1 //===-- APFloat.cpp - Implement APFloat class -----------------------------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file implements a class to represent arbitrary precision floating
11 // point values and provide a variety of arithmetic operations on them.
13 //===----------------------------------------------------------------------===//
15 #include "llvm/ADT/APFloat.h"
16 #include "llvm/ADT/APSInt.h"
17 #include "llvm/ADT/FoldingSet.h"
18 #include "llvm/ADT/Hashing.h"
19 #include "llvm/ADT/StringExtras.h"
20 #include "llvm/ADT/StringRef.h"
21 #include "llvm/Support/ErrorHandling.h"
22 #include "llvm/Support/MathExtras.h"
28 #define convolve(lhs, rhs) ((lhs) * 4 + (rhs))
30 /* Assumed in hexadecimal significand parsing, and conversion to
31 hexadecimal strings. */
32 #define COMPILE_TIME_ASSERT(cond) extern int CTAssert[(cond) ? 1 : -1]
33 COMPILE_TIME_ASSERT(integerPartWidth % 4 == 0);
37 /* Represents floating point arithmetic semantics. */
39 /* The largest E such that 2^E is representable; this matches the
40 definition of IEEE 754. */
41 APFloat::ExponentType maxExponent;
43 /* The smallest E such that 2^E is a normalized number; this
44 matches the definition of IEEE 754. */
45 APFloat::ExponentType minExponent;
47 /* Number of bits in the significand. This includes the integer
49 unsigned int precision;
52 const fltSemantics APFloat::IEEEhalf = { 15, -14, 11 };
53 const fltSemantics APFloat::IEEEsingle = { 127, -126, 24 };
54 const fltSemantics APFloat::IEEEdouble = { 1023, -1022, 53 };
55 const fltSemantics APFloat::IEEEquad = { 16383, -16382, 113 };
56 const fltSemantics APFloat::x87DoubleExtended = { 16383, -16382, 64 };
57 const fltSemantics APFloat::Bogus = { 0, 0, 0 };
59 /* The PowerPC format consists of two doubles. It does not map cleanly
60 onto the usual format above. It is approximated using twice the
61 mantissa bits. Note that for exponents near the double minimum,
62 we no longer can represent the full 106 mantissa bits, so those
63 will be treated as denormal numbers.
65 FIXME: While this approximation is equivalent to what GCC uses for
66 compile-time arithmetic on PPC double-double numbers, it is not able
67 to represent all possible values held by a PPC double-double number,
68 for example: (long double) 1.0 + (long double) 0x1p-106
69 Should this be replaced by a full emulation of PPC double-double? */
70 const fltSemantics APFloat::PPCDoubleDouble = { 1023, -1022 + 53, 53 + 53 };
72 /* A tight upper bound on number of parts required to hold the value
75 power * 815 / (351 * integerPartWidth) + 1
77 However, whilst the result may require only this many parts,
78 because we are multiplying two values to get it, the
79 multiplication may require an extra part with the excess part
80 being zero (consider the trivial case of 1 * 1, tcFullMultiply
81 requires two parts to hold the single-part result). So we add an
82 extra one to guarantee enough space whilst multiplying. */
83 const unsigned int maxExponent = 16383;
84 const unsigned int maxPrecision = 113;
85 const unsigned int maxPowerOfFiveExponent = maxExponent + maxPrecision - 1;
86 const unsigned int maxPowerOfFiveParts = 2 + ((maxPowerOfFiveExponent * 815)
87 / (351 * integerPartWidth));
90 /* A bunch of private, handy routines. */
92 static inline unsigned int
93 partCountForBits(unsigned int bits)
95 return ((bits) + integerPartWidth - 1) / integerPartWidth;
98 /* Returns 0U-9U. Return values >= 10U are not digits. */
99 static inline unsigned int
100 decDigitValue(unsigned int c)
105 /* Return the value of a decimal exponent of the form
108 If the exponent overflows, returns a large exponent with the
111 readExponent(StringRef::iterator begin, StringRef::iterator end)
114 unsigned int absExponent;
115 const unsigned int overlargeExponent = 24000; /* FIXME. */
116 StringRef::iterator p = begin;
118 assert(p != end && "Exponent has no digits");
120 isNegative = (*p == '-');
121 if (*p == '-' || *p == '+') {
123 assert(p != end && "Exponent has no digits");
126 absExponent = decDigitValue(*p++);
127 assert(absExponent < 10U && "Invalid character in exponent");
129 for (; p != end; ++p) {
132 value = decDigitValue(*p);
133 assert(value < 10U && "Invalid character in exponent");
135 value += absExponent * 10;
136 if (absExponent >= overlargeExponent) {
137 absExponent = overlargeExponent;
138 p = end; /* outwit assert below */
144 assert(p == end && "Invalid exponent in exponent");
147 return -(int) absExponent;
149 return (int) absExponent;
152 /* This is ugly and needs cleaning up, but I don't immediately see
153 how whilst remaining safe. */
155 totalExponent(StringRef::iterator p, StringRef::iterator end,
156 int exponentAdjustment)
158 int unsignedExponent;
159 bool negative, overflow;
162 assert(p != end && "Exponent has no digits");
164 negative = *p == '-';
165 if (*p == '-' || *p == '+') {
167 assert(p != end && "Exponent has no digits");
170 unsignedExponent = 0;
172 for (; p != end; ++p) {
175 value = decDigitValue(*p);
176 assert(value < 10U && "Invalid character in exponent");
178 unsignedExponent = unsignedExponent * 10 + value;
179 if (unsignedExponent > 32767) {
185 if (exponentAdjustment > 32767 || exponentAdjustment < -32768)
189 exponent = unsignedExponent;
191 exponent = -exponent;
192 exponent += exponentAdjustment;
193 if (exponent > 32767 || exponent < -32768)
198 exponent = negative ? -32768: 32767;
203 static StringRef::iterator
204 skipLeadingZeroesAndAnyDot(StringRef::iterator begin, StringRef::iterator end,
205 StringRef::iterator *dot)
207 StringRef::iterator p = begin;
209 while (*p == '0' && p != end)
215 assert(end - begin != 1 && "Significand has no digits");
217 while (*p == '0' && p != end)
224 /* Given a normal decimal floating point number of the form
228 where the decimal point and exponent are optional, fill out the
229 structure D. Exponent is appropriate if the significand is
230 treated as an integer, and normalizedExponent if the significand
231 is taken to have the decimal point after a single leading
234 If the value is zero, V->firstSigDigit points to a non-digit, and
235 the return exponent is zero.
238 const char *firstSigDigit;
239 const char *lastSigDigit;
241 int normalizedExponent;
245 interpretDecimal(StringRef::iterator begin, StringRef::iterator end,
248 StringRef::iterator dot = end;
249 StringRef::iterator p = skipLeadingZeroesAndAnyDot (begin, end, &dot);
251 D->firstSigDigit = p;
253 D->normalizedExponent = 0;
255 for (; p != end; ++p) {
257 assert(dot == end && "String contains multiple dots");
262 if (decDigitValue(*p) >= 10U)
267 assert((*p == 'e' || *p == 'E') && "Invalid character in significand");
268 assert(p != begin && "Significand has no digits");
269 assert((dot == end || p - begin != 1) && "Significand has no digits");
271 /* p points to the first non-digit in the string */
272 D->exponent = readExponent(p + 1, end);
274 /* Implied decimal point? */
279 /* If number is all zeroes accept any exponent. */
280 if (p != D->firstSigDigit) {
281 /* Drop insignificant trailing zeroes. */
286 while (p != begin && *p == '0');
287 while (p != begin && *p == '.');
290 /* Adjust the exponents for any decimal point. */
291 D->exponent += static_cast<APFloat::ExponentType>((dot - p) - (dot > p));
292 D->normalizedExponent = (D->exponent +
293 static_cast<APFloat::ExponentType>((p - D->firstSigDigit)
294 - (dot > D->firstSigDigit && dot < p)));
300 /* Return the trailing fraction of a hexadecimal number.
301 DIGITVALUE is the first hex digit of the fraction, P points to
304 trailingHexadecimalFraction(StringRef::iterator p, StringRef::iterator end,
305 unsigned int digitValue)
307 unsigned int hexDigit;
309 /* If the first trailing digit isn't 0 or 8 we can work out the
310 fraction immediately. */
312 return lfMoreThanHalf;
313 else if (digitValue < 8 && digitValue > 0)
314 return lfLessThanHalf;
316 /* Otherwise we need to find the first non-zero digit. */
320 assert(p != end && "Invalid trailing hexadecimal fraction!");
322 hexDigit = hexDigitValue(*p);
324 /* If we ran off the end it is exactly zero or one-half, otherwise
327 return digitValue == 0 ? lfExactlyZero: lfExactlyHalf;
329 return digitValue == 0 ? lfLessThanHalf: lfMoreThanHalf;
332 /* Return the fraction lost were a bignum truncated losing the least
333 significant BITS bits. */
335 lostFractionThroughTruncation(const integerPart *parts,
336 unsigned int partCount,
341 lsb = APInt::tcLSB(parts, partCount);
343 /* Note this is guaranteed true if bits == 0, or LSB == -1U. */
345 return lfExactlyZero;
347 return lfExactlyHalf;
348 if (bits <= partCount * integerPartWidth &&
349 APInt::tcExtractBit(parts, bits - 1))
350 return lfMoreThanHalf;
352 return lfLessThanHalf;
355 /* Shift DST right BITS bits noting lost fraction. */
357 shiftRight(integerPart *dst, unsigned int parts, unsigned int bits)
359 lostFraction lost_fraction;
361 lost_fraction = lostFractionThroughTruncation(dst, parts, bits);
363 APInt::tcShiftRight(dst, parts, bits);
365 return lost_fraction;
368 /* Combine the effect of two lost fractions. */
370 combineLostFractions(lostFraction moreSignificant,
371 lostFraction lessSignificant)
373 if (lessSignificant != lfExactlyZero) {
374 if (moreSignificant == lfExactlyZero)
375 moreSignificant = lfLessThanHalf;
376 else if (moreSignificant == lfExactlyHalf)
377 moreSignificant = lfMoreThanHalf;
380 return moreSignificant;
383 /* The error from the true value, in half-ulps, on multiplying two
384 floating point numbers, which differ from the value they
385 approximate by at most HUE1 and HUE2 half-ulps, is strictly less
386 than the returned value.
388 See "How to Read Floating Point Numbers Accurately" by William D
391 HUerrBound(bool inexactMultiply, unsigned int HUerr1, unsigned int HUerr2)
393 assert(HUerr1 < 2 || HUerr2 < 2 || (HUerr1 + HUerr2 < 8));
395 if (HUerr1 + HUerr2 == 0)
396 return inexactMultiply * 2; /* <= inexactMultiply half-ulps. */
398 return inexactMultiply + 2 * (HUerr1 + HUerr2);
401 /* The number of ulps from the boundary (zero, or half if ISNEAREST)
402 when the least significant BITS are truncated. BITS cannot be
405 ulpsFromBoundary(const integerPart *parts, unsigned int bits, bool isNearest)
407 unsigned int count, partBits;
408 integerPart part, boundary;
413 count = bits / integerPartWidth;
414 partBits = bits % integerPartWidth + 1;
416 part = parts[count] & (~(integerPart) 0 >> (integerPartWidth - partBits));
419 boundary = (integerPart) 1 << (partBits - 1);
424 if (part - boundary <= boundary - part)
425 return part - boundary;
427 return boundary - part;
430 if (part == boundary) {
433 return ~(integerPart) 0; /* A lot. */
436 } else if (part == boundary - 1) {
439 return ~(integerPart) 0; /* A lot. */
444 return ~(integerPart) 0; /* A lot. */
447 /* Place pow(5, power) in DST, and return the number of parts used.
448 DST must be at least one part larger than size of the answer. */
450 powerOf5(integerPart *dst, unsigned int power)
452 static const integerPart firstEightPowers[] = { 1, 5, 25, 125, 625, 3125,
454 integerPart pow5s[maxPowerOfFiveParts * 2 + 5];
455 pow5s[0] = 78125 * 5;
457 unsigned int partsCount[16] = { 1 };
458 integerPart scratch[maxPowerOfFiveParts], *p1, *p2, *pow5;
460 assert(power <= maxExponent);
465 *p1 = firstEightPowers[power & 7];
471 for (unsigned int n = 0; power; power >>= 1, n++) {
476 /* Calculate pow(5,pow(2,n+3)) if we haven't yet. */
478 pc = partsCount[n - 1];
479 APInt::tcFullMultiply(pow5, pow5 - pc, pow5 - pc, pc, pc);
481 if (pow5[pc - 1] == 0)
489 APInt::tcFullMultiply(p2, p1, pow5, result, pc);
491 if (p2[result - 1] == 0)
494 /* Now result is in p1 with partsCount parts and p2 is scratch
496 tmp = p1, p1 = p2, p2 = tmp;
503 APInt::tcAssign(dst, p1, result);
508 /* Zero at the end to avoid modular arithmetic when adding one; used
509 when rounding up during hexadecimal output. */
510 static const char hexDigitsLower[] = "0123456789abcdef0";
511 static const char hexDigitsUpper[] = "0123456789ABCDEF0";
512 static const char infinityL[] = "infinity";
513 static const char infinityU[] = "INFINITY";
514 static const char NaNL[] = "nan";
515 static const char NaNU[] = "NAN";
517 /* Write out an integerPart in hexadecimal, starting with the most
518 significant nibble. Write out exactly COUNT hexdigits, return
521 partAsHex (char *dst, integerPart part, unsigned int count,
522 const char *hexDigitChars)
524 unsigned int result = count;
526 assert(count != 0 && count <= integerPartWidth / 4);
528 part >>= (integerPartWidth - 4 * count);
530 dst[count] = hexDigitChars[part & 0xf];
537 /* Write out an unsigned decimal integer. */
539 writeUnsignedDecimal (char *dst, unsigned int n)
555 /* Write out a signed decimal integer. */
557 writeSignedDecimal (char *dst, int value)
561 dst = writeUnsignedDecimal(dst, -(unsigned) value);
563 dst = writeUnsignedDecimal(dst, value);
570 APFloat::initialize(const fltSemantics *ourSemantics)
574 semantics = ourSemantics;
577 significand.parts = new integerPart[count];
581 APFloat::freeSignificand()
584 delete [] significand.parts;
588 APFloat::assign(const APFloat &rhs)
590 assert(semantics == rhs.semantics);
593 category = rhs.category;
594 exponent = rhs.exponent;
595 if (category == fcNormal || category == fcNaN)
596 copySignificand(rhs);
600 APFloat::copySignificand(const APFloat &rhs)
602 assert(category == fcNormal || category == fcNaN);
603 assert(rhs.partCount() >= partCount());
605 APInt::tcAssign(significandParts(), rhs.significandParts(),
609 /* Make this number a NaN, with an arbitrary but deterministic value
610 for the significand. If double or longer, this is a signalling NaN,
611 which may not be ideal. If float, this is QNaN(0). */
612 void APFloat::makeNaN(bool SNaN, bool Negative, const APInt *fill)
617 integerPart *significand = significandParts();
618 unsigned numParts = partCount();
620 // Set the significand bits to the fill.
621 if (!fill || fill->getNumWords() < numParts)
622 APInt::tcSet(significand, 0, numParts);
624 APInt::tcAssign(significand, fill->getRawData(),
625 std::min(fill->getNumWords(), numParts));
627 // Zero out the excess bits of the significand.
628 unsigned bitsToPreserve = semantics->precision - 1;
629 unsigned part = bitsToPreserve / 64;
630 bitsToPreserve %= 64;
631 significand[part] &= ((1ULL << bitsToPreserve) - 1);
632 for (part++; part != numParts; ++part)
633 significand[part] = 0;
636 unsigned QNaNBit = semantics->precision - 2;
639 // We always have to clear the QNaN bit to make it an SNaN.
640 APInt::tcClearBit(significand, QNaNBit);
642 // If there are no bits set in the payload, we have to set
643 // *something* to make it a NaN instead of an infinity;
644 // conventionally, this is the next bit down from the QNaN bit.
645 if (APInt::tcIsZero(significand, numParts))
646 APInt::tcSetBit(significand, QNaNBit - 1);
648 // We always have to set the QNaN bit to make it a QNaN.
649 APInt::tcSetBit(significand, QNaNBit);
652 // For x87 extended precision, we want to make a NaN, not a
653 // pseudo-NaN. Maybe we should expose the ability to make
655 if (semantics == &APFloat::x87DoubleExtended)
656 APInt::tcSetBit(significand, QNaNBit + 1);
659 APFloat APFloat::makeNaN(const fltSemantics &Sem, bool SNaN, bool Negative,
661 APFloat value(Sem, uninitialized);
662 value.makeNaN(SNaN, Negative, fill);
667 APFloat::operator=(const APFloat &rhs)
670 if (semantics != rhs.semantics) {
672 initialize(rhs.semantics);
681 APFloat::isDenormal() const {
682 return isFiniteNonZero() && (exponent == semantics->minExponent) &&
683 (APInt::tcExtractBit(significandParts(),
684 semantics->precision - 1) == 0);
688 APFloat::isSmallest() const {
689 // The smallest number by magnitude in our format will be the smallest
690 // denormal, i.e. the floating point number with exponent being minimum
691 // exponent and significand bitwise equal to 1 (i.e. with MSB equal to 0).
692 return isFiniteNonZero() && exponent == semantics->minExponent &&
693 significandMSB() == 0;
696 bool APFloat::isSignificandAllOnes() const {
697 // Test if the significand excluding the integral bit is all ones. This allows
698 // us to test for binade boundaries.
699 const integerPart *Parts = significandParts();
700 const unsigned PartCount = partCount();
701 for (unsigned i = 0; i < PartCount - 1; i++)
705 // Set the unused high bits to all ones when we compare.
706 const unsigned NumHighBits =
707 PartCount*integerPartWidth - semantics->precision + 1;
708 assert(NumHighBits <= integerPartWidth && "Can not have more high bits to "
709 "fill than integerPartWidth");
710 const integerPart HighBitFill =
711 ~integerPart(0) << (integerPartWidth - NumHighBits);
712 if (~(Parts[PartCount - 1] | HighBitFill))
718 bool APFloat::isSignificandAllZeros() const {
719 // Test if the significand excluding the integral bit is all zeros. This
720 // allows us to test for binade boundaries.
721 const integerPart *Parts = significandParts();
722 const unsigned PartCount = partCount();
724 for (unsigned i = 0; i < PartCount - 1; i++)
728 const unsigned NumHighBits =
729 PartCount*integerPartWidth - semantics->precision + 1;
730 assert(NumHighBits <= integerPartWidth && "Can not have more high bits to "
731 "clear than integerPartWidth");
732 const integerPart HighBitMask = ~integerPart(0) >> NumHighBits;
734 if (Parts[PartCount - 1] & HighBitMask)
741 APFloat::isLargest() const {
742 // The largest number by magnitude in our format will be the floating point
743 // number with maximum exponent and with significand that is all ones.
744 return isFiniteNonZero() && exponent == semantics->maxExponent
745 && isSignificandAllOnes();
749 APFloat::bitwiseIsEqual(const APFloat &rhs) const {
752 if (semantics != rhs.semantics ||
753 category != rhs.category ||
756 if (category==fcZero || category==fcInfinity)
758 else if (category==fcNormal && exponent!=rhs.exponent)
762 const integerPart* p=significandParts();
763 const integerPart* q=rhs.significandParts();
764 for (; i>0; i--, p++, q++) {
772 APFloat::APFloat(const fltSemantics &ourSemantics, integerPart value) {
773 initialize(&ourSemantics);
776 exponent = ourSemantics.precision - 1;
777 significandParts()[0] = value;
778 normalize(rmNearestTiesToEven, lfExactlyZero);
781 APFloat::APFloat(const fltSemantics &ourSemantics) {
782 initialize(&ourSemantics);
787 APFloat::APFloat(const fltSemantics &ourSemantics, uninitializedTag tag) {
788 // Allocates storage if necessary but does not initialize it.
789 initialize(&ourSemantics);
792 APFloat::APFloat(const fltSemantics &ourSemantics,
793 fltCategory ourCategory, bool negative) {
794 initialize(&ourSemantics);
795 category = ourCategory;
797 if (category == fcNormal)
799 else if (ourCategory == fcNaN)
803 APFloat::APFloat(const fltSemantics &ourSemantics, StringRef text) {
804 initialize(&ourSemantics);
805 convertFromString(text, rmNearestTiesToEven);
808 APFloat::APFloat(const APFloat &rhs) {
809 initialize(rhs.semantics);
818 // Profile - This method 'profiles' an APFloat for use with FoldingSet.
819 void APFloat::Profile(FoldingSetNodeID& ID) const {
820 ID.Add(bitcastToAPInt());
824 APFloat::partCount() const
826 return partCountForBits(semantics->precision + 1);
830 APFloat::semanticsPrecision(const fltSemantics &semantics)
832 return semantics.precision;
836 APFloat::significandParts() const
838 return const_cast<APFloat *>(this)->significandParts();
842 APFloat::significandParts()
844 assert(category == fcNormal || category == fcNaN);
847 return significand.parts;
849 return &significand.part;
853 APFloat::zeroSignificand()
856 APInt::tcSet(significandParts(), 0, partCount());
859 /* Increment an fcNormal floating point number's significand. */
861 APFloat::incrementSignificand()
865 carry = APInt::tcIncrement(significandParts(), partCount());
867 /* Our callers should never cause us to overflow. */
872 /* Add the significand of the RHS. Returns the carry flag. */
874 APFloat::addSignificand(const APFloat &rhs)
878 parts = significandParts();
880 assert(semantics == rhs.semantics);
881 assert(exponent == rhs.exponent);
883 return APInt::tcAdd(parts, rhs.significandParts(), 0, partCount());
886 /* Subtract the significand of the RHS with a borrow flag. Returns
889 APFloat::subtractSignificand(const APFloat &rhs, integerPart borrow)
893 parts = significandParts();
895 assert(semantics == rhs.semantics);
896 assert(exponent == rhs.exponent);
898 return APInt::tcSubtract(parts, rhs.significandParts(), borrow,
902 /* Multiply the significand of the RHS. If ADDEND is non-NULL, add it
903 on to the full-precision result of the multiplication. Returns the
906 APFloat::multiplySignificand(const APFloat &rhs, const APFloat *addend)
908 unsigned int omsb; // One, not zero, based MSB.
909 unsigned int partsCount, newPartsCount, precision;
910 integerPart *lhsSignificand;
911 integerPart scratch[4];
912 integerPart *fullSignificand;
913 lostFraction lost_fraction;
916 assert(semantics == rhs.semantics);
918 precision = semantics->precision;
919 newPartsCount = partCountForBits(precision * 2);
921 if (newPartsCount > 4)
922 fullSignificand = new integerPart[newPartsCount];
924 fullSignificand = scratch;
926 lhsSignificand = significandParts();
927 partsCount = partCount();
929 APInt::tcFullMultiply(fullSignificand, lhsSignificand,
930 rhs.significandParts(), partsCount, partsCount);
932 lost_fraction = lfExactlyZero;
933 omsb = APInt::tcMSB(fullSignificand, newPartsCount) + 1;
934 exponent += rhs.exponent;
936 // Assume the operands involved in the multiplication are single-precision
937 // FP, and the two multiplicants are:
938 // *this = a23 . a22 ... a0 * 2^e1
939 // rhs = b23 . b22 ... b0 * 2^e2
940 // the result of multiplication is:
941 // *this = c47 c46 . c45 ... c0 * 2^(e1+e2)
942 // Note that there are two significant bits at the left-hand side of the
943 // radix point. Move the radix point toward left by one bit, and adjust
944 // exponent accordingly.
948 // The intermediate result of the multiplication has "2 * precision"
949 // signicant bit; adjust the addend to be consistent with mul result.
951 Significand savedSignificand = significand;
952 const fltSemantics *savedSemantics = semantics;
953 fltSemantics extendedSemantics;
955 unsigned int extendedPrecision;
957 /* Normalize our MSB. */
958 extendedPrecision = 2 * precision;
959 if (omsb != extendedPrecision) {
960 assert(extendedPrecision > omsb);
961 APInt::tcShiftLeft(fullSignificand, newPartsCount,
962 extendedPrecision - omsb);
963 exponent -= extendedPrecision - omsb;
966 /* Create new semantics. */
967 extendedSemantics = *semantics;
968 extendedSemantics.precision = extendedPrecision;
970 if (newPartsCount == 1)
971 significand.part = fullSignificand[0];
973 significand.parts = fullSignificand;
974 semantics = &extendedSemantics;
976 APFloat extendedAddend(*addend);
977 status = extendedAddend.convert(extendedSemantics, rmTowardZero, &ignored);
978 assert(status == opOK);
980 lost_fraction = addOrSubtractSignificand(extendedAddend, false);
982 /* Restore our state. */
983 if (newPartsCount == 1)
984 fullSignificand[0] = significand.part;
985 significand = savedSignificand;
986 semantics = savedSemantics;
988 omsb = APInt::tcMSB(fullSignificand, newPartsCount) + 1;
991 // Convert the result having "2 * precision" significant-bits back to the one
992 // having "precision" significant-bits. First, move the radix point from
993 // poision "2*precision - 1" to "precision - 1". The exponent need to be
994 // adjusted by "2*precision - 1" - "precision - 1" = "precision".
995 exponent -= precision;
997 // In case MSB resides at the left-hand side of radix point, shift the
998 // mantissa right by some amount to make sure the MSB reside right before
999 // the radix point (i.e. "MSB . rest-significant-bits").
1001 // Note that the result is not normalized when "omsb < precision". So, the
1002 // caller needs to call APFloat::normalize() if normalized value is expected.
1003 if (omsb > precision) {
1004 unsigned int bits, significantParts;
1007 bits = omsb - precision;
1008 significantParts = partCountForBits(omsb);
1009 lf = shiftRight(fullSignificand, significantParts, bits);
1010 lost_fraction = combineLostFractions(lf, lost_fraction);
1014 APInt::tcAssign(lhsSignificand, fullSignificand, partsCount);
1016 if (newPartsCount > 4)
1017 delete [] fullSignificand;
1019 return lost_fraction;
1022 /* Multiply the significands of LHS and RHS to DST. */
1024 APFloat::divideSignificand(const APFloat &rhs)
1026 unsigned int bit, i, partsCount;
1027 const integerPart *rhsSignificand;
1028 integerPart *lhsSignificand, *dividend, *divisor;
1029 integerPart scratch[4];
1030 lostFraction lost_fraction;
1032 assert(semantics == rhs.semantics);
1034 lhsSignificand = significandParts();
1035 rhsSignificand = rhs.significandParts();
1036 partsCount = partCount();
1039 dividend = new integerPart[partsCount * 2];
1043 divisor = dividend + partsCount;
1045 /* Copy the dividend and divisor as they will be modified in-place. */
1046 for (i = 0; i < partsCount; i++) {
1047 dividend[i] = lhsSignificand[i];
1048 divisor[i] = rhsSignificand[i];
1049 lhsSignificand[i] = 0;
1052 exponent -= rhs.exponent;
1054 unsigned int precision = semantics->precision;
1056 /* Normalize the divisor. */
1057 bit = precision - APInt::tcMSB(divisor, partsCount) - 1;
1060 APInt::tcShiftLeft(divisor, partsCount, bit);
1063 /* Normalize the dividend. */
1064 bit = precision - APInt::tcMSB(dividend, partsCount) - 1;
1067 APInt::tcShiftLeft(dividend, partsCount, bit);
1070 /* Ensure the dividend >= divisor initially for the loop below.
1071 Incidentally, this means that the division loop below is
1072 guaranteed to set the integer bit to one. */
1073 if (APInt::tcCompare(dividend, divisor, partsCount) < 0) {
1075 APInt::tcShiftLeft(dividend, partsCount, 1);
1076 assert(APInt::tcCompare(dividend, divisor, partsCount) >= 0);
1079 /* Long division. */
1080 for (bit = precision; bit; bit -= 1) {
1081 if (APInt::tcCompare(dividend, divisor, partsCount) >= 0) {
1082 APInt::tcSubtract(dividend, divisor, 0, partsCount);
1083 APInt::tcSetBit(lhsSignificand, bit - 1);
1086 APInt::tcShiftLeft(dividend, partsCount, 1);
1089 /* Figure out the lost fraction. */
1090 int cmp = APInt::tcCompare(dividend, divisor, partsCount);
1093 lost_fraction = lfMoreThanHalf;
1095 lost_fraction = lfExactlyHalf;
1096 else if (APInt::tcIsZero(dividend, partsCount))
1097 lost_fraction = lfExactlyZero;
1099 lost_fraction = lfLessThanHalf;
1104 return lost_fraction;
1108 APFloat::significandMSB() const
1110 return APInt::tcMSB(significandParts(), partCount());
1114 APFloat::significandLSB() const
1116 return APInt::tcLSB(significandParts(), partCount());
1119 /* Note that a zero result is NOT normalized to fcZero. */
1121 APFloat::shiftSignificandRight(unsigned int bits)
1123 /* Our exponent should not overflow. */
1124 assert((ExponentType) (exponent + bits) >= exponent);
1128 return shiftRight(significandParts(), partCount(), bits);
1131 /* Shift the significand left BITS bits, subtract BITS from its exponent. */
1133 APFloat::shiftSignificandLeft(unsigned int bits)
1135 assert(bits < semantics->precision);
1138 unsigned int partsCount = partCount();
1140 APInt::tcShiftLeft(significandParts(), partsCount, bits);
1143 assert(!APInt::tcIsZero(significandParts(), partsCount));
1148 APFloat::compareAbsoluteValue(const APFloat &rhs) const
1152 assert(semantics == rhs.semantics);
1153 assert(category == fcNormal);
1154 assert(rhs.category == fcNormal);
1156 compare = exponent - rhs.exponent;
1158 /* If exponents are equal, do an unsigned bignum comparison of the
1161 compare = APInt::tcCompare(significandParts(), rhs.significandParts(),
1165 return cmpGreaterThan;
1166 else if (compare < 0)
1172 /* Handle overflow. Sign is preserved. We either become infinity or
1173 the largest finite number. */
1175 APFloat::handleOverflow(roundingMode rounding_mode)
1178 if (rounding_mode == rmNearestTiesToEven ||
1179 rounding_mode == rmNearestTiesToAway ||
1180 (rounding_mode == rmTowardPositive && !sign) ||
1181 (rounding_mode == rmTowardNegative && sign)) {
1182 category = fcInfinity;
1183 return (opStatus) (opOverflow | opInexact);
1186 /* Otherwise we become the largest finite number. */
1187 category = fcNormal;
1188 exponent = semantics->maxExponent;
1189 APInt::tcSetLeastSignificantBits(significandParts(), partCount(),
1190 semantics->precision);
1195 /* Returns TRUE if, when truncating the current number, with BIT the
1196 new LSB, with the given lost fraction and rounding mode, the result
1197 would need to be rounded away from zero (i.e., by increasing the
1198 signficand). This routine must work for fcZero of both signs, and
1199 fcNormal numbers. */
1201 APFloat::roundAwayFromZero(roundingMode rounding_mode,
1202 lostFraction lost_fraction,
1203 unsigned int bit) const
1205 /* NaNs and infinities should not have lost fractions. */
1206 assert(category == fcNormal || category == fcZero);
1208 /* Current callers never pass this so we don't handle it. */
1209 assert(lost_fraction != lfExactlyZero);
1211 switch (rounding_mode) {
1212 case rmNearestTiesToAway:
1213 return lost_fraction == lfExactlyHalf || lost_fraction == lfMoreThanHalf;
1215 case rmNearestTiesToEven:
1216 if (lost_fraction == lfMoreThanHalf)
1219 /* Our zeroes don't have a significand to test. */
1220 if (lost_fraction == lfExactlyHalf && category != fcZero)
1221 return APInt::tcExtractBit(significandParts(), bit);
1228 case rmTowardPositive:
1229 return sign == false;
1231 case rmTowardNegative:
1232 return sign == true;
1234 llvm_unreachable("Invalid rounding mode found");
1238 APFloat::normalize(roundingMode rounding_mode,
1239 lostFraction lost_fraction)
1241 unsigned int omsb; /* One, not zero, based MSB. */
1244 if (category != fcNormal)
1247 /* Before rounding normalize the exponent of fcNormal numbers. */
1248 omsb = significandMSB() + 1;
1251 /* OMSB is numbered from 1. We want to place it in the integer
1252 bit numbered PRECISION if possible, with a compensating change in
1254 exponentChange = omsb - semantics->precision;
1256 /* If the resulting exponent is too high, overflow according to
1257 the rounding mode. */
1258 if (exponent + exponentChange > semantics->maxExponent)
1259 return handleOverflow(rounding_mode);
1261 /* Subnormal numbers have exponent minExponent, and their MSB
1262 is forced based on that. */
1263 if (exponent + exponentChange < semantics->minExponent)
1264 exponentChange = semantics->minExponent - exponent;
1266 /* Shifting left is easy as we don't lose precision. */
1267 if (exponentChange < 0) {
1268 assert(lost_fraction == lfExactlyZero);
1270 shiftSignificandLeft(-exponentChange);
1275 if (exponentChange > 0) {
1278 /* Shift right and capture any new lost fraction. */
1279 lf = shiftSignificandRight(exponentChange);
1281 lost_fraction = combineLostFractions(lf, lost_fraction);
1283 /* Keep OMSB up-to-date. */
1284 if (omsb > (unsigned) exponentChange)
1285 omsb -= exponentChange;
1291 /* Now round the number according to rounding_mode given the lost
1294 /* As specified in IEEE 754, since we do not trap we do not report
1295 underflow for exact results. */
1296 if (lost_fraction == lfExactlyZero) {
1297 /* Canonicalize zeroes. */
1304 /* Increment the significand if we're rounding away from zero. */
1305 if (roundAwayFromZero(rounding_mode, lost_fraction, 0)) {
1307 exponent = semantics->minExponent;
1309 incrementSignificand();
1310 omsb = significandMSB() + 1;
1312 /* Did the significand increment overflow? */
1313 if (omsb == (unsigned) semantics->precision + 1) {
1314 /* Renormalize by incrementing the exponent and shifting our
1315 significand right one. However if we already have the
1316 maximum exponent we overflow to infinity. */
1317 if (exponent == semantics->maxExponent) {
1318 category = fcInfinity;
1320 return (opStatus) (opOverflow | opInexact);
1323 shiftSignificandRight(1);
1329 /* The normal case - we were and are not denormal, and any
1330 significand increment above didn't overflow. */
1331 if (omsb == semantics->precision)
1334 /* We have a non-zero denormal. */
1335 assert(omsb < semantics->precision);
1337 /* Canonicalize zeroes. */
1341 /* The fcZero case is a denormal that underflowed to zero. */
1342 return (opStatus) (opUnderflow | opInexact);
1346 APFloat::addOrSubtractSpecials(const APFloat &rhs, bool subtract)
1348 switch (convolve(category, rhs.category)) {
1350 llvm_unreachable(0);
1352 case convolve(fcNaN, fcZero):
1353 case convolve(fcNaN, fcNormal):
1354 case convolve(fcNaN, fcInfinity):
1355 case convolve(fcNaN, fcNaN):
1356 case convolve(fcNormal, fcZero):
1357 case convolve(fcInfinity, fcNormal):
1358 case convolve(fcInfinity, fcZero):
1361 case convolve(fcZero, fcNaN):
1362 case convolve(fcNormal, fcNaN):
1363 case convolve(fcInfinity, fcNaN):
1365 copySignificand(rhs);
1368 case convolve(fcNormal, fcInfinity):
1369 case convolve(fcZero, fcInfinity):
1370 category = fcInfinity;
1371 sign = rhs.sign ^ subtract;
1374 case convolve(fcZero, fcNormal):
1376 sign = rhs.sign ^ subtract;
1379 case convolve(fcZero, fcZero):
1380 /* Sign depends on rounding mode; handled by caller. */
1383 case convolve(fcInfinity, fcInfinity):
1384 /* Differently signed infinities can only be validly
1386 if (((sign ^ rhs.sign)!=0) != subtract) {
1393 case convolve(fcNormal, fcNormal):
1398 /* Add or subtract two normal numbers. */
1400 APFloat::addOrSubtractSignificand(const APFloat &rhs, bool subtract)
1403 lostFraction lost_fraction;
1406 /* Determine if the operation on the absolute values is effectively
1407 an addition or subtraction. */
1408 subtract ^= (sign ^ rhs.sign) ? true : false;
1410 /* Are we bigger exponent-wise than the RHS? */
1411 bits = exponent - rhs.exponent;
1413 /* Subtraction is more subtle than one might naively expect. */
1415 APFloat temp_rhs(rhs);
1419 reverse = compareAbsoluteValue(temp_rhs) == cmpLessThan;
1420 lost_fraction = lfExactlyZero;
1421 } else if (bits > 0) {
1422 lost_fraction = temp_rhs.shiftSignificandRight(bits - 1);
1423 shiftSignificandLeft(1);
1426 lost_fraction = shiftSignificandRight(-bits - 1);
1427 temp_rhs.shiftSignificandLeft(1);
1432 carry = temp_rhs.subtractSignificand
1433 (*this, lost_fraction != lfExactlyZero);
1434 copySignificand(temp_rhs);
1437 carry = subtractSignificand
1438 (temp_rhs, lost_fraction != lfExactlyZero);
1441 /* Invert the lost fraction - it was on the RHS and
1443 if (lost_fraction == lfLessThanHalf)
1444 lost_fraction = lfMoreThanHalf;
1445 else if (lost_fraction == lfMoreThanHalf)
1446 lost_fraction = lfLessThanHalf;
1448 /* The code above is intended to ensure that no borrow is
1454 APFloat temp_rhs(rhs);
1456 lost_fraction = temp_rhs.shiftSignificandRight(bits);
1457 carry = addSignificand(temp_rhs);
1459 lost_fraction = shiftSignificandRight(-bits);
1460 carry = addSignificand(rhs);
1463 /* We have a guard bit; generating a carry cannot happen. */
1468 return lost_fraction;
1472 APFloat::multiplySpecials(const APFloat &rhs)
1474 switch (convolve(category, rhs.category)) {
1476 llvm_unreachable(0);
1478 case convolve(fcNaN, fcZero):
1479 case convolve(fcNaN, fcNormal):
1480 case convolve(fcNaN, fcInfinity):
1481 case convolve(fcNaN, fcNaN):
1484 case convolve(fcZero, fcNaN):
1485 case convolve(fcNormal, fcNaN):
1486 case convolve(fcInfinity, fcNaN):
1488 copySignificand(rhs);
1491 case convolve(fcNormal, fcInfinity):
1492 case convolve(fcInfinity, fcNormal):
1493 case convolve(fcInfinity, fcInfinity):
1494 category = fcInfinity;
1497 case convolve(fcZero, fcNormal):
1498 case convolve(fcNormal, fcZero):
1499 case convolve(fcZero, fcZero):
1503 case convolve(fcZero, fcInfinity):
1504 case convolve(fcInfinity, fcZero):
1508 case convolve(fcNormal, fcNormal):
1514 APFloat::divideSpecials(const APFloat &rhs)
1516 switch (convolve(category, rhs.category)) {
1518 llvm_unreachable(0);
1520 case convolve(fcNaN, fcZero):
1521 case convolve(fcNaN, fcNormal):
1522 case convolve(fcNaN, fcInfinity):
1523 case convolve(fcNaN, fcNaN):
1524 case convolve(fcInfinity, fcZero):
1525 case convolve(fcInfinity, fcNormal):
1526 case convolve(fcZero, fcInfinity):
1527 case convolve(fcZero, fcNormal):
1530 case convolve(fcZero, fcNaN):
1531 case convolve(fcNormal, fcNaN):
1532 case convolve(fcInfinity, fcNaN):
1534 copySignificand(rhs);
1537 case convolve(fcNormal, fcInfinity):
1541 case convolve(fcNormal, fcZero):
1542 category = fcInfinity;
1545 case convolve(fcInfinity, fcInfinity):
1546 case convolve(fcZero, fcZero):
1550 case convolve(fcNormal, fcNormal):
1556 APFloat::modSpecials(const APFloat &rhs)
1558 switch (convolve(category, rhs.category)) {
1560 llvm_unreachable(0);
1562 case convolve(fcNaN, fcZero):
1563 case convolve(fcNaN, fcNormal):
1564 case convolve(fcNaN, fcInfinity):
1565 case convolve(fcNaN, fcNaN):
1566 case convolve(fcZero, fcInfinity):
1567 case convolve(fcZero, fcNormal):
1568 case convolve(fcNormal, fcInfinity):
1571 case convolve(fcZero, fcNaN):
1572 case convolve(fcNormal, fcNaN):
1573 case convolve(fcInfinity, fcNaN):
1575 copySignificand(rhs);
1578 case convolve(fcNormal, fcZero):
1579 case convolve(fcInfinity, fcZero):
1580 case convolve(fcInfinity, fcNormal):
1581 case convolve(fcInfinity, fcInfinity):
1582 case convolve(fcZero, fcZero):
1586 case convolve(fcNormal, fcNormal):
1593 APFloat::changeSign()
1595 /* Look mummy, this one's easy. */
1600 APFloat::clearSign()
1602 /* So is this one. */
1607 APFloat::copySign(const APFloat &rhs)
1613 /* Normalized addition or subtraction. */
1615 APFloat::addOrSubtract(const APFloat &rhs, roundingMode rounding_mode,
1620 fs = addOrSubtractSpecials(rhs, subtract);
1622 /* This return code means it was not a simple case. */
1623 if (fs == opDivByZero) {
1624 lostFraction lost_fraction;
1626 lost_fraction = addOrSubtractSignificand(rhs, subtract);
1627 fs = normalize(rounding_mode, lost_fraction);
1629 /* Can only be zero if we lost no fraction. */
1630 assert(category != fcZero || lost_fraction == lfExactlyZero);
1633 /* If two numbers add (exactly) to zero, IEEE 754 decrees it is a
1634 positive zero unless rounding to minus infinity, except that
1635 adding two like-signed zeroes gives that zero. */
1636 if (category == fcZero) {
1637 if (rhs.category != fcZero || (sign == rhs.sign) == subtract)
1638 sign = (rounding_mode == rmTowardNegative);
1644 /* Normalized addition. */
1646 APFloat::add(const APFloat &rhs, roundingMode rounding_mode)
1648 return addOrSubtract(rhs, rounding_mode, false);
1651 /* Normalized subtraction. */
1653 APFloat::subtract(const APFloat &rhs, roundingMode rounding_mode)
1655 return addOrSubtract(rhs, rounding_mode, true);
1658 /* Normalized multiply. */
1660 APFloat::multiply(const APFloat &rhs, roundingMode rounding_mode)
1665 fs = multiplySpecials(rhs);
1667 if (category == fcNormal) {
1668 lostFraction lost_fraction = multiplySignificand(rhs, 0);
1669 fs = normalize(rounding_mode, lost_fraction);
1670 if (lost_fraction != lfExactlyZero)
1671 fs = (opStatus) (fs | opInexact);
1677 /* Normalized divide. */
1679 APFloat::divide(const APFloat &rhs, roundingMode rounding_mode)
1684 fs = divideSpecials(rhs);
1686 if (category == fcNormal) {
1687 lostFraction lost_fraction = divideSignificand(rhs);
1688 fs = normalize(rounding_mode, lost_fraction);
1689 if (lost_fraction != lfExactlyZero)
1690 fs = (opStatus) (fs | opInexact);
1696 /* Normalized remainder. This is not currently correct in all cases. */
1698 APFloat::remainder(const APFloat &rhs)
1702 unsigned int origSign = sign;
1704 fs = V.divide(rhs, rmNearestTiesToEven);
1705 if (fs == opDivByZero)
1708 int parts = partCount();
1709 integerPart *x = new integerPart[parts];
1711 fs = V.convertToInteger(x, parts * integerPartWidth, true,
1712 rmNearestTiesToEven, &ignored);
1713 if (fs==opInvalidOp)
1716 fs = V.convertFromZeroExtendedInteger(x, parts * integerPartWidth, true,
1717 rmNearestTiesToEven);
1718 assert(fs==opOK); // should always work
1720 fs = V.multiply(rhs, rmNearestTiesToEven);
1721 assert(fs==opOK || fs==opInexact); // should not overflow or underflow
1723 fs = subtract(V, rmNearestTiesToEven);
1724 assert(fs==opOK || fs==opInexact); // likewise
1727 sign = origSign; // IEEE754 requires this
1732 /* Normalized llvm frem (C fmod).
1733 This is not currently correct in all cases. */
1735 APFloat::mod(const APFloat &rhs, roundingMode rounding_mode)
1738 fs = modSpecials(rhs);
1740 if (category == fcNormal && rhs.category == fcNormal) {
1742 unsigned int origSign = sign;
1744 fs = V.divide(rhs, rmNearestTiesToEven);
1745 if (fs == opDivByZero)
1748 int parts = partCount();
1749 integerPart *x = new integerPart[parts];
1751 fs = V.convertToInteger(x, parts * integerPartWidth, true,
1752 rmTowardZero, &ignored);
1753 if (fs==opInvalidOp)
1756 fs = V.convertFromZeroExtendedInteger(x, parts * integerPartWidth, true,
1757 rmNearestTiesToEven);
1758 assert(fs==opOK); // should always work
1760 fs = V.multiply(rhs, rounding_mode);
1761 assert(fs==opOK || fs==opInexact); // should not overflow or underflow
1763 fs = subtract(V, rounding_mode);
1764 assert(fs==opOK || fs==opInexact); // likewise
1767 sign = origSign; // IEEE754 requires this
1773 /* Normalized fused-multiply-add. */
1775 APFloat::fusedMultiplyAdd(const APFloat &multiplicand,
1776 const APFloat &addend,
1777 roundingMode rounding_mode)
1781 /* Post-multiplication sign, before addition. */
1782 sign ^= multiplicand.sign;
1784 /* If and only if all arguments are normal do we need to do an
1785 extended-precision calculation. */
1786 if (category == fcNormal &&
1787 multiplicand.category == fcNormal &&
1788 addend.category == fcNormal) {
1789 lostFraction lost_fraction;
1791 lost_fraction = multiplySignificand(multiplicand, &addend);
1792 fs = normalize(rounding_mode, lost_fraction);
1793 if (lost_fraction != lfExactlyZero)
1794 fs = (opStatus) (fs | opInexact);
1796 /* If two numbers add (exactly) to zero, IEEE 754 decrees it is a
1797 positive zero unless rounding to minus infinity, except that
1798 adding two like-signed zeroes gives that zero. */
1799 if (category == fcZero && sign != addend.sign)
1800 sign = (rounding_mode == rmTowardNegative);
1802 fs = multiplySpecials(multiplicand);
1804 /* FS can only be opOK or opInvalidOp. There is no more work
1805 to do in the latter case. The IEEE-754R standard says it is
1806 implementation-defined in this case whether, if ADDEND is a
1807 quiet NaN, we raise invalid op; this implementation does so.
1809 If we need to do the addition we can do so with normal
1812 fs = addOrSubtract(addend, rounding_mode, false);
1818 /* Rounding-mode corrrect round to integral value. */
1819 APFloat::opStatus APFloat::roundToIntegral(roundingMode rounding_mode) {
1822 // If the exponent is large enough, we know that this value is already
1823 // integral, and the arithmetic below would potentially cause it to saturate
1824 // to +/-Inf. Bail out early instead.
1825 if (category == fcNormal && exponent+1 >= (int)semanticsPrecision(*semantics))
1828 // The algorithm here is quite simple: we add 2^(p-1), where p is the
1829 // precision of our format, and then subtract it back off again. The choice
1830 // of rounding modes for the addition/subtraction determines the rounding mode
1831 // for our integral rounding as well.
1832 // NOTE: When the input value is negative, we do subtraction followed by
1833 // addition instead.
1834 APInt IntegerConstant(NextPowerOf2(semanticsPrecision(*semantics)), 1);
1835 IntegerConstant <<= semanticsPrecision(*semantics)-1;
1836 APFloat MagicConstant(*semantics);
1837 fs = MagicConstant.convertFromAPInt(IntegerConstant, false,
1838 rmNearestTiesToEven);
1839 MagicConstant.copySign(*this);
1844 // Preserve the input sign so that we can handle 0.0/-0.0 cases correctly.
1845 bool inputSign = isNegative();
1847 fs = add(MagicConstant, rounding_mode);
1848 if (fs != opOK && fs != opInexact)
1851 fs = subtract(MagicConstant, rounding_mode);
1853 // Restore the input sign.
1854 if (inputSign != isNegative())
1861 /* Comparison requires normalized numbers. */
1863 APFloat::compare(const APFloat &rhs) const
1867 assert(semantics == rhs.semantics);
1869 switch (convolve(category, rhs.category)) {
1871 llvm_unreachable(0);
1873 case convolve(fcNaN, fcZero):
1874 case convolve(fcNaN, fcNormal):
1875 case convolve(fcNaN, fcInfinity):
1876 case convolve(fcNaN, fcNaN):
1877 case convolve(fcZero, fcNaN):
1878 case convolve(fcNormal, fcNaN):
1879 case convolve(fcInfinity, fcNaN):
1880 return cmpUnordered;
1882 case convolve(fcInfinity, fcNormal):
1883 case convolve(fcInfinity, fcZero):
1884 case convolve(fcNormal, fcZero):
1888 return cmpGreaterThan;
1890 case convolve(fcNormal, fcInfinity):
1891 case convolve(fcZero, fcInfinity):
1892 case convolve(fcZero, fcNormal):
1894 return cmpGreaterThan;
1898 case convolve(fcInfinity, fcInfinity):
1899 if (sign == rhs.sign)
1904 return cmpGreaterThan;
1906 case convolve(fcZero, fcZero):
1909 case convolve(fcNormal, fcNormal):
1913 /* Two normal numbers. Do they have the same sign? */
1914 if (sign != rhs.sign) {
1916 result = cmpLessThan;
1918 result = cmpGreaterThan;
1920 /* Compare absolute values; invert result if negative. */
1921 result = compareAbsoluteValue(rhs);
1924 if (result == cmpLessThan)
1925 result = cmpGreaterThan;
1926 else if (result == cmpGreaterThan)
1927 result = cmpLessThan;
1934 /// APFloat::convert - convert a value of one floating point type to another.
1935 /// The return value corresponds to the IEEE754 exceptions. *losesInfo
1936 /// records whether the transformation lost information, i.e. whether
1937 /// converting the result back to the original type will produce the
1938 /// original value (this is almost the same as return value==fsOK, but there
1939 /// are edge cases where this is not so).
1942 APFloat::convert(const fltSemantics &toSemantics,
1943 roundingMode rounding_mode, bool *losesInfo)
1945 lostFraction lostFraction;
1946 unsigned int newPartCount, oldPartCount;
1949 const fltSemantics &fromSemantics = *semantics;
1951 lostFraction = lfExactlyZero;
1952 newPartCount = partCountForBits(toSemantics.precision + 1);
1953 oldPartCount = partCount();
1954 shift = toSemantics.precision - fromSemantics.precision;
1956 bool X86SpecialNan = false;
1957 if (&fromSemantics == &APFloat::x87DoubleExtended &&
1958 &toSemantics != &APFloat::x87DoubleExtended && category == fcNaN &&
1959 (!(*significandParts() & 0x8000000000000000ULL) ||
1960 !(*significandParts() & 0x4000000000000000ULL))) {
1961 // x86 has some unusual NaNs which cannot be represented in any other
1962 // format; note them here.
1963 X86SpecialNan = true;
1966 // If this is a truncation, perform the shift before we narrow the storage.
1967 if (shift < 0 && (category==fcNormal || category==fcNaN))
1968 lostFraction = shiftRight(significandParts(), oldPartCount, -shift);
1970 // Fix the storage so it can hold to new value.
1971 if (newPartCount > oldPartCount) {
1972 // The new type requires more storage; make it available.
1973 integerPart *newParts;
1974 newParts = new integerPart[newPartCount];
1975 APInt::tcSet(newParts, 0, newPartCount);
1976 if (category==fcNormal || category==fcNaN)
1977 APInt::tcAssign(newParts, significandParts(), oldPartCount);
1979 significand.parts = newParts;
1980 } else if (newPartCount == 1 && oldPartCount != 1) {
1981 // Switch to built-in storage for a single part.
1982 integerPart newPart = 0;
1983 if (category==fcNormal || category==fcNaN)
1984 newPart = significandParts()[0];
1986 significand.part = newPart;
1989 // Now that we have the right storage, switch the semantics.
1990 semantics = &toSemantics;
1992 // If this is an extension, perform the shift now that the storage is
1994 if (shift > 0 && (category==fcNormal || category==fcNaN))
1995 APInt::tcShiftLeft(significandParts(), newPartCount, shift);
1997 if (category == fcNormal) {
1998 fs = normalize(rounding_mode, lostFraction);
1999 *losesInfo = (fs != opOK);
2000 } else if (category == fcNaN) {
2001 *losesInfo = lostFraction != lfExactlyZero || X86SpecialNan;
2003 // For x87 extended precision, we want to make a NaN, not a special NaN if
2004 // the input wasn't special either.
2005 if (!X86SpecialNan && semantics == &APFloat::x87DoubleExtended)
2006 APInt::tcSetBit(significandParts(), semantics->precision - 1);
2008 // gcc forces the Quiet bit on, which means (float)(double)(float_sNan)
2009 // does not give you back the same bits. This is dubious, and we
2010 // don't currently do it. You're really supposed to get
2011 // an invalid operation signal at runtime, but nobody does that.
2021 /* Convert a floating point number to an integer according to the
2022 rounding mode. If the rounded integer value is out of range this
2023 returns an invalid operation exception and the contents of the
2024 destination parts are unspecified. If the rounded value is in
2025 range but the floating point number is not the exact integer, the C
2026 standard doesn't require an inexact exception to be raised. IEEE
2027 854 does require it so we do that.
2029 Note that for conversions to integer type the C standard requires
2030 round-to-zero to always be used. */
2032 APFloat::convertToSignExtendedInteger(integerPart *parts, unsigned int width,
2034 roundingMode rounding_mode,
2035 bool *isExact) const
2037 lostFraction lost_fraction;
2038 const integerPart *src;
2039 unsigned int dstPartsCount, truncatedBits;
2043 /* Handle the three special cases first. */
2044 if (category == fcInfinity || category == fcNaN)
2047 dstPartsCount = partCountForBits(width);
2049 if (category == fcZero) {
2050 APInt::tcSet(parts, 0, dstPartsCount);
2051 // Negative zero can't be represented as an int.
2056 src = significandParts();
2058 /* Step 1: place our absolute value, with any fraction truncated, in
2061 /* Our absolute value is less than one; truncate everything. */
2062 APInt::tcSet(parts, 0, dstPartsCount);
2063 /* For exponent -1 the integer bit represents .5, look at that.
2064 For smaller exponents leftmost truncated bit is 0. */
2065 truncatedBits = semantics->precision -1U - exponent;
2067 /* We want the most significant (exponent + 1) bits; the rest are
2069 unsigned int bits = exponent + 1U;
2071 /* Hopelessly large in magnitude? */
2075 if (bits < semantics->precision) {
2076 /* We truncate (semantics->precision - bits) bits. */
2077 truncatedBits = semantics->precision - bits;
2078 APInt::tcExtract(parts, dstPartsCount, src, bits, truncatedBits);
2080 /* We want at least as many bits as are available. */
2081 APInt::tcExtract(parts, dstPartsCount, src, semantics->precision, 0);
2082 APInt::tcShiftLeft(parts, dstPartsCount, bits - semantics->precision);
2087 /* Step 2: work out any lost fraction, and increment the absolute
2088 value if we would round away from zero. */
2089 if (truncatedBits) {
2090 lost_fraction = lostFractionThroughTruncation(src, partCount(),
2092 if (lost_fraction != lfExactlyZero &&
2093 roundAwayFromZero(rounding_mode, lost_fraction, truncatedBits)) {
2094 if (APInt::tcIncrement(parts, dstPartsCount))
2095 return opInvalidOp; /* Overflow. */
2098 lost_fraction = lfExactlyZero;
2101 /* Step 3: check if we fit in the destination. */
2102 unsigned int omsb = APInt::tcMSB(parts, dstPartsCount) + 1;
2106 /* Negative numbers cannot be represented as unsigned. */
2110 /* It takes omsb bits to represent the unsigned integer value.
2111 We lose a bit for the sign, but care is needed as the
2112 maximally negative integer is a special case. */
2113 if (omsb == width && APInt::tcLSB(parts, dstPartsCount) + 1 != omsb)
2116 /* This case can happen because of rounding. */
2121 APInt::tcNegate (parts, dstPartsCount);
2123 if (omsb >= width + !isSigned)
2127 if (lost_fraction == lfExactlyZero) {
2134 /* Same as convertToSignExtendedInteger, except we provide
2135 deterministic values in case of an invalid operation exception,
2136 namely zero for NaNs and the minimal or maximal value respectively
2137 for underflow or overflow.
2138 The *isExact output tells whether the result is exact, in the sense
2139 that converting it back to the original floating point type produces
2140 the original value. This is almost equivalent to result==opOK,
2141 except for negative zeroes.
2144 APFloat::convertToInteger(integerPart *parts, unsigned int width,
2146 roundingMode rounding_mode, bool *isExact) const
2150 fs = convertToSignExtendedInteger(parts, width, isSigned, rounding_mode,
2153 if (fs == opInvalidOp) {
2154 unsigned int bits, dstPartsCount;
2156 dstPartsCount = partCountForBits(width);
2158 if (category == fcNaN)
2163 bits = width - isSigned;
2165 APInt::tcSetLeastSignificantBits(parts, dstPartsCount, bits);
2166 if (sign && isSigned)
2167 APInt::tcShiftLeft(parts, dstPartsCount, width - 1);
2173 /* Same as convertToInteger(integerPart*, ...), except the result is returned in
2174 an APSInt, whose initial bit-width and signed-ness are used to determine the
2175 precision of the conversion.
2178 APFloat::convertToInteger(APSInt &result,
2179 roundingMode rounding_mode, bool *isExact) const
2181 unsigned bitWidth = result.getBitWidth();
2182 SmallVector<uint64_t, 4> parts(result.getNumWords());
2183 opStatus status = convertToInteger(
2184 parts.data(), bitWidth, result.isSigned(), rounding_mode, isExact);
2185 // Keeps the original signed-ness.
2186 result = APInt(bitWidth, parts);
2190 /* Convert an unsigned integer SRC to a floating point number,
2191 rounding according to ROUNDING_MODE. The sign of the floating
2192 point number is not modified. */
2194 APFloat::convertFromUnsignedParts(const integerPart *src,
2195 unsigned int srcCount,
2196 roundingMode rounding_mode)
2198 unsigned int omsb, precision, dstCount;
2200 lostFraction lost_fraction;
2202 category = fcNormal;
2203 omsb = APInt::tcMSB(src, srcCount) + 1;
2204 dst = significandParts();
2205 dstCount = partCount();
2206 precision = semantics->precision;
2208 /* We want the most significant PRECISION bits of SRC. There may not
2209 be that many; extract what we can. */
2210 if (precision <= omsb) {
2211 exponent = omsb - 1;
2212 lost_fraction = lostFractionThroughTruncation(src, srcCount,
2214 APInt::tcExtract(dst, dstCount, src, precision, omsb - precision);
2216 exponent = precision - 1;
2217 lost_fraction = lfExactlyZero;
2218 APInt::tcExtract(dst, dstCount, src, omsb, 0);
2221 return normalize(rounding_mode, lost_fraction);
2225 APFloat::convertFromAPInt(const APInt &Val,
2227 roundingMode rounding_mode)
2229 unsigned int partCount = Val.getNumWords();
2233 if (isSigned && api.isNegative()) {
2238 return convertFromUnsignedParts(api.getRawData(), partCount, rounding_mode);
2241 /* Convert a two's complement integer SRC to a floating point number,
2242 rounding according to ROUNDING_MODE. ISSIGNED is true if the
2243 integer is signed, in which case it must be sign-extended. */
2245 APFloat::convertFromSignExtendedInteger(const integerPart *src,
2246 unsigned int srcCount,
2248 roundingMode rounding_mode)
2253 APInt::tcExtractBit(src, srcCount * integerPartWidth - 1)) {
2256 /* If we're signed and negative negate a copy. */
2258 copy = new integerPart[srcCount];
2259 APInt::tcAssign(copy, src, srcCount);
2260 APInt::tcNegate(copy, srcCount);
2261 status = convertFromUnsignedParts(copy, srcCount, rounding_mode);
2265 status = convertFromUnsignedParts(src, srcCount, rounding_mode);
2271 /* FIXME: should this just take a const APInt reference? */
2273 APFloat::convertFromZeroExtendedInteger(const integerPart *parts,
2274 unsigned int width, bool isSigned,
2275 roundingMode rounding_mode)
2277 unsigned int partCount = partCountForBits(width);
2278 APInt api = APInt(width, makeArrayRef(parts, partCount));
2281 if (isSigned && APInt::tcExtractBit(parts, width - 1)) {
2286 return convertFromUnsignedParts(api.getRawData(), partCount, rounding_mode);
2290 APFloat::convertFromHexadecimalString(StringRef s, roundingMode rounding_mode)
2292 lostFraction lost_fraction = lfExactlyZero;
2293 integerPart *significand;
2294 unsigned int bitPos, partsCount;
2295 StringRef::iterator dot, firstSignificantDigit;
2299 category = fcNormal;
2301 significand = significandParts();
2302 partsCount = partCount();
2303 bitPos = partsCount * integerPartWidth;
2305 /* Skip leading zeroes and any (hexa)decimal point. */
2306 StringRef::iterator begin = s.begin();
2307 StringRef::iterator end = s.end();
2308 StringRef::iterator p = skipLeadingZeroesAndAnyDot(begin, end, &dot);
2309 firstSignificantDigit = p;
2312 integerPart hex_value;
2315 assert(dot == end && "String contains multiple dots");
2322 hex_value = hexDigitValue(*p);
2323 if (hex_value == -1U) {
2332 /* Store the number whilst 4-bit nibbles remain. */
2335 hex_value <<= bitPos % integerPartWidth;
2336 significand[bitPos / integerPartWidth] |= hex_value;
2338 lost_fraction = trailingHexadecimalFraction(p, end, hex_value);
2339 while (p != end && hexDigitValue(*p) != -1U)
2346 /* Hex floats require an exponent but not a hexadecimal point. */
2347 assert(p != end && "Hex strings require an exponent");
2348 assert((*p == 'p' || *p == 'P') && "Invalid character in significand");
2349 assert(p != begin && "Significand has no digits");
2350 assert((dot == end || p - begin != 1) && "Significand has no digits");
2352 /* Ignore the exponent if we are zero. */
2353 if (p != firstSignificantDigit) {
2356 /* Implicit hexadecimal point? */
2360 /* Calculate the exponent adjustment implicit in the number of
2361 significant digits. */
2362 expAdjustment = static_cast<int>(dot - firstSignificantDigit);
2363 if (expAdjustment < 0)
2365 expAdjustment = expAdjustment * 4 - 1;
2367 /* Adjust for writing the significand starting at the most
2368 significant nibble. */
2369 expAdjustment += semantics->precision;
2370 expAdjustment -= partsCount * integerPartWidth;
2372 /* Adjust for the given exponent. */
2373 exponent = totalExponent(p + 1, end, expAdjustment);
2376 return normalize(rounding_mode, lost_fraction);
2380 APFloat::roundSignificandWithExponent(const integerPart *decSigParts,
2381 unsigned sigPartCount, int exp,
2382 roundingMode rounding_mode)
2384 unsigned int parts, pow5PartCount;
2385 fltSemantics calcSemantics = { 32767, -32767, 0 };
2386 integerPart pow5Parts[maxPowerOfFiveParts];
2389 isNearest = (rounding_mode == rmNearestTiesToEven ||
2390 rounding_mode == rmNearestTiesToAway);
2392 parts = partCountForBits(semantics->precision + 11);
2394 /* Calculate pow(5, abs(exp)). */
2395 pow5PartCount = powerOf5(pow5Parts, exp >= 0 ? exp: -exp);
2397 for (;; parts *= 2) {
2398 opStatus sigStatus, powStatus;
2399 unsigned int excessPrecision, truncatedBits;
2401 calcSemantics.precision = parts * integerPartWidth - 1;
2402 excessPrecision = calcSemantics.precision - semantics->precision;
2403 truncatedBits = excessPrecision;
2405 APFloat decSig(calcSemantics, fcZero, sign);
2406 APFloat pow5(calcSemantics, fcZero, false);
2408 sigStatus = decSig.convertFromUnsignedParts(decSigParts, sigPartCount,
2409 rmNearestTiesToEven);
2410 powStatus = pow5.convertFromUnsignedParts(pow5Parts, pow5PartCount,
2411 rmNearestTiesToEven);
2412 /* Add exp, as 10^n = 5^n * 2^n. */
2413 decSig.exponent += exp;
2415 lostFraction calcLostFraction;
2416 integerPart HUerr, HUdistance;
2417 unsigned int powHUerr;
2420 /* multiplySignificand leaves the precision-th bit set to 1. */
2421 calcLostFraction = decSig.multiplySignificand(pow5, NULL);
2422 powHUerr = powStatus != opOK;
2424 calcLostFraction = decSig.divideSignificand(pow5);
2425 /* Denormal numbers have less precision. */
2426 if (decSig.exponent < semantics->minExponent) {
2427 excessPrecision += (semantics->minExponent - decSig.exponent);
2428 truncatedBits = excessPrecision;
2429 if (excessPrecision > calcSemantics.precision)
2430 excessPrecision = calcSemantics.precision;
2432 /* Extra half-ulp lost in reciprocal of exponent. */
2433 powHUerr = (powStatus == opOK && calcLostFraction == lfExactlyZero) ? 0:2;
2436 /* Both multiplySignificand and divideSignificand return the
2437 result with the integer bit set. */
2438 assert(APInt::tcExtractBit
2439 (decSig.significandParts(), calcSemantics.precision - 1) == 1);
2441 HUerr = HUerrBound(calcLostFraction != lfExactlyZero, sigStatus != opOK,
2443 HUdistance = 2 * ulpsFromBoundary(decSig.significandParts(),
2444 excessPrecision, isNearest);
2446 /* Are we guaranteed to round correctly if we truncate? */
2447 if (HUdistance >= HUerr) {
2448 APInt::tcExtract(significandParts(), partCount(), decSig.significandParts(),
2449 calcSemantics.precision - excessPrecision,
2451 /* Take the exponent of decSig. If we tcExtract-ed less bits
2452 above we must adjust our exponent to compensate for the
2453 implicit right shift. */
2454 exponent = (decSig.exponent + semantics->precision
2455 - (calcSemantics.precision - excessPrecision));
2456 calcLostFraction = lostFractionThroughTruncation(decSig.significandParts(),
2459 return normalize(rounding_mode, calcLostFraction);
2465 APFloat::convertFromDecimalString(StringRef str, roundingMode rounding_mode)
2470 /* Scan the text. */
2471 StringRef::iterator p = str.begin();
2472 interpretDecimal(p, str.end(), &D);
2474 /* Handle the quick cases. First the case of no significant digits,
2475 i.e. zero, and then exponents that are obviously too large or too
2476 small. Writing L for log 10 / log 2, a number d.ddddd*10^exp
2477 definitely overflows if
2479 (exp - 1) * L >= maxExponent
2481 and definitely underflows to zero where
2483 (exp + 1) * L <= minExponent - precision
2485 With integer arithmetic the tightest bounds for L are
2487 93/28 < L < 196/59 [ numerator <= 256 ]
2488 42039/12655 < L < 28738/8651 [ numerator <= 65536 ]
2491 if (decDigitValue(*D.firstSigDigit) >= 10U) {
2495 /* Check whether the normalized exponent is high enough to overflow
2496 max during the log-rebasing in the max-exponent check below. */
2497 } else if (D.normalizedExponent - 1 > INT_MAX / 42039) {
2498 fs = handleOverflow(rounding_mode);
2500 /* If it wasn't, then it also wasn't high enough to overflow max
2501 during the log-rebasing in the min-exponent check. Check that it
2502 won't overflow min in either check, then perform the min-exponent
2504 } else if (D.normalizedExponent - 1 < INT_MIN / 42039 ||
2505 (D.normalizedExponent + 1) * 28738 <=
2506 8651 * (semantics->minExponent - (int) semantics->precision)) {
2507 /* Underflow to zero and round. */
2509 fs = normalize(rounding_mode, lfLessThanHalf);
2511 /* We can finally safely perform the max-exponent check. */
2512 } else if ((D.normalizedExponent - 1) * 42039
2513 >= 12655 * semantics->maxExponent) {
2514 /* Overflow and round. */
2515 fs = handleOverflow(rounding_mode);
2517 integerPart *decSignificand;
2518 unsigned int partCount;
2520 /* A tight upper bound on number of bits required to hold an
2521 N-digit decimal integer is N * 196 / 59. Allocate enough space
2522 to hold the full significand, and an extra part required by
2524 partCount = static_cast<unsigned int>(D.lastSigDigit - D.firstSigDigit) + 1;
2525 partCount = partCountForBits(1 + 196 * partCount / 59);
2526 decSignificand = new integerPart[partCount + 1];
2529 /* Convert to binary efficiently - we do almost all multiplication
2530 in an integerPart. When this would overflow do we do a single
2531 bignum multiplication, and then revert again to multiplication
2532 in an integerPart. */
2534 integerPart decValue, val, multiplier;
2542 if (p == str.end()) {
2546 decValue = decDigitValue(*p++);
2547 assert(decValue < 10U && "Invalid character in significand");
2549 val = val * 10 + decValue;
2550 /* The maximum number that can be multiplied by ten with any
2551 digit added without overflowing an integerPart. */
2552 } while (p <= D.lastSigDigit && multiplier <= (~ (integerPart) 0 - 9) / 10);
2554 /* Multiply out the current part. */
2555 APInt::tcMultiplyPart(decSignificand, decSignificand, multiplier, val,
2556 partCount, partCount + 1, false);
2558 /* If we used another part (likely but not guaranteed), increase
2560 if (decSignificand[partCount])
2562 } while (p <= D.lastSigDigit);
2564 category = fcNormal;
2565 fs = roundSignificandWithExponent(decSignificand, partCount,
2566 D.exponent, rounding_mode);
2568 delete [] decSignificand;
2575 APFloat::convertFromString(StringRef str, roundingMode rounding_mode)
2577 assert(!str.empty() && "Invalid string length");
2579 /* Handle a leading minus sign. */
2580 StringRef::iterator p = str.begin();
2581 size_t slen = str.size();
2582 sign = *p == '-' ? 1 : 0;
2583 if (*p == '-' || *p == '+') {
2586 assert(slen && "String has no digits");
2589 if (slen >= 2 && p[0] == '0' && (p[1] == 'x' || p[1] == 'X')) {
2590 assert(slen - 2 && "Invalid string");
2591 return convertFromHexadecimalString(StringRef(p + 2, slen - 2),
2595 return convertFromDecimalString(StringRef(p, slen), rounding_mode);
2598 /* Write out a hexadecimal representation of the floating point value
2599 to DST, which must be of sufficient size, in the C99 form
2600 [-]0xh.hhhhp[+-]d. Return the number of characters written,
2601 excluding the terminating NUL.
2603 If UPPERCASE, the output is in upper case, otherwise in lower case.
2605 HEXDIGITS digits appear altogether, rounding the value if
2606 necessary. If HEXDIGITS is 0, the minimal precision to display the
2607 number precisely is used instead. If nothing would appear after
2608 the decimal point it is suppressed.
2610 The decimal exponent is always printed and has at least one digit.
2611 Zero values display an exponent of zero. Infinities and NaNs
2612 appear as "infinity" or "nan" respectively.
2614 The above rules are as specified by C99. There is ambiguity about
2615 what the leading hexadecimal digit should be. This implementation
2616 uses whatever is necessary so that the exponent is displayed as
2617 stored. This implies the exponent will fall within the IEEE format
2618 range, and the leading hexadecimal digit will be 0 (for denormals),
2619 1 (normal numbers) or 2 (normal numbers rounded-away-from-zero with
2620 any other digits zero).
2623 APFloat::convertToHexString(char *dst, unsigned int hexDigits,
2624 bool upperCase, roundingMode rounding_mode) const
2634 memcpy (dst, upperCase ? infinityU: infinityL, sizeof infinityU - 1);
2635 dst += sizeof infinityL - 1;
2639 memcpy (dst, upperCase ? NaNU: NaNL, sizeof NaNU - 1);
2640 dst += sizeof NaNU - 1;
2645 *dst++ = upperCase ? 'X': 'x';
2647 if (hexDigits > 1) {
2649 memset (dst, '0', hexDigits - 1);
2650 dst += hexDigits - 1;
2652 *dst++ = upperCase ? 'P': 'p';
2657 dst = convertNormalToHexString (dst, hexDigits, upperCase, rounding_mode);
2663 return static_cast<unsigned int>(dst - p);
2666 /* Does the hard work of outputting the correctly rounded hexadecimal
2667 form of a normal floating point number with the specified number of
2668 hexadecimal digits. If HEXDIGITS is zero the minimum number of
2669 digits necessary to print the value precisely is output. */
2671 APFloat::convertNormalToHexString(char *dst, unsigned int hexDigits,
2673 roundingMode rounding_mode) const
2675 unsigned int count, valueBits, shift, partsCount, outputDigits;
2676 const char *hexDigitChars;
2677 const integerPart *significand;
2682 *dst++ = upperCase ? 'X': 'x';
2685 hexDigitChars = upperCase ? hexDigitsUpper: hexDigitsLower;
2687 significand = significandParts();
2688 partsCount = partCount();
2690 /* +3 because the first digit only uses the single integer bit, so
2691 we have 3 virtual zero most-significant-bits. */
2692 valueBits = semantics->precision + 3;
2693 shift = integerPartWidth - valueBits % integerPartWidth;
2695 /* The natural number of digits required ignoring trailing
2696 insignificant zeroes. */
2697 outputDigits = (valueBits - significandLSB () + 3) / 4;
2699 /* hexDigits of zero means use the required number for the
2700 precision. Otherwise, see if we are truncating. If we are,
2701 find out if we need to round away from zero. */
2703 if (hexDigits < outputDigits) {
2704 /* We are dropping non-zero bits, so need to check how to round.
2705 "bits" is the number of dropped bits. */
2707 lostFraction fraction;
2709 bits = valueBits - hexDigits * 4;
2710 fraction = lostFractionThroughTruncation (significand, partsCount, bits);
2711 roundUp = roundAwayFromZero(rounding_mode, fraction, bits);
2713 outputDigits = hexDigits;
2716 /* Write the digits consecutively, and start writing in the location
2717 of the hexadecimal point. We move the most significant digit
2718 left and add the hexadecimal point later. */
2721 count = (valueBits + integerPartWidth - 1) / integerPartWidth;
2723 while (outputDigits && count) {
2726 /* Put the most significant integerPartWidth bits in "part". */
2727 if (--count == partsCount)
2728 part = 0; /* An imaginary higher zero part. */
2730 part = significand[count] << shift;
2733 part |= significand[count - 1] >> (integerPartWidth - shift);
2735 /* Convert as much of "part" to hexdigits as we can. */
2736 unsigned int curDigits = integerPartWidth / 4;
2738 if (curDigits > outputDigits)
2739 curDigits = outputDigits;
2740 dst += partAsHex (dst, part, curDigits, hexDigitChars);
2741 outputDigits -= curDigits;
2747 /* Note that hexDigitChars has a trailing '0'. */
2750 *q = hexDigitChars[hexDigitValue (*q) + 1];
2751 } while (*q == '0');
2754 /* Add trailing zeroes. */
2755 memset (dst, '0', outputDigits);
2756 dst += outputDigits;
2759 /* Move the most significant digit to before the point, and if there
2760 is something after the decimal point add it. This must come
2761 after rounding above. */
2768 /* Finally output the exponent. */
2769 *dst++ = upperCase ? 'P': 'p';
2771 return writeSignedDecimal (dst, exponent);
2774 hash_code llvm::hash_value(const APFloat &Arg) {
2775 if (Arg.category != APFloat::fcNormal)
2776 return hash_combine((uint8_t)Arg.category,
2777 // NaN has no sign, fix it at zero.
2778 Arg.isNaN() ? (uint8_t)0 : (uint8_t)Arg.sign,
2779 Arg.semantics->precision);
2781 // Normal floats need their exponent and significand hashed.
2782 return hash_combine((uint8_t)Arg.category, (uint8_t)Arg.sign,
2783 Arg.semantics->precision, Arg.exponent,
2785 Arg.significandParts(),
2786 Arg.significandParts() + Arg.partCount()));
2789 // Conversion from APFloat to/from host float/double. It may eventually be
2790 // possible to eliminate these and have everybody deal with APFloats, but that
2791 // will take a while. This approach will not easily extend to long double.
2792 // Current implementation requires integerPartWidth==64, which is correct at
2793 // the moment but could be made more general.
2795 // Denormals have exponent minExponent in APFloat, but minExponent-1 in
2796 // the actual IEEE respresentations. We compensate for that here.
2799 APFloat::convertF80LongDoubleAPFloatToAPInt() const
2801 assert(semantics == (const llvm::fltSemantics*)&x87DoubleExtended);
2802 assert(partCount()==2);
2804 uint64_t myexponent, mysignificand;
2806 if (category==fcNormal) {
2807 myexponent = exponent+16383; //bias
2808 mysignificand = significandParts()[0];
2809 if (myexponent==1 && !(mysignificand & 0x8000000000000000ULL))
2810 myexponent = 0; // denormal
2811 } else if (category==fcZero) {
2814 } else if (category==fcInfinity) {
2815 myexponent = 0x7fff;
2816 mysignificand = 0x8000000000000000ULL;
2818 assert(category == fcNaN && "Unknown category");
2819 myexponent = 0x7fff;
2820 mysignificand = significandParts()[0];
2824 words[0] = mysignificand;
2825 words[1] = ((uint64_t)(sign & 1) << 15) |
2826 (myexponent & 0x7fffLL);
2827 return APInt(80, words);
2831 APFloat::convertPPCDoubleDoubleAPFloatToAPInt() const
2833 assert(semantics == (const llvm::fltSemantics*)&PPCDoubleDouble);
2834 assert(partCount()==2);
2840 // Convert number to double. To avoid spurious underflows, we re-
2841 // normalize against the "double" minExponent first, and only *then*
2842 // truncate the mantissa. The result of that second conversion
2843 // may be inexact, but should never underflow.
2844 // Declare fltSemantics before APFloat that uses it (and
2845 // saves pointer to it) to ensure correct destruction order.
2846 fltSemantics extendedSemantics = *semantics;
2847 extendedSemantics.minExponent = IEEEdouble.minExponent;
2848 APFloat extended(*this);
2849 fs = extended.convert(extendedSemantics, rmNearestTiesToEven, &losesInfo);
2850 assert(fs == opOK && !losesInfo);
2853 APFloat u(extended);
2854 fs = u.convert(IEEEdouble, rmNearestTiesToEven, &losesInfo);
2855 assert(fs == opOK || fs == opInexact);
2857 words[0] = *u.convertDoubleAPFloatToAPInt().getRawData();
2859 // If conversion was exact or resulted in a special case, we're done;
2860 // just set the second double to zero. Otherwise, re-convert back to
2861 // the extended format and compute the difference. This now should
2862 // convert exactly to double.
2863 if (u.category == fcNormal && losesInfo) {
2864 fs = u.convert(extendedSemantics, rmNearestTiesToEven, &losesInfo);
2865 assert(fs == opOK && !losesInfo);
2868 APFloat v(extended);
2869 v.subtract(u, rmNearestTiesToEven);
2870 fs = v.convert(IEEEdouble, rmNearestTiesToEven, &losesInfo);
2871 assert(fs == opOK && !losesInfo);
2873 words[1] = *v.convertDoubleAPFloatToAPInt().getRawData();
2878 return APInt(128, words);
2882 APFloat::convertQuadrupleAPFloatToAPInt() const
2884 assert(semantics == (const llvm::fltSemantics*)&IEEEquad);
2885 assert(partCount()==2);
2887 uint64_t myexponent, mysignificand, mysignificand2;
2889 if (category==fcNormal) {
2890 myexponent = exponent+16383; //bias
2891 mysignificand = significandParts()[0];
2892 mysignificand2 = significandParts()[1];
2893 if (myexponent==1 && !(mysignificand2 & 0x1000000000000LL))
2894 myexponent = 0; // denormal
2895 } else if (category==fcZero) {
2897 mysignificand = mysignificand2 = 0;
2898 } else if (category==fcInfinity) {
2899 myexponent = 0x7fff;
2900 mysignificand = mysignificand2 = 0;
2902 assert(category == fcNaN && "Unknown category!");
2903 myexponent = 0x7fff;
2904 mysignificand = significandParts()[0];
2905 mysignificand2 = significandParts()[1];
2909 words[0] = mysignificand;
2910 words[1] = ((uint64_t)(sign & 1) << 63) |
2911 ((myexponent & 0x7fff) << 48) |
2912 (mysignificand2 & 0xffffffffffffLL);
2914 return APInt(128, words);
2918 APFloat::convertDoubleAPFloatToAPInt() const
2920 assert(semantics == (const llvm::fltSemantics*)&IEEEdouble);
2921 assert(partCount()==1);
2923 uint64_t myexponent, mysignificand;
2925 if (category==fcNormal) {
2926 myexponent = exponent+1023; //bias
2927 mysignificand = *significandParts();
2928 if (myexponent==1 && !(mysignificand & 0x10000000000000LL))
2929 myexponent = 0; // denormal
2930 } else if (category==fcZero) {
2933 } else if (category==fcInfinity) {
2937 assert(category == fcNaN && "Unknown category!");
2939 mysignificand = *significandParts();
2942 return APInt(64, ((((uint64_t)(sign & 1) << 63) |
2943 ((myexponent & 0x7ff) << 52) |
2944 (mysignificand & 0xfffffffffffffLL))));
2948 APFloat::convertFloatAPFloatToAPInt() const
2950 assert(semantics == (const llvm::fltSemantics*)&IEEEsingle);
2951 assert(partCount()==1);
2953 uint32_t myexponent, mysignificand;
2955 if (category==fcNormal) {
2956 myexponent = exponent+127; //bias
2957 mysignificand = (uint32_t)*significandParts();
2958 if (myexponent == 1 && !(mysignificand & 0x800000))
2959 myexponent = 0; // denormal
2960 } else if (category==fcZero) {
2963 } else if (category==fcInfinity) {
2967 assert(category == fcNaN && "Unknown category!");
2969 mysignificand = (uint32_t)*significandParts();
2972 return APInt(32, (((sign&1) << 31) | ((myexponent&0xff) << 23) |
2973 (mysignificand & 0x7fffff)));
2977 APFloat::convertHalfAPFloatToAPInt() const
2979 assert(semantics == (const llvm::fltSemantics*)&IEEEhalf);
2980 assert(partCount()==1);
2982 uint32_t myexponent, mysignificand;
2984 if (category==fcNormal) {
2985 myexponent = exponent+15; //bias
2986 mysignificand = (uint32_t)*significandParts();
2987 if (myexponent == 1 && !(mysignificand & 0x400))
2988 myexponent = 0; // denormal
2989 } else if (category==fcZero) {
2992 } else if (category==fcInfinity) {
2996 assert(category == fcNaN && "Unknown category!");
2998 mysignificand = (uint32_t)*significandParts();
3001 return APInt(16, (((sign&1) << 15) | ((myexponent&0x1f) << 10) |
3002 (mysignificand & 0x3ff)));
3005 // This function creates an APInt that is just a bit map of the floating
3006 // point constant as it would appear in memory. It is not a conversion,
3007 // and treating the result as a normal integer is unlikely to be useful.
3010 APFloat::bitcastToAPInt() const
3012 if (semantics == (const llvm::fltSemantics*)&IEEEhalf)
3013 return convertHalfAPFloatToAPInt();
3015 if (semantics == (const llvm::fltSemantics*)&IEEEsingle)
3016 return convertFloatAPFloatToAPInt();
3018 if (semantics == (const llvm::fltSemantics*)&IEEEdouble)
3019 return convertDoubleAPFloatToAPInt();
3021 if (semantics == (const llvm::fltSemantics*)&IEEEquad)
3022 return convertQuadrupleAPFloatToAPInt();
3024 if (semantics == (const llvm::fltSemantics*)&PPCDoubleDouble)
3025 return convertPPCDoubleDoubleAPFloatToAPInt();
3027 assert(semantics == (const llvm::fltSemantics*)&x87DoubleExtended &&
3029 return convertF80LongDoubleAPFloatToAPInt();
3033 APFloat::convertToFloat() const
3035 assert(semantics == (const llvm::fltSemantics*)&IEEEsingle &&
3036 "Float semantics are not IEEEsingle");
3037 APInt api = bitcastToAPInt();
3038 return api.bitsToFloat();
3042 APFloat::convertToDouble() const
3044 assert(semantics == (const llvm::fltSemantics*)&IEEEdouble &&
3045 "Float semantics are not IEEEdouble");
3046 APInt api = bitcastToAPInt();
3047 return api.bitsToDouble();
3050 /// Integer bit is explicit in this format. Intel hardware (387 and later)
3051 /// does not support these bit patterns:
3052 /// exponent = all 1's, integer bit 0, significand 0 ("pseudoinfinity")
3053 /// exponent = all 1's, integer bit 0, significand nonzero ("pseudoNaN")
3054 /// exponent = 0, integer bit 1 ("pseudodenormal")
3055 /// exponent!=0 nor all 1's, integer bit 0 ("unnormal")
3056 /// At the moment, the first two are treated as NaNs, the second two as Normal.
3058 APFloat::initFromF80LongDoubleAPInt(const APInt &api)
3060 assert(api.getBitWidth()==80);
3061 uint64_t i1 = api.getRawData()[0];
3062 uint64_t i2 = api.getRawData()[1];
3063 uint64_t myexponent = (i2 & 0x7fff);
3064 uint64_t mysignificand = i1;
3066 initialize(&APFloat::x87DoubleExtended);
3067 assert(partCount()==2);
3069 sign = static_cast<unsigned int>(i2>>15);
3070 if (myexponent==0 && mysignificand==0) {
3071 // exponent, significand meaningless
3073 } else if (myexponent==0x7fff && mysignificand==0x8000000000000000ULL) {
3074 // exponent, significand meaningless
3075 category = fcInfinity;
3076 } else if (myexponent==0x7fff && mysignificand!=0x8000000000000000ULL) {
3077 // exponent meaningless
3079 significandParts()[0] = mysignificand;
3080 significandParts()[1] = 0;
3082 category = fcNormal;
3083 exponent = myexponent - 16383;
3084 significandParts()[0] = mysignificand;
3085 significandParts()[1] = 0;
3086 if (myexponent==0) // denormal
3092 APFloat::initFromPPCDoubleDoubleAPInt(const APInt &api)
3094 assert(api.getBitWidth()==128);
3095 uint64_t i1 = api.getRawData()[0];
3096 uint64_t i2 = api.getRawData()[1];
3100 // Get the first double and convert to our format.
3101 initFromDoubleAPInt(APInt(64, i1));
3102 fs = convert(PPCDoubleDouble, rmNearestTiesToEven, &losesInfo);
3103 assert(fs == opOK && !losesInfo);
3106 // Unless we have a special case, add in second double.
3107 if (category == fcNormal) {
3108 APFloat v(IEEEdouble, APInt(64, i2));
3109 fs = v.convert(PPCDoubleDouble, rmNearestTiesToEven, &losesInfo);
3110 assert(fs == opOK && !losesInfo);
3113 add(v, rmNearestTiesToEven);
3118 APFloat::initFromQuadrupleAPInt(const APInt &api)
3120 assert(api.getBitWidth()==128);
3121 uint64_t i1 = api.getRawData()[0];
3122 uint64_t i2 = api.getRawData()[1];
3123 uint64_t myexponent = (i2 >> 48) & 0x7fff;
3124 uint64_t mysignificand = i1;
3125 uint64_t mysignificand2 = i2 & 0xffffffffffffLL;
3127 initialize(&APFloat::IEEEquad);
3128 assert(partCount()==2);
3130 sign = static_cast<unsigned int>(i2>>63);
3131 if (myexponent==0 &&
3132 (mysignificand==0 && mysignificand2==0)) {
3133 // exponent, significand meaningless
3135 } else if (myexponent==0x7fff &&
3136 (mysignificand==0 && mysignificand2==0)) {
3137 // exponent, significand meaningless
3138 category = fcInfinity;
3139 } else if (myexponent==0x7fff &&
3140 (mysignificand!=0 || mysignificand2 !=0)) {
3141 // exponent meaningless
3143 significandParts()[0] = mysignificand;
3144 significandParts()[1] = mysignificand2;
3146 category = fcNormal;
3147 exponent = myexponent - 16383;
3148 significandParts()[0] = mysignificand;
3149 significandParts()[1] = mysignificand2;
3150 if (myexponent==0) // denormal
3153 significandParts()[1] |= 0x1000000000000LL; // integer bit
3158 APFloat::initFromDoubleAPInt(const APInt &api)
3160 assert(api.getBitWidth()==64);
3161 uint64_t i = *api.getRawData();
3162 uint64_t myexponent = (i >> 52) & 0x7ff;
3163 uint64_t mysignificand = i & 0xfffffffffffffLL;
3165 initialize(&APFloat::IEEEdouble);
3166 assert(partCount()==1);
3168 sign = static_cast<unsigned int>(i>>63);
3169 if (myexponent==0 && mysignificand==0) {
3170 // exponent, significand meaningless
3172 } else if (myexponent==0x7ff && mysignificand==0) {
3173 // exponent, significand meaningless
3174 category = fcInfinity;
3175 } else if (myexponent==0x7ff && mysignificand!=0) {
3176 // exponent meaningless
3178 *significandParts() = mysignificand;
3180 category = fcNormal;
3181 exponent = myexponent - 1023;
3182 *significandParts() = mysignificand;
3183 if (myexponent==0) // denormal
3186 *significandParts() |= 0x10000000000000LL; // integer bit
3191 APFloat::initFromFloatAPInt(const APInt & api)
3193 assert(api.getBitWidth()==32);
3194 uint32_t i = (uint32_t)*api.getRawData();
3195 uint32_t myexponent = (i >> 23) & 0xff;
3196 uint32_t mysignificand = i & 0x7fffff;
3198 initialize(&APFloat::IEEEsingle);
3199 assert(partCount()==1);
3202 if (myexponent==0 && mysignificand==0) {
3203 // exponent, significand meaningless
3205 } else if (myexponent==0xff && mysignificand==0) {
3206 // exponent, significand meaningless
3207 category = fcInfinity;
3208 } else if (myexponent==0xff && mysignificand!=0) {
3209 // sign, exponent, significand meaningless
3211 *significandParts() = mysignificand;
3213 category = fcNormal;
3214 exponent = myexponent - 127; //bias
3215 *significandParts() = mysignificand;
3216 if (myexponent==0) // denormal
3219 *significandParts() |= 0x800000; // integer bit
3224 APFloat::initFromHalfAPInt(const APInt & api)
3226 assert(api.getBitWidth()==16);
3227 uint32_t i = (uint32_t)*api.getRawData();
3228 uint32_t myexponent = (i >> 10) & 0x1f;
3229 uint32_t mysignificand = i & 0x3ff;
3231 initialize(&APFloat::IEEEhalf);
3232 assert(partCount()==1);
3235 if (myexponent==0 && mysignificand==0) {
3236 // exponent, significand meaningless
3238 } else if (myexponent==0x1f && mysignificand==0) {
3239 // exponent, significand meaningless
3240 category = fcInfinity;
3241 } else if (myexponent==0x1f && mysignificand!=0) {
3242 // sign, exponent, significand meaningless
3244 *significandParts() = mysignificand;
3246 category = fcNormal;
3247 exponent = myexponent - 15; //bias
3248 *significandParts() = mysignificand;
3249 if (myexponent==0) // denormal
3252 *significandParts() |= 0x400; // integer bit
3256 /// Treat api as containing the bits of a floating point number. Currently
3257 /// we infer the floating point type from the size of the APInt. The
3258 /// isIEEE argument distinguishes between PPC128 and IEEE128 (not meaningful
3259 /// when the size is anything else).
3261 APFloat::initFromAPInt(const fltSemantics* Sem, const APInt& api)
3263 if (Sem == &IEEEhalf)
3264 return initFromHalfAPInt(api);
3265 if (Sem == &IEEEsingle)
3266 return initFromFloatAPInt(api);
3267 if (Sem == &IEEEdouble)
3268 return initFromDoubleAPInt(api);
3269 if (Sem == &x87DoubleExtended)
3270 return initFromF80LongDoubleAPInt(api);
3271 if (Sem == &IEEEquad)
3272 return initFromQuadrupleAPInt(api);
3273 if (Sem == &PPCDoubleDouble)
3274 return initFromPPCDoubleDoubleAPInt(api);
3276 llvm_unreachable(0);
3280 APFloat::getAllOnesValue(unsigned BitWidth, bool isIEEE)
3284 return APFloat(IEEEhalf, APInt::getAllOnesValue(BitWidth));
3286 return APFloat(IEEEsingle, APInt::getAllOnesValue(BitWidth));
3288 return APFloat(IEEEdouble, APInt::getAllOnesValue(BitWidth));
3290 return APFloat(x87DoubleExtended, APInt::getAllOnesValue(BitWidth));
3293 return APFloat(IEEEquad, APInt::getAllOnesValue(BitWidth));
3294 return APFloat(PPCDoubleDouble, APInt::getAllOnesValue(BitWidth));
3296 llvm_unreachable("Unknown floating bit width");
3300 /// Make this number the largest magnitude normal number in the given
3302 void APFloat::makeLargest(bool Negative) {
3303 // We want (in interchange format):
3304 // sign = {Negative}
3306 // significand = 1..1
3307 category = fcNormal;
3309 exponent = semantics->maxExponent;
3311 // Use memset to set all but the highest integerPart to all ones.
3312 integerPart *significand = significandParts();
3313 unsigned PartCount = partCount();
3314 memset(significand, 0xFF, sizeof(integerPart)*(PartCount - 1));
3316 // Set the high integerPart especially setting all unused top bits for
3317 // internal consistency.
3318 const unsigned NumUnusedHighBits =
3319 PartCount*integerPartWidth - semantics->precision;
3320 significand[PartCount - 1] = ~integerPart(0) >> NumUnusedHighBits;
3323 /// Make this number the smallest magnitude denormal number in the given
3325 void APFloat::makeSmallest(bool Negative) {
3326 // We want (in interchange format):
3327 // sign = {Negative}
3329 // significand = 0..01
3330 category = fcNormal;
3332 exponent = semantics->minExponent;
3333 APInt::tcSet(significandParts(), 1, partCount());
3337 APFloat APFloat::getLargest(const fltSemantics &Sem, bool Negative) {
3338 // We want (in interchange format):
3339 // sign = {Negative}
3341 // significand = 1..1
3342 APFloat Val(Sem, uninitialized);
3343 Val.makeLargest(Negative);
3347 APFloat APFloat::getSmallest(const fltSemantics &Sem, bool Negative) {
3348 // We want (in interchange format):
3349 // sign = {Negative}
3351 // significand = 0..01
3352 APFloat Val(Sem, uninitialized);
3353 Val.makeSmallest(Negative);
3357 APFloat APFloat::getSmallestNormalized(const fltSemantics &Sem, bool Negative) {
3358 APFloat Val(Sem, fcNormal, Negative);
3360 // We want (in interchange format):
3361 // sign = {Negative}
3363 // significand = 10..0
3365 Val.exponent = Sem.minExponent;
3366 Val.zeroSignificand();
3367 Val.significandParts()[partCountForBits(Sem.precision)-1] |=
3368 (((integerPart) 1) << ((Sem.precision - 1) % integerPartWidth));
3373 APFloat::APFloat(const fltSemantics &Sem, const APInt &API) {
3374 initFromAPInt(&Sem, API);
3377 APFloat::APFloat(float f) {
3378 initFromAPInt(&IEEEsingle, APInt::floatToBits(f));
3381 APFloat::APFloat(double d) {
3382 initFromAPInt(&IEEEdouble, APInt::doubleToBits(d));
3386 void append(SmallVectorImpl<char> &Buffer, StringRef Str) {
3387 Buffer.append(Str.begin(), Str.end());
3390 /// Removes data from the given significand until it is no more
3391 /// precise than is required for the desired precision.
3392 void AdjustToPrecision(APInt &significand,
3393 int &exp, unsigned FormatPrecision) {
3394 unsigned bits = significand.getActiveBits();
3396 // 196/59 is a very slight overestimate of lg_2(10).
3397 unsigned bitsRequired = (FormatPrecision * 196 + 58) / 59;
3399 if (bits <= bitsRequired) return;
3401 unsigned tensRemovable = (bits - bitsRequired) * 59 / 196;
3402 if (!tensRemovable) return;
3404 exp += tensRemovable;
3406 APInt divisor(significand.getBitWidth(), 1);
3407 APInt powten(significand.getBitWidth(), 10);
3409 if (tensRemovable & 1)
3411 tensRemovable >>= 1;
3412 if (!tensRemovable) break;
3416 significand = significand.udiv(divisor);
3418 // Truncate the significand down to its active bit count.
3419 significand = significand.trunc(significand.getActiveBits());
3423 void AdjustToPrecision(SmallVectorImpl<char> &buffer,
3424 int &exp, unsigned FormatPrecision) {
3425 unsigned N = buffer.size();
3426 if (N <= FormatPrecision) return;
3428 // The most significant figures are the last ones in the buffer.
3429 unsigned FirstSignificant = N - FormatPrecision;
3432 // FIXME: this probably shouldn't use 'round half up'.
3434 // Rounding down is just a truncation, except we also want to drop
3435 // trailing zeros from the new result.
3436 if (buffer[FirstSignificant - 1] < '5') {
3437 while (FirstSignificant < N && buffer[FirstSignificant] == '0')
3440 exp += FirstSignificant;
3441 buffer.erase(&buffer[0], &buffer[FirstSignificant]);
3445 // Rounding up requires a decimal add-with-carry. If we continue
3446 // the carry, the newly-introduced zeros will just be truncated.
3447 for (unsigned I = FirstSignificant; I != N; ++I) {
3448 if (buffer[I] == '9') {
3456 // If we carried through, we have exactly one digit of precision.
3457 if (FirstSignificant == N) {
3458 exp += FirstSignificant;
3460 buffer.push_back('1');
3464 exp += FirstSignificant;
3465 buffer.erase(&buffer[0], &buffer[FirstSignificant]);
3469 void APFloat::toString(SmallVectorImpl<char> &Str,
3470 unsigned FormatPrecision,
3471 unsigned FormatMaxPadding) const {
3475 return append(Str, "-Inf");
3477 return append(Str, "+Inf");
3479 case fcNaN: return append(Str, "NaN");
3485 if (!FormatMaxPadding)
3486 append(Str, "0.0E+0");
3498 // Decompose the number into an APInt and an exponent.
3499 int exp = exponent - ((int) semantics->precision - 1);
3500 APInt significand(semantics->precision,
3501 makeArrayRef(significandParts(),
3502 partCountForBits(semantics->precision)));
3504 // Set FormatPrecision if zero. We want to do this before we
3505 // truncate trailing zeros, as those are part of the precision.
3506 if (!FormatPrecision) {
3507 // It's an interesting question whether to use the nominal
3508 // precision or the active precision here for denormals.
3510 // FormatPrecision = ceil(significandBits / lg_2(10))
3511 FormatPrecision = (semantics->precision * 59 + 195) / 196;
3514 // Ignore trailing binary zeros.
3515 int trailingZeros = significand.countTrailingZeros();
3516 exp += trailingZeros;
3517 significand = significand.lshr(trailingZeros);
3519 // Change the exponent from 2^e to 10^e.
3522 } else if (exp > 0) {
3524 significand = significand.zext(semantics->precision + exp);
3525 significand <<= exp;
3527 } else { /* exp < 0 */
3530 // We transform this using the identity:
3531 // (N)(2^-e) == (N)(5^e)(10^-e)
3532 // This means we have to multiply N (the significand) by 5^e.
3533 // To avoid overflow, we have to operate on numbers large
3534 // enough to store N * 5^e:
3535 // log2(N * 5^e) == log2(N) + e * log2(5)
3536 // <= semantics->precision + e * 137 / 59
3537 // (log_2(5) ~ 2.321928 < 2.322034 ~ 137/59)
3539 unsigned precision = semantics->precision + (137 * texp + 136) / 59;
3541 // Multiply significand by 5^e.
3542 // N * 5^0101 == N * 5^(1*1) * 5^(0*2) * 5^(1*4) * 5^(0*8)
3543 significand = significand.zext(precision);
3544 APInt five_to_the_i(precision, 5);
3546 if (texp & 1) significand *= five_to_the_i;
3550 five_to_the_i *= five_to_the_i;
3554 AdjustToPrecision(significand, exp, FormatPrecision);
3556 SmallVector<char, 256> buffer;
3559 unsigned precision = significand.getBitWidth();
3560 APInt ten(precision, 10);
3561 APInt digit(precision, 0);
3563 bool inTrail = true;
3564 while (significand != 0) {
3565 // digit <- significand % 10
3566 // significand <- significand / 10
3567 APInt::udivrem(significand, ten, significand, digit);
3569 unsigned d = digit.getZExtValue();
3571 // Drop trailing zeros.
3572 if (inTrail && !d) exp++;
3574 buffer.push_back((char) ('0' + d));
3579 assert(!buffer.empty() && "no characters in buffer!");
3581 // Drop down to FormatPrecision.
3582 // TODO: don't do more precise calculations above than are required.
3583 AdjustToPrecision(buffer, exp, FormatPrecision);
3585 unsigned NDigits = buffer.size();
3587 // Check whether we should use scientific notation.
3588 bool FormatScientific;
3589 if (!FormatMaxPadding)
3590 FormatScientific = true;
3595 // But we shouldn't make the number look more precise than it is.
3596 FormatScientific = ((unsigned) exp > FormatMaxPadding ||
3597 NDigits + (unsigned) exp > FormatPrecision);
3599 // Power of the most significant digit.
3600 int MSD = exp + (int) (NDigits - 1);
3603 FormatScientific = false;
3605 // 765e-5 == 0.00765
3607 FormatScientific = ((unsigned) -MSD) > FormatMaxPadding;
3612 // Scientific formatting is pretty straightforward.
3613 if (FormatScientific) {
3614 exp += (NDigits - 1);
3616 Str.push_back(buffer[NDigits-1]);
3621 for (unsigned I = 1; I != NDigits; ++I)
3622 Str.push_back(buffer[NDigits-1-I]);
3625 Str.push_back(exp >= 0 ? '+' : '-');
3626 if (exp < 0) exp = -exp;
3627 SmallVector<char, 6> expbuf;
3629 expbuf.push_back((char) ('0' + (exp % 10)));
3632 for (unsigned I = 0, E = expbuf.size(); I != E; ++I)
3633 Str.push_back(expbuf[E-1-I]);
3637 // Non-scientific, positive exponents.
3639 for (unsigned I = 0; I != NDigits; ++I)
3640 Str.push_back(buffer[NDigits-1-I]);
3641 for (unsigned I = 0; I != (unsigned) exp; ++I)
3646 // Non-scientific, negative exponents.
3648 // The number of digits to the left of the decimal point.
3649 int NWholeDigits = exp + (int) NDigits;
3652 if (NWholeDigits > 0) {
3653 for (; I != (unsigned) NWholeDigits; ++I)
3654 Str.push_back(buffer[NDigits-I-1]);
3657 unsigned NZeros = 1 + (unsigned) -NWholeDigits;
3661 for (unsigned Z = 1; Z != NZeros; ++Z)
3665 for (; I != NDigits; ++I)
3666 Str.push_back(buffer[NDigits-I-1]);
3669 bool APFloat::getExactInverse(APFloat *inv) const {
3670 // Special floats and denormals have no exact inverse.
3671 if (category != fcNormal)
3674 // Check that the number is a power of two by making sure that only the
3675 // integer bit is set in the significand.
3676 if (significandLSB() != semantics->precision - 1)
3680 APFloat reciprocal(*semantics, 1ULL);
3681 if (reciprocal.divide(*this, rmNearestTiesToEven) != opOK)
3684 // Avoid multiplication with a denormal, it is not safe on all platforms and
3685 // may be slower than a normal division.
3686 if (reciprocal.isDenormal())
3689 assert(reciprocal.category == fcNormal &&
3690 reciprocal.significandLSB() == reciprocal.semantics->precision - 1);
3698 bool APFloat::isSignaling() const {
3702 // IEEE-754R 2008 6.2.1: A signaling NaN bit string should be encoded with the
3703 // first bit of the trailing significand being 0.
3704 return !APInt::tcExtractBit(significandParts(), semantics->precision - 2);
3707 /// IEEE-754R 2008 5.3.1: nextUp/nextDown.
3709 /// *NOTE* since nextDown(x) = -nextUp(-x), we only implement nextUp with
3710 /// appropriate sign switching before/after the computation.
3711 APFloat::opStatus APFloat::next(bool nextDown) {
3712 // If we are performing nextDown, swap sign so we have -x.
3716 // Compute nextUp(x)
3717 opStatus result = opOK;
3719 // Handle each float category separately.
3722 // nextUp(+inf) = +inf
3725 // nextUp(-inf) = -getLargest()
3729 // IEEE-754R 2008 6.2 Par 2: nextUp(sNaN) = qNaN. Set Invalid flag.
3730 // IEEE-754R 2008 6.2: nextUp(qNaN) = qNaN. Must be identity so we do not
3731 // change the payload.
3732 if (isSignaling()) {
3733 result = opInvalidOp;
3734 // For consistency, propogate the sign of the sNaN to the qNaN.
3735 makeNaN(false, isNegative(), 0);
3739 // nextUp(pm 0) = +getSmallest()
3740 makeSmallest(false);
3743 // nextUp(-getSmallest()) = -0
3744 if (isSmallest() && isNegative()) {
3745 APInt::tcSet(significandParts(), 0, partCount());
3751 // nextUp(getLargest()) == INFINITY
3752 if (isLargest() && !isNegative()) {
3753 APInt::tcSet(significandParts(), 0, partCount());
3754 category = fcInfinity;
3755 exponent = semantics->maxExponent + 1;
3759 // nextUp(normal) == normal + inc.
3761 // If we are negative, we need to decrement the significand.
3763 // We only cross a binade boundary that requires adjusting the exponent
3765 // 1. exponent != semantics->minExponent. This implies we are not in the
3766 // smallest binade or are dealing with denormals.
3767 // 2. Our significand excluding the integral bit is all zeros.
3768 bool WillCrossBinadeBoundary =
3769 exponent != semantics->minExponent && isSignificandAllZeros();
3771 // Decrement the significand.
3773 // We always do this since:
3774 // 1. If we are dealing with a non binade decrement, by definition we
3775 // just decrement the significand.
3776 // 2. If we are dealing with a normal -> normal binade decrement, since
3777 // we have an explicit integral bit the fact that all bits but the
3778 // integral bit are zero implies that subtracting one will yield a
3779 // significand with 0 integral bit and 1 in all other spots. Thus we
3780 // must just adjust the exponent and set the integral bit to 1.
3781 // 3. If we are dealing with a normal -> denormal binade decrement,
3782 // since we set the integral bit to 0 when we represent denormals, we
3783 // just decrement the significand.
3784 integerPart *Parts = significandParts();
3785 APInt::tcDecrement(Parts, partCount());
3787 if (WillCrossBinadeBoundary) {
3788 // Our result is a normal number. Do the following:
3789 // 1. Set the integral bit to 1.
3790 // 2. Decrement the exponent.
3791 APInt::tcSetBit(Parts, semantics->precision - 1);
3795 // If we are positive, we need to increment the significand.
3797 // We only cross a binade boundary that requires adjusting the exponent if
3798 // the input is not a denormal and all of said input's significand bits
3799 // are set. If all of said conditions are true: clear the significand, set
3800 // the integral bit to 1, and increment the exponent. If we have a
3801 // denormal always increment since moving denormals and the numbers in the
3802 // smallest normal binade have the same exponent in our representation.
3803 bool WillCrossBinadeBoundary = !isDenormal() && isSignificandAllOnes();
3805 if (WillCrossBinadeBoundary) {
3806 integerPart *Parts = significandParts();
3807 APInt::tcSet(Parts, 0, partCount());
3808 APInt::tcSetBit(Parts, semantics->precision - 1);
3809 assert(exponent != semantics->maxExponent &&
3810 "We can not increment an exponent beyond the maxExponent allowed"
3811 " by the given floating point semantics.");
3814 incrementSignificand();
3820 // If we are performing nextDown, swap sign so we have -nextUp(-x)