1 //===-- APFloat.cpp - Implement APFloat class -----------------------------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file implements a class to represent arbitrary precision floating
11 // point values and provide a variety of arithmetic operations on them.
13 //===----------------------------------------------------------------------===//
15 #include "llvm/ADT/APFloat.h"
16 #include "llvm/ADT/APSInt.h"
17 #include "llvm/ADT/FoldingSet.h"
18 #include "llvm/ADT/Hashing.h"
19 #include "llvm/ADT/StringRef.h"
20 #include "llvm/Support/ErrorHandling.h"
21 #include "llvm/Support/MathExtras.h"
27 #define convolve(lhs, rhs) ((lhs) * 4 + (rhs))
29 /* Assumed in hexadecimal significand parsing, and conversion to
30 hexadecimal strings. */
31 #define COMPILE_TIME_ASSERT(cond) extern int CTAssert[(cond) ? 1 : -1]
32 COMPILE_TIME_ASSERT(integerPartWidth % 4 == 0);
36 /* Represents floating point arithmetic semantics. */
38 /* The largest E such that 2^E is representable; this matches the
39 definition of IEEE 754. */
40 exponent_t maxExponent;
42 /* The smallest E such that 2^E is a normalized number; this
43 matches the definition of IEEE 754. */
44 exponent_t minExponent;
46 /* Number of bits in the significand. This includes the integer
48 unsigned int precision;
51 const fltSemantics APFloat::IEEEhalf = { 15, -14, 11 };
52 const fltSemantics APFloat::IEEEsingle = { 127, -126, 24 };
53 const fltSemantics APFloat::IEEEdouble = { 1023, -1022, 53 };
54 const fltSemantics APFloat::IEEEquad = { 16383, -16382, 113 };
55 const fltSemantics APFloat::x87DoubleExtended = { 16383, -16382, 64 };
56 const fltSemantics APFloat::Bogus = { 0, 0, 0 };
58 /* The PowerPC format consists of two doubles. It does not map cleanly
59 onto the usual format above. It is approximated using twice the
60 mantissa bits. Note that for exponents near the double minimum,
61 we no longer can represent the full 106 mantissa bits, so those
62 will be treated as denormal numbers.
64 FIXME: While this approximation is equivalent to what GCC uses for
65 compile-time arithmetic on PPC double-double numbers, it is not able
66 to represent all possible values held by a PPC double-double number,
67 for example: (long double) 1.0 + (long double) 0x1p-106
68 Should this be replaced by a full emulation of PPC double-double? */
69 const fltSemantics APFloat::PPCDoubleDouble = { 1023, -1022 + 53, 53 + 53 };
71 /* A tight upper bound on number of parts required to hold the value
74 power * 815 / (351 * integerPartWidth) + 1
76 However, whilst the result may require only this many parts,
77 because we are multiplying two values to get it, the
78 multiplication may require an extra part with the excess part
79 being zero (consider the trivial case of 1 * 1, tcFullMultiply
80 requires two parts to hold the single-part result). So we add an
81 extra one to guarantee enough space whilst multiplying. */
82 const unsigned int maxExponent = 16383;
83 const unsigned int maxPrecision = 113;
84 const unsigned int maxPowerOfFiveExponent = maxExponent + maxPrecision - 1;
85 const unsigned int maxPowerOfFiveParts = 2 + ((maxPowerOfFiveExponent * 815)
86 / (351 * integerPartWidth));
89 /* A bunch of private, handy routines. */
91 static inline unsigned int
92 partCountForBits(unsigned int bits)
94 return ((bits) + integerPartWidth - 1) / integerPartWidth;
97 /* Returns 0U-9U. Return values >= 10U are not digits. */
98 static inline unsigned int
99 decDigitValue(unsigned int c)
105 hexDigitValue(unsigned int c)
124 /* Return the value of a decimal exponent of the form
127 If the exponent overflows, returns a large exponent with the
130 readExponent(StringRef::iterator begin, StringRef::iterator end)
133 unsigned int absExponent;
134 const unsigned int overlargeExponent = 24000; /* FIXME. */
135 StringRef::iterator p = begin;
137 assert(p != end && "Exponent has no digits");
139 isNegative = (*p == '-');
140 if (*p == '-' || *p == '+') {
142 assert(p != end && "Exponent has no digits");
145 absExponent = decDigitValue(*p++);
146 assert(absExponent < 10U && "Invalid character in exponent");
148 for (; p != end; ++p) {
151 value = decDigitValue(*p);
152 assert(value < 10U && "Invalid character in exponent");
154 value += absExponent * 10;
155 if (absExponent >= overlargeExponent) {
156 absExponent = overlargeExponent;
157 p = end; /* outwit assert below */
163 assert(p == end && "Invalid exponent in exponent");
166 return -(int) absExponent;
168 return (int) absExponent;
171 /* This is ugly and needs cleaning up, but I don't immediately see
172 how whilst remaining safe. */
174 totalExponent(StringRef::iterator p, StringRef::iterator end,
175 int exponentAdjustment)
177 int unsignedExponent;
178 bool negative, overflow;
181 assert(p != end && "Exponent has no digits");
183 negative = *p == '-';
184 if (*p == '-' || *p == '+') {
186 assert(p != end && "Exponent has no digits");
189 unsignedExponent = 0;
191 for (; p != end; ++p) {
194 value = decDigitValue(*p);
195 assert(value < 10U && "Invalid character in exponent");
197 unsignedExponent = unsignedExponent * 10 + value;
198 if (unsignedExponent > 32767) {
204 if (exponentAdjustment > 32767 || exponentAdjustment < -32768)
208 exponent = unsignedExponent;
210 exponent = -exponent;
211 exponent += exponentAdjustment;
212 if (exponent > 32767 || exponent < -32768)
217 exponent = negative ? -32768: 32767;
222 static StringRef::iterator
223 skipLeadingZeroesAndAnyDot(StringRef::iterator begin, StringRef::iterator end,
224 StringRef::iterator *dot)
226 StringRef::iterator p = begin;
228 while (*p == '0' && p != end)
234 assert(end - begin != 1 && "Significand has no digits");
236 while (*p == '0' && p != end)
243 /* Given a normal decimal floating point number of the form
247 where the decimal point and exponent are optional, fill out the
248 structure D. Exponent is appropriate if the significand is
249 treated as an integer, and normalizedExponent if the significand
250 is taken to have the decimal point after a single leading
253 If the value is zero, V->firstSigDigit points to a non-digit, and
254 the return exponent is zero.
257 const char *firstSigDigit;
258 const char *lastSigDigit;
260 int normalizedExponent;
264 interpretDecimal(StringRef::iterator begin, StringRef::iterator end,
267 StringRef::iterator dot = end;
268 StringRef::iterator p = skipLeadingZeroesAndAnyDot (begin, end, &dot);
270 D->firstSigDigit = p;
272 D->normalizedExponent = 0;
274 for (; p != end; ++p) {
276 assert(dot == end && "String contains multiple dots");
281 if (decDigitValue(*p) >= 10U)
286 assert((*p == 'e' || *p == 'E') && "Invalid character in significand");
287 assert(p != begin && "Significand has no digits");
288 assert((dot == end || p - begin != 1) && "Significand has no digits");
290 /* p points to the first non-digit in the string */
291 D->exponent = readExponent(p + 1, end);
293 /* Implied decimal point? */
298 /* If number is all zeroes accept any exponent. */
299 if (p != D->firstSigDigit) {
300 /* Drop insignificant trailing zeroes. */
305 while (p != begin && *p == '0');
306 while (p != begin && *p == '.');
309 /* Adjust the exponents for any decimal point. */
310 D->exponent += static_cast<exponent_t>((dot - p) - (dot > p));
311 D->normalizedExponent = (D->exponent +
312 static_cast<exponent_t>((p - D->firstSigDigit)
313 - (dot > D->firstSigDigit && dot < p)));
319 /* Return the trailing fraction of a hexadecimal number.
320 DIGITVALUE is the first hex digit of the fraction, P points to
323 trailingHexadecimalFraction(StringRef::iterator p, StringRef::iterator end,
324 unsigned int digitValue)
326 unsigned int hexDigit;
328 /* If the first trailing digit isn't 0 or 8 we can work out the
329 fraction immediately. */
331 return lfMoreThanHalf;
332 else if (digitValue < 8 && digitValue > 0)
333 return lfLessThanHalf;
335 /* Otherwise we need to find the first non-zero digit. */
339 assert(p != end && "Invalid trailing hexadecimal fraction!");
341 hexDigit = hexDigitValue(*p);
343 /* If we ran off the end it is exactly zero or one-half, otherwise
346 return digitValue == 0 ? lfExactlyZero: lfExactlyHalf;
348 return digitValue == 0 ? lfLessThanHalf: lfMoreThanHalf;
351 /* Return the fraction lost were a bignum truncated losing the least
352 significant BITS bits. */
354 lostFractionThroughTruncation(const integerPart *parts,
355 unsigned int partCount,
360 lsb = APInt::tcLSB(parts, partCount);
362 /* Note this is guaranteed true if bits == 0, or LSB == -1U. */
364 return lfExactlyZero;
366 return lfExactlyHalf;
367 if (bits <= partCount * integerPartWidth &&
368 APInt::tcExtractBit(parts, bits - 1))
369 return lfMoreThanHalf;
371 return lfLessThanHalf;
374 /* Shift DST right BITS bits noting lost fraction. */
376 shiftRight(integerPart *dst, unsigned int parts, unsigned int bits)
378 lostFraction lost_fraction;
380 lost_fraction = lostFractionThroughTruncation(dst, parts, bits);
382 APInt::tcShiftRight(dst, parts, bits);
384 return lost_fraction;
387 /* Combine the effect of two lost fractions. */
389 combineLostFractions(lostFraction moreSignificant,
390 lostFraction lessSignificant)
392 if (lessSignificant != lfExactlyZero) {
393 if (moreSignificant == lfExactlyZero)
394 moreSignificant = lfLessThanHalf;
395 else if (moreSignificant == lfExactlyHalf)
396 moreSignificant = lfMoreThanHalf;
399 return moreSignificant;
402 /* The error from the true value, in half-ulps, on multiplying two
403 floating point numbers, which differ from the value they
404 approximate by at most HUE1 and HUE2 half-ulps, is strictly less
405 than the returned value.
407 See "How to Read Floating Point Numbers Accurately" by William D
410 HUerrBound(bool inexactMultiply, unsigned int HUerr1, unsigned int HUerr2)
412 assert(HUerr1 < 2 || HUerr2 < 2 || (HUerr1 + HUerr2 < 8));
414 if (HUerr1 + HUerr2 == 0)
415 return inexactMultiply * 2; /* <= inexactMultiply half-ulps. */
417 return inexactMultiply + 2 * (HUerr1 + HUerr2);
420 /* The number of ulps from the boundary (zero, or half if ISNEAREST)
421 when the least significant BITS are truncated. BITS cannot be
424 ulpsFromBoundary(const integerPart *parts, unsigned int bits, bool isNearest)
426 unsigned int count, partBits;
427 integerPart part, boundary;
432 count = bits / integerPartWidth;
433 partBits = bits % integerPartWidth + 1;
435 part = parts[count] & (~(integerPart) 0 >> (integerPartWidth - partBits));
438 boundary = (integerPart) 1 << (partBits - 1);
443 if (part - boundary <= boundary - part)
444 return part - boundary;
446 return boundary - part;
449 if (part == boundary) {
452 return ~(integerPart) 0; /* A lot. */
455 } else if (part == boundary - 1) {
458 return ~(integerPart) 0; /* A lot. */
463 return ~(integerPart) 0; /* A lot. */
466 /* Place pow(5, power) in DST, and return the number of parts used.
467 DST must be at least one part larger than size of the answer. */
469 powerOf5(integerPart *dst, unsigned int power)
471 static const integerPart firstEightPowers[] = { 1, 5, 25, 125, 625, 3125,
473 integerPart pow5s[maxPowerOfFiveParts * 2 + 5];
474 pow5s[0] = 78125 * 5;
476 unsigned int partsCount[16] = { 1 };
477 integerPart scratch[maxPowerOfFiveParts], *p1, *p2, *pow5;
479 assert(power <= maxExponent);
484 *p1 = firstEightPowers[power & 7];
490 for (unsigned int n = 0; power; power >>= 1, n++) {
495 /* Calculate pow(5,pow(2,n+3)) if we haven't yet. */
497 pc = partsCount[n - 1];
498 APInt::tcFullMultiply(pow5, pow5 - pc, pow5 - pc, pc, pc);
500 if (pow5[pc - 1] == 0)
508 APInt::tcFullMultiply(p2, p1, pow5, result, pc);
510 if (p2[result - 1] == 0)
513 /* Now result is in p1 with partsCount parts and p2 is scratch
515 tmp = p1, p1 = p2, p2 = tmp;
522 APInt::tcAssign(dst, p1, result);
527 /* Zero at the end to avoid modular arithmetic when adding one; used
528 when rounding up during hexadecimal output. */
529 static const char hexDigitsLower[] = "0123456789abcdef0";
530 static const char hexDigitsUpper[] = "0123456789ABCDEF0";
531 static const char infinityL[] = "infinity";
532 static const char infinityU[] = "INFINITY";
533 static const char NaNL[] = "nan";
534 static const char NaNU[] = "NAN";
536 /* Write out an integerPart in hexadecimal, starting with the most
537 significant nibble. Write out exactly COUNT hexdigits, return
540 partAsHex (char *dst, integerPart part, unsigned int count,
541 const char *hexDigitChars)
543 unsigned int result = count;
545 assert(count != 0 && count <= integerPartWidth / 4);
547 part >>= (integerPartWidth - 4 * count);
549 dst[count] = hexDigitChars[part & 0xf];
556 /* Write out an unsigned decimal integer. */
558 writeUnsignedDecimal (char *dst, unsigned int n)
574 /* Write out a signed decimal integer. */
576 writeSignedDecimal (char *dst, int value)
580 dst = writeUnsignedDecimal(dst, -(unsigned) value);
582 dst = writeUnsignedDecimal(dst, value);
589 APFloat::initialize(const fltSemantics *ourSemantics)
593 semantics = ourSemantics;
596 significand.parts = new integerPart[count];
600 APFloat::freeSignificand()
603 delete [] significand.parts;
607 APFloat::assign(const APFloat &rhs)
609 assert(semantics == rhs.semantics);
612 category = rhs.category;
613 exponent = rhs.exponent;
614 if (category == fcNormal || category == fcNaN)
615 copySignificand(rhs);
619 APFloat::copySignificand(const APFloat &rhs)
621 assert(category == fcNormal || category == fcNaN);
622 assert(rhs.partCount() >= partCount());
624 APInt::tcAssign(significandParts(), rhs.significandParts(),
628 /* Make this number a NaN, with an arbitrary but deterministic value
629 for the significand. If double or longer, this is a signalling NaN,
630 which may not be ideal. If float, this is QNaN(0). */
631 void APFloat::makeNaN(bool SNaN, bool Negative, const APInt *fill)
636 integerPart *significand = significandParts();
637 unsigned numParts = partCount();
639 // Set the significand bits to the fill.
640 if (!fill || fill->getNumWords() < numParts)
641 APInt::tcSet(significand, 0, numParts);
643 APInt::tcAssign(significand, fill->getRawData(),
644 std::min(fill->getNumWords(), numParts));
646 // Zero out the excess bits of the significand.
647 unsigned bitsToPreserve = semantics->precision - 1;
648 unsigned part = bitsToPreserve / 64;
649 bitsToPreserve %= 64;
650 significand[part] &= ((1ULL << bitsToPreserve) - 1);
651 for (part++; part != numParts; ++part)
652 significand[part] = 0;
655 unsigned QNaNBit = semantics->precision - 2;
658 // We always have to clear the QNaN bit to make it an SNaN.
659 APInt::tcClearBit(significand, QNaNBit);
661 // If there are no bits set in the payload, we have to set
662 // *something* to make it a NaN instead of an infinity;
663 // conventionally, this is the next bit down from the QNaN bit.
664 if (APInt::tcIsZero(significand, numParts))
665 APInt::tcSetBit(significand, QNaNBit - 1);
667 // We always have to set the QNaN bit to make it a QNaN.
668 APInt::tcSetBit(significand, QNaNBit);
671 // For x87 extended precision, we want to make a NaN, not a
672 // pseudo-NaN. Maybe we should expose the ability to make
674 if (semantics == &APFloat::x87DoubleExtended)
675 APInt::tcSetBit(significand, QNaNBit + 1);
678 APFloat APFloat::makeNaN(const fltSemantics &Sem, bool SNaN, bool Negative,
680 APFloat value(Sem, uninitialized);
681 value.makeNaN(SNaN, Negative, fill);
686 APFloat::operator=(const APFloat &rhs)
689 if (semantics != rhs.semantics) {
691 initialize(rhs.semantics);
700 APFloat::isDenormal() const {
701 return isNormal() && (exponent == semantics->minExponent) &&
702 (APInt::tcExtractBit(significandParts(),
703 semantics->precision - 1) == 0);
707 APFloat::bitwiseIsEqual(const APFloat &rhs) const {
710 if (semantics != rhs.semantics ||
711 category != rhs.category ||
714 if (category==fcZero || category==fcInfinity)
716 else if (category==fcNormal && exponent!=rhs.exponent)
720 const integerPart* p=significandParts();
721 const integerPart* q=rhs.significandParts();
722 for (; i>0; i--, p++, q++) {
730 APFloat::APFloat(const fltSemantics &ourSemantics, integerPart value) {
731 initialize(&ourSemantics);
734 exponent = ourSemantics.precision - 1;
735 significandParts()[0] = value;
736 normalize(rmNearestTiesToEven, lfExactlyZero);
739 APFloat::APFloat(const fltSemantics &ourSemantics) {
740 initialize(&ourSemantics);
745 APFloat::APFloat(const fltSemantics &ourSemantics, uninitializedTag tag) {
746 // Allocates storage if necessary but does not initialize it.
747 initialize(&ourSemantics);
750 APFloat::APFloat(const fltSemantics &ourSemantics,
751 fltCategory ourCategory, bool negative) {
752 initialize(&ourSemantics);
753 category = ourCategory;
755 if (category == fcNormal)
757 else if (ourCategory == fcNaN)
761 APFloat::APFloat(const fltSemantics &ourSemantics, StringRef text) {
762 initialize(&ourSemantics);
763 convertFromString(text, rmNearestTiesToEven);
766 APFloat::APFloat(const APFloat &rhs) {
767 initialize(rhs.semantics);
776 // Profile - This method 'profiles' an APFloat for use with FoldingSet.
777 void APFloat::Profile(FoldingSetNodeID& ID) const {
778 ID.Add(bitcastToAPInt());
782 APFloat::partCount() const
784 return partCountForBits(semantics->precision + 1);
788 APFloat::semanticsPrecision(const fltSemantics &semantics)
790 return semantics.precision;
794 APFloat::significandParts() const
796 return const_cast<APFloat *>(this)->significandParts();
800 APFloat::significandParts()
802 assert(category == fcNormal || category == fcNaN);
805 return significand.parts;
807 return &significand.part;
811 APFloat::zeroSignificand()
814 APInt::tcSet(significandParts(), 0, partCount());
817 /* Increment an fcNormal floating point number's significand. */
819 APFloat::incrementSignificand()
823 carry = APInt::tcIncrement(significandParts(), partCount());
825 /* Our callers should never cause us to overflow. */
830 /* Add the significand of the RHS. Returns the carry flag. */
832 APFloat::addSignificand(const APFloat &rhs)
836 parts = significandParts();
838 assert(semantics == rhs.semantics);
839 assert(exponent == rhs.exponent);
841 return APInt::tcAdd(parts, rhs.significandParts(), 0, partCount());
844 /* Subtract the significand of the RHS with a borrow flag. Returns
847 APFloat::subtractSignificand(const APFloat &rhs, integerPart borrow)
851 parts = significandParts();
853 assert(semantics == rhs.semantics);
854 assert(exponent == rhs.exponent);
856 return APInt::tcSubtract(parts, rhs.significandParts(), borrow,
860 /* Multiply the significand of the RHS. If ADDEND is non-NULL, add it
861 on to the full-precision result of the multiplication. Returns the
864 APFloat::multiplySignificand(const APFloat &rhs, const APFloat *addend)
866 unsigned int omsb; // One, not zero, based MSB.
867 unsigned int partsCount, newPartsCount, precision;
868 integerPart *lhsSignificand;
869 integerPart scratch[4];
870 integerPart *fullSignificand;
871 lostFraction lost_fraction;
874 assert(semantics == rhs.semantics);
876 precision = semantics->precision;
877 newPartsCount = partCountForBits(precision * 2);
879 if (newPartsCount > 4)
880 fullSignificand = new integerPart[newPartsCount];
882 fullSignificand = scratch;
884 lhsSignificand = significandParts();
885 partsCount = partCount();
887 APInt::tcFullMultiply(fullSignificand, lhsSignificand,
888 rhs.significandParts(), partsCount, partsCount);
890 lost_fraction = lfExactlyZero;
891 omsb = APInt::tcMSB(fullSignificand, newPartsCount) + 1;
892 exponent += rhs.exponent;
895 Significand savedSignificand = significand;
896 const fltSemantics *savedSemantics = semantics;
897 fltSemantics extendedSemantics;
899 unsigned int extendedPrecision;
901 /* Normalize our MSB. */
902 extendedPrecision = precision + precision - 1;
903 if (omsb != extendedPrecision) {
904 APInt::tcShiftLeft(fullSignificand, newPartsCount,
905 extendedPrecision - omsb);
906 exponent -= extendedPrecision - omsb;
909 /* Create new semantics. */
910 extendedSemantics = *semantics;
911 extendedSemantics.precision = extendedPrecision;
913 if (newPartsCount == 1)
914 significand.part = fullSignificand[0];
916 significand.parts = fullSignificand;
917 semantics = &extendedSemantics;
919 APFloat extendedAddend(*addend);
920 status = extendedAddend.convert(extendedSemantics, rmTowardZero, &ignored);
921 assert(status == opOK);
923 lost_fraction = addOrSubtractSignificand(extendedAddend, false);
925 /* Restore our state. */
926 if (newPartsCount == 1)
927 fullSignificand[0] = significand.part;
928 significand = savedSignificand;
929 semantics = savedSemantics;
931 omsb = APInt::tcMSB(fullSignificand, newPartsCount) + 1;
934 exponent -= (precision - 1);
936 if (omsb > precision) {
937 unsigned int bits, significantParts;
940 bits = omsb - precision;
941 significantParts = partCountForBits(omsb);
942 lf = shiftRight(fullSignificand, significantParts, bits);
943 lost_fraction = combineLostFractions(lf, lost_fraction);
947 APInt::tcAssign(lhsSignificand, fullSignificand, partsCount);
949 if (newPartsCount > 4)
950 delete [] fullSignificand;
952 return lost_fraction;
955 /* Multiply the significands of LHS and RHS to DST. */
957 APFloat::divideSignificand(const APFloat &rhs)
959 unsigned int bit, i, partsCount;
960 const integerPart *rhsSignificand;
961 integerPart *lhsSignificand, *dividend, *divisor;
962 integerPart scratch[4];
963 lostFraction lost_fraction;
965 assert(semantics == rhs.semantics);
967 lhsSignificand = significandParts();
968 rhsSignificand = rhs.significandParts();
969 partsCount = partCount();
972 dividend = new integerPart[partsCount * 2];
976 divisor = dividend + partsCount;
978 /* Copy the dividend and divisor as they will be modified in-place. */
979 for (i = 0; i < partsCount; i++) {
980 dividend[i] = lhsSignificand[i];
981 divisor[i] = rhsSignificand[i];
982 lhsSignificand[i] = 0;
985 exponent -= rhs.exponent;
987 unsigned int precision = semantics->precision;
989 /* Normalize the divisor. */
990 bit = precision - APInt::tcMSB(divisor, partsCount) - 1;
993 APInt::tcShiftLeft(divisor, partsCount, bit);
996 /* Normalize the dividend. */
997 bit = precision - APInt::tcMSB(dividend, partsCount) - 1;
1000 APInt::tcShiftLeft(dividend, partsCount, bit);
1003 /* Ensure the dividend >= divisor initially for the loop below.
1004 Incidentally, this means that the division loop below is
1005 guaranteed to set the integer bit to one. */
1006 if (APInt::tcCompare(dividend, divisor, partsCount) < 0) {
1008 APInt::tcShiftLeft(dividend, partsCount, 1);
1009 assert(APInt::tcCompare(dividend, divisor, partsCount) >= 0);
1012 /* Long division. */
1013 for (bit = precision; bit; bit -= 1) {
1014 if (APInt::tcCompare(dividend, divisor, partsCount) >= 0) {
1015 APInt::tcSubtract(dividend, divisor, 0, partsCount);
1016 APInt::tcSetBit(lhsSignificand, bit - 1);
1019 APInt::tcShiftLeft(dividend, partsCount, 1);
1022 /* Figure out the lost fraction. */
1023 int cmp = APInt::tcCompare(dividend, divisor, partsCount);
1026 lost_fraction = lfMoreThanHalf;
1028 lost_fraction = lfExactlyHalf;
1029 else if (APInt::tcIsZero(dividend, partsCount))
1030 lost_fraction = lfExactlyZero;
1032 lost_fraction = lfLessThanHalf;
1037 return lost_fraction;
1041 APFloat::significandMSB() const
1043 return APInt::tcMSB(significandParts(), partCount());
1047 APFloat::significandLSB() const
1049 return APInt::tcLSB(significandParts(), partCount());
1052 /* Note that a zero result is NOT normalized to fcZero. */
1054 APFloat::shiftSignificandRight(unsigned int bits)
1056 /* Our exponent should not overflow. */
1057 assert((exponent_t) (exponent + bits) >= exponent);
1061 return shiftRight(significandParts(), partCount(), bits);
1064 /* Shift the significand left BITS bits, subtract BITS from its exponent. */
1066 APFloat::shiftSignificandLeft(unsigned int bits)
1068 assert(bits < semantics->precision);
1071 unsigned int partsCount = partCount();
1073 APInt::tcShiftLeft(significandParts(), partsCount, bits);
1076 assert(!APInt::tcIsZero(significandParts(), partsCount));
1081 APFloat::compareAbsoluteValue(const APFloat &rhs) const
1085 assert(semantics == rhs.semantics);
1086 assert(category == fcNormal);
1087 assert(rhs.category == fcNormal);
1089 compare = exponent - rhs.exponent;
1091 /* If exponents are equal, do an unsigned bignum comparison of the
1094 compare = APInt::tcCompare(significandParts(), rhs.significandParts(),
1098 return cmpGreaterThan;
1099 else if (compare < 0)
1105 /* Handle overflow. Sign is preserved. We either become infinity or
1106 the largest finite number. */
1108 APFloat::handleOverflow(roundingMode rounding_mode)
1111 if (rounding_mode == rmNearestTiesToEven ||
1112 rounding_mode == rmNearestTiesToAway ||
1113 (rounding_mode == rmTowardPositive && !sign) ||
1114 (rounding_mode == rmTowardNegative && sign)) {
1115 category = fcInfinity;
1116 return (opStatus) (opOverflow | opInexact);
1119 /* Otherwise we become the largest finite number. */
1120 category = fcNormal;
1121 exponent = semantics->maxExponent;
1122 APInt::tcSetLeastSignificantBits(significandParts(), partCount(),
1123 semantics->precision);
1128 /* Returns TRUE if, when truncating the current number, with BIT the
1129 new LSB, with the given lost fraction and rounding mode, the result
1130 would need to be rounded away from zero (i.e., by increasing the
1131 signficand). This routine must work for fcZero of both signs, and
1132 fcNormal numbers. */
1134 APFloat::roundAwayFromZero(roundingMode rounding_mode,
1135 lostFraction lost_fraction,
1136 unsigned int bit) const
1138 /* NaNs and infinities should not have lost fractions. */
1139 assert(category == fcNormal || category == fcZero);
1141 /* Current callers never pass this so we don't handle it. */
1142 assert(lost_fraction != lfExactlyZero);
1144 switch (rounding_mode) {
1145 case rmNearestTiesToAway:
1146 return lost_fraction == lfExactlyHalf || lost_fraction == lfMoreThanHalf;
1148 case rmNearestTiesToEven:
1149 if (lost_fraction == lfMoreThanHalf)
1152 /* Our zeroes don't have a significand to test. */
1153 if (lost_fraction == lfExactlyHalf && category != fcZero)
1154 return APInt::tcExtractBit(significandParts(), bit);
1161 case rmTowardPositive:
1162 return sign == false;
1164 case rmTowardNegative:
1165 return sign == true;
1167 llvm_unreachable("Invalid rounding mode found");
1171 APFloat::normalize(roundingMode rounding_mode,
1172 lostFraction lost_fraction)
1174 unsigned int omsb; /* One, not zero, based MSB. */
1177 if (category != fcNormal)
1180 /* Before rounding normalize the exponent of fcNormal numbers. */
1181 omsb = significandMSB() + 1;
1184 /* OMSB is numbered from 1. We want to place it in the integer
1185 bit numbered PRECISION if possible, with a compensating change in
1187 exponentChange = omsb - semantics->precision;
1189 /* If the resulting exponent is too high, overflow according to
1190 the rounding mode. */
1191 if (exponent + exponentChange > semantics->maxExponent)
1192 return handleOverflow(rounding_mode);
1194 /* Subnormal numbers have exponent minExponent, and their MSB
1195 is forced based on that. */
1196 if (exponent + exponentChange < semantics->minExponent)
1197 exponentChange = semantics->minExponent - exponent;
1199 /* Shifting left is easy as we don't lose precision. */
1200 if (exponentChange < 0) {
1201 assert(lost_fraction == lfExactlyZero);
1203 shiftSignificandLeft(-exponentChange);
1208 if (exponentChange > 0) {
1211 /* Shift right and capture any new lost fraction. */
1212 lf = shiftSignificandRight(exponentChange);
1214 lost_fraction = combineLostFractions(lf, lost_fraction);
1216 /* Keep OMSB up-to-date. */
1217 if (omsb > (unsigned) exponentChange)
1218 omsb -= exponentChange;
1224 /* Now round the number according to rounding_mode given the lost
1227 /* As specified in IEEE 754, since we do not trap we do not report
1228 underflow for exact results. */
1229 if (lost_fraction == lfExactlyZero) {
1230 /* Canonicalize zeroes. */
1237 /* Increment the significand if we're rounding away from zero. */
1238 if (roundAwayFromZero(rounding_mode, lost_fraction, 0)) {
1240 exponent = semantics->minExponent;
1242 incrementSignificand();
1243 omsb = significandMSB() + 1;
1245 /* Did the significand increment overflow? */
1246 if (omsb == (unsigned) semantics->precision + 1) {
1247 /* Renormalize by incrementing the exponent and shifting our
1248 significand right one. However if we already have the
1249 maximum exponent we overflow to infinity. */
1250 if (exponent == semantics->maxExponent) {
1251 category = fcInfinity;
1253 return (opStatus) (opOverflow | opInexact);
1256 shiftSignificandRight(1);
1262 /* The normal case - we were and are not denormal, and any
1263 significand increment above didn't overflow. */
1264 if (omsb == semantics->precision)
1267 /* We have a non-zero denormal. */
1268 assert(omsb < semantics->precision);
1270 /* Canonicalize zeroes. */
1274 /* The fcZero case is a denormal that underflowed to zero. */
1275 return (opStatus) (opUnderflow | opInexact);
1279 APFloat::addOrSubtractSpecials(const APFloat &rhs, bool subtract)
1281 switch (convolve(category, rhs.category)) {
1283 llvm_unreachable(0);
1285 case convolve(fcNaN, fcZero):
1286 case convolve(fcNaN, fcNormal):
1287 case convolve(fcNaN, fcInfinity):
1288 case convolve(fcNaN, fcNaN):
1289 case convolve(fcNormal, fcZero):
1290 case convolve(fcInfinity, fcNormal):
1291 case convolve(fcInfinity, fcZero):
1294 case convolve(fcZero, fcNaN):
1295 case convolve(fcNormal, fcNaN):
1296 case convolve(fcInfinity, fcNaN):
1298 copySignificand(rhs);
1301 case convolve(fcNormal, fcInfinity):
1302 case convolve(fcZero, fcInfinity):
1303 category = fcInfinity;
1304 sign = rhs.sign ^ subtract;
1307 case convolve(fcZero, fcNormal):
1309 sign = rhs.sign ^ subtract;
1312 case convolve(fcZero, fcZero):
1313 /* Sign depends on rounding mode; handled by caller. */
1316 case convolve(fcInfinity, fcInfinity):
1317 /* Differently signed infinities can only be validly
1319 if (((sign ^ rhs.sign)!=0) != subtract) {
1326 case convolve(fcNormal, fcNormal):
1331 /* Add or subtract two normal numbers. */
1333 APFloat::addOrSubtractSignificand(const APFloat &rhs, bool subtract)
1336 lostFraction lost_fraction;
1339 /* Determine if the operation on the absolute values is effectively
1340 an addition or subtraction. */
1341 subtract ^= (sign ^ rhs.sign) ? true : false;
1343 /* Are we bigger exponent-wise than the RHS? */
1344 bits = exponent - rhs.exponent;
1346 /* Subtraction is more subtle than one might naively expect. */
1348 APFloat temp_rhs(rhs);
1352 reverse = compareAbsoluteValue(temp_rhs) == cmpLessThan;
1353 lost_fraction = lfExactlyZero;
1354 } else if (bits > 0) {
1355 lost_fraction = temp_rhs.shiftSignificandRight(bits - 1);
1356 shiftSignificandLeft(1);
1359 lost_fraction = shiftSignificandRight(-bits - 1);
1360 temp_rhs.shiftSignificandLeft(1);
1365 carry = temp_rhs.subtractSignificand
1366 (*this, lost_fraction != lfExactlyZero);
1367 copySignificand(temp_rhs);
1370 carry = subtractSignificand
1371 (temp_rhs, lost_fraction != lfExactlyZero);
1374 /* Invert the lost fraction - it was on the RHS and
1376 if (lost_fraction == lfLessThanHalf)
1377 lost_fraction = lfMoreThanHalf;
1378 else if (lost_fraction == lfMoreThanHalf)
1379 lost_fraction = lfLessThanHalf;
1381 /* The code above is intended to ensure that no borrow is
1387 APFloat temp_rhs(rhs);
1389 lost_fraction = temp_rhs.shiftSignificandRight(bits);
1390 carry = addSignificand(temp_rhs);
1392 lost_fraction = shiftSignificandRight(-bits);
1393 carry = addSignificand(rhs);
1396 /* We have a guard bit; generating a carry cannot happen. */
1401 return lost_fraction;
1405 APFloat::multiplySpecials(const APFloat &rhs)
1407 switch (convolve(category, rhs.category)) {
1409 llvm_unreachable(0);
1411 case convolve(fcNaN, fcZero):
1412 case convolve(fcNaN, fcNormal):
1413 case convolve(fcNaN, fcInfinity):
1414 case convolve(fcNaN, fcNaN):
1417 case convolve(fcZero, fcNaN):
1418 case convolve(fcNormal, fcNaN):
1419 case convolve(fcInfinity, fcNaN):
1421 copySignificand(rhs);
1424 case convolve(fcNormal, fcInfinity):
1425 case convolve(fcInfinity, fcNormal):
1426 case convolve(fcInfinity, fcInfinity):
1427 category = fcInfinity;
1430 case convolve(fcZero, fcNormal):
1431 case convolve(fcNormal, fcZero):
1432 case convolve(fcZero, fcZero):
1436 case convolve(fcZero, fcInfinity):
1437 case convolve(fcInfinity, fcZero):
1441 case convolve(fcNormal, fcNormal):
1447 APFloat::divideSpecials(const APFloat &rhs)
1449 switch (convolve(category, rhs.category)) {
1451 llvm_unreachable(0);
1453 case convolve(fcNaN, fcZero):
1454 case convolve(fcNaN, fcNormal):
1455 case convolve(fcNaN, fcInfinity):
1456 case convolve(fcNaN, fcNaN):
1457 case convolve(fcInfinity, fcZero):
1458 case convolve(fcInfinity, fcNormal):
1459 case convolve(fcZero, fcInfinity):
1460 case convolve(fcZero, fcNormal):
1463 case convolve(fcZero, fcNaN):
1464 case convolve(fcNormal, fcNaN):
1465 case convolve(fcInfinity, fcNaN):
1467 copySignificand(rhs);
1470 case convolve(fcNormal, fcInfinity):
1474 case convolve(fcNormal, fcZero):
1475 category = fcInfinity;
1478 case convolve(fcInfinity, fcInfinity):
1479 case convolve(fcZero, fcZero):
1483 case convolve(fcNormal, fcNormal):
1489 APFloat::modSpecials(const APFloat &rhs)
1491 switch (convolve(category, rhs.category)) {
1493 llvm_unreachable(0);
1495 case convolve(fcNaN, fcZero):
1496 case convolve(fcNaN, fcNormal):
1497 case convolve(fcNaN, fcInfinity):
1498 case convolve(fcNaN, fcNaN):
1499 case convolve(fcZero, fcInfinity):
1500 case convolve(fcZero, fcNormal):
1501 case convolve(fcNormal, fcInfinity):
1504 case convolve(fcZero, fcNaN):
1505 case convolve(fcNormal, fcNaN):
1506 case convolve(fcInfinity, fcNaN):
1508 copySignificand(rhs);
1511 case convolve(fcNormal, fcZero):
1512 case convolve(fcInfinity, fcZero):
1513 case convolve(fcInfinity, fcNormal):
1514 case convolve(fcInfinity, fcInfinity):
1515 case convolve(fcZero, fcZero):
1519 case convolve(fcNormal, fcNormal):
1526 APFloat::changeSign()
1528 /* Look mummy, this one's easy. */
1533 APFloat::clearSign()
1535 /* So is this one. */
1540 APFloat::copySign(const APFloat &rhs)
1546 /* Normalized addition or subtraction. */
1548 APFloat::addOrSubtract(const APFloat &rhs, roundingMode rounding_mode,
1553 fs = addOrSubtractSpecials(rhs, subtract);
1555 /* This return code means it was not a simple case. */
1556 if (fs == opDivByZero) {
1557 lostFraction lost_fraction;
1559 lost_fraction = addOrSubtractSignificand(rhs, subtract);
1560 fs = normalize(rounding_mode, lost_fraction);
1562 /* Can only be zero if we lost no fraction. */
1563 assert(category != fcZero || lost_fraction == lfExactlyZero);
1566 /* If two numbers add (exactly) to zero, IEEE 754 decrees it is a
1567 positive zero unless rounding to minus infinity, except that
1568 adding two like-signed zeroes gives that zero. */
1569 if (category == fcZero) {
1570 if (rhs.category != fcZero || (sign == rhs.sign) == subtract)
1571 sign = (rounding_mode == rmTowardNegative);
1577 /* Normalized addition. */
1579 APFloat::add(const APFloat &rhs, roundingMode rounding_mode)
1581 return addOrSubtract(rhs, rounding_mode, false);
1584 /* Normalized subtraction. */
1586 APFloat::subtract(const APFloat &rhs, roundingMode rounding_mode)
1588 return addOrSubtract(rhs, rounding_mode, true);
1591 /* Normalized multiply. */
1593 APFloat::multiply(const APFloat &rhs, roundingMode rounding_mode)
1598 fs = multiplySpecials(rhs);
1600 if (category == fcNormal) {
1601 lostFraction lost_fraction = multiplySignificand(rhs, 0);
1602 fs = normalize(rounding_mode, lost_fraction);
1603 if (lost_fraction != lfExactlyZero)
1604 fs = (opStatus) (fs | opInexact);
1610 /* Normalized divide. */
1612 APFloat::divide(const APFloat &rhs, roundingMode rounding_mode)
1617 fs = divideSpecials(rhs);
1619 if (category == fcNormal) {
1620 lostFraction lost_fraction = divideSignificand(rhs);
1621 fs = normalize(rounding_mode, lost_fraction);
1622 if (lost_fraction != lfExactlyZero)
1623 fs = (opStatus) (fs | opInexact);
1629 /* Normalized remainder. This is not currently correct in all cases. */
1631 APFloat::remainder(const APFloat &rhs)
1635 unsigned int origSign = sign;
1637 fs = V.divide(rhs, rmNearestTiesToEven);
1638 if (fs == opDivByZero)
1641 int parts = partCount();
1642 integerPart *x = new integerPart[parts];
1644 fs = V.convertToInteger(x, parts * integerPartWidth, true,
1645 rmNearestTiesToEven, &ignored);
1646 if (fs==opInvalidOp)
1649 fs = V.convertFromZeroExtendedInteger(x, parts * integerPartWidth, true,
1650 rmNearestTiesToEven);
1651 assert(fs==opOK); // should always work
1653 fs = V.multiply(rhs, rmNearestTiesToEven);
1654 assert(fs==opOK || fs==opInexact); // should not overflow or underflow
1656 fs = subtract(V, rmNearestTiesToEven);
1657 assert(fs==opOK || fs==opInexact); // likewise
1660 sign = origSign; // IEEE754 requires this
1665 /* Normalized llvm frem (C fmod).
1666 This is not currently correct in all cases. */
1668 APFloat::mod(const APFloat &rhs, roundingMode rounding_mode)
1671 fs = modSpecials(rhs);
1673 if (category == fcNormal && rhs.category == fcNormal) {
1675 unsigned int origSign = sign;
1677 fs = V.divide(rhs, rmNearestTiesToEven);
1678 if (fs == opDivByZero)
1681 int parts = partCount();
1682 integerPart *x = new integerPart[parts];
1684 fs = V.convertToInteger(x, parts * integerPartWidth, true,
1685 rmTowardZero, &ignored);
1686 if (fs==opInvalidOp)
1689 fs = V.convertFromZeroExtendedInteger(x, parts * integerPartWidth, true,
1690 rmNearestTiesToEven);
1691 assert(fs==opOK); // should always work
1693 fs = V.multiply(rhs, rounding_mode);
1694 assert(fs==opOK || fs==opInexact); // should not overflow or underflow
1696 fs = subtract(V, rounding_mode);
1697 assert(fs==opOK || fs==opInexact); // likewise
1700 sign = origSign; // IEEE754 requires this
1706 /* Normalized fused-multiply-add. */
1708 APFloat::fusedMultiplyAdd(const APFloat &multiplicand,
1709 const APFloat &addend,
1710 roundingMode rounding_mode)
1714 /* Post-multiplication sign, before addition. */
1715 sign ^= multiplicand.sign;
1717 /* If and only if all arguments are normal do we need to do an
1718 extended-precision calculation. */
1719 if (category == fcNormal &&
1720 multiplicand.category == fcNormal &&
1721 addend.category == fcNormal) {
1722 lostFraction lost_fraction;
1724 lost_fraction = multiplySignificand(multiplicand, &addend);
1725 fs = normalize(rounding_mode, lost_fraction);
1726 if (lost_fraction != lfExactlyZero)
1727 fs = (opStatus) (fs | opInexact);
1729 /* If two numbers add (exactly) to zero, IEEE 754 decrees it is a
1730 positive zero unless rounding to minus infinity, except that
1731 adding two like-signed zeroes gives that zero. */
1732 if (category == fcZero && sign != addend.sign)
1733 sign = (rounding_mode == rmTowardNegative);
1735 fs = multiplySpecials(multiplicand);
1737 /* FS can only be opOK or opInvalidOp. There is no more work
1738 to do in the latter case. The IEEE-754R standard says it is
1739 implementation-defined in this case whether, if ADDEND is a
1740 quiet NaN, we raise invalid op; this implementation does so.
1742 If we need to do the addition we can do so with normal
1745 fs = addOrSubtract(addend, rounding_mode, false);
1751 /* Rounding-mode corrrect round to integral value. */
1752 APFloat::opStatus APFloat::roundToIntegral(roundingMode rounding_mode) {
1755 // If the exponent is large enough, we know that this value is already
1756 // integral, and the arithmetic below would potentially cause it to saturate
1757 // to +/-Inf. Bail out early instead.
1758 if (category == fcNormal && exponent+1 >= (int)semanticsPrecision(*semantics))
1761 // The algorithm here is quite simple: we add 2^(p-1), where p is the
1762 // precision of our format, and then subtract it back off again. The choice
1763 // of rounding modes for the addition/subtraction determines the rounding mode
1764 // for our integral rounding as well.
1765 // NOTE: When the input value is negative, we do subtraction followed by
1766 // addition instead.
1767 APInt IntegerConstant(NextPowerOf2(semanticsPrecision(*semantics)), 1);
1768 IntegerConstant <<= semanticsPrecision(*semantics)-1;
1769 APFloat MagicConstant(*semantics);
1770 fs = MagicConstant.convertFromAPInt(IntegerConstant, false,
1771 rmNearestTiesToEven);
1772 MagicConstant.copySign(*this);
1777 // Preserve the input sign so that we can handle 0.0/-0.0 cases correctly.
1778 bool inputSign = isNegative();
1780 fs = add(MagicConstant, rounding_mode);
1781 if (fs != opOK && fs != opInexact)
1784 fs = subtract(MagicConstant, rounding_mode);
1786 // Restore the input sign.
1787 if (inputSign != isNegative())
1794 /* Comparison requires normalized numbers. */
1796 APFloat::compare(const APFloat &rhs) const
1800 assert(semantics == rhs.semantics);
1802 switch (convolve(category, rhs.category)) {
1804 llvm_unreachable(0);
1806 case convolve(fcNaN, fcZero):
1807 case convolve(fcNaN, fcNormal):
1808 case convolve(fcNaN, fcInfinity):
1809 case convolve(fcNaN, fcNaN):
1810 case convolve(fcZero, fcNaN):
1811 case convolve(fcNormal, fcNaN):
1812 case convolve(fcInfinity, fcNaN):
1813 return cmpUnordered;
1815 case convolve(fcInfinity, fcNormal):
1816 case convolve(fcInfinity, fcZero):
1817 case convolve(fcNormal, fcZero):
1821 return cmpGreaterThan;
1823 case convolve(fcNormal, fcInfinity):
1824 case convolve(fcZero, fcInfinity):
1825 case convolve(fcZero, fcNormal):
1827 return cmpGreaterThan;
1831 case convolve(fcInfinity, fcInfinity):
1832 if (sign == rhs.sign)
1837 return cmpGreaterThan;
1839 case convolve(fcZero, fcZero):
1842 case convolve(fcNormal, fcNormal):
1846 /* Two normal numbers. Do they have the same sign? */
1847 if (sign != rhs.sign) {
1849 result = cmpLessThan;
1851 result = cmpGreaterThan;
1853 /* Compare absolute values; invert result if negative. */
1854 result = compareAbsoluteValue(rhs);
1857 if (result == cmpLessThan)
1858 result = cmpGreaterThan;
1859 else if (result == cmpGreaterThan)
1860 result = cmpLessThan;
1867 /// APFloat::convert - convert a value of one floating point type to another.
1868 /// The return value corresponds to the IEEE754 exceptions. *losesInfo
1869 /// records whether the transformation lost information, i.e. whether
1870 /// converting the result back to the original type will produce the
1871 /// original value (this is almost the same as return value==fsOK, but there
1872 /// are edge cases where this is not so).
1875 APFloat::convert(const fltSemantics &toSemantics,
1876 roundingMode rounding_mode, bool *losesInfo)
1878 lostFraction lostFraction;
1879 unsigned int newPartCount, oldPartCount;
1882 const fltSemantics &fromSemantics = *semantics;
1884 lostFraction = lfExactlyZero;
1885 newPartCount = partCountForBits(toSemantics.precision + 1);
1886 oldPartCount = partCount();
1887 shift = toSemantics.precision - fromSemantics.precision;
1889 bool X86SpecialNan = false;
1890 if (&fromSemantics == &APFloat::x87DoubleExtended &&
1891 &toSemantics != &APFloat::x87DoubleExtended && category == fcNaN &&
1892 (!(*significandParts() & 0x8000000000000000ULL) ||
1893 !(*significandParts() & 0x4000000000000000ULL))) {
1894 // x86 has some unusual NaNs which cannot be represented in any other
1895 // format; note them here.
1896 X86SpecialNan = true;
1899 // If this is a truncation, perform the shift before we narrow the storage.
1900 if (shift < 0 && (category==fcNormal || category==fcNaN))
1901 lostFraction = shiftRight(significandParts(), oldPartCount, -shift);
1903 // Fix the storage so it can hold to new value.
1904 if (newPartCount > oldPartCount) {
1905 // The new type requires more storage; make it available.
1906 integerPart *newParts;
1907 newParts = new integerPart[newPartCount];
1908 APInt::tcSet(newParts, 0, newPartCount);
1909 if (category==fcNormal || category==fcNaN)
1910 APInt::tcAssign(newParts, significandParts(), oldPartCount);
1912 significand.parts = newParts;
1913 } else if (newPartCount == 1 && oldPartCount != 1) {
1914 // Switch to built-in storage for a single part.
1915 integerPart newPart = 0;
1916 if (category==fcNormal || category==fcNaN)
1917 newPart = significandParts()[0];
1919 significand.part = newPart;
1922 // Now that we have the right storage, switch the semantics.
1923 semantics = &toSemantics;
1925 // If this is an extension, perform the shift now that the storage is
1927 if (shift > 0 && (category==fcNormal || category==fcNaN))
1928 APInt::tcShiftLeft(significandParts(), newPartCount, shift);
1930 if (category == fcNormal) {
1931 fs = normalize(rounding_mode, lostFraction);
1932 *losesInfo = (fs != opOK);
1933 } else if (category == fcNaN) {
1934 *losesInfo = lostFraction != lfExactlyZero || X86SpecialNan;
1935 // gcc forces the Quiet bit on, which means (float)(double)(float_sNan)
1936 // does not give you back the same bits. This is dubious, and we
1937 // don't currently do it. You're really supposed to get
1938 // an invalid operation signal at runtime, but nobody does that.
1948 /* Convert a floating point number to an integer according to the
1949 rounding mode. If the rounded integer value is out of range this
1950 returns an invalid operation exception and the contents of the
1951 destination parts are unspecified. If the rounded value is in
1952 range but the floating point number is not the exact integer, the C
1953 standard doesn't require an inexact exception to be raised. IEEE
1954 854 does require it so we do that.
1956 Note that for conversions to integer type the C standard requires
1957 round-to-zero to always be used. */
1959 APFloat::convertToSignExtendedInteger(integerPart *parts, unsigned int width,
1961 roundingMode rounding_mode,
1962 bool *isExact) const
1964 lostFraction lost_fraction;
1965 const integerPart *src;
1966 unsigned int dstPartsCount, truncatedBits;
1970 /* Handle the three special cases first. */
1971 if (category == fcInfinity || category == fcNaN)
1974 dstPartsCount = partCountForBits(width);
1976 if (category == fcZero) {
1977 APInt::tcSet(parts, 0, dstPartsCount);
1978 // Negative zero can't be represented as an int.
1983 src = significandParts();
1985 /* Step 1: place our absolute value, with any fraction truncated, in
1988 /* Our absolute value is less than one; truncate everything. */
1989 APInt::tcSet(parts, 0, dstPartsCount);
1990 /* For exponent -1 the integer bit represents .5, look at that.
1991 For smaller exponents leftmost truncated bit is 0. */
1992 truncatedBits = semantics->precision -1U - exponent;
1994 /* We want the most significant (exponent + 1) bits; the rest are
1996 unsigned int bits = exponent + 1U;
1998 /* Hopelessly large in magnitude? */
2002 if (bits < semantics->precision) {
2003 /* We truncate (semantics->precision - bits) bits. */
2004 truncatedBits = semantics->precision - bits;
2005 APInt::tcExtract(parts, dstPartsCount, src, bits, truncatedBits);
2007 /* We want at least as many bits as are available. */
2008 APInt::tcExtract(parts, dstPartsCount, src, semantics->precision, 0);
2009 APInt::tcShiftLeft(parts, dstPartsCount, bits - semantics->precision);
2014 /* Step 2: work out any lost fraction, and increment the absolute
2015 value if we would round away from zero. */
2016 if (truncatedBits) {
2017 lost_fraction = lostFractionThroughTruncation(src, partCount(),
2019 if (lost_fraction != lfExactlyZero &&
2020 roundAwayFromZero(rounding_mode, lost_fraction, truncatedBits)) {
2021 if (APInt::tcIncrement(parts, dstPartsCount))
2022 return opInvalidOp; /* Overflow. */
2025 lost_fraction = lfExactlyZero;
2028 /* Step 3: check if we fit in the destination. */
2029 unsigned int omsb = APInt::tcMSB(parts, dstPartsCount) + 1;
2033 /* Negative numbers cannot be represented as unsigned. */
2037 /* It takes omsb bits to represent the unsigned integer value.
2038 We lose a bit for the sign, but care is needed as the
2039 maximally negative integer is a special case. */
2040 if (omsb == width && APInt::tcLSB(parts, dstPartsCount) + 1 != omsb)
2043 /* This case can happen because of rounding. */
2048 APInt::tcNegate (parts, dstPartsCount);
2050 if (omsb >= width + !isSigned)
2054 if (lost_fraction == lfExactlyZero) {
2061 /* Same as convertToSignExtendedInteger, except we provide
2062 deterministic values in case of an invalid operation exception,
2063 namely zero for NaNs and the minimal or maximal value respectively
2064 for underflow or overflow.
2065 The *isExact output tells whether the result is exact, in the sense
2066 that converting it back to the original floating point type produces
2067 the original value. This is almost equivalent to result==opOK,
2068 except for negative zeroes.
2071 APFloat::convertToInteger(integerPart *parts, unsigned int width,
2073 roundingMode rounding_mode, bool *isExact) const
2077 fs = convertToSignExtendedInteger(parts, width, isSigned, rounding_mode,
2080 if (fs == opInvalidOp) {
2081 unsigned int bits, dstPartsCount;
2083 dstPartsCount = partCountForBits(width);
2085 if (category == fcNaN)
2090 bits = width - isSigned;
2092 APInt::tcSetLeastSignificantBits(parts, dstPartsCount, bits);
2093 if (sign && isSigned)
2094 APInt::tcShiftLeft(parts, dstPartsCount, width - 1);
2100 /* Same as convertToInteger(integerPart*, ...), except the result is returned in
2101 an APSInt, whose initial bit-width and signed-ness are used to determine the
2102 precision of the conversion.
2105 APFloat::convertToInteger(APSInt &result,
2106 roundingMode rounding_mode, bool *isExact) const
2108 unsigned bitWidth = result.getBitWidth();
2109 SmallVector<uint64_t, 4> parts(result.getNumWords());
2110 opStatus status = convertToInteger(
2111 parts.data(), bitWidth, result.isSigned(), rounding_mode, isExact);
2112 // Keeps the original signed-ness.
2113 result = APInt(bitWidth, parts);
2117 /* Convert an unsigned integer SRC to a floating point number,
2118 rounding according to ROUNDING_MODE. The sign of the floating
2119 point number is not modified. */
2121 APFloat::convertFromUnsignedParts(const integerPart *src,
2122 unsigned int srcCount,
2123 roundingMode rounding_mode)
2125 unsigned int omsb, precision, dstCount;
2127 lostFraction lost_fraction;
2129 category = fcNormal;
2130 omsb = APInt::tcMSB(src, srcCount) + 1;
2131 dst = significandParts();
2132 dstCount = partCount();
2133 precision = semantics->precision;
2135 /* We want the most significant PRECISION bits of SRC. There may not
2136 be that many; extract what we can. */
2137 if (precision <= omsb) {
2138 exponent = omsb - 1;
2139 lost_fraction = lostFractionThroughTruncation(src, srcCount,
2141 APInt::tcExtract(dst, dstCount, src, precision, omsb - precision);
2143 exponent = precision - 1;
2144 lost_fraction = lfExactlyZero;
2145 APInt::tcExtract(dst, dstCount, src, omsb, 0);
2148 return normalize(rounding_mode, lost_fraction);
2152 APFloat::convertFromAPInt(const APInt &Val,
2154 roundingMode rounding_mode)
2156 unsigned int partCount = Val.getNumWords();
2160 if (isSigned && api.isNegative()) {
2165 return convertFromUnsignedParts(api.getRawData(), partCount, rounding_mode);
2168 /* Convert a two's complement integer SRC to a floating point number,
2169 rounding according to ROUNDING_MODE. ISSIGNED is true if the
2170 integer is signed, in which case it must be sign-extended. */
2172 APFloat::convertFromSignExtendedInteger(const integerPart *src,
2173 unsigned int srcCount,
2175 roundingMode rounding_mode)
2180 APInt::tcExtractBit(src, srcCount * integerPartWidth - 1)) {
2183 /* If we're signed and negative negate a copy. */
2185 copy = new integerPart[srcCount];
2186 APInt::tcAssign(copy, src, srcCount);
2187 APInt::tcNegate(copy, srcCount);
2188 status = convertFromUnsignedParts(copy, srcCount, rounding_mode);
2192 status = convertFromUnsignedParts(src, srcCount, rounding_mode);
2198 /* FIXME: should this just take a const APInt reference? */
2200 APFloat::convertFromZeroExtendedInteger(const integerPart *parts,
2201 unsigned int width, bool isSigned,
2202 roundingMode rounding_mode)
2204 unsigned int partCount = partCountForBits(width);
2205 APInt api = APInt(width, makeArrayRef(parts, partCount));
2208 if (isSigned && APInt::tcExtractBit(parts, width - 1)) {
2213 return convertFromUnsignedParts(api.getRawData(), partCount, rounding_mode);
2217 APFloat::convertFromHexadecimalString(StringRef s, roundingMode rounding_mode)
2219 lostFraction lost_fraction = lfExactlyZero;
2220 integerPart *significand;
2221 unsigned int bitPos, partsCount;
2222 StringRef::iterator dot, firstSignificantDigit;
2226 category = fcNormal;
2228 significand = significandParts();
2229 partsCount = partCount();
2230 bitPos = partsCount * integerPartWidth;
2232 /* Skip leading zeroes and any (hexa)decimal point. */
2233 StringRef::iterator begin = s.begin();
2234 StringRef::iterator end = s.end();
2235 StringRef::iterator p = skipLeadingZeroesAndAnyDot(begin, end, &dot);
2236 firstSignificantDigit = p;
2239 integerPart hex_value;
2242 assert(dot == end && "String contains multiple dots");
2249 hex_value = hexDigitValue(*p);
2250 if (hex_value == -1U) {
2259 /* Store the number whilst 4-bit nibbles remain. */
2262 hex_value <<= bitPos % integerPartWidth;
2263 significand[bitPos / integerPartWidth] |= hex_value;
2265 lost_fraction = trailingHexadecimalFraction(p, end, hex_value);
2266 while (p != end && hexDigitValue(*p) != -1U)
2273 /* Hex floats require an exponent but not a hexadecimal point. */
2274 assert(p != end && "Hex strings require an exponent");
2275 assert((*p == 'p' || *p == 'P') && "Invalid character in significand");
2276 assert(p != begin && "Significand has no digits");
2277 assert((dot == end || p - begin != 1) && "Significand has no digits");
2279 /* Ignore the exponent if we are zero. */
2280 if (p != firstSignificantDigit) {
2283 /* Implicit hexadecimal point? */
2287 /* Calculate the exponent adjustment implicit in the number of
2288 significant digits. */
2289 expAdjustment = static_cast<int>(dot - firstSignificantDigit);
2290 if (expAdjustment < 0)
2292 expAdjustment = expAdjustment * 4 - 1;
2294 /* Adjust for writing the significand starting at the most
2295 significant nibble. */
2296 expAdjustment += semantics->precision;
2297 expAdjustment -= partsCount * integerPartWidth;
2299 /* Adjust for the given exponent. */
2300 exponent = totalExponent(p + 1, end, expAdjustment);
2303 return normalize(rounding_mode, lost_fraction);
2307 APFloat::roundSignificandWithExponent(const integerPart *decSigParts,
2308 unsigned sigPartCount, int exp,
2309 roundingMode rounding_mode)
2311 unsigned int parts, pow5PartCount;
2312 fltSemantics calcSemantics = { 32767, -32767, 0 };
2313 integerPart pow5Parts[maxPowerOfFiveParts];
2316 isNearest = (rounding_mode == rmNearestTiesToEven ||
2317 rounding_mode == rmNearestTiesToAway);
2319 parts = partCountForBits(semantics->precision + 11);
2321 /* Calculate pow(5, abs(exp)). */
2322 pow5PartCount = powerOf5(pow5Parts, exp >= 0 ? exp: -exp);
2324 for (;; parts *= 2) {
2325 opStatus sigStatus, powStatus;
2326 unsigned int excessPrecision, truncatedBits;
2328 calcSemantics.precision = parts * integerPartWidth - 1;
2329 excessPrecision = calcSemantics.precision - semantics->precision;
2330 truncatedBits = excessPrecision;
2332 APFloat decSig(calcSemantics, fcZero, sign);
2333 APFloat pow5(calcSemantics, fcZero, false);
2335 sigStatus = decSig.convertFromUnsignedParts(decSigParts, sigPartCount,
2336 rmNearestTiesToEven);
2337 powStatus = pow5.convertFromUnsignedParts(pow5Parts, pow5PartCount,
2338 rmNearestTiesToEven);
2339 /* Add exp, as 10^n = 5^n * 2^n. */
2340 decSig.exponent += exp;
2342 lostFraction calcLostFraction;
2343 integerPart HUerr, HUdistance;
2344 unsigned int powHUerr;
2347 /* multiplySignificand leaves the precision-th bit set to 1. */
2348 calcLostFraction = decSig.multiplySignificand(pow5, NULL);
2349 powHUerr = powStatus != opOK;
2351 calcLostFraction = decSig.divideSignificand(pow5);
2352 /* Denormal numbers have less precision. */
2353 if (decSig.exponent < semantics->minExponent) {
2354 excessPrecision += (semantics->minExponent - decSig.exponent);
2355 truncatedBits = excessPrecision;
2356 if (excessPrecision > calcSemantics.precision)
2357 excessPrecision = calcSemantics.precision;
2359 /* Extra half-ulp lost in reciprocal of exponent. */
2360 powHUerr = (powStatus == opOK && calcLostFraction == lfExactlyZero) ? 0:2;
2363 /* Both multiplySignificand and divideSignificand return the
2364 result with the integer bit set. */
2365 assert(APInt::tcExtractBit
2366 (decSig.significandParts(), calcSemantics.precision - 1) == 1);
2368 HUerr = HUerrBound(calcLostFraction != lfExactlyZero, sigStatus != opOK,
2370 HUdistance = 2 * ulpsFromBoundary(decSig.significandParts(),
2371 excessPrecision, isNearest);
2373 /* Are we guaranteed to round correctly if we truncate? */
2374 if (HUdistance >= HUerr) {
2375 APInt::tcExtract(significandParts(), partCount(), decSig.significandParts(),
2376 calcSemantics.precision - excessPrecision,
2378 /* Take the exponent of decSig. If we tcExtract-ed less bits
2379 above we must adjust our exponent to compensate for the
2380 implicit right shift. */
2381 exponent = (decSig.exponent + semantics->precision
2382 - (calcSemantics.precision - excessPrecision));
2383 calcLostFraction = lostFractionThroughTruncation(decSig.significandParts(),
2386 return normalize(rounding_mode, calcLostFraction);
2392 APFloat::convertFromDecimalString(StringRef str, roundingMode rounding_mode)
2397 /* Scan the text. */
2398 StringRef::iterator p = str.begin();
2399 interpretDecimal(p, str.end(), &D);
2401 /* Handle the quick cases. First the case of no significant digits,
2402 i.e. zero, and then exponents that are obviously too large or too
2403 small. Writing L for log 10 / log 2, a number d.ddddd*10^exp
2404 definitely overflows if
2406 (exp - 1) * L >= maxExponent
2408 and definitely underflows to zero where
2410 (exp + 1) * L <= minExponent - precision
2412 With integer arithmetic the tightest bounds for L are
2414 93/28 < L < 196/59 [ numerator <= 256 ]
2415 42039/12655 < L < 28738/8651 [ numerator <= 65536 ]
2418 if (decDigitValue(*D.firstSigDigit) >= 10U) {
2422 /* Check whether the normalized exponent is high enough to overflow
2423 max during the log-rebasing in the max-exponent check below. */
2424 } else if (D.normalizedExponent - 1 > INT_MAX / 42039) {
2425 fs = handleOverflow(rounding_mode);
2427 /* If it wasn't, then it also wasn't high enough to overflow max
2428 during the log-rebasing in the min-exponent check. Check that it
2429 won't overflow min in either check, then perform the min-exponent
2431 } else if (D.normalizedExponent - 1 < INT_MIN / 42039 ||
2432 (D.normalizedExponent + 1) * 28738 <=
2433 8651 * (semantics->minExponent - (int) semantics->precision)) {
2434 /* Underflow to zero and round. */
2436 fs = normalize(rounding_mode, lfLessThanHalf);
2438 /* We can finally safely perform the max-exponent check. */
2439 } else if ((D.normalizedExponent - 1) * 42039
2440 >= 12655 * semantics->maxExponent) {
2441 /* Overflow and round. */
2442 fs = handleOverflow(rounding_mode);
2444 integerPart *decSignificand;
2445 unsigned int partCount;
2447 /* A tight upper bound on number of bits required to hold an
2448 N-digit decimal integer is N * 196 / 59. Allocate enough space
2449 to hold the full significand, and an extra part required by
2451 partCount = static_cast<unsigned int>(D.lastSigDigit - D.firstSigDigit) + 1;
2452 partCount = partCountForBits(1 + 196 * partCount / 59);
2453 decSignificand = new integerPart[partCount + 1];
2456 /* Convert to binary efficiently - we do almost all multiplication
2457 in an integerPart. When this would overflow do we do a single
2458 bignum multiplication, and then revert again to multiplication
2459 in an integerPart. */
2461 integerPart decValue, val, multiplier;
2469 if (p == str.end()) {
2473 decValue = decDigitValue(*p++);
2474 assert(decValue < 10U && "Invalid character in significand");
2476 val = val * 10 + decValue;
2477 /* The maximum number that can be multiplied by ten with any
2478 digit added without overflowing an integerPart. */
2479 } while (p <= D.lastSigDigit && multiplier <= (~ (integerPart) 0 - 9) / 10);
2481 /* Multiply out the current part. */
2482 APInt::tcMultiplyPart(decSignificand, decSignificand, multiplier, val,
2483 partCount, partCount + 1, false);
2485 /* If we used another part (likely but not guaranteed), increase
2487 if (decSignificand[partCount])
2489 } while (p <= D.lastSigDigit);
2491 category = fcNormal;
2492 fs = roundSignificandWithExponent(decSignificand, partCount,
2493 D.exponent, rounding_mode);
2495 delete [] decSignificand;
2502 APFloat::convertFromString(StringRef str, roundingMode rounding_mode)
2504 assert(!str.empty() && "Invalid string length");
2506 /* Handle a leading minus sign. */
2507 StringRef::iterator p = str.begin();
2508 size_t slen = str.size();
2509 sign = *p == '-' ? 1 : 0;
2510 if (*p == '-' || *p == '+') {
2513 assert(slen && "String has no digits");
2516 if (slen >= 2 && p[0] == '0' && (p[1] == 'x' || p[1] == 'X')) {
2517 assert(slen - 2 && "Invalid string");
2518 return convertFromHexadecimalString(StringRef(p + 2, slen - 2),
2522 return convertFromDecimalString(StringRef(p, slen), rounding_mode);
2525 /* Write out a hexadecimal representation of the floating point value
2526 to DST, which must be of sufficient size, in the C99 form
2527 [-]0xh.hhhhp[+-]d. Return the number of characters written,
2528 excluding the terminating NUL.
2530 If UPPERCASE, the output is in upper case, otherwise in lower case.
2532 HEXDIGITS digits appear altogether, rounding the value if
2533 necessary. If HEXDIGITS is 0, the minimal precision to display the
2534 number precisely is used instead. If nothing would appear after
2535 the decimal point it is suppressed.
2537 The decimal exponent is always printed and has at least one digit.
2538 Zero values display an exponent of zero. Infinities and NaNs
2539 appear as "infinity" or "nan" respectively.
2541 The above rules are as specified by C99. There is ambiguity about
2542 what the leading hexadecimal digit should be. This implementation
2543 uses whatever is necessary so that the exponent is displayed as
2544 stored. This implies the exponent will fall within the IEEE format
2545 range, and the leading hexadecimal digit will be 0 (for denormals),
2546 1 (normal numbers) or 2 (normal numbers rounded-away-from-zero with
2547 any other digits zero).
2550 APFloat::convertToHexString(char *dst, unsigned int hexDigits,
2551 bool upperCase, roundingMode rounding_mode) const
2561 memcpy (dst, upperCase ? infinityU: infinityL, sizeof infinityU - 1);
2562 dst += sizeof infinityL - 1;
2566 memcpy (dst, upperCase ? NaNU: NaNL, sizeof NaNU - 1);
2567 dst += sizeof NaNU - 1;
2572 *dst++ = upperCase ? 'X': 'x';
2574 if (hexDigits > 1) {
2576 memset (dst, '0', hexDigits - 1);
2577 dst += hexDigits - 1;
2579 *dst++ = upperCase ? 'P': 'p';
2584 dst = convertNormalToHexString (dst, hexDigits, upperCase, rounding_mode);
2590 return static_cast<unsigned int>(dst - p);
2593 /* Does the hard work of outputting the correctly rounded hexadecimal
2594 form of a normal floating point number with the specified number of
2595 hexadecimal digits. If HEXDIGITS is zero the minimum number of
2596 digits necessary to print the value precisely is output. */
2598 APFloat::convertNormalToHexString(char *dst, unsigned int hexDigits,
2600 roundingMode rounding_mode) const
2602 unsigned int count, valueBits, shift, partsCount, outputDigits;
2603 const char *hexDigitChars;
2604 const integerPart *significand;
2609 *dst++ = upperCase ? 'X': 'x';
2612 hexDigitChars = upperCase ? hexDigitsUpper: hexDigitsLower;
2614 significand = significandParts();
2615 partsCount = partCount();
2617 /* +3 because the first digit only uses the single integer bit, so
2618 we have 3 virtual zero most-significant-bits. */
2619 valueBits = semantics->precision + 3;
2620 shift = integerPartWidth - valueBits % integerPartWidth;
2622 /* The natural number of digits required ignoring trailing
2623 insignificant zeroes. */
2624 outputDigits = (valueBits - significandLSB () + 3) / 4;
2626 /* hexDigits of zero means use the required number for the
2627 precision. Otherwise, see if we are truncating. If we are,
2628 find out if we need to round away from zero. */
2630 if (hexDigits < outputDigits) {
2631 /* We are dropping non-zero bits, so need to check how to round.
2632 "bits" is the number of dropped bits. */
2634 lostFraction fraction;
2636 bits = valueBits - hexDigits * 4;
2637 fraction = lostFractionThroughTruncation (significand, partsCount, bits);
2638 roundUp = roundAwayFromZero(rounding_mode, fraction, bits);
2640 outputDigits = hexDigits;
2643 /* Write the digits consecutively, and start writing in the location
2644 of the hexadecimal point. We move the most significant digit
2645 left and add the hexadecimal point later. */
2648 count = (valueBits + integerPartWidth - 1) / integerPartWidth;
2650 while (outputDigits && count) {
2653 /* Put the most significant integerPartWidth bits in "part". */
2654 if (--count == partsCount)
2655 part = 0; /* An imaginary higher zero part. */
2657 part = significand[count] << shift;
2660 part |= significand[count - 1] >> (integerPartWidth - shift);
2662 /* Convert as much of "part" to hexdigits as we can. */
2663 unsigned int curDigits = integerPartWidth / 4;
2665 if (curDigits > outputDigits)
2666 curDigits = outputDigits;
2667 dst += partAsHex (dst, part, curDigits, hexDigitChars);
2668 outputDigits -= curDigits;
2674 /* Note that hexDigitChars has a trailing '0'. */
2677 *q = hexDigitChars[hexDigitValue (*q) + 1];
2678 } while (*q == '0');
2681 /* Add trailing zeroes. */
2682 memset (dst, '0', outputDigits);
2683 dst += outputDigits;
2686 /* Move the most significant digit to before the point, and if there
2687 is something after the decimal point add it. This must come
2688 after rounding above. */
2695 /* Finally output the exponent. */
2696 *dst++ = upperCase ? 'P': 'p';
2698 return writeSignedDecimal (dst, exponent);
2701 hash_code llvm::hash_value(const APFloat &Arg) {
2702 if (Arg.category != APFloat::fcNormal)
2703 return hash_combine((uint8_t)Arg.category,
2704 // NaN has no sign, fix it at zero.
2705 Arg.isNaN() ? (uint8_t)0 : (uint8_t)Arg.sign,
2706 Arg.semantics->precision);
2708 // Normal floats need their exponent and significand hashed.
2709 return hash_combine((uint8_t)Arg.category, (uint8_t)Arg.sign,
2710 Arg.semantics->precision, Arg.exponent,
2712 Arg.significandParts(),
2713 Arg.significandParts() + Arg.partCount()));
2716 // Conversion from APFloat to/from host float/double. It may eventually be
2717 // possible to eliminate these and have everybody deal with APFloats, but that
2718 // will take a while. This approach will not easily extend to long double.
2719 // Current implementation requires integerPartWidth==64, which is correct at
2720 // the moment but could be made more general.
2722 // Denormals have exponent minExponent in APFloat, but minExponent-1 in
2723 // the actual IEEE respresentations. We compensate for that here.
2726 APFloat::convertF80LongDoubleAPFloatToAPInt() const
2728 assert(semantics == (const llvm::fltSemantics*)&x87DoubleExtended);
2729 assert(partCount()==2);
2731 uint64_t myexponent, mysignificand;
2733 if (category==fcNormal) {
2734 myexponent = exponent+16383; //bias
2735 mysignificand = significandParts()[0];
2736 if (myexponent==1 && !(mysignificand & 0x8000000000000000ULL))
2737 myexponent = 0; // denormal
2738 } else if (category==fcZero) {
2741 } else if (category==fcInfinity) {
2742 myexponent = 0x7fff;
2743 mysignificand = 0x8000000000000000ULL;
2745 assert(category == fcNaN && "Unknown category");
2746 myexponent = 0x7fff;
2747 mysignificand = significandParts()[0];
2751 words[0] = mysignificand;
2752 words[1] = ((uint64_t)(sign & 1) << 15) |
2753 (myexponent & 0x7fffLL);
2754 return APInt(80, words);
2758 APFloat::convertPPCDoubleDoubleAPFloatToAPInt() const
2760 assert(semantics == (const llvm::fltSemantics*)&PPCDoubleDouble);
2761 assert(partCount()==2);
2767 // Convert number to double. To avoid spurious underflows, we re-
2768 // normalize against the "double" minExponent first, and only *then*
2769 // truncate the mantissa. The result of that second conversion
2770 // may be inexact, but should never underflow.
2771 // Declare fltSemantics before APFloat that uses it (and
2772 // saves pointer to it) to ensure correct destruction order.
2773 fltSemantics extendedSemantics = *semantics;
2774 extendedSemantics.minExponent = IEEEdouble.minExponent;
2775 APFloat extended(*this);
2776 fs = extended.convert(extendedSemantics, rmNearestTiesToEven, &losesInfo);
2777 assert(fs == opOK && !losesInfo);
2780 APFloat u(extended);
2781 fs = u.convert(IEEEdouble, rmNearestTiesToEven, &losesInfo);
2782 assert(fs == opOK || fs == opInexact);
2784 words[0] = *u.convertDoubleAPFloatToAPInt().getRawData();
2786 // If conversion was exact or resulted in a special case, we're done;
2787 // just set the second double to zero. Otherwise, re-convert back to
2788 // the extended format and compute the difference. This now should
2789 // convert exactly to double.
2790 if (u.category == fcNormal && losesInfo) {
2791 fs = u.convert(extendedSemantics, rmNearestTiesToEven, &losesInfo);
2792 assert(fs == opOK && !losesInfo);
2795 APFloat v(extended);
2796 v.subtract(u, rmNearestTiesToEven);
2797 fs = v.convert(IEEEdouble, rmNearestTiesToEven, &losesInfo);
2798 assert(fs == opOK && !losesInfo);
2800 words[1] = *v.convertDoubleAPFloatToAPInt().getRawData();
2805 return APInt(128, words);
2809 APFloat::convertQuadrupleAPFloatToAPInt() const
2811 assert(semantics == (const llvm::fltSemantics*)&IEEEquad);
2812 assert(partCount()==2);
2814 uint64_t myexponent, mysignificand, mysignificand2;
2816 if (category==fcNormal) {
2817 myexponent = exponent+16383; //bias
2818 mysignificand = significandParts()[0];
2819 mysignificand2 = significandParts()[1];
2820 if (myexponent==1 && !(mysignificand2 & 0x1000000000000LL))
2821 myexponent = 0; // denormal
2822 } else if (category==fcZero) {
2824 mysignificand = mysignificand2 = 0;
2825 } else if (category==fcInfinity) {
2826 myexponent = 0x7fff;
2827 mysignificand = mysignificand2 = 0;
2829 assert(category == fcNaN && "Unknown category!");
2830 myexponent = 0x7fff;
2831 mysignificand = significandParts()[0];
2832 mysignificand2 = significandParts()[1];
2836 words[0] = mysignificand;
2837 words[1] = ((uint64_t)(sign & 1) << 63) |
2838 ((myexponent & 0x7fff) << 48) |
2839 (mysignificand2 & 0xffffffffffffLL);
2841 return APInt(128, words);
2845 APFloat::convertDoubleAPFloatToAPInt() const
2847 assert(semantics == (const llvm::fltSemantics*)&IEEEdouble);
2848 assert(partCount()==1);
2850 uint64_t myexponent, mysignificand;
2852 if (category==fcNormal) {
2853 myexponent = exponent+1023; //bias
2854 mysignificand = *significandParts();
2855 if (myexponent==1 && !(mysignificand & 0x10000000000000LL))
2856 myexponent = 0; // denormal
2857 } else if (category==fcZero) {
2860 } else if (category==fcInfinity) {
2864 assert(category == fcNaN && "Unknown category!");
2866 mysignificand = *significandParts();
2869 return APInt(64, ((((uint64_t)(sign & 1) << 63) |
2870 ((myexponent & 0x7ff) << 52) |
2871 (mysignificand & 0xfffffffffffffLL))));
2875 APFloat::convertFloatAPFloatToAPInt() const
2877 assert(semantics == (const llvm::fltSemantics*)&IEEEsingle);
2878 assert(partCount()==1);
2880 uint32_t myexponent, mysignificand;
2882 if (category==fcNormal) {
2883 myexponent = exponent+127; //bias
2884 mysignificand = (uint32_t)*significandParts();
2885 if (myexponent == 1 && !(mysignificand & 0x800000))
2886 myexponent = 0; // denormal
2887 } else if (category==fcZero) {
2890 } else if (category==fcInfinity) {
2894 assert(category == fcNaN && "Unknown category!");
2896 mysignificand = (uint32_t)*significandParts();
2899 return APInt(32, (((sign&1) << 31) | ((myexponent&0xff) << 23) |
2900 (mysignificand & 0x7fffff)));
2904 APFloat::convertHalfAPFloatToAPInt() const
2906 assert(semantics == (const llvm::fltSemantics*)&IEEEhalf);
2907 assert(partCount()==1);
2909 uint32_t myexponent, mysignificand;
2911 if (category==fcNormal) {
2912 myexponent = exponent+15; //bias
2913 mysignificand = (uint32_t)*significandParts();
2914 if (myexponent == 1 && !(mysignificand & 0x400))
2915 myexponent = 0; // denormal
2916 } else if (category==fcZero) {
2919 } else if (category==fcInfinity) {
2923 assert(category == fcNaN && "Unknown category!");
2925 mysignificand = (uint32_t)*significandParts();
2928 return APInt(16, (((sign&1) << 15) | ((myexponent&0x1f) << 10) |
2929 (mysignificand & 0x3ff)));
2932 // This function creates an APInt that is just a bit map of the floating
2933 // point constant as it would appear in memory. It is not a conversion,
2934 // and treating the result as a normal integer is unlikely to be useful.
2937 APFloat::bitcastToAPInt() const
2939 if (semantics == (const llvm::fltSemantics*)&IEEEhalf)
2940 return convertHalfAPFloatToAPInt();
2942 if (semantics == (const llvm::fltSemantics*)&IEEEsingle)
2943 return convertFloatAPFloatToAPInt();
2945 if (semantics == (const llvm::fltSemantics*)&IEEEdouble)
2946 return convertDoubleAPFloatToAPInt();
2948 if (semantics == (const llvm::fltSemantics*)&IEEEquad)
2949 return convertQuadrupleAPFloatToAPInt();
2951 if (semantics == (const llvm::fltSemantics*)&PPCDoubleDouble)
2952 return convertPPCDoubleDoubleAPFloatToAPInt();
2954 assert(semantics == (const llvm::fltSemantics*)&x87DoubleExtended &&
2956 return convertF80LongDoubleAPFloatToAPInt();
2960 APFloat::convertToFloat() const
2962 assert(semantics == (const llvm::fltSemantics*)&IEEEsingle &&
2963 "Float semantics are not IEEEsingle");
2964 APInt api = bitcastToAPInt();
2965 return api.bitsToFloat();
2969 APFloat::convertToDouble() const
2971 assert(semantics == (const llvm::fltSemantics*)&IEEEdouble &&
2972 "Float semantics are not IEEEdouble");
2973 APInt api = bitcastToAPInt();
2974 return api.bitsToDouble();
2977 /// Integer bit is explicit in this format. Intel hardware (387 and later)
2978 /// does not support these bit patterns:
2979 /// exponent = all 1's, integer bit 0, significand 0 ("pseudoinfinity")
2980 /// exponent = all 1's, integer bit 0, significand nonzero ("pseudoNaN")
2981 /// exponent = 0, integer bit 1 ("pseudodenormal")
2982 /// exponent!=0 nor all 1's, integer bit 0 ("unnormal")
2983 /// At the moment, the first two are treated as NaNs, the second two as Normal.
2985 APFloat::initFromF80LongDoubleAPInt(const APInt &api)
2987 assert(api.getBitWidth()==80);
2988 uint64_t i1 = api.getRawData()[0];
2989 uint64_t i2 = api.getRawData()[1];
2990 uint64_t myexponent = (i2 & 0x7fff);
2991 uint64_t mysignificand = i1;
2993 initialize(&APFloat::x87DoubleExtended);
2994 assert(partCount()==2);
2996 sign = static_cast<unsigned int>(i2>>15);
2997 if (myexponent==0 && mysignificand==0) {
2998 // exponent, significand meaningless
3000 } else if (myexponent==0x7fff && mysignificand==0x8000000000000000ULL) {
3001 // exponent, significand meaningless
3002 category = fcInfinity;
3003 } else if (myexponent==0x7fff && mysignificand!=0x8000000000000000ULL) {
3004 // exponent meaningless
3006 significandParts()[0] = mysignificand;
3007 significandParts()[1] = 0;
3009 category = fcNormal;
3010 exponent = myexponent - 16383;
3011 significandParts()[0] = mysignificand;
3012 significandParts()[1] = 0;
3013 if (myexponent==0) // denormal
3019 APFloat::initFromPPCDoubleDoubleAPInt(const APInt &api)
3021 assert(api.getBitWidth()==128);
3022 uint64_t i1 = api.getRawData()[0];
3023 uint64_t i2 = api.getRawData()[1];
3027 // Get the first double and convert to our format.
3028 initFromDoubleAPInt(APInt(64, i1));
3029 fs = convert(PPCDoubleDouble, rmNearestTiesToEven, &losesInfo);
3030 assert(fs == opOK && !losesInfo);
3033 // Unless we have a special case, add in second double.
3034 if (category == fcNormal) {
3035 APFloat v(APInt(64, i2));
3036 fs = v.convert(PPCDoubleDouble, rmNearestTiesToEven, &losesInfo);
3037 assert(fs == opOK && !losesInfo);
3040 add(v, rmNearestTiesToEven);
3045 APFloat::initFromQuadrupleAPInt(const APInt &api)
3047 assert(api.getBitWidth()==128);
3048 uint64_t i1 = api.getRawData()[0];
3049 uint64_t i2 = api.getRawData()[1];
3050 uint64_t myexponent = (i2 >> 48) & 0x7fff;
3051 uint64_t mysignificand = i1;
3052 uint64_t mysignificand2 = i2 & 0xffffffffffffLL;
3054 initialize(&APFloat::IEEEquad);
3055 assert(partCount()==2);
3057 sign = static_cast<unsigned int>(i2>>63);
3058 if (myexponent==0 &&
3059 (mysignificand==0 && mysignificand2==0)) {
3060 // exponent, significand meaningless
3062 } else if (myexponent==0x7fff &&
3063 (mysignificand==0 && mysignificand2==0)) {
3064 // exponent, significand meaningless
3065 category = fcInfinity;
3066 } else if (myexponent==0x7fff &&
3067 (mysignificand!=0 || mysignificand2 !=0)) {
3068 // exponent meaningless
3070 significandParts()[0] = mysignificand;
3071 significandParts()[1] = mysignificand2;
3073 category = fcNormal;
3074 exponent = myexponent - 16383;
3075 significandParts()[0] = mysignificand;
3076 significandParts()[1] = mysignificand2;
3077 if (myexponent==0) // denormal
3080 significandParts()[1] |= 0x1000000000000LL; // integer bit
3085 APFloat::initFromDoubleAPInt(const APInt &api)
3087 assert(api.getBitWidth()==64);
3088 uint64_t i = *api.getRawData();
3089 uint64_t myexponent = (i >> 52) & 0x7ff;
3090 uint64_t mysignificand = i & 0xfffffffffffffLL;
3092 initialize(&APFloat::IEEEdouble);
3093 assert(partCount()==1);
3095 sign = static_cast<unsigned int>(i>>63);
3096 if (myexponent==0 && mysignificand==0) {
3097 // exponent, significand meaningless
3099 } else if (myexponent==0x7ff && mysignificand==0) {
3100 // exponent, significand meaningless
3101 category = fcInfinity;
3102 } else if (myexponent==0x7ff && mysignificand!=0) {
3103 // exponent meaningless
3105 *significandParts() = mysignificand;
3107 category = fcNormal;
3108 exponent = myexponent - 1023;
3109 *significandParts() = mysignificand;
3110 if (myexponent==0) // denormal
3113 *significandParts() |= 0x10000000000000LL; // integer bit
3118 APFloat::initFromFloatAPInt(const APInt & api)
3120 assert(api.getBitWidth()==32);
3121 uint32_t i = (uint32_t)*api.getRawData();
3122 uint32_t myexponent = (i >> 23) & 0xff;
3123 uint32_t mysignificand = i & 0x7fffff;
3125 initialize(&APFloat::IEEEsingle);
3126 assert(partCount()==1);
3129 if (myexponent==0 && mysignificand==0) {
3130 // exponent, significand meaningless
3132 } else if (myexponent==0xff && mysignificand==0) {
3133 // exponent, significand meaningless
3134 category = fcInfinity;
3135 } else if (myexponent==0xff && mysignificand!=0) {
3136 // sign, exponent, significand meaningless
3138 *significandParts() = mysignificand;
3140 category = fcNormal;
3141 exponent = myexponent - 127; //bias
3142 *significandParts() = mysignificand;
3143 if (myexponent==0) // denormal
3146 *significandParts() |= 0x800000; // integer bit
3151 APFloat::initFromHalfAPInt(const APInt & api)
3153 assert(api.getBitWidth()==16);
3154 uint32_t i = (uint32_t)*api.getRawData();
3155 uint32_t myexponent = (i >> 10) & 0x1f;
3156 uint32_t mysignificand = i & 0x3ff;
3158 initialize(&APFloat::IEEEhalf);
3159 assert(partCount()==1);
3162 if (myexponent==0 && mysignificand==0) {
3163 // exponent, significand meaningless
3165 } else if (myexponent==0x1f && mysignificand==0) {
3166 // exponent, significand meaningless
3167 category = fcInfinity;
3168 } else if (myexponent==0x1f && mysignificand!=0) {
3169 // sign, exponent, significand meaningless
3171 *significandParts() = mysignificand;
3173 category = fcNormal;
3174 exponent = myexponent - 15; //bias
3175 *significandParts() = mysignificand;
3176 if (myexponent==0) // denormal
3179 *significandParts() |= 0x400; // integer bit
3183 /// Treat api as containing the bits of a floating point number. Currently
3184 /// we infer the floating point type from the size of the APInt. The
3185 /// isIEEE argument distinguishes between PPC128 and IEEE128 (not meaningful
3186 /// when the size is anything else).
3188 APFloat::initFromAPInt(const APInt& api, bool isIEEE)
3190 if (api.getBitWidth() == 16)
3191 return initFromHalfAPInt(api);
3192 else if (api.getBitWidth() == 32)
3193 return initFromFloatAPInt(api);
3194 else if (api.getBitWidth()==64)
3195 return initFromDoubleAPInt(api);
3196 else if (api.getBitWidth()==80)
3197 return initFromF80LongDoubleAPInt(api);
3198 else if (api.getBitWidth()==128)
3200 initFromQuadrupleAPInt(api) : initFromPPCDoubleDoubleAPInt(api));
3202 llvm_unreachable(0);
3206 APFloat::getAllOnesValue(unsigned BitWidth, bool isIEEE)
3208 return APFloat(APInt::getAllOnesValue(BitWidth), isIEEE);
3211 APFloat APFloat::getLargest(const fltSemantics &Sem, bool Negative) {
3212 APFloat Val(Sem, fcNormal, Negative);
3214 // We want (in interchange format):
3215 // sign = {Negative}
3217 // significand = 1..1
3219 Val.exponent = Sem.maxExponent; // unbiased
3221 // 1-initialize all bits....
3222 Val.zeroSignificand();
3223 integerPart *significand = Val.significandParts();
3224 unsigned N = partCountForBits(Sem.precision);
3225 for (unsigned i = 0; i != N; ++i)
3226 significand[i] = ~((integerPart) 0);
3228 // ...and then clear the top bits for internal consistency.
3229 if (Sem.precision % integerPartWidth != 0)
3231 (((integerPart) 1) << (Sem.precision % integerPartWidth)) - 1;
3236 APFloat APFloat::getSmallest(const fltSemantics &Sem, bool Negative) {
3237 APFloat Val(Sem, fcNormal, Negative);
3239 // We want (in interchange format):
3240 // sign = {Negative}
3242 // significand = 0..01
3244 Val.exponent = Sem.minExponent; // unbiased
3245 Val.zeroSignificand();
3246 Val.significandParts()[0] = 1;
3250 APFloat APFloat::getSmallestNormalized(const fltSemantics &Sem, bool Negative) {
3251 APFloat Val(Sem, fcNormal, Negative);
3253 // We want (in interchange format):
3254 // sign = {Negative}
3256 // significand = 10..0
3258 Val.exponent = Sem.minExponent;
3259 Val.zeroSignificand();
3260 Val.significandParts()[partCountForBits(Sem.precision)-1] |=
3261 (((integerPart) 1) << ((Sem.precision - 1) % integerPartWidth));
3266 APFloat::APFloat(const APInt& api, bool isIEEE) {
3267 initFromAPInt(api, isIEEE);
3270 APFloat::APFloat(float f) {
3271 initFromAPInt(APInt::floatToBits(f));
3274 APFloat::APFloat(double d) {
3275 initFromAPInt(APInt::doubleToBits(d));
3279 void append(SmallVectorImpl<char> &Buffer, StringRef Str) {
3280 Buffer.append(Str.begin(), Str.end());
3283 /// Removes data from the given significand until it is no more
3284 /// precise than is required for the desired precision.
3285 void AdjustToPrecision(APInt &significand,
3286 int &exp, unsigned FormatPrecision) {
3287 unsigned bits = significand.getActiveBits();
3289 // 196/59 is a very slight overestimate of lg_2(10).
3290 unsigned bitsRequired = (FormatPrecision * 196 + 58) / 59;
3292 if (bits <= bitsRequired) return;
3294 unsigned tensRemovable = (bits - bitsRequired) * 59 / 196;
3295 if (!tensRemovable) return;
3297 exp += tensRemovable;
3299 APInt divisor(significand.getBitWidth(), 1);
3300 APInt powten(significand.getBitWidth(), 10);
3302 if (tensRemovable & 1)
3304 tensRemovable >>= 1;
3305 if (!tensRemovable) break;
3309 significand = significand.udiv(divisor);
3311 // Truncate the significand down to its active bit count, but
3312 // don't try to drop below 32.
3313 unsigned newPrecision = std::max(32U, significand.getActiveBits());
3314 significand = significand.trunc(newPrecision);
3318 void AdjustToPrecision(SmallVectorImpl<char> &buffer,
3319 int &exp, unsigned FormatPrecision) {
3320 unsigned N = buffer.size();
3321 if (N <= FormatPrecision) return;
3323 // The most significant figures are the last ones in the buffer.
3324 unsigned FirstSignificant = N - FormatPrecision;
3327 // FIXME: this probably shouldn't use 'round half up'.
3329 // Rounding down is just a truncation, except we also want to drop
3330 // trailing zeros from the new result.
3331 if (buffer[FirstSignificant - 1] < '5') {
3332 while (FirstSignificant < N && buffer[FirstSignificant] == '0')
3335 exp += FirstSignificant;
3336 buffer.erase(&buffer[0], &buffer[FirstSignificant]);
3340 // Rounding up requires a decimal add-with-carry. If we continue
3341 // the carry, the newly-introduced zeros will just be truncated.
3342 for (unsigned I = FirstSignificant; I != N; ++I) {
3343 if (buffer[I] == '9') {
3351 // If we carried through, we have exactly one digit of precision.
3352 if (FirstSignificant == N) {
3353 exp += FirstSignificant;
3355 buffer.push_back('1');
3359 exp += FirstSignificant;
3360 buffer.erase(&buffer[0], &buffer[FirstSignificant]);
3364 void APFloat::toString(SmallVectorImpl<char> &Str,
3365 unsigned FormatPrecision,
3366 unsigned FormatMaxPadding) const {
3370 return append(Str, "-Inf");
3372 return append(Str, "+Inf");
3374 case fcNaN: return append(Str, "NaN");
3380 if (!FormatMaxPadding)
3381 append(Str, "0.0E+0");
3393 // Decompose the number into an APInt and an exponent.
3394 int exp = exponent - ((int) semantics->precision - 1);
3395 APInt significand(semantics->precision,
3396 makeArrayRef(significandParts(),
3397 partCountForBits(semantics->precision)));
3399 // Set FormatPrecision if zero. We want to do this before we
3400 // truncate trailing zeros, as those are part of the precision.
3401 if (!FormatPrecision) {
3402 // It's an interesting question whether to use the nominal
3403 // precision or the active precision here for denormals.
3405 // FormatPrecision = ceil(significandBits / lg_2(10))
3406 FormatPrecision = (semantics->precision * 59 + 195) / 196;
3409 // Ignore trailing binary zeros.
3410 int trailingZeros = significand.countTrailingZeros();
3411 exp += trailingZeros;
3412 significand = significand.lshr(trailingZeros);
3414 // Change the exponent from 2^e to 10^e.
3417 } else if (exp > 0) {
3419 significand = significand.zext(semantics->precision + exp);
3420 significand <<= exp;
3422 } else { /* exp < 0 */
3425 // We transform this using the identity:
3426 // (N)(2^-e) == (N)(5^e)(10^-e)
3427 // This means we have to multiply N (the significand) by 5^e.
3428 // To avoid overflow, we have to operate on numbers large
3429 // enough to store N * 5^e:
3430 // log2(N * 5^e) == log2(N) + e * log2(5)
3431 // <= semantics->precision + e * 137 / 59
3432 // (log_2(5) ~ 2.321928 < 2.322034 ~ 137/59)
3434 unsigned precision = semantics->precision + (137 * texp + 136) / 59;
3436 // Multiply significand by 5^e.
3437 // N * 5^0101 == N * 5^(1*1) * 5^(0*2) * 5^(1*4) * 5^(0*8)
3438 significand = significand.zext(precision);
3439 APInt five_to_the_i(precision, 5);
3441 if (texp & 1) significand *= five_to_the_i;
3445 five_to_the_i *= five_to_the_i;
3449 AdjustToPrecision(significand, exp, FormatPrecision);
3451 SmallVector<char, 256> buffer;
3454 unsigned precision = significand.getBitWidth();
3455 APInt ten(precision, 10);
3456 APInt digit(precision, 0);
3458 bool inTrail = true;
3459 while (significand != 0) {
3460 // digit <- significand % 10
3461 // significand <- significand / 10
3462 APInt::udivrem(significand, ten, significand, digit);
3464 unsigned d = digit.getZExtValue();
3466 // Drop trailing zeros.
3467 if (inTrail && !d) exp++;
3469 buffer.push_back((char) ('0' + d));
3474 assert(!buffer.empty() && "no characters in buffer!");
3476 // Drop down to FormatPrecision.
3477 // TODO: don't do more precise calculations above than are required.
3478 AdjustToPrecision(buffer, exp, FormatPrecision);
3480 unsigned NDigits = buffer.size();
3482 // Check whether we should use scientific notation.
3483 bool FormatScientific;
3484 if (!FormatMaxPadding)
3485 FormatScientific = true;
3490 // But we shouldn't make the number look more precise than it is.
3491 FormatScientific = ((unsigned) exp > FormatMaxPadding ||
3492 NDigits + (unsigned) exp > FormatPrecision);
3494 // Power of the most significant digit.
3495 int MSD = exp + (int) (NDigits - 1);
3498 FormatScientific = false;
3500 // 765e-5 == 0.00765
3502 FormatScientific = ((unsigned) -MSD) > FormatMaxPadding;
3507 // Scientific formatting is pretty straightforward.
3508 if (FormatScientific) {
3509 exp += (NDigits - 1);
3511 Str.push_back(buffer[NDigits-1]);
3516 for (unsigned I = 1; I != NDigits; ++I)
3517 Str.push_back(buffer[NDigits-1-I]);
3520 Str.push_back(exp >= 0 ? '+' : '-');
3521 if (exp < 0) exp = -exp;
3522 SmallVector<char, 6> expbuf;
3524 expbuf.push_back((char) ('0' + (exp % 10)));
3527 for (unsigned I = 0, E = expbuf.size(); I != E; ++I)
3528 Str.push_back(expbuf[E-1-I]);
3532 // Non-scientific, positive exponents.
3534 for (unsigned I = 0; I != NDigits; ++I)
3535 Str.push_back(buffer[NDigits-1-I]);
3536 for (unsigned I = 0; I != (unsigned) exp; ++I)
3541 // Non-scientific, negative exponents.
3543 // The number of digits to the left of the decimal point.
3544 int NWholeDigits = exp + (int) NDigits;
3547 if (NWholeDigits > 0) {
3548 for (; I != (unsigned) NWholeDigits; ++I)
3549 Str.push_back(buffer[NDigits-I-1]);
3552 unsigned NZeros = 1 + (unsigned) -NWholeDigits;
3556 for (unsigned Z = 1; Z != NZeros; ++Z)
3560 for (; I != NDigits; ++I)
3561 Str.push_back(buffer[NDigits-I-1]);
3564 bool APFloat::getExactInverse(APFloat *inv) const {
3565 // Special floats and denormals have no exact inverse.
3566 if (category != fcNormal)
3569 // Check that the number is a power of two by making sure that only the
3570 // integer bit is set in the significand.
3571 if (significandLSB() != semantics->precision - 1)
3575 APFloat reciprocal(*semantics, 1ULL);
3576 if (reciprocal.divide(*this, rmNearestTiesToEven) != opOK)
3579 // Avoid multiplication with a denormal, it is not safe on all platforms and
3580 // may be slower than a normal division.
3581 if (reciprocal.significandMSB() + 1 < reciprocal.semantics->precision)
3584 assert(reciprocal.category == fcNormal &&
3585 reciprocal.significandLSB() == reciprocal.semantics->precision - 1);