1 //===-- APFloat.cpp - Implement APFloat class -----------------------------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file implements a class to represent arbitrary precision floating
11 // point values and provide a variety of arithmetic operations on them.
13 //===----------------------------------------------------------------------===//
15 #include "llvm/ADT/APFloat.h"
16 #include "llvm/ADT/APSInt.h"
17 #include "llvm/ADT/FoldingSet.h"
18 #include "llvm/ADT/Hashing.h"
19 #include "llvm/ADT/StringRef.h"
20 #include "llvm/Support/ErrorHandling.h"
21 #include "llvm/Support/MathExtras.h"
27 #define convolve(lhs, rhs) ((lhs) * 4 + (rhs))
29 /* Assumed in hexadecimal significand parsing, and conversion to
30 hexadecimal strings. */
31 #define COMPILE_TIME_ASSERT(cond) extern int CTAssert[(cond) ? 1 : -1]
32 COMPILE_TIME_ASSERT(integerPartWidth % 4 == 0);
36 /* Represents floating point arithmetic semantics. */
38 /* The largest E such that 2^E is representable; this matches the
39 definition of IEEE 754. */
40 exponent_t maxExponent;
42 /* The smallest E such that 2^E is a normalized number; this
43 matches the definition of IEEE 754. */
44 exponent_t minExponent;
46 /* Number of bits in the significand. This includes the integer
48 unsigned int precision;
50 /* True if arithmetic is supported. */
51 unsigned int arithmeticOK;
54 const fltSemantics APFloat::IEEEhalf = { 15, -14, 11, true };
55 const fltSemantics APFloat::IEEEsingle = { 127, -126, 24, true };
56 const fltSemantics APFloat::IEEEdouble = { 1023, -1022, 53, true };
57 const fltSemantics APFloat::IEEEquad = { 16383, -16382, 113, true };
58 const fltSemantics APFloat::x87DoubleExtended = { 16383, -16382, 64, true };
59 const fltSemantics APFloat::Bogus = { 0, 0, 0, true };
61 // The PowerPC format consists of two doubles. It does not map cleanly
62 // onto the usual format above. For now only storage of constants of
63 // this type is supported, no arithmetic.
64 const fltSemantics APFloat::PPCDoubleDouble = { 1023, -1022, 106, false };
66 /* A tight upper bound on number of parts required to hold the value
69 power * 815 / (351 * integerPartWidth) + 1
71 However, whilst the result may require only this many parts,
72 because we are multiplying two values to get it, the
73 multiplication may require an extra part with the excess part
74 being zero (consider the trivial case of 1 * 1, tcFullMultiply
75 requires two parts to hold the single-part result). So we add an
76 extra one to guarantee enough space whilst multiplying. */
77 const unsigned int maxExponent = 16383;
78 const unsigned int maxPrecision = 113;
79 const unsigned int maxPowerOfFiveExponent = maxExponent + maxPrecision - 1;
80 const unsigned int maxPowerOfFiveParts = 2 + ((maxPowerOfFiveExponent * 815)
81 / (351 * integerPartWidth));
84 /* A bunch of private, handy routines. */
86 static inline unsigned int
87 partCountForBits(unsigned int bits)
89 return ((bits) + integerPartWidth - 1) / integerPartWidth;
92 /* Returns 0U-9U. Return values >= 10U are not digits. */
93 static inline unsigned int
94 decDigitValue(unsigned int c)
100 hexDigitValue(unsigned int c)
120 assertArithmeticOK(const llvm::fltSemantics &semantics) {
121 assert(semantics.arithmeticOK &&
122 "Compile-time arithmetic does not support these semantics");
125 /* Return the value of a decimal exponent of the form
128 If the exponent overflows, returns a large exponent with the
131 readExponent(StringRef::iterator begin, StringRef::iterator end)
134 unsigned int absExponent;
135 const unsigned int overlargeExponent = 24000; /* FIXME. */
136 StringRef::iterator p = begin;
138 assert(p != end && "Exponent has no digits");
140 isNegative = (*p == '-');
141 if (*p == '-' || *p == '+') {
143 assert(p != end && "Exponent has no digits");
146 absExponent = decDigitValue(*p++);
147 assert(absExponent < 10U && "Invalid character in exponent");
149 for (; p != end; ++p) {
152 value = decDigitValue(*p);
153 assert(value < 10U && "Invalid character in exponent");
155 value += absExponent * 10;
156 if (absExponent >= overlargeExponent) {
157 absExponent = overlargeExponent;
158 p = end; /* outwit assert below */
164 assert(p == end && "Invalid exponent in exponent");
167 return -(int) absExponent;
169 return (int) absExponent;
172 /* This is ugly and needs cleaning up, but I don't immediately see
173 how whilst remaining safe. */
175 totalExponent(StringRef::iterator p, StringRef::iterator end,
176 int exponentAdjustment)
178 int unsignedExponent;
179 bool negative, overflow;
182 assert(p != end && "Exponent has no digits");
184 negative = *p == '-';
185 if (*p == '-' || *p == '+') {
187 assert(p != end && "Exponent has no digits");
190 unsignedExponent = 0;
192 for (; p != end; ++p) {
195 value = decDigitValue(*p);
196 assert(value < 10U && "Invalid character in exponent");
198 unsignedExponent = unsignedExponent * 10 + value;
199 if (unsignedExponent > 32767)
203 if (exponentAdjustment > 32767 || exponentAdjustment < -32768)
207 exponent = unsignedExponent;
209 exponent = -exponent;
210 exponent += exponentAdjustment;
211 if (exponent > 32767 || exponent < -32768)
216 exponent = negative ? -32768: 32767;
221 static StringRef::iterator
222 skipLeadingZeroesAndAnyDot(StringRef::iterator begin, StringRef::iterator end,
223 StringRef::iterator *dot)
225 StringRef::iterator p = begin;
227 while (*p == '0' && p != end)
233 assert(end - begin != 1 && "Significand has no digits");
235 while (*p == '0' && p != end)
242 /* Given a normal decimal floating point number of the form
246 where the decimal point and exponent are optional, fill out the
247 structure D. Exponent is appropriate if the significand is
248 treated as an integer, and normalizedExponent if the significand
249 is taken to have the decimal point after a single leading
252 If the value is zero, V->firstSigDigit points to a non-digit, and
253 the return exponent is zero.
256 const char *firstSigDigit;
257 const char *lastSigDigit;
259 int normalizedExponent;
263 interpretDecimal(StringRef::iterator begin, StringRef::iterator end,
266 StringRef::iterator dot = end;
267 StringRef::iterator p = skipLeadingZeroesAndAnyDot (begin, end, &dot);
269 D->firstSigDigit = p;
271 D->normalizedExponent = 0;
273 for (; p != end; ++p) {
275 assert(dot == end && "String contains multiple dots");
280 if (decDigitValue(*p) >= 10U)
285 assert((*p == 'e' || *p == 'E') && "Invalid character in significand");
286 assert(p != begin && "Significand has no digits");
287 assert((dot == end || p - begin != 1) && "Significand has no digits");
289 /* p points to the first non-digit in the string */
290 D->exponent = readExponent(p + 1, end);
292 /* Implied decimal point? */
297 /* If number is all zeroes accept any exponent. */
298 if (p != D->firstSigDigit) {
299 /* Drop insignificant trailing zeroes. */
304 while (p != begin && *p == '0');
305 while (p != begin && *p == '.');
308 /* Adjust the exponents for any decimal point. */
309 D->exponent += static_cast<exponent_t>((dot - p) - (dot > p));
310 D->normalizedExponent = (D->exponent +
311 static_cast<exponent_t>((p - D->firstSigDigit)
312 - (dot > D->firstSigDigit && dot < p)));
318 /* Return the trailing fraction of a hexadecimal number.
319 DIGITVALUE is the first hex digit of the fraction, P points to
322 trailingHexadecimalFraction(StringRef::iterator p, StringRef::iterator end,
323 unsigned int digitValue)
325 unsigned int hexDigit;
327 /* If the first trailing digit isn't 0 or 8 we can work out the
328 fraction immediately. */
330 return lfMoreThanHalf;
331 else if (digitValue < 8 && digitValue > 0)
332 return lfLessThanHalf;
334 /* Otherwise we need to find the first non-zero digit. */
338 assert(p != end && "Invalid trailing hexadecimal fraction!");
340 hexDigit = hexDigitValue(*p);
342 /* If we ran off the end it is exactly zero or one-half, otherwise
345 return digitValue == 0 ? lfExactlyZero: lfExactlyHalf;
347 return digitValue == 0 ? lfLessThanHalf: lfMoreThanHalf;
350 /* Return the fraction lost were a bignum truncated losing the least
351 significant BITS bits. */
353 lostFractionThroughTruncation(const integerPart *parts,
354 unsigned int partCount,
359 lsb = APInt::tcLSB(parts, partCount);
361 /* Note this is guaranteed true if bits == 0, or LSB == -1U. */
363 return lfExactlyZero;
365 return lfExactlyHalf;
366 if (bits <= partCount * integerPartWidth &&
367 APInt::tcExtractBit(parts, bits - 1))
368 return lfMoreThanHalf;
370 return lfLessThanHalf;
373 /* Shift DST right BITS bits noting lost fraction. */
375 shiftRight(integerPart *dst, unsigned int parts, unsigned int bits)
377 lostFraction lost_fraction;
379 lost_fraction = lostFractionThroughTruncation(dst, parts, bits);
381 APInt::tcShiftRight(dst, parts, bits);
383 return lost_fraction;
386 /* Combine the effect of two lost fractions. */
388 combineLostFractions(lostFraction moreSignificant,
389 lostFraction lessSignificant)
391 if (lessSignificant != lfExactlyZero) {
392 if (moreSignificant == lfExactlyZero)
393 moreSignificant = lfLessThanHalf;
394 else if (moreSignificant == lfExactlyHalf)
395 moreSignificant = lfMoreThanHalf;
398 return moreSignificant;
401 /* The error from the true value, in half-ulps, on multiplying two
402 floating point numbers, which differ from the value they
403 approximate by at most HUE1 and HUE2 half-ulps, is strictly less
404 than the returned value.
406 See "How to Read Floating Point Numbers Accurately" by William D
409 HUerrBound(bool inexactMultiply, unsigned int HUerr1, unsigned int HUerr2)
411 assert(HUerr1 < 2 || HUerr2 < 2 || (HUerr1 + HUerr2 < 8));
413 if (HUerr1 + HUerr2 == 0)
414 return inexactMultiply * 2; /* <= inexactMultiply half-ulps. */
416 return inexactMultiply + 2 * (HUerr1 + HUerr2);
419 /* The number of ulps from the boundary (zero, or half if ISNEAREST)
420 when the least significant BITS are truncated. BITS cannot be
423 ulpsFromBoundary(const integerPart *parts, unsigned int bits, bool isNearest)
425 unsigned int count, partBits;
426 integerPart part, boundary;
431 count = bits / integerPartWidth;
432 partBits = bits % integerPartWidth + 1;
434 part = parts[count] & (~(integerPart) 0 >> (integerPartWidth - partBits));
437 boundary = (integerPart) 1 << (partBits - 1);
442 if (part - boundary <= boundary - part)
443 return part - boundary;
445 return boundary - part;
448 if (part == boundary) {
451 return ~(integerPart) 0; /* A lot. */
454 } else if (part == boundary - 1) {
457 return ~(integerPart) 0; /* A lot. */
462 return ~(integerPart) 0; /* A lot. */
465 /* Place pow(5, power) in DST, and return the number of parts used.
466 DST must be at least one part larger than size of the answer. */
468 powerOf5(integerPart *dst, unsigned int power)
470 static const integerPart firstEightPowers[] = { 1, 5, 25, 125, 625, 3125,
472 integerPart pow5s[maxPowerOfFiveParts * 2 + 5];
473 pow5s[0] = 78125 * 5;
475 unsigned int partsCount[16] = { 1 };
476 integerPart scratch[maxPowerOfFiveParts], *p1, *p2, *pow5;
478 assert(power <= maxExponent);
483 *p1 = firstEightPowers[power & 7];
489 for (unsigned int n = 0; power; power >>= 1, n++) {
494 /* Calculate pow(5,pow(2,n+3)) if we haven't yet. */
496 pc = partsCount[n - 1];
497 APInt::tcFullMultiply(pow5, pow5 - pc, pow5 - pc, pc, pc);
499 if (pow5[pc - 1] == 0)
507 APInt::tcFullMultiply(p2, p1, pow5, result, pc);
509 if (p2[result - 1] == 0)
512 /* Now result is in p1 with partsCount parts and p2 is scratch
514 tmp = p1, p1 = p2, p2 = tmp;
521 APInt::tcAssign(dst, p1, result);
526 /* Zero at the end to avoid modular arithmetic when adding one; used
527 when rounding up during hexadecimal output. */
528 static const char hexDigitsLower[] = "0123456789abcdef0";
529 static const char hexDigitsUpper[] = "0123456789ABCDEF0";
530 static const char infinityL[] = "infinity";
531 static const char infinityU[] = "INFINITY";
532 static const char NaNL[] = "nan";
533 static const char NaNU[] = "NAN";
535 /* Write out an integerPart in hexadecimal, starting with the most
536 significant nibble. Write out exactly COUNT hexdigits, return
539 partAsHex (char *dst, integerPart part, unsigned int count,
540 const char *hexDigitChars)
542 unsigned int result = count;
544 assert(count != 0 && count <= integerPartWidth / 4);
546 part >>= (integerPartWidth - 4 * count);
548 dst[count] = hexDigitChars[part & 0xf];
555 /* Write out an unsigned decimal integer. */
557 writeUnsignedDecimal (char *dst, unsigned int n)
573 /* Write out a signed decimal integer. */
575 writeSignedDecimal (char *dst, int value)
579 dst = writeUnsignedDecimal(dst, -(unsigned) value);
581 dst = writeUnsignedDecimal(dst, value);
588 APFloat::initialize(const fltSemantics *ourSemantics)
592 semantics = ourSemantics;
595 significand.parts = new integerPart[count];
599 APFloat::freeSignificand()
602 delete [] significand.parts;
606 APFloat::assign(const APFloat &rhs)
608 assert(semantics == rhs.semantics);
611 category = rhs.category;
612 exponent = rhs.exponent;
614 exponent2 = rhs.exponent2;
615 if (category == fcNormal || category == fcNaN)
616 copySignificand(rhs);
620 APFloat::copySignificand(const APFloat &rhs)
622 assert(category == fcNormal || category == fcNaN);
623 assert(rhs.partCount() >= partCount());
625 APInt::tcAssign(significandParts(), rhs.significandParts(),
629 /* Make this number a NaN, with an arbitrary but deterministic value
630 for the significand. If double or longer, this is a signalling NaN,
631 which may not be ideal. If float, this is QNaN(0). */
632 void APFloat::makeNaN(bool SNaN, bool Negative, const APInt *fill)
637 integerPart *significand = significandParts();
638 unsigned numParts = partCount();
640 // Set the significand bits to the fill.
641 if (!fill || fill->getNumWords() < numParts)
642 APInt::tcSet(significand, 0, numParts);
644 APInt::tcAssign(significand, fill->getRawData(),
645 std::min(fill->getNumWords(), numParts));
647 // Zero out the excess bits of the significand.
648 unsigned bitsToPreserve = semantics->precision - 1;
649 unsigned part = bitsToPreserve / 64;
650 bitsToPreserve %= 64;
651 significand[part] &= ((1ULL << bitsToPreserve) - 1);
652 for (part++; part != numParts; ++part)
653 significand[part] = 0;
656 unsigned QNaNBit = semantics->precision - 2;
659 // We always have to clear the QNaN bit to make it an SNaN.
660 APInt::tcClearBit(significand, QNaNBit);
662 // If there are no bits set in the payload, we have to set
663 // *something* to make it a NaN instead of an infinity;
664 // conventionally, this is the next bit down from the QNaN bit.
665 if (APInt::tcIsZero(significand, numParts))
666 APInt::tcSetBit(significand, QNaNBit - 1);
668 // We always have to set the QNaN bit to make it a QNaN.
669 APInt::tcSetBit(significand, QNaNBit);
672 // For x87 extended precision, we want to make a NaN, not a
673 // pseudo-NaN. Maybe we should expose the ability to make
675 if (semantics == &APFloat::x87DoubleExtended)
676 APInt::tcSetBit(significand, QNaNBit + 1);
679 APFloat APFloat::makeNaN(const fltSemantics &Sem, bool SNaN, bool Negative,
681 APFloat value(Sem, uninitialized);
682 value.makeNaN(SNaN, Negative, fill);
687 APFloat::operator=(const APFloat &rhs)
690 if (semantics != rhs.semantics) {
692 initialize(rhs.semantics);
701 APFloat::bitwiseIsEqual(const APFloat &rhs) const {
704 if (semantics != rhs.semantics ||
705 category != rhs.category ||
708 if (semantics==(const llvm::fltSemantics*)&PPCDoubleDouble &&
711 if (category==fcZero || category==fcInfinity)
713 else if (category==fcNormal && exponent!=rhs.exponent)
715 else if (semantics==(const llvm::fltSemantics*)&PPCDoubleDouble &&
716 exponent2!=rhs.exponent2)
720 const integerPart* p=significandParts();
721 const integerPart* q=rhs.significandParts();
722 for (; i>0; i--, p++, q++) {
730 APFloat::APFloat(const fltSemantics &ourSemantics, integerPart value)
731 : exponent2(0), sign2(0) {
732 assertArithmeticOK(ourSemantics);
733 initialize(&ourSemantics);
736 exponent = ourSemantics.precision - 1;
737 significandParts()[0] = value;
738 normalize(rmNearestTiesToEven, lfExactlyZero);
741 APFloat::APFloat(const fltSemantics &ourSemantics) : exponent2(0), sign2(0) {
742 assertArithmeticOK(ourSemantics);
743 initialize(&ourSemantics);
748 APFloat::APFloat(const fltSemantics &ourSemantics, uninitializedTag tag)
749 : exponent2(0), sign2(0) {
750 assertArithmeticOK(ourSemantics);
751 // Allocates storage if necessary but does not initialize it.
752 initialize(&ourSemantics);
755 APFloat::APFloat(const fltSemantics &ourSemantics,
756 fltCategory ourCategory, bool negative)
757 : exponent2(0), sign2(0) {
758 assertArithmeticOK(ourSemantics);
759 initialize(&ourSemantics);
760 category = ourCategory;
762 if (category == fcNormal)
764 else if (ourCategory == fcNaN)
768 APFloat::APFloat(const fltSemantics &ourSemantics, StringRef text)
769 : exponent2(0), sign2(0) {
770 assertArithmeticOK(ourSemantics);
771 initialize(&ourSemantics);
772 convertFromString(text, rmNearestTiesToEven);
775 APFloat::APFloat(const APFloat &rhs) : exponent2(0), sign2(0) {
776 initialize(rhs.semantics);
785 // Profile - This method 'profiles' an APFloat for use with FoldingSet.
786 void APFloat::Profile(FoldingSetNodeID& ID) const {
787 ID.Add(bitcastToAPInt());
791 APFloat::partCount() const
793 return partCountForBits(semantics->precision + 1);
797 APFloat::semanticsPrecision(const fltSemantics &semantics)
799 return semantics.precision;
803 APFloat::significandParts() const
805 return const_cast<APFloat *>(this)->significandParts();
809 APFloat::significandParts()
811 assert(category == fcNormal || category == fcNaN);
814 return significand.parts;
816 return &significand.part;
820 APFloat::zeroSignificand()
823 APInt::tcSet(significandParts(), 0, partCount());
826 /* Increment an fcNormal floating point number's significand. */
828 APFloat::incrementSignificand()
832 carry = APInt::tcIncrement(significandParts(), partCount());
834 /* Our callers should never cause us to overflow. */
839 /* Add the significand of the RHS. Returns the carry flag. */
841 APFloat::addSignificand(const APFloat &rhs)
845 parts = significandParts();
847 assert(semantics == rhs.semantics);
848 assert(exponent == rhs.exponent);
850 return APInt::tcAdd(parts, rhs.significandParts(), 0, partCount());
853 /* Subtract the significand of the RHS with a borrow flag. Returns
856 APFloat::subtractSignificand(const APFloat &rhs, integerPart borrow)
860 parts = significandParts();
862 assert(semantics == rhs.semantics);
863 assert(exponent == rhs.exponent);
865 return APInt::tcSubtract(parts, rhs.significandParts(), borrow,
869 /* Multiply the significand of the RHS. If ADDEND is non-NULL, add it
870 on to the full-precision result of the multiplication. Returns the
873 APFloat::multiplySignificand(const APFloat &rhs, const APFloat *addend)
875 unsigned int omsb; // One, not zero, based MSB.
876 unsigned int partsCount, newPartsCount, precision;
877 integerPart *lhsSignificand;
878 integerPart scratch[4];
879 integerPart *fullSignificand;
880 lostFraction lost_fraction;
883 assert(semantics == rhs.semantics);
885 precision = semantics->precision;
886 newPartsCount = partCountForBits(precision * 2);
888 if (newPartsCount > 4)
889 fullSignificand = new integerPart[newPartsCount];
891 fullSignificand = scratch;
893 lhsSignificand = significandParts();
894 partsCount = partCount();
896 APInt::tcFullMultiply(fullSignificand, lhsSignificand,
897 rhs.significandParts(), partsCount, partsCount);
899 lost_fraction = lfExactlyZero;
900 omsb = APInt::tcMSB(fullSignificand, newPartsCount) + 1;
901 exponent += rhs.exponent;
904 Significand savedSignificand = significand;
905 const fltSemantics *savedSemantics = semantics;
906 fltSemantics extendedSemantics;
908 unsigned int extendedPrecision;
910 /* Normalize our MSB. */
911 extendedPrecision = precision + precision - 1;
912 if (omsb != extendedPrecision) {
913 APInt::tcShiftLeft(fullSignificand, newPartsCount,
914 extendedPrecision - omsb);
915 exponent -= extendedPrecision - omsb;
918 /* Create new semantics. */
919 extendedSemantics = *semantics;
920 extendedSemantics.precision = extendedPrecision;
922 if (newPartsCount == 1)
923 significand.part = fullSignificand[0];
925 significand.parts = fullSignificand;
926 semantics = &extendedSemantics;
928 APFloat extendedAddend(*addend);
929 status = extendedAddend.convert(extendedSemantics, rmTowardZero, &ignored);
930 assert(status == opOK);
932 lost_fraction = addOrSubtractSignificand(extendedAddend, false);
934 /* Restore our state. */
935 if (newPartsCount == 1)
936 fullSignificand[0] = significand.part;
937 significand = savedSignificand;
938 semantics = savedSemantics;
940 omsb = APInt::tcMSB(fullSignificand, newPartsCount) + 1;
943 exponent -= (precision - 1);
945 if (omsb > precision) {
946 unsigned int bits, significantParts;
949 bits = omsb - precision;
950 significantParts = partCountForBits(omsb);
951 lf = shiftRight(fullSignificand, significantParts, bits);
952 lost_fraction = combineLostFractions(lf, lost_fraction);
956 APInt::tcAssign(lhsSignificand, fullSignificand, partsCount);
958 if (newPartsCount > 4)
959 delete [] fullSignificand;
961 return lost_fraction;
964 /* Multiply the significands of LHS and RHS to DST. */
966 APFloat::divideSignificand(const APFloat &rhs)
968 unsigned int bit, i, partsCount;
969 const integerPart *rhsSignificand;
970 integerPart *lhsSignificand, *dividend, *divisor;
971 integerPart scratch[4];
972 lostFraction lost_fraction;
974 assert(semantics == rhs.semantics);
976 lhsSignificand = significandParts();
977 rhsSignificand = rhs.significandParts();
978 partsCount = partCount();
981 dividend = new integerPart[partsCount * 2];
985 divisor = dividend + partsCount;
987 /* Copy the dividend and divisor as they will be modified in-place. */
988 for (i = 0; i < partsCount; i++) {
989 dividend[i] = lhsSignificand[i];
990 divisor[i] = rhsSignificand[i];
991 lhsSignificand[i] = 0;
994 exponent -= rhs.exponent;
996 unsigned int precision = semantics->precision;
998 /* Normalize the divisor. */
999 bit = precision - APInt::tcMSB(divisor, partsCount) - 1;
1002 APInt::tcShiftLeft(divisor, partsCount, bit);
1005 /* Normalize the dividend. */
1006 bit = precision - APInt::tcMSB(dividend, partsCount) - 1;
1009 APInt::tcShiftLeft(dividend, partsCount, bit);
1012 /* Ensure the dividend >= divisor initially for the loop below.
1013 Incidentally, this means that the division loop below is
1014 guaranteed to set the integer bit to one. */
1015 if (APInt::tcCompare(dividend, divisor, partsCount) < 0) {
1017 APInt::tcShiftLeft(dividend, partsCount, 1);
1018 assert(APInt::tcCompare(dividend, divisor, partsCount) >= 0);
1021 /* Long division. */
1022 for (bit = precision; bit; bit -= 1) {
1023 if (APInt::tcCompare(dividend, divisor, partsCount) >= 0) {
1024 APInt::tcSubtract(dividend, divisor, 0, partsCount);
1025 APInt::tcSetBit(lhsSignificand, bit - 1);
1028 APInt::tcShiftLeft(dividend, partsCount, 1);
1031 /* Figure out the lost fraction. */
1032 int cmp = APInt::tcCompare(dividend, divisor, partsCount);
1035 lost_fraction = lfMoreThanHalf;
1037 lost_fraction = lfExactlyHalf;
1038 else if (APInt::tcIsZero(dividend, partsCount))
1039 lost_fraction = lfExactlyZero;
1041 lost_fraction = lfLessThanHalf;
1046 return lost_fraction;
1050 APFloat::significandMSB() const
1052 return APInt::tcMSB(significandParts(), partCount());
1056 APFloat::significandLSB() const
1058 return APInt::tcLSB(significandParts(), partCount());
1061 /* Note that a zero result is NOT normalized to fcZero. */
1063 APFloat::shiftSignificandRight(unsigned int bits)
1065 /* Our exponent should not overflow. */
1066 assert((exponent_t) (exponent + bits) >= exponent);
1070 return shiftRight(significandParts(), partCount(), bits);
1073 /* Shift the significand left BITS bits, subtract BITS from its exponent. */
1075 APFloat::shiftSignificandLeft(unsigned int bits)
1077 assert(bits < semantics->precision);
1080 unsigned int partsCount = partCount();
1082 APInt::tcShiftLeft(significandParts(), partsCount, bits);
1085 assert(!APInt::tcIsZero(significandParts(), partsCount));
1090 APFloat::compareAbsoluteValue(const APFloat &rhs) const
1094 assert(semantics == rhs.semantics);
1095 assert(category == fcNormal);
1096 assert(rhs.category == fcNormal);
1098 compare = exponent - rhs.exponent;
1100 /* If exponents are equal, do an unsigned bignum comparison of the
1103 compare = APInt::tcCompare(significandParts(), rhs.significandParts(),
1107 return cmpGreaterThan;
1108 else if (compare < 0)
1114 /* Handle overflow. Sign is preserved. We either become infinity or
1115 the largest finite number. */
1117 APFloat::handleOverflow(roundingMode rounding_mode)
1120 if (rounding_mode == rmNearestTiesToEven ||
1121 rounding_mode == rmNearestTiesToAway ||
1122 (rounding_mode == rmTowardPositive && !sign) ||
1123 (rounding_mode == rmTowardNegative && sign)) {
1124 category = fcInfinity;
1125 return (opStatus) (opOverflow | opInexact);
1128 /* Otherwise we become the largest finite number. */
1129 category = fcNormal;
1130 exponent = semantics->maxExponent;
1131 APInt::tcSetLeastSignificantBits(significandParts(), partCount(),
1132 semantics->precision);
1137 /* Returns TRUE if, when truncating the current number, with BIT the
1138 new LSB, with the given lost fraction and rounding mode, the result
1139 would need to be rounded away from zero (i.e., by increasing the
1140 signficand). This routine must work for fcZero of both signs, and
1141 fcNormal numbers. */
1143 APFloat::roundAwayFromZero(roundingMode rounding_mode,
1144 lostFraction lost_fraction,
1145 unsigned int bit) const
1147 /* NaNs and infinities should not have lost fractions. */
1148 assert(category == fcNormal || category == fcZero);
1150 /* Current callers never pass this so we don't handle it. */
1151 assert(lost_fraction != lfExactlyZero);
1153 switch (rounding_mode) {
1154 case rmNearestTiesToAway:
1155 return lost_fraction == lfExactlyHalf || lost_fraction == lfMoreThanHalf;
1157 case rmNearestTiesToEven:
1158 if (lost_fraction == lfMoreThanHalf)
1161 /* Our zeroes don't have a significand to test. */
1162 if (lost_fraction == lfExactlyHalf && category != fcZero)
1163 return APInt::tcExtractBit(significandParts(), bit);
1170 case rmTowardPositive:
1171 return sign == false;
1173 case rmTowardNegative:
1174 return sign == true;
1176 llvm_unreachable("Invalid rounding mode found");
1180 APFloat::normalize(roundingMode rounding_mode,
1181 lostFraction lost_fraction)
1183 unsigned int omsb; /* One, not zero, based MSB. */
1186 if (category != fcNormal)
1189 /* Before rounding normalize the exponent of fcNormal numbers. */
1190 omsb = significandMSB() + 1;
1193 /* OMSB is numbered from 1. We want to place it in the integer
1194 bit numbered PRECISION if possible, with a compensating change in
1196 exponentChange = omsb - semantics->precision;
1198 /* If the resulting exponent is too high, overflow according to
1199 the rounding mode. */
1200 if (exponent + exponentChange > semantics->maxExponent)
1201 return handleOverflow(rounding_mode);
1203 /* Subnormal numbers have exponent minExponent, and their MSB
1204 is forced based on that. */
1205 if (exponent + exponentChange < semantics->minExponent)
1206 exponentChange = semantics->minExponent - exponent;
1208 /* Shifting left is easy as we don't lose precision. */
1209 if (exponentChange < 0) {
1210 assert(lost_fraction == lfExactlyZero);
1212 shiftSignificandLeft(-exponentChange);
1217 if (exponentChange > 0) {
1220 /* Shift right and capture any new lost fraction. */
1221 lf = shiftSignificandRight(exponentChange);
1223 lost_fraction = combineLostFractions(lf, lost_fraction);
1225 /* Keep OMSB up-to-date. */
1226 if (omsb > (unsigned) exponentChange)
1227 omsb -= exponentChange;
1233 /* Now round the number according to rounding_mode given the lost
1236 /* As specified in IEEE 754, since we do not trap we do not report
1237 underflow for exact results. */
1238 if (lost_fraction == lfExactlyZero) {
1239 /* Canonicalize zeroes. */
1246 /* Increment the significand if we're rounding away from zero. */
1247 if (roundAwayFromZero(rounding_mode, lost_fraction, 0)) {
1249 exponent = semantics->minExponent;
1251 incrementSignificand();
1252 omsb = significandMSB() + 1;
1254 /* Did the significand increment overflow? */
1255 if (omsb == (unsigned) semantics->precision + 1) {
1256 /* Renormalize by incrementing the exponent and shifting our
1257 significand right one. However if we already have the
1258 maximum exponent we overflow to infinity. */
1259 if (exponent == semantics->maxExponent) {
1260 category = fcInfinity;
1262 return (opStatus) (opOverflow | opInexact);
1265 shiftSignificandRight(1);
1271 /* The normal case - we were and are not denormal, and any
1272 significand increment above didn't overflow. */
1273 if (omsb == semantics->precision)
1276 /* We have a non-zero denormal. */
1277 assert(omsb < semantics->precision);
1279 /* Canonicalize zeroes. */
1283 /* The fcZero case is a denormal that underflowed to zero. */
1284 return (opStatus) (opUnderflow | opInexact);
1288 APFloat::addOrSubtractSpecials(const APFloat &rhs, bool subtract)
1290 switch (convolve(category, rhs.category)) {
1292 llvm_unreachable(0);
1294 case convolve(fcNaN, fcZero):
1295 case convolve(fcNaN, fcNormal):
1296 case convolve(fcNaN, fcInfinity):
1297 case convolve(fcNaN, fcNaN):
1298 case convolve(fcNormal, fcZero):
1299 case convolve(fcInfinity, fcNormal):
1300 case convolve(fcInfinity, fcZero):
1303 case convolve(fcZero, fcNaN):
1304 case convolve(fcNormal, fcNaN):
1305 case convolve(fcInfinity, fcNaN):
1307 copySignificand(rhs);
1310 case convolve(fcNormal, fcInfinity):
1311 case convolve(fcZero, fcInfinity):
1312 category = fcInfinity;
1313 sign = rhs.sign ^ subtract;
1316 case convolve(fcZero, fcNormal):
1318 sign = rhs.sign ^ subtract;
1321 case convolve(fcZero, fcZero):
1322 /* Sign depends on rounding mode; handled by caller. */
1325 case convolve(fcInfinity, fcInfinity):
1326 /* Differently signed infinities can only be validly
1328 if (((sign ^ rhs.sign)!=0) != subtract) {
1335 case convolve(fcNormal, fcNormal):
1340 /* Add or subtract two normal numbers. */
1342 APFloat::addOrSubtractSignificand(const APFloat &rhs, bool subtract)
1345 lostFraction lost_fraction;
1348 /* Determine if the operation on the absolute values is effectively
1349 an addition or subtraction. */
1350 subtract ^= (sign ^ rhs.sign) ? true : false;
1352 /* Are we bigger exponent-wise than the RHS? */
1353 bits = exponent - rhs.exponent;
1355 /* Subtraction is more subtle than one might naively expect. */
1357 APFloat temp_rhs(rhs);
1361 reverse = compareAbsoluteValue(temp_rhs) == cmpLessThan;
1362 lost_fraction = lfExactlyZero;
1363 } else if (bits > 0) {
1364 lost_fraction = temp_rhs.shiftSignificandRight(bits - 1);
1365 shiftSignificandLeft(1);
1368 lost_fraction = shiftSignificandRight(-bits - 1);
1369 temp_rhs.shiftSignificandLeft(1);
1374 carry = temp_rhs.subtractSignificand
1375 (*this, lost_fraction != lfExactlyZero);
1376 copySignificand(temp_rhs);
1379 carry = subtractSignificand
1380 (temp_rhs, lost_fraction != lfExactlyZero);
1383 /* Invert the lost fraction - it was on the RHS and
1385 if (lost_fraction == lfLessThanHalf)
1386 lost_fraction = lfMoreThanHalf;
1387 else if (lost_fraction == lfMoreThanHalf)
1388 lost_fraction = lfLessThanHalf;
1390 /* The code above is intended to ensure that no borrow is
1396 APFloat temp_rhs(rhs);
1398 lost_fraction = temp_rhs.shiftSignificandRight(bits);
1399 carry = addSignificand(temp_rhs);
1401 lost_fraction = shiftSignificandRight(-bits);
1402 carry = addSignificand(rhs);
1405 /* We have a guard bit; generating a carry cannot happen. */
1410 return lost_fraction;
1414 APFloat::multiplySpecials(const APFloat &rhs)
1416 switch (convolve(category, rhs.category)) {
1418 llvm_unreachable(0);
1420 case convolve(fcNaN, fcZero):
1421 case convolve(fcNaN, fcNormal):
1422 case convolve(fcNaN, fcInfinity):
1423 case convolve(fcNaN, fcNaN):
1426 case convolve(fcZero, fcNaN):
1427 case convolve(fcNormal, fcNaN):
1428 case convolve(fcInfinity, fcNaN):
1430 copySignificand(rhs);
1433 case convolve(fcNormal, fcInfinity):
1434 case convolve(fcInfinity, fcNormal):
1435 case convolve(fcInfinity, fcInfinity):
1436 category = fcInfinity;
1439 case convolve(fcZero, fcNormal):
1440 case convolve(fcNormal, fcZero):
1441 case convolve(fcZero, fcZero):
1445 case convolve(fcZero, fcInfinity):
1446 case convolve(fcInfinity, fcZero):
1450 case convolve(fcNormal, fcNormal):
1456 APFloat::divideSpecials(const APFloat &rhs)
1458 switch (convolve(category, rhs.category)) {
1460 llvm_unreachable(0);
1462 case convolve(fcNaN, fcZero):
1463 case convolve(fcNaN, fcNormal):
1464 case convolve(fcNaN, fcInfinity):
1465 case convolve(fcNaN, fcNaN):
1466 case convolve(fcInfinity, fcZero):
1467 case convolve(fcInfinity, fcNormal):
1468 case convolve(fcZero, fcInfinity):
1469 case convolve(fcZero, fcNormal):
1472 case convolve(fcZero, fcNaN):
1473 case convolve(fcNormal, fcNaN):
1474 case convolve(fcInfinity, fcNaN):
1476 copySignificand(rhs);
1479 case convolve(fcNormal, fcInfinity):
1483 case convolve(fcNormal, fcZero):
1484 category = fcInfinity;
1487 case convolve(fcInfinity, fcInfinity):
1488 case convolve(fcZero, fcZero):
1492 case convolve(fcNormal, fcNormal):
1498 APFloat::modSpecials(const APFloat &rhs)
1500 switch (convolve(category, rhs.category)) {
1502 llvm_unreachable(0);
1504 case convolve(fcNaN, fcZero):
1505 case convolve(fcNaN, fcNormal):
1506 case convolve(fcNaN, fcInfinity):
1507 case convolve(fcNaN, fcNaN):
1508 case convolve(fcZero, fcInfinity):
1509 case convolve(fcZero, fcNormal):
1510 case convolve(fcNormal, fcInfinity):
1513 case convolve(fcZero, fcNaN):
1514 case convolve(fcNormal, fcNaN):
1515 case convolve(fcInfinity, fcNaN):
1517 copySignificand(rhs);
1520 case convolve(fcNormal, fcZero):
1521 case convolve(fcInfinity, fcZero):
1522 case convolve(fcInfinity, fcNormal):
1523 case convolve(fcInfinity, fcInfinity):
1524 case convolve(fcZero, fcZero):
1528 case convolve(fcNormal, fcNormal):
1535 APFloat::changeSign()
1537 /* Look mummy, this one's easy. */
1542 APFloat::clearSign()
1544 /* So is this one. */
1549 APFloat::copySign(const APFloat &rhs)
1555 /* Normalized addition or subtraction. */
1557 APFloat::addOrSubtract(const APFloat &rhs, roundingMode rounding_mode,
1562 assertArithmeticOK(*semantics);
1564 fs = addOrSubtractSpecials(rhs, subtract);
1566 /* This return code means it was not a simple case. */
1567 if (fs == opDivByZero) {
1568 lostFraction lost_fraction;
1570 lost_fraction = addOrSubtractSignificand(rhs, subtract);
1571 fs = normalize(rounding_mode, lost_fraction);
1573 /* Can only be zero if we lost no fraction. */
1574 assert(category != fcZero || lost_fraction == lfExactlyZero);
1577 /* If two numbers add (exactly) to zero, IEEE 754 decrees it is a
1578 positive zero unless rounding to minus infinity, except that
1579 adding two like-signed zeroes gives that zero. */
1580 if (category == fcZero) {
1581 if (rhs.category != fcZero || (sign == rhs.sign) == subtract)
1582 sign = (rounding_mode == rmTowardNegative);
1588 /* Normalized addition. */
1590 APFloat::add(const APFloat &rhs, roundingMode rounding_mode)
1592 return addOrSubtract(rhs, rounding_mode, false);
1595 /* Normalized subtraction. */
1597 APFloat::subtract(const APFloat &rhs, roundingMode rounding_mode)
1599 return addOrSubtract(rhs, rounding_mode, true);
1602 /* Normalized multiply. */
1604 APFloat::multiply(const APFloat &rhs, roundingMode rounding_mode)
1608 assertArithmeticOK(*semantics);
1610 fs = multiplySpecials(rhs);
1612 if (category == fcNormal) {
1613 lostFraction lost_fraction = multiplySignificand(rhs, 0);
1614 fs = normalize(rounding_mode, lost_fraction);
1615 if (lost_fraction != lfExactlyZero)
1616 fs = (opStatus) (fs | opInexact);
1622 /* Normalized divide. */
1624 APFloat::divide(const APFloat &rhs, roundingMode rounding_mode)
1628 assertArithmeticOK(*semantics);
1630 fs = divideSpecials(rhs);
1632 if (category == fcNormal) {
1633 lostFraction lost_fraction = divideSignificand(rhs);
1634 fs = normalize(rounding_mode, lost_fraction);
1635 if (lost_fraction != lfExactlyZero)
1636 fs = (opStatus) (fs | opInexact);
1642 /* Normalized remainder. This is not currently correct in all cases. */
1644 APFloat::remainder(const APFloat &rhs)
1648 unsigned int origSign = sign;
1650 assertArithmeticOK(*semantics);
1651 fs = V.divide(rhs, rmNearestTiesToEven);
1652 if (fs == opDivByZero)
1655 int parts = partCount();
1656 integerPart *x = new integerPart[parts];
1658 fs = V.convertToInteger(x, parts * integerPartWidth, true,
1659 rmNearestTiesToEven, &ignored);
1660 if (fs==opInvalidOp)
1663 fs = V.convertFromZeroExtendedInteger(x, parts * integerPartWidth, true,
1664 rmNearestTiesToEven);
1665 assert(fs==opOK); // should always work
1667 fs = V.multiply(rhs, rmNearestTiesToEven);
1668 assert(fs==opOK || fs==opInexact); // should not overflow or underflow
1670 fs = subtract(V, rmNearestTiesToEven);
1671 assert(fs==opOK || fs==opInexact); // likewise
1674 sign = origSign; // IEEE754 requires this
1679 /* Normalized llvm frem (C fmod).
1680 This is not currently correct in all cases. */
1682 APFloat::mod(const APFloat &rhs, roundingMode rounding_mode)
1685 assertArithmeticOK(*semantics);
1686 fs = modSpecials(rhs);
1688 if (category == fcNormal && rhs.category == fcNormal) {
1690 unsigned int origSign = sign;
1692 fs = V.divide(rhs, rmNearestTiesToEven);
1693 if (fs == opDivByZero)
1696 int parts = partCount();
1697 integerPart *x = new integerPart[parts];
1699 fs = V.convertToInteger(x, parts * integerPartWidth, true,
1700 rmTowardZero, &ignored);
1701 if (fs==opInvalidOp)
1704 fs = V.convertFromZeroExtendedInteger(x, parts * integerPartWidth, true,
1705 rmNearestTiesToEven);
1706 assert(fs==opOK); // should always work
1708 fs = V.multiply(rhs, rounding_mode);
1709 assert(fs==opOK || fs==opInexact); // should not overflow or underflow
1711 fs = subtract(V, rounding_mode);
1712 assert(fs==opOK || fs==opInexact); // likewise
1715 sign = origSign; // IEEE754 requires this
1721 /* Normalized fused-multiply-add. */
1723 APFloat::fusedMultiplyAdd(const APFloat &multiplicand,
1724 const APFloat &addend,
1725 roundingMode rounding_mode)
1729 assertArithmeticOK(*semantics);
1731 /* Post-multiplication sign, before addition. */
1732 sign ^= multiplicand.sign;
1734 /* If and only if all arguments are normal do we need to do an
1735 extended-precision calculation. */
1736 if (category == fcNormal &&
1737 multiplicand.category == fcNormal &&
1738 addend.category == fcNormal) {
1739 lostFraction lost_fraction;
1741 lost_fraction = multiplySignificand(multiplicand, &addend);
1742 fs = normalize(rounding_mode, lost_fraction);
1743 if (lost_fraction != lfExactlyZero)
1744 fs = (opStatus) (fs | opInexact);
1746 /* If two numbers add (exactly) to zero, IEEE 754 decrees it is a
1747 positive zero unless rounding to minus infinity, except that
1748 adding two like-signed zeroes gives that zero. */
1749 if (category == fcZero && sign != addend.sign)
1750 sign = (rounding_mode == rmTowardNegative);
1752 fs = multiplySpecials(multiplicand);
1754 /* FS can only be opOK or opInvalidOp. There is no more work
1755 to do in the latter case. The IEEE-754R standard says it is
1756 implementation-defined in this case whether, if ADDEND is a
1757 quiet NaN, we raise invalid op; this implementation does so.
1759 If we need to do the addition we can do so with normal
1762 fs = addOrSubtract(addend, rounding_mode, false);
1768 /* Rounding-mode corrrect round to integral value. */
1769 APFloat::opStatus APFloat::roundToIntegral(roundingMode rounding_mode) {
1771 assertArithmeticOK(*semantics);
1773 // The algorithm here is quite simple: we add 2^(p-1), where p is the
1774 // precision of our format, and then subtract it back off again. The choice
1775 // of rounding modes for the addition/subtraction determines the rounding mode
1776 // for our integral rounding as well.
1777 // NOTE: When the input value is negative, we do subtraction followed by
1778 // addition instead.
1779 APInt IntegerConstant(NextPowerOf2(semanticsPrecision(*semantics)), 1);
1780 IntegerConstant <<= semanticsPrecision(*semantics)-1;
1781 APFloat MagicConstant(*semantics);
1782 fs = MagicConstant.convertFromAPInt(IntegerConstant, false,
1783 rmNearestTiesToEven);
1784 MagicConstant.copySign(*this);
1789 // Preserve the input sign so that we can handle 0.0/-0.0 cases correctly.
1790 bool inputSign = isNegative();
1792 fs = add(MagicConstant, rounding_mode);
1793 if (fs != opOK && fs != opInexact)
1796 fs = subtract(MagicConstant, rounding_mode);
1798 // Restore the input sign.
1799 if (inputSign != isNegative())
1806 /* Comparison requires normalized numbers. */
1808 APFloat::compare(const APFloat &rhs) const
1812 assertArithmeticOK(*semantics);
1813 assert(semantics == rhs.semantics);
1815 switch (convolve(category, rhs.category)) {
1817 llvm_unreachable(0);
1819 case convolve(fcNaN, fcZero):
1820 case convolve(fcNaN, fcNormal):
1821 case convolve(fcNaN, fcInfinity):
1822 case convolve(fcNaN, fcNaN):
1823 case convolve(fcZero, fcNaN):
1824 case convolve(fcNormal, fcNaN):
1825 case convolve(fcInfinity, fcNaN):
1826 return cmpUnordered;
1828 case convolve(fcInfinity, fcNormal):
1829 case convolve(fcInfinity, fcZero):
1830 case convolve(fcNormal, fcZero):
1834 return cmpGreaterThan;
1836 case convolve(fcNormal, fcInfinity):
1837 case convolve(fcZero, fcInfinity):
1838 case convolve(fcZero, fcNormal):
1840 return cmpGreaterThan;
1844 case convolve(fcInfinity, fcInfinity):
1845 if (sign == rhs.sign)
1850 return cmpGreaterThan;
1852 case convolve(fcZero, fcZero):
1855 case convolve(fcNormal, fcNormal):
1859 /* Two normal numbers. Do they have the same sign? */
1860 if (sign != rhs.sign) {
1862 result = cmpLessThan;
1864 result = cmpGreaterThan;
1866 /* Compare absolute values; invert result if negative. */
1867 result = compareAbsoluteValue(rhs);
1870 if (result == cmpLessThan)
1871 result = cmpGreaterThan;
1872 else if (result == cmpGreaterThan)
1873 result = cmpLessThan;
1880 /// APFloat::convert - convert a value of one floating point type to another.
1881 /// The return value corresponds to the IEEE754 exceptions. *losesInfo
1882 /// records whether the transformation lost information, i.e. whether
1883 /// converting the result back to the original type will produce the
1884 /// original value (this is almost the same as return value==fsOK, but there
1885 /// are edge cases where this is not so).
1888 APFloat::convert(const fltSemantics &toSemantics,
1889 roundingMode rounding_mode, bool *losesInfo)
1891 lostFraction lostFraction;
1892 unsigned int newPartCount, oldPartCount;
1895 const fltSemantics &fromSemantics = *semantics;
1897 assertArithmeticOK(fromSemantics);
1898 assertArithmeticOK(toSemantics);
1899 lostFraction = lfExactlyZero;
1900 newPartCount = partCountForBits(toSemantics.precision + 1);
1901 oldPartCount = partCount();
1902 shift = toSemantics.precision - fromSemantics.precision;
1904 bool X86SpecialNan = false;
1905 if (&fromSemantics == &APFloat::x87DoubleExtended &&
1906 &toSemantics != &APFloat::x87DoubleExtended && category == fcNaN &&
1907 (!(*significandParts() & 0x8000000000000000ULL) ||
1908 !(*significandParts() & 0x4000000000000000ULL))) {
1909 // x86 has some unusual NaNs which cannot be represented in any other
1910 // format; note them here.
1911 X86SpecialNan = true;
1914 // If this is a truncation, perform the shift before we narrow the storage.
1915 if (shift < 0 && (category==fcNormal || category==fcNaN))
1916 lostFraction = shiftRight(significandParts(), oldPartCount, -shift);
1918 // Fix the storage so it can hold to new value.
1919 if (newPartCount > oldPartCount) {
1920 // The new type requires more storage; make it available.
1921 integerPart *newParts;
1922 newParts = new integerPart[newPartCount];
1923 APInt::tcSet(newParts, 0, newPartCount);
1924 if (category==fcNormal || category==fcNaN)
1925 APInt::tcAssign(newParts, significandParts(), oldPartCount);
1927 significand.parts = newParts;
1928 } else if (newPartCount == 1 && oldPartCount != 1) {
1929 // Switch to built-in storage for a single part.
1930 integerPart newPart = 0;
1931 if (category==fcNormal || category==fcNaN)
1932 newPart = significandParts()[0];
1934 significand.part = newPart;
1937 // Now that we have the right storage, switch the semantics.
1938 semantics = &toSemantics;
1940 // If this is an extension, perform the shift now that the storage is
1942 if (shift > 0 && (category==fcNormal || category==fcNaN))
1943 APInt::tcShiftLeft(significandParts(), newPartCount, shift);
1945 if (category == fcNormal) {
1946 fs = normalize(rounding_mode, lostFraction);
1947 *losesInfo = (fs != opOK);
1948 } else if (category == fcNaN) {
1949 *losesInfo = lostFraction != lfExactlyZero || X86SpecialNan;
1950 // gcc forces the Quiet bit on, which means (float)(double)(float_sNan)
1951 // does not give you back the same bits. This is dubious, and we
1952 // don't currently do it. You're really supposed to get
1953 // an invalid operation signal at runtime, but nobody does that.
1963 /* Convert a floating point number to an integer according to the
1964 rounding mode. If the rounded integer value is out of range this
1965 returns an invalid operation exception and the contents of the
1966 destination parts are unspecified. If the rounded value is in
1967 range but the floating point number is not the exact integer, the C
1968 standard doesn't require an inexact exception to be raised. IEEE
1969 854 does require it so we do that.
1971 Note that for conversions to integer type the C standard requires
1972 round-to-zero to always be used. */
1974 APFloat::convertToSignExtendedInteger(integerPart *parts, unsigned int width,
1976 roundingMode rounding_mode,
1977 bool *isExact) const
1979 lostFraction lost_fraction;
1980 const integerPart *src;
1981 unsigned int dstPartsCount, truncatedBits;
1983 assertArithmeticOK(*semantics);
1987 /* Handle the three special cases first. */
1988 if (category == fcInfinity || category == fcNaN)
1991 dstPartsCount = partCountForBits(width);
1993 if (category == fcZero) {
1994 APInt::tcSet(parts, 0, dstPartsCount);
1995 // Negative zero can't be represented as an int.
2000 src = significandParts();
2002 /* Step 1: place our absolute value, with any fraction truncated, in
2005 /* Our absolute value is less than one; truncate everything. */
2006 APInt::tcSet(parts, 0, dstPartsCount);
2007 /* For exponent -1 the integer bit represents .5, look at that.
2008 For smaller exponents leftmost truncated bit is 0. */
2009 truncatedBits = semantics->precision -1U - exponent;
2011 /* We want the most significant (exponent + 1) bits; the rest are
2013 unsigned int bits = exponent + 1U;
2015 /* Hopelessly large in magnitude? */
2019 if (bits < semantics->precision) {
2020 /* We truncate (semantics->precision - bits) bits. */
2021 truncatedBits = semantics->precision - bits;
2022 APInt::tcExtract(parts, dstPartsCount, src, bits, truncatedBits);
2024 /* We want at least as many bits as are available. */
2025 APInt::tcExtract(parts, dstPartsCount, src, semantics->precision, 0);
2026 APInt::tcShiftLeft(parts, dstPartsCount, bits - semantics->precision);
2031 /* Step 2: work out any lost fraction, and increment the absolute
2032 value if we would round away from zero. */
2033 if (truncatedBits) {
2034 lost_fraction = lostFractionThroughTruncation(src, partCount(),
2036 if (lost_fraction != lfExactlyZero &&
2037 roundAwayFromZero(rounding_mode, lost_fraction, truncatedBits)) {
2038 if (APInt::tcIncrement(parts, dstPartsCount))
2039 return opInvalidOp; /* Overflow. */
2042 lost_fraction = lfExactlyZero;
2045 /* Step 3: check if we fit in the destination. */
2046 unsigned int omsb = APInt::tcMSB(parts, dstPartsCount) + 1;
2050 /* Negative numbers cannot be represented as unsigned. */
2054 /* It takes omsb bits to represent the unsigned integer value.
2055 We lose a bit for the sign, but care is needed as the
2056 maximally negative integer is a special case. */
2057 if (omsb == width && APInt::tcLSB(parts, dstPartsCount) + 1 != omsb)
2060 /* This case can happen because of rounding. */
2065 APInt::tcNegate (parts, dstPartsCount);
2067 if (omsb >= width + !isSigned)
2071 if (lost_fraction == lfExactlyZero) {
2078 /* Same as convertToSignExtendedInteger, except we provide
2079 deterministic values in case of an invalid operation exception,
2080 namely zero for NaNs and the minimal or maximal value respectively
2081 for underflow or overflow.
2082 The *isExact output tells whether the result is exact, in the sense
2083 that converting it back to the original floating point type produces
2084 the original value. This is almost equivalent to result==opOK,
2085 except for negative zeroes.
2088 APFloat::convertToInteger(integerPart *parts, unsigned int width,
2090 roundingMode rounding_mode, bool *isExact) const
2094 fs = convertToSignExtendedInteger(parts, width, isSigned, rounding_mode,
2097 if (fs == opInvalidOp) {
2098 unsigned int bits, dstPartsCount;
2100 dstPartsCount = partCountForBits(width);
2102 if (category == fcNaN)
2107 bits = width - isSigned;
2109 APInt::tcSetLeastSignificantBits(parts, dstPartsCount, bits);
2110 if (sign && isSigned)
2111 APInt::tcShiftLeft(parts, dstPartsCount, width - 1);
2117 /* Same as convertToInteger(integerPart*, ...), except the result is returned in
2118 an APSInt, whose initial bit-width and signed-ness are used to determine the
2119 precision of the conversion.
2122 APFloat::convertToInteger(APSInt &result,
2123 roundingMode rounding_mode, bool *isExact) const
2125 unsigned bitWidth = result.getBitWidth();
2126 SmallVector<uint64_t, 4> parts(result.getNumWords());
2127 opStatus status = convertToInteger(
2128 parts.data(), bitWidth, result.isSigned(), rounding_mode, isExact);
2129 // Keeps the original signed-ness.
2130 result = APInt(bitWidth, parts);
2134 /* Convert an unsigned integer SRC to a floating point number,
2135 rounding according to ROUNDING_MODE. The sign of the floating
2136 point number is not modified. */
2138 APFloat::convertFromUnsignedParts(const integerPart *src,
2139 unsigned int srcCount,
2140 roundingMode rounding_mode)
2142 unsigned int omsb, precision, dstCount;
2144 lostFraction lost_fraction;
2146 assertArithmeticOK(*semantics);
2147 category = fcNormal;
2148 omsb = APInt::tcMSB(src, srcCount) + 1;
2149 dst = significandParts();
2150 dstCount = partCount();
2151 precision = semantics->precision;
2153 /* We want the most significant PRECISION bits of SRC. There may not
2154 be that many; extract what we can. */
2155 if (precision <= omsb) {
2156 exponent = omsb - 1;
2157 lost_fraction = lostFractionThroughTruncation(src, srcCount,
2159 APInt::tcExtract(dst, dstCount, src, precision, omsb - precision);
2161 exponent = precision - 1;
2162 lost_fraction = lfExactlyZero;
2163 APInt::tcExtract(dst, dstCount, src, omsb, 0);
2166 return normalize(rounding_mode, lost_fraction);
2170 APFloat::convertFromAPInt(const APInt &Val,
2172 roundingMode rounding_mode)
2174 unsigned int partCount = Val.getNumWords();
2178 if (isSigned && api.isNegative()) {
2183 return convertFromUnsignedParts(api.getRawData(), partCount, rounding_mode);
2186 /* Convert a two's complement integer SRC to a floating point number,
2187 rounding according to ROUNDING_MODE. ISSIGNED is true if the
2188 integer is signed, in which case it must be sign-extended. */
2190 APFloat::convertFromSignExtendedInteger(const integerPart *src,
2191 unsigned int srcCount,
2193 roundingMode rounding_mode)
2197 assertArithmeticOK(*semantics);
2199 APInt::tcExtractBit(src, srcCount * integerPartWidth - 1)) {
2202 /* If we're signed and negative negate a copy. */
2204 copy = new integerPart[srcCount];
2205 APInt::tcAssign(copy, src, srcCount);
2206 APInt::tcNegate(copy, srcCount);
2207 status = convertFromUnsignedParts(copy, srcCount, rounding_mode);
2211 status = convertFromUnsignedParts(src, srcCount, rounding_mode);
2217 /* FIXME: should this just take a const APInt reference? */
2219 APFloat::convertFromZeroExtendedInteger(const integerPart *parts,
2220 unsigned int width, bool isSigned,
2221 roundingMode rounding_mode)
2223 unsigned int partCount = partCountForBits(width);
2224 APInt api = APInt(width, makeArrayRef(parts, partCount));
2227 if (isSigned && APInt::tcExtractBit(parts, width - 1)) {
2232 return convertFromUnsignedParts(api.getRawData(), partCount, rounding_mode);
2236 APFloat::convertFromHexadecimalString(StringRef s, roundingMode rounding_mode)
2238 lostFraction lost_fraction = lfExactlyZero;
2239 integerPart *significand;
2240 unsigned int bitPos, partsCount;
2241 StringRef::iterator dot, firstSignificantDigit;
2245 category = fcNormal;
2247 significand = significandParts();
2248 partsCount = partCount();
2249 bitPos = partsCount * integerPartWidth;
2251 /* Skip leading zeroes and any (hexa)decimal point. */
2252 StringRef::iterator begin = s.begin();
2253 StringRef::iterator end = s.end();
2254 StringRef::iterator p = skipLeadingZeroesAndAnyDot(begin, end, &dot);
2255 firstSignificantDigit = p;
2258 integerPart hex_value;
2261 assert(dot == end && "String contains multiple dots");
2268 hex_value = hexDigitValue(*p);
2269 if (hex_value == -1U) {
2278 /* Store the number whilst 4-bit nibbles remain. */
2281 hex_value <<= bitPos % integerPartWidth;
2282 significand[bitPos / integerPartWidth] |= hex_value;
2284 lost_fraction = trailingHexadecimalFraction(p, end, hex_value);
2285 while (p != end && hexDigitValue(*p) != -1U)
2292 /* Hex floats require an exponent but not a hexadecimal point. */
2293 assert(p != end && "Hex strings require an exponent");
2294 assert((*p == 'p' || *p == 'P') && "Invalid character in significand");
2295 assert(p != begin && "Significand has no digits");
2296 assert((dot == end || p - begin != 1) && "Significand has no digits");
2298 /* Ignore the exponent if we are zero. */
2299 if (p != firstSignificantDigit) {
2302 /* Implicit hexadecimal point? */
2306 /* Calculate the exponent adjustment implicit in the number of
2307 significant digits. */
2308 expAdjustment = static_cast<int>(dot - firstSignificantDigit);
2309 if (expAdjustment < 0)
2311 expAdjustment = expAdjustment * 4 - 1;
2313 /* Adjust for writing the significand starting at the most
2314 significant nibble. */
2315 expAdjustment += semantics->precision;
2316 expAdjustment -= partsCount * integerPartWidth;
2318 /* Adjust for the given exponent. */
2319 exponent = totalExponent(p + 1, end, expAdjustment);
2322 return normalize(rounding_mode, lost_fraction);
2326 APFloat::roundSignificandWithExponent(const integerPart *decSigParts,
2327 unsigned sigPartCount, int exp,
2328 roundingMode rounding_mode)
2330 unsigned int parts, pow5PartCount;
2331 fltSemantics calcSemantics = { 32767, -32767, 0, true };
2332 integerPart pow5Parts[maxPowerOfFiveParts];
2335 isNearest = (rounding_mode == rmNearestTiesToEven ||
2336 rounding_mode == rmNearestTiesToAway);
2338 parts = partCountForBits(semantics->precision + 11);
2340 /* Calculate pow(5, abs(exp)). */
2341 pow5PartCount = powerOf5(pow5Parts, exp >= 0 ? exp: -exp);
2343 for (;; parts *= 2) {
2344 opStatus sigStatus, powStatus;
2345 unsigned int excessPrecision, truncatedBits;
2347 calcSemantics.precision = parts * integerPartWidth - 1;
2348 excessPrecision = calcSemantics.precision - semantics->precision;
2349 truncatedBits = excessPrecision;
2351 APFloat decSig(calcSemantics, fcZero, sign);
2352 APFloat pow5(calcSemantics, fcZero, false);
2354 sigStatus = decSig.convertFromUnsignedParts(decSigParts, sigPartCount,
2355 rmNearestTiesToEven);
2356 powStatus = pow5.convertFromUnsignedParts(pow5Parts, pow5PartCount,
2357 rmNearestTiesToEven);
2358 /* Add exp, as 10^n = 5^n * 2^n. */
2359 decSig.exponent += exp;
2361 lostFraction calcLostFraction;
2362 integerPart HUerr, HUdistance;
2363 unsigned int powHUerr;
2366 /* multiplySignificand leaves the precision-th bit set to 1. */
2367 calcLostFraction = decSig.multiplySignificand(pow5, NULL);
2368 powHUerr = powStatus != opOK;
2370 calcLostFraction = decSig.divideSignificand(pow5);
2371 /* Denormal numbers have less precision. */
2372 if (decSig.exponent < semantics->minExponent) {
2373 excessPrecision += (semantics->minExponent - decSig.exponent);
2374 truncatedBits = excessPrecision;
2375 if (excessPrecision > calcSemantics.precision)
2376 excessPrecision = calcSemantics.precision;
2378 /* Extra half-ulp lost in reciprocal of exponent. */
2379 powHUerr = (powStatus == opOK && calcLostFraction == lfExactlyZero) ? 0:2;
2382 /* Both multiplySignificand and divideSignificand return the
2383 result with the integer bit set. */
2384 assert(APInt::tcExtractBit
2385 (decSig.significandParts(), calcSemantics.precision - 1) == 1);
2387 HUerr = HUerrBound(calcLostFraction != lfExactlyZero, sigStatus != opOK,
2389 HUdistance = 2 * ulpsFromBoundary(decSig.significandParts(),
2390 excessPrecision, isNearest);
2392 /* Are we guaranteed to round correctly if we truncate? */
2393 if (HUdistance >= HUerr) {
2394 APInt::tcExtract(significandParts(), partCount(), decSig.significandParts(),
2395 calcSemantics.precision - excessPrecision,
2397 /* Take the exponent of decSig. If we tcExtract-ed less bits
2398 above we must adjust our exponent to compensate for the
2399 implicit right shift. */
2400 exponent = (decSig.exponent + semantics->precision
2401 - (calcSemantics.precision - excessPrecision));
2402 calcLostFraction = lostFractionThroughTruncation(decSig.significandParts(),
2405 return normalize(rounding_mode, calcLostFraction);
2411 APFloat::convertFromDecimalString(StringRef str, roundingMode rounding_mode)
2416 /* Scan the text. */
2417 StringRef::iterator p = str.begin();
2418 interpretDecimal(p, str.end(), &D);
2420 /* Handle the quick cases. First the case of no significant digits,
2421 i.e. zero, and then exponents that are obviously too large or too
2422 small. Writing L for log 10 / log 2, a number d.ddddd*10^exp
2423 definitely overflows if
2425 (exp - 1) * L >= maxExponent
2427 and definitely underflows to zero where
2429 (exp + 1) * L <= minExponent - precision
2431 With integer arithmetic the tightest bounds for L are
2433 93/28 < L < 196/59 [ numerator <= 256 ]
2434 42039/12655 < L < 28738/8651 [ numerator <= 65536 ]
2437 if (decDigitValue(*D.firstSigDigit) >= 10U) {
2441 /* Check whether the normalized exponent is high enough to overflow
2442 max during the log-rebasing in the max-exponent check below. */
2443 } else if (D.normalizedExponent - 1 > INT_MAX / 42039) {
2444 fs = handleOverflow(rounding_mode);
2446 /* If it wasn't, then it also wasn't high enough to overflow max
2447 during the log-rebasing in the min-exponent check. Check that it
2448 won't overflow min in either check, then perform the min-exponent
2450 } else if (D.normalizedExponent - 1 < INT_MIN / 42039 ||
2451 (D.normalizedExponent + 1) * 28738 <=
2452 8651 * (semantics->minExponent - (int) semantics->precision)) {
2453 /* Underflow to zero and round. */
2455 fs = normalize(rounding_mode, lfLessThanHalf);
2457 /* We can finally safely perform the max-exponent check. */
2458 } else if ((D.normalizedExponent - 1) * 42039
2459 >= 12655 * semantics->maxExponent) {
2460 /* Overflow and round. */
2461 fs = handleOverflow(rounding_mode);
2463 integerPart *decSignificand;
2464 unsigned int partCount;
2466 /* A tight upper bound on number of bits required to hold an
2467 N-digit decimal integer is N * 196 / 59. Allocate enough space
2468 to hold the full significand, and an extra part required by
2470 partCount = static_cast<unsigned int>(D.lastSigDigit - D.firstSigDigit) + 1;
2471 partCount = partCountForBits(1 + 196 * partCount / 59);
2472 decSignificand = new integerPart[partCount + 1];
2475 /* Convert to binary efficiently - we do almost all multiplication
2476 in an integerPart. When this would overflow do we do a single
2477 bignum multiplication, and then revert again to multiplication
2478 in an integerPart. */
2480 integerPart decValue, val, multiplier;
2488 if (p == str.end()) {
2492 decValue = decDigitValue(*p++);
2493 assert(decValue < 10U && "Invalid character in significand");
2495 val = val * 10 + decValue;
2496 /* The maximum number that can be multiplied by ten with any
2497 digit added without overflowing an integerPart. */
2498 } while (p <= D.lastSigDigit && multiplier <= (~ (integerPart) 0 - 9) / 10);
2500 /* Multiply out the current part. */
2501 APInt::tcMultiplyPart(decSignificand, decSignificand, multiplier, val,
2502 partCount, partCount + 1, false);
2504 /* If we used another part (likely but not guaranteed), increase
2506 if (decSignificand[partCount])
2508 } while (p <= D.lastSigDigit);
2510 category = fcNormal;
2511 fs = roundSignificandWithExponent(decSignificand, partCount,
2512 D.exponent, rounding_mode);
2514 delete [] decSignificand;
2521 APFloat::convertFromString(StringRef str, roundingMode rounding_mode)
2523 assertArithmeticOK(*semantics);
2524 assert(!str.empty() && "Invalid string length");
2526 /* Handle a leading minus sign. */
2527 StringRef::iterator p = str.begin();
2528 size_t slen = str.size();
2529 sign = *p == '-' ? 1 : 0;
2530 if (*p == '-' || *p == '+') {
2533 assert(slen && "String has no digits");
2536 if (slen >= 2 && p[0] == '0' && (p[1] == 'x' || p[1] == 'X')) {
2537 assert(slen - 2 && "Invalid string");
2538 return convertFromHexadecimalString(StringRef(p + 2, slen - 2),
2542 return convertFromDecimalString(StringRef(p, slen), rounding_mode);
2545 /* Write out a hexadecimal representation of the floating point value
2546 to DST, which must be of sufficient size, in the C99 form
2547 [-]0xh.hhhhp[+-]d. Return the number of characters written,
2548 excluding the terminating NUL.
2550 If UPPERCASE, the output is in upper case, otherwise in lower case.
2552 HEXDIGITS digits appear altogether, rounding the value if
2553 necessary. If HEXDIGITS is 0, the minimal precision to display the
2554 number precisely is used instead. If nothing would appear after
2555 the decimal point it is suppressed.
2557 The decimal exponent is always printed and has at least one digit.
2558 Zero values display an exponent of zero. Infinities and NaNs
2559 appear as "infinity" or "nan" respectively.
2561 The above rules are as specified by C99. There is ambiguity about
2562 what the leading hexadecimal digit should be. This implementation
2563 uses whatever is necessary so that the exponent is displayed as
2564 stored. This implies the exponent will fall within the IEEE format
2565 range, and the leading hexadecimal digit will be 0 (for denormals),
2566 1 (normal numbers) or 2 (normal numbers rounded-away-from-zero with
2567 any other digits zero).
2570 APFloat::convertToHexString(char *dst, unsigned int hexDigits,
2571 bool upperCase, roundingMode rounding_mode) const
2575 assertArithmeticOK(*semantics);
2583 memcpy (dst, upperCase ? infinityU: infinityL, sizeof infinityU - 1);
2584 dst += sizeof infinityL - 1;
2588 memcpy (dst, upperCase ? NaNU: NaNL, sizeof NaNU - 1);
2589 dst += sizeof NaNU - 1;
2594 *dst++ = upperCase ? 'X': 'x';
2596 if (hexDigits > 1) {
2598 memset (dst, '0', hexDigits - 1);
2599 dst += hexDigits - 1;
2601 *dst++ = upperCase ? 'P': 'p';
2606 dst = convertNormalToHexString (dst, hexDigits, upperCase, rounding_mode);
2612 return static_cast<unsigned int>(dst - p);
2615 /* Does the hard work of outputting the correctly rounded hexadecimal
2616 form of a normal floating point number with the specified number of
2617 hexadecimal digits. If HEXDIGITS is zero the minimum number of
2618 digits necessary to print the value precisely is output. */
2620 APFloat::convertNormalToHexString(char *dst, unsigned int hexDigits,
2622 roundingMode rounding_mode) const
2624 unsigned int count, valueBits, shift, partsCount, outputDigits;
2625 const char *hexDigitChars;
2626 const integerPart *significand;
2631 *dst++ = upperCase ? 'X': 'x';
2634 hexDigitChars = upperCase ? hexDigitsUpper: hexDigitsLower;
2636 significand = significandParts();
2637 partsCount = partCount();
2639 /* +3 because the first digit only uses the single integer bit, so
2640 we have 3 virtual zero most-significant-bits. */
2641 valueBits = semantics->precision + 3;
2642 shift = integerPartWidth - valueBits % integerPartWidth;
2644 /* The natural number of digits required ignoring trailing
2645 insignificant zeroes. */
2646 outputDigits = (valueBits - significandLSB () + 3) / 4;
2648 /* hexDigits of zero means use the required number for the
2649 precision. Otherwise, see if we are truncating. If we are,
2650 find out if we need to round away from zero. */
2652 if (hexDigits < outputDigits) {
2653 /* We are dropping non-zero bits, so need to check how to round.
2654 "bits" is the number of dropped bits. */
2656 lostFraction fraction;
2658 bits = valueBits - hexDigits * 4;
2659 fraction = lostFractionThroughTruncation (significand, partsCount, bits);
2660 roundUp = roundAwayFromZero(rounding_mode, fraction, bits);
2662 outputDigits = hexDigits;
2665 /* Write the digits consecutively, and start writing in the location
2666 of the hexadecimal point. We move the most significant digit
2667 left and add the hexadecimal point later. */
2670 count = (valueBits + integerPartWidth - 1) / integerPartWidth;
2672 while (outputDigits && count) {
2675 /* Put the most significant integerPartWidth bits in "part". */
2676 if (--count == partsCount)
2677 part = 0; /* An imaginary higher zero part. */
2679 part = significand[count] << shift;
2682 part |= significand[count - 1] >> (integerPartWidth - shift);
2684 /* Convert as much of "part" to hexdigits as we can. */
2685 unsigned int curDigits = integerPartWidth / 4;
2687 if (curDigits > outputDigits)
2688 curDigits = outputDigits;
2689 dst += partAsHex (dst, part, curDigits, hexDigitChars);
2690 outputDigits -= curDigits;
2696 /* Note that hexDigitChars has a trailing '0'. */
2699 *q = hexDigitChars[hexDigitValue (*q) + 1];
2700 } while (*q == '0');
2703 /* Add trailing zeroes. */
2704 memset (dst, '0', outputDigits);
2705 dst += outputDigits;
2708 /* Move the most significant digit to before the point, and if there
2709 is something after the decimal point add it. This must come
2710 after rounding above. */
2717 /* Finally output the exponent. */
2718 *dst++ = upperCase ? 'P': 'p';
2720 return writeSignedDecimal (dst, exponent);
2723 hash_code llvm::hash_value(const APFloat &Arg) {
2724 if (Arg.category != APFloat::fcNormal)
2725 return hash_combine((uint8_t)Arg.category,
2726 // NaN has no sign, fix it at zero.
2727 Arg.isNaN() ? (uint8_t)0 : (uint8_t)Arg.sign,
2728 Arg.semantics->precision);
2730 // Normal floats need their exponent and significand hashed.
2731 return hash_combine((uint8_t)Arg.category, (uint8_t)Arg.sign,
2732 Arg.semantics->precision, Arg.exponent,
2734 Arg.significandParts(),
2735 Arg.significandParts() + Arg.partCount()));
2738 // Conversion from APFloat to/from host float/double. It may eventually be
2739 // possible to eliminate these and have everybody deal with APFloats, but that
2740 // will take a while. This approach will not easily extend to long double.
2741 // Current implementation requires integerPartWidth==64, which is correct at
2742 // the moment but could be made more general.
2744 // Denormals have exponent minExponent in APFloat, but minExponent-1 in
2745 // the actual IEEE respresentations. We compensate for that here.
2748 APFloat::convertF80LongDoubleAPFloatToAPInt() const
2750 assert(semantics == (const llvm::fltSemantics*)&x87DoubleExtended);
2751 assert(partCount()==2);
2753 uint64_t myexponent, mysignificand;
2755 if (category==fcNormal) {
2756 myexponent = exponent+16383; //bias
2757 mysignificand = significandParts()[0];
2758 if (myexponent==1 && !(mysignificand & 0x8000000000000000ULL))
2759 myexponent = 0; // denormal
2760 } else if (category==fcZero) {
2763 } else if (category==fcInfinity) {
2764 myexponent = 0x7fff;
2765 mysignificand = 0x8000000000000000ULL;
2767 assert(category == fcNaN && "Unknown category");
2768 myexponent = 0x7fff;
2769 mysignificand = significandParts()[0];
2773 words[0] = mysignificand;
2774 words[1] = ((uint64_t)(sign & 1) << 15) |
2775 (myexponent & 0x7fffLL);
2776 return APInt(80, words);
2780 APFloat::convertPPCDoubleDoubleAPFloatToAPInt() const
2782 assert(semantics == (const llvm::fltSemantics*)&PPCDoubleDouble);
2783 assert(partCount()==2);
2785 uint64_t myexponent, mysignificand, myexponent2, mysignificand2;
2787 if (category==fcNormal) {
2788 myexponent = exponent + 1023; //bias
2789 myexponent2 = exponent2 + 1023;
2790 mysignificand = significandParts()[0];
2791 mysignificand2 = significandParts()[1];
2792 if (myexponent==1 && !(mysignificand & 0x10000000000000LL))
2793 myexponent = 0; // denormal
2794 if (myexponent2==1 && !(mysignificand2 & 0x10000000000000LL))
2795 myexponent2 = 0; // denormal
2796 } else if (category==fcZero) {
2801 } else if (category==fcInfinity) {
2807 assert(category == fcNaN && "Unknown category");
2809 mysignificand = significandParts()[0];
2810 myexponent2 = exponent2;
2811 mysignificand2 = significandParts()[1];
2815 words[0] = ((uint64_t)(sign & 1) << 63) |
2816 ((myexponent & 0x7ff) << 52) |
2817 (mysignificand & 0xfffffffffffffLL);
2818 words[1] = ((uint64_t)(sign2 & 1) << 63) |
2819 ((myexponent2 & 0x7ff) << 52) |
2820 (mysignificand2 & 0xfffffffffffffLL);
2821 return APInt(128, words);
2825 APFloat::convertQuadrupleAPFloatToAPInt() const
2827 assert(semantics == (const llvm::fltSemantics*)&IEEEquad);
2828 assert(partCount()==2);
2830 uint64_t myexponent, mysignificand, mysignificand2;
2832 if (category==fcNormal) {
2833 myexponent = exponent+16383; //bias
2834 mysignificand = significandParts()[0];
2835 mysignificand2 = significandParts()[1];
2836 if (myexponent==1 && !(mysignificand2 & 0x1000000000000LL))
2837 myexponent = 0; // denormal
2838 } else if (category==fcZero) {
2840 mysignificand = mysignificand2 = 0;
2841 } else if (category==fcInfinity) {
2842 myexponent = 0x7fff;
2843 mysignificand = mysignificand2 = 0;
2845 assert(category == fcNaN && "Unknown category!");
2846 myexponent = 0x7fff;
2847 mysignificand = significandParts()[0];
2848 mysignificand2 = significandParts()[1];
2852 words[0] = mysignificand;
2853 words[1] = ((uint64_t)(sign & 1) << 63) |
2854 ((myexponent & 0x7fff) << 48) |
2855 (mysignificand2 & 0xffffffffffffLL);
2857 return APInt(128, words);
2861 APFloat::convertDoubleAPFloatToAPInt() const
2863 assert(semantics == (const llvm::fltSemantics*)&IEEEdouble);
2864 assert(partCount()==1);
2866 uint64_t myexponent, mysignificand;
2868 if (category==fcNormal) {
2869 myexponent = exponent+1023; //bias
2870 mysignificand = *significandParts();
2871 if (myexponent==1 && !(mysignificand & 0x10000000000000LL))
2872 myexponent = 0; // denormal
2873 } else if (category==fcZero) {
2876 } else if (category==fcInfinity) {
2880 assert(category == fcNaN && "Unknown category!");
2882 mysignificand = *significandParts();
2885 return APInt(64, ((((uint64_t)(sign & 1) << 63) |
2886 ((myexponent & 0x7ff) << 52) |
2887 (mysignificand & 0xfffffffffffffLL))));
2891 APFloat::convertFloatAPFloatToAPInt() const
2893 assert(semantics == (const llvm::fltSemantics*)&IEEEsingle);
2894 assert(partCount()==1);
2896 uint32_t myexponent, mysignificand;
2898 if (category==fcNormal) {
2899 myexponent = exponent+127; //bias
2900 mysignificand = (uint32_t)*significandParts();
2901 if (myexponent == 1 && !(mysignificand & 0x800000))
2902 myexponent = 0; // denormal
2903 } else if (category==fcZero) {
2906 } else if (category==fcInfinity) {
2910 assert(category == fcNaN && "Unknown category!");
2912 mysignificand = (uint32_t)*significandParts();
2915 return APInt(32, (((sign&1) << 31) | ((myexponent&0xff) << 23) |
2916 (mysignificand & 0x7fffff)));
2920 APFloat::convertHalfAPFloatToAPInt() const
2922 assert(semantics == (const llvm::fltSemantics*)&IEEEhalf);
2923 assert(partCount()==1);
2925 uint32_t myexponent, mysignificand;
2927 if (category==fcNormal) {
2928 myexponent = exponent+15; //bias
2929 mysignificand = (uint32_t)*significandParts();
2930 if (myexponent == 1 && !(mysignificand & 0x400))
2931 myexponent = 0; // denormal
2932 } else if (category==fcZero) {
2935 } else if (category==fcInfinity) {
2939 assert(category == fcNaN && "Unknown category!");
2941 mysignificand = (uint32_t)*significandParts();
2944 return APInt(16, (((sign&1) << 15) | ((myexponent&0x1f) << 10) |
2945 (mysignificand & 0x3ff)));
2948 // This function creates an APInt that is just a bit map of the floating
2949 // point constant as it would appear in memory. It is not a conversion,
2950 // and treating the result as a normal integer is unlikely to be useful.
2953 APFloat::bitcastToAPInt() const
2955 if (semantics == (const llvm::fltSemantics*)&IEEEhalf)
2956 return convertHalfAPFloatToAPInt();
2958 if (semantics == (const llvm::fltSemantics*)&IEEEsingle)
2959 return convertFloatAPFloatToAPInt();
2961 if (semantics == (const llvm::fltSemantics*)&IEEEdouble)
2962 return convertDoubleAPFloatToAPInt();
2964 if (semantics == (const llvm::fltSemantics*)&IEEEquad)
2965 return convertQuadrupleAPFloatToAPInt();
2967 if (semantics == (const llvm::fltSemantics*)&PPCDoubleDouble)
2968 return convertPPCDoubleDoubleAPFloatToAPInt();
2970 assert(semantics == (const llvm::fltSemantics*)&x87DoubleExtended &&
2972 return convertF80LongDoubleAPFloatToAPInt();
2976 APFloat::convertToFloat() const
2978 assert(semantics == (const llvm::fltSemantics*)&IEEEsingle &&
2979 "Float semantics are not IEEEsingle");
2980 APInt api = bitcastToAPInt();
2981 return api.bitsToFloat();
2985 APFloat::convertToDouble() const
2987 assert(semantics == (const llvm::fltSemantics*)&IEEEdouble &&
2988 "Float semantics are not IEEEdouble");
2989 APInt api = bitcastToAPInt();
2990 return api.bitsToDouble();
2993 /// Integer bit is explicit in this format. Intel hardware (387 and later)
2994 /// does not support these bit patterns:
2995 /// exponent = all 1's, integer bit 0, significand 0 ("pseudoinfinity")
2996 /// exponent = all 1's, integer bit 0, significand nonzero ("pseudoNaN")
2997 /// exponent = 0, integer bit 1 ("pseudodenormal")
2998 /// exponent!=0 nor all 1's, integer bit 0 ("unnormal")
2999 /// At the moment, the first two are treated as NaNs, the second two as Normal.
3001 APFloat::initFromF80LongDoubleAPInt(const APInt &api)
3003 assert(api.getBitWidth()==80);
3004 uint64_t i1 = api.getRawData()[0];
3005 uint64_t i2 = api.getRawData()[1];
3006 uint64_t myexponent = (i2 & 0x7fff);
3007 uint64_t mysignificand = i1;
3009 initialize(&APFloat::x87DoubleExtended);
3010 assert(partCount()==2);
3012 sign = static_cast<unsigned int>(i2>>15);
3013 if (myexponent==0 && mysignificand==0) {
3014 // exponent, significand meaningless
3016 } else if (myexponent==0x7fff && mysignificand==0x8000000000000000ULL) {
3017 // exponent, significand meaningless
3018 category = fcInfinity;
3019 } else if (myexponent==0x7fff && mysignificand!=0x8000000000000000ULL) {
3020 // exponent meaningless
3022 significandParts()[0] = mysignificand;
3023 significandParts()[1] = 0;
3025 category = fcNormal;
3026 exponent = myexponent - 16383;
3027 significandParts()[0] = mysignificand;
3028 significandParts()[1] = 0;
3029 if (myexponent==0) // denormal
3035 APFloat::initFromPPCDoubleDoubleAPInt(const APInt &api)
3037 assert(api.getBitWidth()==128);
3038 uint64_t i1 = api.getRawData()[0];
3039 uint64_t i2 = api.getRawData()[1];
3040 uint64_t myexponent = (i1 >> 52) & 0x7ff;
3041 uint64_t mysignificand = i1 & 0xfffffffffffffLL;
3042 uint64_t myexponent2 = (i2 >> 52) & 0x7ff;
3043 uint64_t mysignificand2 = i2 & 0xfffffffffffffLL;
3045 initialize(&APFloat::PPCDoubleDouble);
3046 assert(partCount()==2);
3048 sign = static_cast<unsigned int>(i1>>63);
3049 sign2 = static_cast<unsigned int>(i2>>63);
3050 if (myexponent==0 && mysignificand==0) {
3051 // exponent, significand meaningless
3052 // exponent2 and significand2 are required to be 0; we don't check
3054 } else if (myexponent==0x7ff && mysignificand==0) {
3055 // exponent, significand meaningless
3056 // exponent2 and significand2 are required to be 0; we don't check
3057 category = fcInfinity;
3058 } else if (myexponent==0x7ff && mysignificand!=0) {
3059 // exponent meaningless. So is the whole second word, but keep it
3062 exponent2 = myexponent2;
3063 significandParts()[0] = mysignificand;
3064 significandParts()[1] = mysignificand2;
3066 category = fcNormal;
3067 // Note there is no category2; the second word is treated as if it is
3068 // fcNormal, although it might be something else considered by itself.
3069 exponent = myexponent - 1023;
3070 exponent2 = myexponent2 - 1023;
3071 significandParts()[0] = mysignificand;
3072 significandParts()[1] = mysignificand2;
3073 if (myexponent==0) // denormal
3076 significandParts()[0] |= 0x10000000000000LL; // integer bit
3080 significandParts()[1] |= 0x10000000000000LL; // integer bit
3085 APFloat::initFromQuadrupleAPInt(const APInt &api)
3087 assert(api.getBitWidth()==128);
3088 uint64_t i1 = api.getRawData()[0];
3089 uint64_t i2 = api.getRawData()[1];
3090 uint64_t myexponent = (i2 >> 48) & 0x7fff;
3091 uint64_t mysignificand = i1;
3092 uint64_t mysignificand2 = i2 & 0xffffffffffffLL;
3094 initialize(&APFloat::IEEEquad);
3095 assert(partCount()==2);
3097 sign = static_cast<unsigned int>(i2>>63);
3098 if (myexponent==0 &&
3099 (mysignificand==0 && mysignificand2==0)) {
3100 // exponent, significand meaningless
3102 } else if (myexponent==0x7fff &&
3103 (mysignificand==0 && mysignificand2==0)) {
3104 // exponent, significand meaningless
3105 category = fcInfinity;
3106 } else if (myexponent==0x7fff &&
3107 (mysignificand!=0 || mysignificand2 !=0)) {
3108 // exponent meaningless
3110 significandParts()[0] = mysignificand;
3111 significandParts()[1] = mysignificand2;
3113 category = fcNormal;
3114 exponent = myexponent - 16383;
3115 significandParts()[0] = mysignificand;
3116 significandParts()[1] = mysignificand2;
3117 if (myexponent==0) // denormal
3120 significandParts()[1] |= 0x1000000000000LL; // integer bit
3125 APFloat::initFromDoubleAPInt(const APInt &api)
3127 assert(api.getBitWidth()==64);
3128 uint64_t i = *api.getRawData();
3129 uint64_t myexponent = (i >> 52) & 0x7ff;
3130 uint64_t mysignificand = i & 0xfffffffffffffLL;
3132 initialize(&APFloat::IEEEdouble);
3133 assert(partCount()==1);
3135 sign = static_cast<unsigned int>(i>>63);
3136 if (myexponent==0 && mysignificand==0) {
3137 // exponent, significand meaningless
3139 } else if (myexponent==0x7ff && mysignificand==0) {
3140 // exponent, significand meaningless
3141 category = fcInfinity;
3142 } else if (myexponent==0x7ff && mysignificand!=0) {
3143 // exponent meaningless
3145 *significandParts() = mysignificand;
3147 category = fcNormal;
3148 exponent = myexponent - 1023;
3149 *significandParts() = mysignificand;
3150 if (myexponent==0) // denormal
3153 *significandParts() |= 0x10000000000000LL; // integer bit
3158 APFloat::initFromFloatAPInt(const APInt & api)
3160 assert(api.getBitWidth()==32);
3161 uint32_t i = (uint32_t)*api.getRawData();
3162 uint32_t myexponent = (i >> 23) & 0xff;
3163 uint32_t mysignificand = i & 0x7fffff;
3165 initialize(&APFloat::IEEEsingle);
3166 assert(partCount()==1);
3169 if (myexponent==0 && mysignificand==0) {
3170 // exponent, significand meaningless
3172 } else if (myexponent==0xff && mysignificand==0) {
3173 // exponent, significand meaningless
3174 category = fcInfinity;
3175 } else if (myexponent==0xff && mysignificand!=0) {
3176 // sign, exponent, significand meaningless
3178 *significandParts() = mysignificand;
3180 category = fcNormal;
3181 exponent = myexponent - 127; //bias
3182 *significandParts() = mysignificand;
3183 if (myexponent==0) // denormal
3186 *significandParts() |= 0x800000; // integer bit
3191 APFloat::initFromHalfAPInt(const APInt & api)
3193 assert(api.getBitWidth()==16);
3194 uint32_t i = (uint32_t)*api.getRawData();
3195 uint32_t myexponent = (i >> 10) & 0x1f;
3196 uint32_t mysignificand = i & 0x3ff;
3198 initialize(&APFloat::IEEEhalf);
3199 assert(partCount()==1);
3202 if (myexponent==0 && mysignificand==0) {
3203 // exponent, significand meaningless
3205 } else if (myexponent==0x1f && mysignificand==0) {
3206 // exponent, significand meaningless
3207 category = fcInfinity;
3208 } else if (myexponent==0x1f && mysignificand!=0) {
3209 // sign, exponent, significand meaningless
3211 *significandParts() = mysignificand;
3213 category = fcNormal;
3214 exponent = myexponent - 15; //bias
3215 *significandParts() = mysignificand;
3216 if (myexponent==0) // denormal
3219 *significandParts() |= 0x400; // integer bit
3223 /// Treat api as containing the bits of a floating point number. Currently
3224 /// we infer the floating point type from the size of the APInt. The
3225 /// isIEEE argument distinguishes between PPC128 and IEEE128 (not meaningful
3226 /// when the size is anything else).
3228 APFloat::initFromAPInt(const APInt& api, bool isIEEE)
3230 if (api.getBitWidth() == 16)
3231 return initFromHalfAPInt(api);
3232 else if (api.getBitWidth() == 32)
3233 return initFromFloatAPInt(api);
3234 else if (api.getBitWidth()==64)
3235 return initFromDoubleAPInt(api);
3236 else if (api.getBitWidth()==80)
3237 return initFromF80LongDoubleAPInt(api);
3238 else if (api.getBitWidth()==128)
3240 initFromQuadrupleAPInt(api) : initFromPPCDoubleDoubleAPInt(api));
3242 llvm_unreachable(0);
3246 APFloat::getAllOnesValue(unsigned BitWidth, bool isIEEE)
3248 return APFloat(APInt::getAllOnesValue(BitWidth), isIEEE);
3251 APFloat APFloat::getLargest(const fltSemantics &Sem, bool Negative) {
3252 APFloat Val(Sem, fcNormal, Negative);
3254 // We want (in interchange format):
3255 // sign = {Negative}
3257 // significand = 1..1
3259 Val.exponent = Sem.maxExponent; // unbiased
3261 // 1-initialize all bits....
3262 Val.zeroSignificand();
3263 integerPart *significand = Val.significandParts();
3264 unsigned N = partCountForBits(Sem.precision);
3265 for (unsigned i = 0; i != N; ++i)
3266 significand[i] = ~((integerPart) 0);
3268 // ...and then clear the top bits for internal consistency.
3269 if (Sem.precision % integerPartWidth != 0)
3271 (((integerPart) 1) << (Sem.precision % integerPartWidth)) - 1;
3276 APFloat APFloat::getSmallest(const fltSemantics &Sem, bool Negative) {
3277 APFloat Val(Sem, fcNormal, Negative);
3279 // We want (in interchange format):
3280 // sign = {Negative}
3282 // significand = 0..01
3284 Val.exponent = Sem.minExponent; // unbiased
3285 Val.zeroSignificand();
3286 Val.significandParts()[0] = 1;
3290 APFloat APFloat::getSmallestNormalized(const fltSemantics &Sem, bool Negative) {
3291 APFloat Val(Sem, fcNormal, Negative);
3293 // We want (in interchange format):
3294 // sign = {Negative}
3296 // significand = 10..0
3298 Val.exponent = Sem.minExponent;
3299 Val.zeroSignificand();
3300 Val.significandParts()[partCountForBits(Sem.precision)-1] |=
3301 (((integerPart) 1) << ((Sem.precision - 1) % integerPartWidth));
3306 APFloat::APFloat(const APInt& api, bool isIEEE) : exponent2(0), sign2(0) {
3307 initFromAPInt(api, isIEEE);
3310 APFloat::APFloat(float f) : exponent2(0), sign2(0) {
3311 initFromAPInt(APInt::floatToBits(f));
3314 APFloat::APFloat(double d) : exponent2(0), sign2(0) {
3315 initFromAPInt(APInt::doubleToBits(d));
3319 void append(SmallVectorImpl<char> &Buffer, StringRef Str) {
3320 Buffer.append(Str.begin(), Str.end());
3323 /// Removes data from the given significand until it is no more
3324 /// precise than is required for the desired precision.
3325 void AdjustToPrecision(APInt &significand,
3326 int &exp, unsigned FormatPrecision) {
3327 unsigned bits = significand.getActiveBits();
3329 // 196/59 is a very slight overestimate of lg_2(10).
3330 unsigned bitsRequired = (FormatPrecision * 196 + 58) / 59;
3332 if (bits <= bitsRequired) return;
3334 unsigned tensRemovable = (bits - bitsRequired) * 59 / 196;
3335 if (!tensRemovable) return;
3337 exp += tensRemovable;
3339 APInt divisor(significand.getBitWidth(), 1);
3340 APInt powten(significand.getBitWidth(), 10);
3342 if (tensRemovable & 1)
3344 tensRemovable >>= 1;
3345 if (!tensRemovable) break;
3349 significand = significand.udiv(divisor);
3351 // Truncate the significand down to its active bit count, but
3352 // don't try to drop below 32.
3353 unsigned newPrecision = std::max(32U, significand.getActiveBits());
3354 significand = significand.trunc(newPrecision);
3358 void AdjustToPrecision(SmallVectorImpl<char> &buffer,
3359 int &exp, unsigned FormatPrecision) {
3360 unsigned N = buffer.size();
3361 if (N <= FormatPrecision) return;
3363 // The most significant figures are the last ones in the buffer.
3364 unsigned FirstSignificant = N - FormatPrecision;
3367 // FIXME: this probably shouldn't use 'round half up'.
3369 // Rounding down is just a truncation, except we also want to drop
3370 // trailing zeros from the new result.
3371 if (buffer[FirstSignificant - 1] < '5') {
3372 while (FirstSignificant < N && buffer[FirstSignificant] == '0')
3375 exp += FirstSignificant;
3376 buffer.erase(&buffer[0], &buffer[FirstSignificant]);
3380 // Rounding up requires a decimal add-with-carry. If we continue
3381 // the carry, the newly-introduced zeros will just be truncated.
3382 for (unsigned I = FirstSignificant; I != N; ++I) {
3383 if (buffer[I] == '9') {
3391 // If we carried through, we have exactly one digit of precision.
3392 if (FirstSignificant == N) {
3393 exp += FirstSignificant;
3395 buffer.push_back('1');
3399 exp += FirstSignificant;
3400 buffer.erase(&buffer[0], &buffer[FirstSignificant]);
3404 void APFloat::toString(SmallVectorImpl<char> &Str,
3405 unsigned FormatPrecision,
3406 unsigned FormatMaxPadding) const {
3410 return append(Str, "-Inf");
3412 return append(Str, "+Inf");
3414 case fcNaN: return append(Str, "NaN");
3420 if (!FormatMaxPadding)
3421 append(Str, "0.0E+0");
3433 // Decompose the number into an APInt and an exponent.
3434 int exp = exponent - ((int) semantics->precision - 1);
3435 APInt significand(semantics->precision,
3436 makeArrayRef(significandParts(),
3437 partCountForBits(semantics->precision)));
3439 // Set FormatPrecision if zero. We want to do this before we
3440 // truncate trailing zeros, as those are part of the precision.
3441 if (!FormatPrecision) {
3442 // It's an interesting question whether to use the nominal
3443 // precision or the active precision here for denormals.
3445 // FormatPrecision = ceil(significandBits / lg_2(10))
3446 FormatPrecision = (semantics->precision * 59 + 195) / 196;
3449 // Ignore trailing binary zeros.
3450 int trailingZeros = significand.countTrailingZeros();
3451 exp += trailingZeros;
3452 significand = significand.lshr(trailingZeros);
3454 // Change the exponent from 2^e to 10^e.
3457 } else if (exp > 0) {
3459 significand = significand.zext(semantics->precision + exp);
3460 significand <<= exp;
3462 } else { /* exp < 0 */
3465 // We transform this using the identity:
3466 // (N)(2^-e) == (N)(5^e)(10^-e)
3467 // This means we have to multiply N (the significand) by 5^e.
3468 // To avoid overflow, we have to operate on numbers large
3469 // enough to store N * 5^e:
3470 // log2(N * 5^e) == log2(N) + e * log2(5)
3471 // <= semantics->precision + e * 137 / 59
3472 // (log_2(5) ~ 2.321928 < 2.322034 ~ 137/59)
3474 unsigned precision = semantics->precision + (137 * texp + 136) / 59;
3476 // Multiply significand by 5^e.
3477 // N * 5^0101 == N * 5^(1*1) * 5^(0*2) * 5^(1*4) * 5^(0*8)
3478 significand = significand.zext(precision);
3479 APInt five_to_the_i(precision, 5);
3481 if (texp & 1) significand *= five_to_the_i;
3485 five_to_the_i *= five_to_the_i;
3489 AdjustToPrecision(significand, exp, FormatPrecision);
3491 llvm::SmallVector<char, 256> buffer;
3494 unsigned precision = significand.getBitWidth();
3495 APInt ten(precision, 10);
3496 APInt digit(precision, 0);
3498 bool inTrail = true;
3499 while (significand != 0) {
3500 // digit <- significand % 10
3501 // significand <- significand / 10
3502 APInt::udivrem(significand, ten, significand, digit);
3504 unsigned d = digit.getZExtValue();
3506 // Drop trailing zeros.
3507 if (inTrail && !d) exp++;
3509 buffer.push_back((char) ('0' + d));
3514 assert(!buffer.empty() && "no characters in buffer!");
3516 // Drop down to FormatPrecision.
3517 // TODO: don't do more precise calculations above than are required.
3518 AdjustToPrecision(buffer, exp, FormatPrecision);
3520 unsigned NDigits = buffer.size();
3522 // Check whether we should use scientific notation.
3523 bool FormatScientific;
3524 if (!FormatMaxPadding)
3525 FormatScientific = true;
3530 // But we shouldn't make the number look more precise than it is.
3531 FormatScientific = ((unsigned) exp > FormatMaxPadding ||
3532 NDigits + (unsigned) exp > FormatPrecision);
3534 // Power of the most significant digit.
3535 int MSD = exp + (int) (NDigits - 1);
3538 FormatScientific = false;
3540 // 765e-5 == 0.00765
3542 FormatScientific = ((unsigned) -MSD) > FormatMaxPadding;
3547 // Scientific formatting is pretty straightforward.
3548 if (FormatScientific) {
3549 exp += (NDigits - 1);
3551 Str.push_back(buffer[NDigits-1]);
3556 for (unsigned I = 1; I != NDigits; ++I)
3557 Str.push_back(buffer[NDigits-1-I]);
3560 Str.push_back(exp >= 0 ? '+' : '-');
3561 if (exp < 0) exp = -exp;
3562 SmallVector<char, 6> expbuf;
3564 expbuf.push_back((char) ('0' + (exp % 10)));
3567 for (unsigned I = 0, E = expbuf.size(); I != E; ++I)
3568 Str.push_back(expbuf[E-1-I]);
3572 // Non-scientific, positive exponents.
3574 for (unsigned I = 0; I != NDigits; ++I)
3575 Str.push_back(buffer[NDigits-1-I]);
3576 for (unsigned I = 0; I != (unsigned) exp; ++I)
3581 // Non-scientific, negative exponents.
3583 // The number of digits to the left of the decimal point.
3584 int NWholeDigits = exp + (int) NDigits;
3587 if (NWholeDigits > 0) {
3588 for (; I != (unsigned) NWholeDigits; ++I)
3589 Str.push_back(buffer[NDigits-I-1]);
3592 unsigned NZeros = 1 + (unsigned) -NWholeDigits;
3596 for (unsigned Z = 1; Z != NZeros; ++Z)
3600 for (; I != NDigits; ++I)
3601 Str.push_back(buffer[NDigits-I-1]);
3604 bool APFloat::getExactInverse(APFloat *inv) const {
3605 // We can only guarantee the existence of an exact inverse for IEEE floats.
3606 if (semantics != &IEEEhalf && semantics != &IEEEsingle &&
3607 semantics != &IEEEdouble && semantics != &IEEEquad)
3610 // Special floats and denormals have no exact inverse.
3611 if (category != fcNormal)
3614 // Check that the number is a power of two by making sure that only the
3615 // integer bit is set in the significand.
3616 if (significandLSB() != semantics->precision - 1)
3620 APFloat reciprocal(*semantics, 1ULL);
3621 if (reciprocal.divide(*this, rmNearestTiesToEven) != opOK)
3624 // Avoid multiplication with a denormal, it is not safe on all platforms and
3625 // may be slower than a normal division.
3626 if (reciprocal.significandMSB() + 1 < reciprocal.semantics->precision)
3629 assert(reciprocal.category == fcNormal &&
3630 reciprocal.significandLSB() == reciprocal.semantics->precision - 1);