1 //===-- APFloat.cpp - Implement APFloat class -----------------------------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file implements a class to represent arbitrary precision floating
11 // point values and provide a variety of arithmetic operations on them.
13 //===----------------------------------------------------------------------===//
15 #include "llvm/ADT/APFloat.h"
16 #include "llvm/ADT/APSInt.h"
17 #include "llvm/ADT/FoldingSet.h"
18 #include "llvm/ADT/Hashing.h"
19 #include "llvm/ADT/StringRef.h"
20 #include "llvm/Support/ErrorHandling.h"
21 #include "llvm/Support/MathExtras.h"
27 #define convolve(lhs, rhs) ((lhs) * 4 + (rhs))
29 /* Assumed in hexadecimal significand parsing, and conversion to
30 hexadecimal strings. */
31 #define COMPILE_TIME_ASSERT(cond) extern int CTAssert[(cond) ? 1 : -1]
32 COMPILE_TIME_ASSERT(integerPartWidth % 4 == 0);
36 /* Represents floating point arithmetic semantics. */
38 /* The largest E such that 2^E is representable; this matches the
39 definition of IEEE 754. */
40 exponent_t maxExponent;
42 /* The smallest E such that 2^E is a normalized number; this
43 matches the definition of IEEE 754. */
44 exponent_t minExponent;
46 /* Number of bits in the significand. This includes the integer
48 unsigned int precision;
50 /* True if arithmetic is supported. */
51 unsigned int arithmeticOK;
54 const fltSemantics APFloat::IEEEhalf = { 15, -14, 11, true };
55 const fltSemantics APFloat::IEEEsingle = { 127, -126, 24, true };
56 const fltSemantics APFloat::IEEEdouble = { 1023, -1022, 53, true };
57 const fltSemantics APFloat::IEEEquad = { 16383, -16382, 113, true };
58 const fltSemantics APFloat::x87DoubleExtended = { 16383, -16382, 64, true };
59 const fltSemantics APFloat::Bogus = { 0, 0, 0, true };
61 // The PowerPC format consists of two doubles. It does not map cleanly
62 // onto the usual format above. For now only storage of constants of
63 // this type is supported, no arithmetic.
64 const fltSemantics APFloat::PPCDoubleDouble = { 1023, -1022, 106, false };
66 /* A tight upper bound on number of parts required to hold the value
69 power * 815 / (351 * integerPartWidth) + 1
71 However, whilst the result may require only this many parts,
72 because we are multiplying two values to get it, the
73 multiplication may require an extra part with the excess part
74 being zero (consider the trivial case of 1 * 1, tcFullMultiply
75 requires two parts to hold the single-part result). So we add an
76 extra one to guarantee enough space whilst multiplying. */
77 const unsigned int maxExponent = 16383;
78 const unsigned int maxPrecision = 113;
79 const unsigned int maxPowerOfFiveExponent = maxExponent + maxPrecision - 1;
80 const unsigned int maxPowerOfFiveParts = 2 + ((maxPowerOfFiveExponent * 815)
81 / (351 * integerPartWidth));
84 /* A bunch of private, handy routines. */
86 static inline unsigned int
87 partCountForBits(unsigned int bits)
89 return ((bits) + integerPartWidth - 1) / integerPartWidth;
92 /* Returns 0U-9U. Return values >= 10U are not digits. */
93 static inline unsigned int
94 decDigitValue(unsigned int c)
100 hexDigitValue(unsigned int c)
120 assertArithmeticOK(const llvm::fltSemantics &semantics) {
121 assert(semantics.arithmeticOK &&
122 "Compile-time arithmetic does not support these semantics");
125 /* Return the value of a decimal exponent of the form
128 If the exponent overflows, returns a large exponent with the
131 readExponent(StringRef::iterator begin, StringRef::iterator end)
134 unsigned int absExponent;
135 const unsigned int overlargeExponent = 24000; /* FIXME. */
136 StringRef::iterator p = begin;
138 assert(p != end && "Exponent has no digits");
140 isNegative = (*p == '-');
141 if (*p == '-' || *p == '+') {
143 assert(p != end && "Exponent has no digits");
146 absExponent = decDigitValue(*p++);
147 assert(absExponent < 10U && "Invalid character in exponent");
149 for (; p != end; ++p) {
152 value = decDigitValue(*p);
153 assert(value < 10U && "Invalid character in exponent");
155 value += absExponent * 10;
156 if (absExponent >= overlargeExponent) {
157 absExponent = overlargeExponent;
158 p = end; /* outwit assert below */
164 assert(p == end && "Invalid exponent in exponent");
167 return -(int) absExponent;
169 return (int) absExponent;
172 /* This is ugly and needs cleaning up, but I don't immediately see
173 how whilst remaining safe. */
175 totalExponent(StringRef::iterator p, StringRef::iterator end,
176 int exponentAdjustment)
178 int unsignedExponent;
179 bool negative, overflow;
182 assert(p != end && "Exponent has no digits");
184 negative = *p == '-';
185 if (*p == '-' || *p == '+') {
187 assert(p != end && "Exponent has no digits");
190 unsignedExponent = 0;
192 for (; p != end; ++p) {
195 value = decDigitValue(*p);
196 assert(value < 10U && "Invalid character in exponent");
198 unsignedExponent = unsignedExponent * 10 + value;
199 if (unsignedExponent > 32767)
203 if (exponentAdjustment > 32767 || exponentAdjustment < -32768)
207 exponent = unsignedExponent;
209 exponent = -exponent;
210 exponent += exponentAdjustment;
211 if (exponent > 32767 || exponent < -32768)
216 exponent = negative ? -32768: 32767;
221 static StringRef::iterator
222 skipLeadingZeroesAndAnyDot(StringRef::iterator begin, StringRef::iterator end,
223 StringRef::iterator *dot)
225 StringRef::iterator p = begin;
227 while (*p == '0' && p != end)
233 assert(end - begin != 1 && "Significand has no digits");
235 while (*p == '0' && p != end)
242 /* Given a normal decimal floating point number of the form
246 where the decimal point and exponent are optional, fill out the
247 structure D. Exponent is appropriate if the significand is
248 treated as an integer, and normalizedExponent if the significand
249 is taken to have the decimal point after a single leading
252 If the value is zero, V->firstSigDigit points to a non-digit, and
253 the return exponent is zero.
256 const char *firstSigDigit;
257 const char *lastSigDigit;
259 int normalizedExponent;
263 interpretDecimal(StringRef::iterator begin, StringRef::iterator end,
266 StringRef::iterator dot = end;
267 StringRef::iterator p = skipLeadingZeroesAndAnyDot (begin, end, &dot);
269 D->firstSigDigit = p;
271 D->normalizedExponent = 0;
273 for (; p != end; ++p) {
275 assert(dot == end && "String contains multiple dots");
280 if (decDigitValue(*p) >= 10U)
285 assert((*p == 'e' || *p == 'E') && "Invalid character in significand");
286 assert(p != begin && "Significand has no digits");
287 assert((dot == end || p - begin != 1) && "Significand has no digits");
289 /* p points to the first non-digit in the string */
290 D->exponent = readExponent(p + 1, end);
292 /* Implied decimal point? */
297 /* If number is all zeroes accept any exponent. */
298 if (p != D->firstSigDigit) {
299 /* Drop insignificant trailing zeroes. */
304 while (p != begin && *p == '0');
305 while (p != begin && *p == '.');
308 /* Adjust the exponents for any decimal point. */
309 D->exponent += static_cast<exponent_t>((dot - p) - (dot > p));
310 D->normalizedExponent = (D->exponent +
311 static_cast<exponent_t>((p - D->firstSigDigit)
312 - (dot > D->firstSigDigit && dot < p)));
318 /* Return the trailing fraction of a hexadecimal number.
319 DIGITVALUE is the first hex digit of the fraction, P points to
322 trailingHexadecimalFraction(StringRef::iterator p, StringRef::iterator end,
323 unsigned int digitValue)
325 unsigned int hexDigit;
327 /* If the first trailing digit isn't 0 or 8 we can work out the
328 fraction immediately. */
330 return lfMoreThanHalf;
331 else if (digitValue < 8 && digitValue > 0)
332 return lfLessThanHalf;
334 /* Otherwise we need to find the first non-zero digit. */
338 assert(p != end && "Invalid trailing hexadecimal fraction!");
340 hexDigit = hexDigitValue(*p);
342 /* If we ran off the end it is exactly zero or one-half, otherwise
345 return digitValue == 0 ? lfExactlyZero: lfExactlyHalf;
347 return digitValue == 0 ? lfLessThanHalf: lfMoreThanHalf;
350 /* Return the fraction lost were a bignum truncated losing the least
351 significant BITS bits. */
353 lostFractionThroughTruncation(const integerPart *parts,
354 unsigned int partCount,
359 lsb = APInt::tcLSB(parts, partCount);
361 /* Note this is guaranteed true if bits == 0, or LSB == -1U. */
363 return lfExactlyZero;
365 return lfExactlyHalf;
366 if (bits <= partCount * integerPartWidth &&
367 APInt::tcExtractBit(parts, bits - 1))
368 return lfMoreThanHalf;
370 return lfLessThanHalf;
373 /* Shift DST right BITS bits noting lost fraction. */
375 shiftRight(integerPart *dst, unsigned int parts, unsigned int bits)
377 lostFraction lost_fraction;
379 lost_fraction = lostFractionThroughTruncation(dst, parts, bits);
381 APInt::tcShiftRight(dst, parts, bits);
383 return lost_fraction;
386 /* Combine the effect of two lost fractions. */
388 combineLostFractions(lostFraction moreSignificant,
389 lostFraction lessSignificant)
391 if (lessSignificant != lfExactlyZero) {
392 if (moreSignificant == lfExactlyZero)
393 moreSignificant = lfLessThanHalf;
394 else if (moreSignificant == lfExactlyHalf)
395 moreSignificant = lfMoreThanHalf;
398 return moreSignificant;
401 /* The error from the true value, in half-ulps, on multiplying two
402 floating point numbers, which differ from the value they
403 approximate by at most HUE1 and HUE2 half-ulps, is strictly less
404 than the returned value.
406 See "How to Read Floating Point Numbers Accurately" by William D
409 HUerrBound(bool inexactMultiply, unsigned int HUerr1, unsigned int HUerr2)
411 assert(HUerr1 < 2 || HUerr2 < 2 || (HUerr1 + HUerr2 < 8));
413 if (HUerr1 + HUerr2 == 0)
414 return inexactMultiply * 2; /* <= inexactMultiply half-ulps. */
416 return inexactMultiply + 2 * (HUerr1 + HUerr2);
419 /* The number of ulps from the boundary (zero, or half if ISNEAREST)
420 when the least significant BITS are truncated. BITS cannot be
423 ulpsFromBoundary(const integerPart *parts, unsigned int bits, bool isNearest)
425 unsigned int count, partBits;
426 integerPart part, boundary;
431 count = bits / integerPartWidth;
432 partBits = bits % integerPartWidth + 1;
434 part = parts[count] & (~(integerPart) 0 >> (integerPartWidth - partBits));
437 boundary = (integerPart) 1 << (partBits - 1);
442 if (part - boundary <= boundary - part)
443 return part - boundary;
445 return boundary - part;
448 if (part == boundary) {
451 return ~(integerPart) 0; /* A lot. */
454 } else if (part == boundary - 1) {
457 return ~(integerPart) 0; /* A lot. */
462 return ~(integerPart) 0; /* A lot. */
465 /* Place pow(5, power) in DST, and return the number of parts used.
466 DST must be at least one part larger than size of the answer. */
468 powerOf5(integerPart *dst, unsigned int power)
470 static const integerPart firstEightPowers[] = { 1, 5, 25, 125, 625, 3125,
472 integerPart pow5s[maxPowerOfFiveParts * 2 + 5];
473 pow5s[0] = 78125 * 5;
475 unsigned int partsCount[16] = { 1 };
476 integerPart scratch[maxPowerOfFiveParts], *p1, *p2, *pow5;
478 assert(power <= maxExponent);
483 *p1 = firstEightPowers[power & 7];
489 for (unsigned int n = 0; power; power >>= 1, n++) {
494 /* Calculate pow(5,pow(2,n+3)) if we haven't yet. */
496 pc = partsCount[n - 1];
497 APInt::tcFullMultiply(pow5, pow5 - pc, pow5 - pc, pc, pc);
499 if (pow5[pc - 1] == 0)
507 APInt::tcFullMultiply(p2, p1, pow5, result, pc);
509 if (p2[result - 1] == 0)
512 /* Now result is in p1 with partsCount parts and p2 is scratch
514 tmp = p1, p1 = p2, p2 = tmp;
521 APInt::tcAssign(dst, p1, result);
526 /* Zero at the end to avoid modular arithmetic when adding one; used
527 when rounding up during hexadecimal output. */
528 static const char hexDigitsLower[] = "0123456789abcdef0";
529 static const char hexDigitsUpper[] = "0123456789ABCDEF0";
530 static const char infinityL[] = "infinity";
531 static const char infinityU[] = "INFINITY";
532 static const char NaNL[] = "nan";
533 static const char NaNU[] = "NAN";
535 /* Write out an integerPart in hexadecimal, starting with the most
536 significant nibble. Write out exactly COUNT hexdigits, return
539 partAsHex (char *dst, integerPart part, unsigned int count,
540 const char *hexDigitChars)
542 unsigned int result = count;
544 assert(count != 0 && count <= integerPartWidth / 4);
546 part >>= (integerPartWidth - 4 * count);
548 dst[count] = hexDigitChars[part & 0xf];
555 /* Write out an unsigned decimal integer. */
557 writeUnsignedDecimal (char *dst, unsigned int n)
573 /* Write out a signed decimal integer. */
575 writeSignedDecimal (char *dst, int value)
579 dst = writeUnsignedDecimal(dst, -(unsigned) value);
581 dst = writeUnsignedDecimal(dst, value);
588 APFloat::initialize(const fltSemantics *ourSemantics)
592 semantics = ourSemantics;
595 significand.parts = new integerPart[count];
599 APFloat::freeSignificand()
602 delete [] significand.parts;
606 APFloat::assign(const APFloat &rhs)
608 assert(semantics == rhs.semantics);
611 category = rhs.category;
612 exponent = rhs.exponent;
614 exponent2 = rhs.exponent2;
615 if (category == fcNormal || category == fcNaN)
616 copySignificand(rhs);
620 APFloat::copySignificand(const APFloat &rhs)
622 assert(category == fcNormal || category == fcNaN);
623 assert(rhs.partCount() >= partCount());
625 APInt::tcAssign(significandParts(), rhs.significandParts(),
629 /* Make this number a NaN, with an arbitrary but deterministic value
630 for the significand. If double or longer, this is a signalling NaN,
631 which may not be ideal. If float, this is QNaN(0). */
632 void APFloat::makeNaN(bool SNaN, bool Negative, const APInt *fill)
637 integerPart *significand = significandParts();
638 unsigned numParts = partCount();
640 // Set the significand bits to the fill.
641 if (!fill || fill->getNumWords() < numParts)
642 APInt::tcSet(significand, 0, numParts);
644 APInt::tcAssign(significand, fill->getRawData(),
645 std::min(fill->getNumWords(), numParts));
647 // Zero out the excess bits of the significand.
648 unsigned bitsToPreserve = semantics->precision - 1;
649 unsigned part = bitsToPreserve / 64;
650 bitsToPreserve %= 64;
651 significand[part] &= ((1ULL << bitsToPreserve) - 1);
652 for (part++; part != numParts; ++part)
653 significand[part] = 0;
656 unsigned QNaNBit = semantics->precision - 2;
659 // We always have to clear the QNaN bit to make it an SNaN.
660 APInt::tcClearBit(significand, QNaNBit);
662 // If there are no bits set in the payload, we have to set
663 // *something* to make it a NaN instead of an infinity;
664 // conventionally, this is the next bit down from the QNaN bit.
665 if (APInt::tcIsZero(significand, numParts))
666 APInt::tcSetBit(significand, QNaNBit - 1);
668 // We always have to set the QNaN bit to make it a QNaN.
669 APInt::tcSetBit(significand, QNaNBit);
672 // For x87 extended precision, we want to make a NaN, not a
673 // pseudo-NaN. Maybe we should expose the ability to make
675 if (semantics == &APFloat::x87DoubleExtended)
676 APInt::tcSetBit(significand, QNaNBit + 1);
679 APFloat APFloat::makeNaN(const fltSemantics &Sem, bool SNaN, bool Negative,
681 APFloat value(Sem, uninitialized);
682 value.makeNaN(SNaN, Negative, fill);
687 APFloat::operator=(const APFloat &rhs)
690 if (semantics != rhs.semantics) {
692 initialize(rhs.semantics);
701 APFloat::bitwiseIsEqual(const APFloat &rhs) const {
704 if (semantics != rhs.semantics ||
705 category != rhs.category ||
708 if (semantics==(const llvm::fltSemantics*)&PPCDoubleDouble &&
711 if (category==fcZero || category==fcInfinity)
713 else if (category==fcNormal && exponent!=rhs.exponent)
715 else if (semantics==(const llvm::fltSemantics*)&PPCDoubleDouble &&
716 exponent2!=rhs.exponent2)
720 const integerPart* p=significandParts();
721 const integerPart* q=rhs.significandParts();
722 for (; i>0; i--, p++, q++) {
730 APFloat::APFloat(const fltSemantics &ourSemantics, integerPart value)
731 : exponent2(0), sign2(0) {
732 assertArithmeticOK(ourSemantics);
733 initialize(&ourSemantics);
736 exponent = ourSemantics.precision - 1;
737 significandParts()[0] = value;
738 normalize(rmNearestTiesToEven, lfExactlyZero);
741 APFloat::APFloat(const fltSemantics &ourSemantics) : exponent2(0), sign2(0) {
742 assertArithmeticOK(ourSemantics);
743 initialize(&ourSemantics);
748 APFloat::APFloat(const fltSemantics &ourSemantics, uninitializedTag tag)
749 : exponent2(0), sign2(0) {
750 assertArithmeticOK(ourSemantics);
751 // Allocates storage if necessary but does not initialize it.
752 initialize(&ourSemantics);
755 APFloat::APFloat(const fltSemantics &ourSemantics,
756 fltCategory ourCategory, bool negative)
757 : exponent2(0), sign2(0) {
758 assertArithmeticOK(ourSemantics);
759 initialize(&ourSemantics);
760 category = ourCategory;
762 if (category == fcNormal)
764 else if (ourCategory == fcNaN)
768 APFloat::APFloat(const fltSemantics &ourSemantics, StringRef text)
769 : exponent2(0), sign2(0) {
770 assertArithmeticOK(ourSemantics);
771 initialize(&ourSemantics);
772 convertFromString(text, rmNearestTiesToEven);
775 APFloat::APFloat(const APFloat &rhs) : exponent2(0), sign2(0) {
776 initialize(rhs.semantics);
785 // Profile - This method 'profiles' an APFloat for use with FoldingSet.
786 void APFloat::Profile(FoldingSetNodeID& ID) const {
787 ID.Add(bitcastToAPInt());
791 APFloat::partCount() const
793 return partCountForBits(semantics->precision + 1);
797 APFloat::semanticsPrecision(const fltSemantics &semantics)
799 return semantics.precision;
803 APFloat::significandParts() const
805 return const_cast<APFloat *>(this)->significandParts();
809 APFloat::significandParts()
811 assert(category == fcNormal || category == fcNaN);
814 return significand.parts;
816 return &significand.part;
820 APFloat::zeroSignificand()
823 APInt::tcSet(significandParts(), 0, partCount());
826 /* Increment an fcNormal floating point number's significand. */
828 APFloat::incrementSignificand()
832 carry = APInt::tcIncrement(significandParts(), partCount());
834 /* Our callers should never cause us to overflow. */
839 /* Add the significand of the RHS. Returns the carry flag. */
841 APFloat::addSignificand(const APFloat &rhs)
845 parts = significandParts();
847 assert(semantics == rhs.semantics);
848 assert(exponent == rhs.exponent);
850 return APInt::tcAdd(parts, rhs.significandParts(), 0, partCount());
853 /* Subtract the significand of the RHS with a borrow flag. Returns
856 APFloat::subtractSignificand(const APFloat &rhs, integerPart borrow)
860 parts = significandParts();
862 assert(semantics == rhs.semantics);
863 assert(exponent == rhs.exponent);
865 return APInt::tcSubtract(parts, rhs.significandParts(), borrow,
869 /* Multiply the significand of the RHS. If ADDEND is non-NULL, add it
870 on to the full-precision result of the multiplication. Returns the
873 APFloat::multiplySignificand(const APFloat &rhs, const APFloat *addend)
875 unsigned int omsb; // One, not zero, based MSB.
876 unsigned int partsCount, newPartsCount, precision;
877 integerPart *lhsSignificand;
878 integerPart scratch[4];
879 integerPart *fullSignificand;
880 lostFraction lost_fraction;
883 assert(semantics == rhs.semantics);
885 precision = semantics->precision;
886 newPartsCount = partCountForBits(precision * 2);
888 if (newPartsCount > 4)
889 fullSignificand = new integerPart[newPartsCount];
891 fullSignificand = scratch;
893 lhsSignificand = significandParts();
894 partsCount = partCount();
896 APInt::tcFullMultiply(fullSignificand, lhsSignificand,
897 rhs.significandParts(), partsCount, partsCount);
899 lost_fraction = lfExactlyZero;
900 omsb = APInt::tcMSB(fullSignificand, newPartsCount) + 1;
901 exponent += rhs.exponent;
904 Significand savedSignificand = significand;
905 const fltSemantics *savedSemantics = semantics;
906 fltSemantics extendedSemantics;
908 unsigned int extendedPrecision;
910 /* Normalize our MSB. */
911 extendedPrecision = precision + precision - 1;
912 if (omsb != extendedPrecision) {
913 APInt::tcShiftLeft(fullSignificand, newPartsCount,
914 extendedPrecision - omsb);
915 exponent -= extendedPrecision - omsb;
918 /* Create new semantics. */
919 extendedSemantics = *semantics;
920 extendedSemantics.precision = extendedPrecision;
922 if (newPartsCount == 1)
923 significand.part = fullSignificand[0];
925 significand.parts = fullSignificand;
926 semantics = &extendedSemantics;
928 APFloat extendedAddend(*addend);
929 status = extendedAddend.convert(extendedSemantics, rmTowardZero, &ignored);
930 assert(status == opOK);
932 lost_fraction = addOrSubtractSignificand(extendedAddend, false);
934 /* Restore our state. */
935 if (newPartsCount == 1)
936 fullSignificand[0] = significand.part;
937 significand = savedSignificand;
938 semantics = savedSemantics;
940 omsb = APInt::tcMSB(fullSignificand, newPartsCount) + 1;
943 exponent -= (precision - 1);
945 if (omsb > precision) {
946 unsigned int bits, significantParts;
949 bits = omsb - precision;
950 significantParts = partCountForBits(omsb);
951 lf = shiftRight(fullSignificand, significantParts, bits);
952 lost_fraction = combineLostFractions(lf, lost_fraction);
956 APInt::tcAssign(lhsSignificand, fullSignificand, partsCount);
958 if (newPartsCount > 4)
959 delete [] fullSignificand;
961 return lost_fraction;
964 /* Multiply the significands of LHS and RHS to DST. */
966 APFloat::divideSignificand(const APFloat &rhs)
968 unsigned int bit, i, partsCount;
969 const integerPart *rhsSignificand;
970 integerPart *lhsSignificand, *dividend, *divisor;
971 integerPart scratch[4];
972 lostFraction lost_fraction;
974 assert(semantics == rhs.semantics);
976 lhsSignificand = significandParts();
977 rhsSignificand = rhs.significandParts();
978 partsCount = partCount();
981 dividend = new integerPart[partsCount * 2];
985 divisor = dividend + partsCount;
987 /* Copy the dividend and divisor as they will be modified in-place. */
988 for (i = 0; i < partsCount; i++) {
989 dividend[i] = lhsSignificand[i];
990 divisor[i] = rhsSignificand[i];
991 lhsSignificand[i] = 0;
994 exponent -= rhs.exponent;
996 unsigned int precision = semantics->precision;
998 /* Normalize the divisor. */
999 bit = precision - APInt::tcMSB(divisor, partsCount) - 1;
1002 APInt::tcShiftLeft(divisor, partsCount, bit);
1005 /* Normalize the dividend. */
1006 bit = precision - APInt::tcMSB(dividend, partsCount) - 1;
1009 APInt::tcShiftLeft(dividend, partsCount, bit);
1012 /* Ensure the dividend >= divisor initially for the loop below.
1013 Incidentally, this means that the division loop below is
1014 guaranteed to set the integer bit to one. */
1015 if (APInt::tcCompare(dividend, divisor, partsCount) < 0) {
1017 APInt::tcShiftLeft(dividend, partsCount, 1);
1018 assert(APInt::tcCompare(dividend, divisor, partsCount) >= 0);
1021 /* Long division. */
1022 for (bit = precision; bit; bit -= 1) {
1023 if (APInt::tcCompare(dividend, divisor, partsCount) >= 0) {
1024 APInt::tcSubtract(dividend, divisor, 0, partsCount);
1025 APInt::tcSetBit(lhsSignificand, bit - 1);
1028 APInt::tcShiftLeft(dividend, partsCount, 1);
1031 /* Figure out the lost fraction. */
1032 int cmp = APInt::tcCompare(dividend, divisor, partsCount);
1035 lost_fraction = lfMoreThanHalf;
1037 lost_fraction = lfExactlyHalf;
1038 else if (APInt::tcIsZero(dividend, partsCount))
1039 lost_fraction = lfExactlyZero;
1041 lost_fraction = lfLessThanHalf;
1046 return lost_fraction;
1050 APFloat::significandMSB() const
1052 return APInt::tcMSB(significandParts(), partCount());
1056 APFloat::significandLSB() const
1058 return APInt::tcLSB(significandParts(), partCount());
1061 /* Note that a zero result is NOT normalized to fcZero. */
1063 APFloat::shiftSignificandRight(unsigned int bits)
1065 /* Our exponent should not overflow. */
1066 assert((exponent_t) (exponent + bits) >= exponent);
1070 return shiftRight(significandParts(), partCount(), bits);
1073 /* Shift the significand left BITS bits, subtract BITS from its exponent. */
1075 APFloat::shiftSignificandLeft(unsigned int bits)
1077 assert(bits < semantics->precision);
1080 unsigned int partsCount = partCount();
1082 APInt::tcShiftLeft(significandParts(), partsCount, bits);
1085 assert(!APInt::tcIsZero(significandParts(), partsCount));
1090 APFloat::compareAbsoluteValue(const APFloat &rhs) const
1094 assert(semantics == rhs.semantics);
1095 assert(category == fcNormal);
1096 assert(rhs.category == fcNormal);
1098 compare = exponent - rhs.exponent;
1100 /* If exponents are equal, do an unsigned bignum comparison of the
1103 compare = APInt::tcCompare(significandParts(), rhs.significandParts(),
1107 return cmpGreaterThan;
1108 else if (compare < 0)
1114 /* Handle overflow. Sign is preserved. We either become infinity or
1115 the largest finite number. */
1117 APFloat::handleOverflow(roundingMode rounding_mode)
1120 if (rounding_mode == rmNearestTiesToEven ||
1121 rounding_mode == rmNearestTiesToAway ||
1122 (rounding_mode == rmTowardPositive && !sign) ||
1123 (rounding_mode == rmTowardNegative && sign)) {
1124 category = fcInfinity;
1125 return (opStatus) (opOverflow | opInexact);
1128 /* Otherwise we become the largest finite number. */
1129 category = fcNormal;
1130 exponent = semantics->maxExponent;
1131 APInt::tcSetLeastSignificantBits(significandParts(), partCount(),
1132 semantics->precision);
1137 /* Returns TRUE if, when truncating the current number, with BIT the
1138 new LSB, with the given lost fraction and rounding mode, the result
1139 would need to be rounded away from zero (i.e., by increasing the
1140 signficand). This routine must work for fcZero of both signs, and
1141 fcNormal numbers. */
1143 APFloat::roundAwayFromZero(roundingMode rounding_mode,
1144 lostFraction lost_fraction,
1145 unsigned int bit) const
1147 /* NaNs and infinities should not have lost fractions. */
1148 assert(category == fcNormal || category == fcZero);
1150 /* Current callers never pass this so we don't handle it. */
1151 assert(lost_fraction != lfExactlyZero);
1153 switch (rounding_mode) {
1154 case rmNearestTiesToAway:
1155 return lost_fraction == lfExactlyHalf || lost_fraction == lfMoreThanHalf;
1157 case rmNearestTiesToEven:
1158 if (lost_fraction == lfMoreThanHalf)
1161 /* Our zeroes don't have a significand to test. */
1162 if (lost_fraction == lfExactlyHalf && category != fcZero)
1163 return APInt::tcExtractBit(significandParts(), bit);
1170 case rmTowardPositive:
1171 return sign == false;
1173 case rmTowardNegative:
1174 return sign == true;
1176 llvm_unreachable("Invalid rounding mode found");
1180 APFloat::normalize(roundingMode rounding_mode,
1181 lostFraction lost_fraction)
1183 unsigned int omsb; /* One, not zero, based MSB. */
1186 if (category != fcNormal)
1189 /* Before rounding normalize the exponent of fcNormal numbers. */
1190 omsb = significandMSB() + 1;
1193 /* OMSB is numbered from 1. We want to place it in the integer
1194 bit numbered PRECISION if possible, with a compensating change in
1196 exponentChange = omsb - semantics->precision;
1198 /* If the resulting exponent is too high, overflow according to
1199 the rounding mode. */
1200 if (exponent + exponentChange > semantics->maxExponent)
1201 return handleOverflow(rounding_mode);
1203 /* Subnormal numbers have exponent minExponent, and their MSB
1204 is forced based on that. */
1205 if (exponent + exponentChange < semantics->minExponent)
1206 exponentChange = semantics->minExponent - exponent;
1208 /* Shifting left is easy as we don't lose precision. */
1209 if (exponentChange < 0) {
1210 assert(lost_fraction == lfExactlyZero);
1212 shiftSignificandLeft(-exponentChange);
1217 if (exponentChange > 0) {
1220 /* Shift right and capture any new lost fraction. */
1221 lf = shiftSignificandRight(exponentChange);
1223 lost_fraction = combineLostFractions(lf, lost_fraction);
1225 /* Keep OMSB up-to-date. */
1226 if (omsb > (unsigned) exponentChange)
1227 omsb -= exponentChange;
1233 /* Now round the number according to rounding_mode given the lost
1236 /* As specified in IEEE 754, since we do not trap we do not report
1237 underflow for exact results. */
1238 if (lost_fraction == lfExactlyZero) {
1239 /* Canonicalize zeroes. */
1246 /* Increment the significand if we're rounding away from zero. */
1247 if (roundAwayFromZero(rounding_mode, lost_fraction, 0)) {
1249 exponent = semantics->minExponent;
1251 incrementSignificand();
1252 omsb = significandMSB() + 1;
1254 /* Did the significand increment overflow? */
1255 if (omsb == (unsigned) semantics->precision + 1) {
1256 /* Renormalize by incrementing the exponent and shifting our
1257 significand right one. However if we already have the
1258 maximum exponent we overflow to infinity. */
1259 if (exponent == semantics->maxExponent) {
1260 category = fcInfinity;
1262 return (opStatus) (opOverflow | opInexact);
1265 shiftSignificandRight(1);
1271 /* The normal case - we were and are not denormal, and any
1272 significand increment above didn't overflow. */
1273 if (omsb == semantics->precision)
1276 /* We have a non-zero denormal. */
1277 assert(omsb < semantics->precision);
1279 /* Canonicalize zeroes. */
1283 /* The fcZero case is a denormal that underflowed to zero. */
1284 return (opStatus) (opUnderflow | opInexact);
1288 APFloat::addOrSubtractSpecials(const APFloat &rhs, bool subtract)
1290 switch (convolve(category, rhs.category)) {
1292 llvm_unreachable(0);
1294 case convolve(fcNaN, fcZero):
1295 case convolve(fcNaN, fcNormal):
1296 case convolve(fcNaN, fcInfinity):
1297 case convolve(fcNaN, fcNaN):
1298 case convolve(fcNormal, fcZero):
1299 case convolve(fcInfinity, fcNormal):
1300 case convolve(fcInfinity, fcZero):
1303 case convolve(fcZero, fcNaN):
1304 case convolve(fcNormal, fcNaN):
1305 case convolve(fcInfinity, fcNaN):
1307 copySignificand(rhs);
1310 case convolve(fcNormal, fcInfinity):
1311 case convolve(fcZero, fcInfinity):
1312 category = fcInfinity;
1313 sign = rhs.sign ^ subtract;
1316 case convolve(fcZero, fcNormal):
1318 sign = rhs.sign ^ subtract;
1321 case convolve(fcZero, fcZero):
1322 /* Sign depends on rounding mode; handled by caller. */
1325 case convolve(fcInfinity, fcInfinity):
1326 /* Differently signed infinities can only be validly
1328 if (((sign ^ rhs.sign)!=0) != subtract) {
1335 case convolve(fcNormal, fcNormal):
1340 /* Add or subtract two normal numbers. */
1342 APFloat::addOrSubtractSignificand(const APFloat &rhs, bool subtract)
1345 lostFraction lost_fraction;
1348 /* Determine if the operation on the absolute values is effectively
1349 an addition or subtraction. */
1350 subtract ^= (sign ^ rhs.sign) ? true : false;
1352 /* Are we bigger exponent-wise than the RHS? */
1353 bits = exponent - rhs.exponent;
1355 /* Subtraction is more subtle than one might naively expect. */
1357 APFloat temp_rhs(rhs);
1361 reverse = compareAbsoluteValue(temp_rhs) == cmpLessThan;
1362 lost_fraction = lfExactlyZero;
1363 } else if (bits > 0) {
1364 lost_fraction = temp_rhs.shiftSignificandRight(bits - 1);
1365 shiftSignificandLeft(1);
1368 lost_fraction = shiftSignificandRight(-bits - 1);
1369 temp_rhs.shiftSignificandLeft(1);
1374 carry = temp_rhs.subtractSignificand
1375 (*this, lost_fraction != lfExactlyZero);
1376 copySignificand(temp_rhs);
1379 carry = subtractSignificand
1380 (temp_rhs, lost_fraction != lfExactlyZero);
1383 /* Invert the lost fraction - it was on the RHS and
1385 if (lost_fraction == lfLessThanHalf)
1386 lost_fraction = lfMoreThanHalf;
1387 else if (lost_fraction == lfMoreThanHalf)
1388 lost_fraction = lfLessThanHalf;
1390 /* The code above is intended to ensure that no borrow is
1396 APFloat temp_rhs(rhs);
1398 lost_fraction = temp_rhs.shiftSignificandRight(bits);
1399 carry = addSignificand(temp_rhs);
1401 lost_fraction = shiftSignificandRight(-bits);
1402 carry = addSignificand(rhs);
1405 /* We have a guard bit; generating a carry cannot happen. */
1410 return lost_fraction;
1414 APFloat::multiplySpecials(const APFloat &rhs)
1416 switch (convolve(category, rhs.category)) {
1418 llvm_unreachable(0);
1420 case convolve(fcNaN, fcZero):
1421 case convolve(fcNaN, fcNormal):
1422 case convolve(fcNaN, fcInfinity):
1423 case convolve(fcNaN, fcNaN):
1426 case convolve(fcZero, fcNaN):
1427 case convolve(fcNormal, fcNaN):
1428 case convolve(fcInfinity, fcNaN):
1430 copySignificand(rhs);
1433 case convolve(fcNormal, fcInfinity):
1434 case convolve(fcInfinity, fcNormal):
1435 case convolve(fcInfinity, fcInfinity):
1436 category = fcInfinity;
1439 case convolve(fcZero, fcNormal):
1440 case convolve(fcNormal, fcZero):
1441 case convolve(fcZero, fcZero):
1445 case convolve(fcZero, fcInfinity):
1446 case convolve(fcInfinity, fcZero):
1450 case convolve(fcNormal, fcNormal):
1456 APFloat::divideSpecials(const APFloat &rhs)
1458 switch (convolve(category, rhs.category)) {
1460 llvm_unreachable(0);
1462 case convolve(fcNaN, fcZero):
1463 case convolve(fcNaN, fcNormal):
1464 case convolve(fcNaN, fcInfinity):
1465 case convolve(fcNaN, fcNaN):
1466 case convolve(fcInfinity, fcZero):
1467 case convolve(fcInfinity, fcNormal):
1468 case convolve(fcZero, fcInfinity):
1469 case convolve(fcZero, fcNormal):
1472 case convolve(fcZero, fcNaN):
1473 case convolve(fcNormal, fcNaN):
1474 case convolve(fcInfinity, fcNaN):
1476 copySignificand(rhs);
1479 case convolve(fcNormal, fcInfinity):
1483 case convolve(fcNormal, fcZero):
1484 category = fcInfinity;
1487 case convolve(fcInfinity, fcInfinity):
1488 case convolve(fcZero, fcZero):
1492 case convolve(fcNormal, fcNormal):
1498 APFloat::modSpecials(const APFloat &rhs)
1500 switch (convolve(category, rhs.category)) {
1502 llvm_unreachable(0);
1504 case convolve(fcNaN, fcZero):
1505 case convolve(fcNaN, fcNormal):
1506 case convolve(fcNaN, fcInfinity):
1507 case convolve(fcNaN, fcNaN):
1508 case convolve(fcZero, fcInfinity):
1509 case convolve(fcZero, fcNormal):
1510 case convolve(fcNormal, fcInfinity):
1513 case convolve(fcZero, fcNaN):
1514 case convolve(fcNormal, fcNaN):
1515 case convolve(fcInfinity, fcNaN):
1517 copySignificand(rhs);
1520 case convolve(fcNormal, fcZero):
1521 case convolve(fcInfinity, fcZero):
1522 case convolve(fcInfinity, fcNormal):
1523 case convolve(fcInfinity, fcInfinity):
1524 case convolve(fcZero, fcZero):
1528 case convolve(fcNormal, fcNormal):
1535 APFloat::changeSign()
1537 /* Look mummy, this one's easy. */
1542 APFloat::clearSign()
1544 /* So is this one. */
1549 APFloat::copySign(const APFloat &rhs)
1555 /* Normalized addition or subtraction. */
1557 APFloat::addOrSubtract(const APFloat &rhs, roundingMode rounding_mode,
1562 assertArithmeticOK(*semantics);
1564 fs = addOrSubtractSpecials(rhs, subtract);
1566 /* This return code means it was not a simple case. */
1567 if (fs == opDivByZero) {
1568 lostFraction lost_fraction;
1570 lost_fraction = addOrSubtractSignificand(rhs, subtract);
1571 fs = normalize(rounding_mode, lost_fraction);
1573 /* Can only be zero if we lost no fraction. */
1574 assert(category != fcZero || lost_fraction == lfExactlyZero);
1577 /* If two numbers add (exactly) to zero, IEEE 754 decrees it is a
1578 positive zero unless rounding to minus infinity, except that
1579 adding two like-signed zeroes gives that zero. */
1580 if (category == fcZero) {
1581 if (rhs.category != fcZero || (sign == rhs.sign) == subtract)
1582 sign = (rounding_mode == rmTowardNegative);
1588 /* Normalized addition. */
1590 APFloat::add(const APFloat &rhs, roundingMode rounding_mode)
1592 return addOrSubtract(rhs, rounding_mode, false);
1595 /* Normalized subtraction. */
1597 APFloat::subtract(const APFloat &rhs, roundingMode rounding_mode)
1599 return addOrSubtract(rhs, rounding_mode, true);
1602 /* Normalized multiply. */
1604 APFloat::multiply(const APFloat &rhs, roundingMode rounding_mode)
1608 assertArithmeticOK(*semantics);
1610 fs = multiplySpecials(rhs);
1612 if (category == fcNormal) {
1613 lostFraction lost_fraction = multiplySignificand(rhs, 0);
1614 fs = normalize(rounding_mode, lost_fraction);
1615 if (lost_fraction != lfExactlyZero)
1616 fs = (opStatus) (fs | opInexact);
1622 /* Normalized divide. */
1624 APFloat::divide(const APFloat &rhs, roundingMode rounding_mode)
1628 assertArithmeticOK(*semantics);
1630 fs = divideSpecials(rhs);
1632 if (category == fcNormal) {
1633 lostFraction lost_fraction = divideSignificand(rhs);
1634 fs = normalize(rounding_mode, lost_fraction);
1635 if (lost_fraction != lfExactlyZero)
1636 fs = (opStatus) (fs | opInexact);
1642 /* Normalized remainder. This is not currently correct in all cases. */
1644 APFloat::remainder(const APFloat &rhs)
1648 unsigned int origSign = sign;
1650 assertArithmeticOK(*semantics);
1651 fs = V.divide(rhs, rmNearestTiesToEven);
1652 if (fs == opDivByZero)
1655 int parts = partCount();
1656 integerPart *x = new integerPart[parts];
1658 fs = V.convertToInteger(x, parts * integerPartWidth, true,
1659 rmNearestTiesToEven, &ignored);
1660 if (fs==opInvalidOp)
1663 fs = V.convertFromZeroExtendedInteger(x, parts * integerPartWidth, true,
1664 rmNearestTiesToEven);
1665 assert(fs==opOK); // should always work
1667 fs = V.multiply(rhs, rmNearestTiesToEven);
1668 assert(fs==opOK || fs==opInexact); // should not overflow or underflow
1670 fs = subtract(V, rmNearestTiesToEven);
1671 assert(fs==opOK || fs==opInexact); // likewise
1674 sign = origSign; // IEEE754 requires this
1679 /* Normalized llvm frem (C fmod).
1680 This is not currently correct in all cases. */
1682 APFloat::mod(const APFloat &rhs, roundingMode rounding_mode)
1685 assertArithmeticOK(*semantics);
1686 fs = modSpecials(rhs);
1688 if (category == fcNormal && rhs.category == fcNormal) {
1690 unsigned int origSign = sign;
1692 fs = V.divide(rhs, rmNearestTiesToEven);
1693 if (fs == opDivByZero)
1696 int parts = partCount();
1697 integerPart *x = new integerPart[parts];
1699 fs = V.convertToInteger(x, parts * integerPartWidth, true,
1700 rmTowardZero, &ignored);
1701 if (fs==opInvalidOp)
1704 fs = V.convertFromZeroExtendedInteger(x, parts * integerPartWidth, true,
1705 rmNearestTiesToEven);
1706 assert(fs==opOK); // should always work
1708 fs = V.multiply(rhs, rounding_mode);
1709 assert(fs==opOK || fs==opInexact); // should not overflow or underflow
1711 fs = subtract(V, rounding_mode);
1712 assert(fs==opOK || fs==opInexact); // likewise
1715 sign = origSign; // IEEE754 requires this
1721 /* Normalized fused-multiply-add. */
1723 APFloat::fusedMultiplyAdd(const APFloat &multiplicand,
1724 const APFloat &addend,
1725 roundingMode rounding_mode)
1729 assertArithmeticOK(*semantics);
1731 /* Post-multiplication sign, before addition. */
1732 sign ^= multiplicand.sign;
1734 /* If and only if all arguments are normal do we need to do an
1735 extended-precision calculation. */
1736 if (category == fcNormal &&
1737 multiplicand.category == fcNormal &&
1738 addend.category == fcNormal) {
1739 lostFraction lost_fraction;
1741 lost_fraction = multiplySignificand(multiplicand, &addend);
1742 fs = normalize(rounding_mode, lost_fraction);
1743 if (lost_fraction != lfExactlyZero)
1744 fs = (opStatus) (fs | opInexact);
1746 /* If two numbers add (exactly) to zero, IEEE 754 decrees it is a
1747 positive zero unless rounding to minus infinity, except that
1748 adding two like-signed zeroes gives that zero. */
1749 if (category == fcZero && sign != addend.sign)
1750 sign = (rounding_mode == rmTowardNegative);
1752 fs = multiplySpecials(multiplicand);
1754 /* FS can only be opOK or opInvalidOp. There is no more work
1755 to do in the latter case. The IEEE-754R standard says it is
1756 implementation-defined in this case whether, if ADDEND is a
1757 quiet NaN, we raise invalid op; this implementation does so.
1759 If we need to do the addition we can do so with normal
1762 fs = addOrSubtract(addend, rounding_mode, false);
1768 /* Rounding-mode corrrect round to integral value. */
1769 APFloat::opStatus APFloat::roundToIntegral(roundingMode rounding_mode) {
1771 assertArithmeticOK(*semantics);
1773 // The algorithm here is quite simple: we add 2^(p-1), where p is the
1774 // precision of our format, and then subtract it back off again. The choice
1775 // of rounding modes for the addition/subtraction determines the rounding mode
1776 // for our integral rounding as well.
1777 APInt IntegerConstant(NextPowerOf2(semanticsPrecision(*semantics)),
1778 1 << (semanticsPrecision(*semantics)-1));
1779 APFloat MagicConstant(*semantics);
1780 fs = MagicConstant.convertFromAPInt(IntegerConstant, false,
1781 rmNearestTiesToEven);
1785 fs = add(MagicConstant, rounding_mode);
1786 if (fs != opOK && fs != opInexact)
1789 fs = subtract(MagicConstant, rounding_mode);
1794 /* Comparison requires normalized numbers. */
1796 APFloat::compare(const APFloat &rhs) const
1800 assertArithmeticOK(*semantics);
1801 assert(semantics == rhs.semantics);
1803 switch (convolve(category, rhs.category)) {
1805 llvm_unreachable(0);
1807 case convolve(fcNaN, fcZero):
1808 case convolve(fcNaN, fcNormal):
1809 case convolve(fcNaN, fcInfinity):
1810 case convolve(fcNaN, fcNaN):
1811 case convolve(fcZero, fcNaN):
1812 case convolve(fcNormal, fcNaN):
1813 case convolve(fcInfinity, fcNaN):
1814 return cmpUnordered;
1816 case convolve(fcInfinity, fcNormal):
1817 case convolve(fcInfinity, fcZero):
1818 case convolve(fcNormal, fcZero):
1822 return cmpGreaterThan;
1824 case convolve(fcNormal, fcInfinity):
1825 case convolve(fcZero, fcInfinity):
1826 case convolve(fcZero, fcNormal):
1828 return cmpGreaterThan;
1832 case convolve(fcInfinity, fcInfinity):
1833 if (sign == rhs.sign)
1838 return cmpGreaterThan;
1840 case convolve(fcZero, fcZero):
1843 case convolve(fcNormal, fcNormal):
1847 /* Two normal numbers. Do they have the same sign? */
1848 if (sign != rhs.sign) {
1850 result = cmpLessThan;
1852 result = cmpGreaterThan;
1854 /* Compare absolute values; invert result if negative. */
1855 result = compareAbsoluteValue(rhs);
1858 if (result == cmpLessThan)
1859 result = cmpGreaterThan;
1860 else if (result == cmpGreaterThan)
1861 result = cmpLessThan;
1868 /// APFloat::convert - convert a value of one floating point type to another.
1869 /// The return value corresponds to the IEEE754 exceptions. *losesInfo
1870 /// records whether the transformation lost information, i.e. whether
1871 /// converting the result back to the original type will produce the
1872 /// original value (this is almost the same as return value==fsOK, but there
1873 /// are edge cases where this is not so).
1876 APFloat::convert(const fltSemantics &toSemantics,
1877 roundingMode rounding_mode, bool *losesInfo)
1879 lostFraction lostFraction;
1880 unsigned int newPartCount, oldPartCount;
1883 const fltSemantics &fromSemantics = *semantics;
1885 assertArithmeticOK(fromSemantics);
1886 assertArithmeticOK(toSemantics);
1887 lostFraction = lfExactlyZero;
1888 newPartCount = partCountForBits(toSemantics.precision + 1);
1889 oldPartCount = partCount();
1890 shift = toSemantics.precision - fromSemantics.precision;
1892 bool X86SpecialNan = false;
1893 if (&fromSemantics == &APFloat::x87DoubleExtended &&
1894 &toSemantics != &APFloat::x87DoubleExtended && category == fcNaN &&
1895 (!(*significandParts() & 0x8000000000000000ULL) ||
1896 !(*significandParts() & 0x4000000000000000ULL))) {
1897 // x86 has some unusual NaNs which cannot be represented in any other
1898 // format; note them here.
1899 X86SpecialNan = true;
1902 // If this is a truncation, perform the shift before we narrow the storage.
1903 if (shift < 0 && (category==fcNormal || category==fcNaN))
1904 lostFraction = shiftRight(significandParts(), oldPartCount, -shift);
1906 // Fix the storage so it can hold to new value.
1907 if (newPartCount > oldPartCount) {
1908 // The new type requires more storage; make it available.
1909 integerPart *newParts;
1910 newParts = new integerPart[newPartCount];
1911 APInt::tcSet(newParts, 0, newPartCount);
1912 if (category==fcNormal || category==fcNaN)
1913 APInt::tcAssign(newParts, significandParts(), oldPartCount);
1915 significand.parts = newParts;
1916 } else if (newPartCount == 1 && oldPartCount != 1) {
1917 // Switch to built-in storage for a single part.
1918 integerPart newPart = 0;
1919 if (category==fcNormal || category==fcNaN)
1920 newPart = significandParts()[0];
1922 significand.part = newPart;
1925 // Now that we have the right storage, switch the semantics.
1926 semantics = &toSemantics;
1928 // If this is an extension, perform the shift now that the storage is
1930 if (shift > 0 && (category==fcNormal || category==fcNaN))
1931 APInt::tcShiftLeft(significandParts(), newPartCount, shift);
1933 if (category == fcNormal) {
1934 fs = normalize(rounding_mode, lostFraction);
1935 *losesInfo = (fs != opOK);
1936 } else if (category == fcNaN) {
1937 *losesInfo = lostFraction != lfExactlyZero || X86SpecialNan;
1938 // gcc forces the Quiet bit on, which means (float)(double)(float_sNan)
1939 // does not give you back the same bits. This is dubious, and we
1940 // don't currently do it. You're really supposed to get
1941 // an invalid operation signal at runtime, but nobody does that.
1951 /* Convert a floating point number to an integer according to the
1952 rounding mode. If the rounded integer value is out of range this
1953 returns an invalid operation exception and the contents of the
1954 destination parts are unspecified. If the rounded value is in
1955 range but the floating point number is not the exact integer, the C
1956 standard doesn't require an inexact exception to be raised. IEEE
1957 854 does require it so we do that.
1959 Note that for conversions to integer type the C standard requires
1960 round-to-zero to always be used. */
1962 APFloat::convertToSignExtendedInteger(integerPart *parts, unsigned int width,
1964 roundingMode rounding_mode,
1965 bool *isExact) const
1967 lostFraction lost_fraction;
1968 const integerPart *src;
1969 unsigned int dstPartsCount, truncatedBits;
1971 assertArithmeticOK(*semantics);
1975 /* Handle the three special cases first. */
1976 if (category == fcInfinity || category == fcNaN)
1979 dstPartsCount = partCountForBits(width);
1981 if (category == fcZero) {
1982 APInt::tcSet(parts, 0, dstPartsCount);
1983 // Negative zero can't be represented as an int.
1988 src = significandParts();
1990 /* Step 1: place our absolute value, with any fraction truncated, in
1993 /* Our absolute value is less than one; truncate everything. */
1994 APInt::tcSet(parts, 0, dstPartsCount);
1995 /* For exponent -1 the integer bit represents .5, look at that.
1996 For smaller exponents leftmost truncated bit is 0. */
1997 truncatedBits = semantics->precision -1U - exponent;
1999 /* We want the most significant (exponent + 1) bits; the rest are
2001 unsigned int bits = exponent + 1U;
2003 /* Hopelessly large in magnitude? */
2007 if (bits < semantics->precision) {
2008 /* We truncate (semantics->precision - bits) bits. */
2009 truncatedBits = semantics->precision - bits;
2010 APInt::tcExtract(parts, dstPartsCount, src, bits, truncatedBits);
2012 /* We want at least as many bits as are available. */
2013 APInt::tcExtract(parts, dstPartsCount, src, semantics->precision, 0);
2014 APInt::tcShiftLeft(parts, dstPartsCount, bits - semantics->precision);
2019 /* Step 2: work out any lost fraction, and increment the absolute
2020 value if we would round away from zero. */
2021 if (truncatedBits) {
2022 lost_fraction = lostFractionThroughTruncation(src, partCount(),
2024 if (lost_fraction != lfExactlyZero &&
2025 roundAwayFromZero(rounding_mode, lost_fraction, truncatedBits)) {
2026 if (APInt::tcIncrement(parts, dstPartsCount))
2027 return opInvalidOp; /* Overflow. */
2030 lost_fraction = lfExactlyZero;
2033 /* Step 3: check if we fit in the destination. */
2034 unsigned int omsb = APInt::tcMSB(parts, dstPartsCount) + 1;
2038 /* Negative numbers cannot be represented as unsigned. */
2042 /* It takes omsb bits to represent the unsigned integer value.
2043 We lose a bit for the sign, but care is needed as the
2044 maximally negative integer is a special case. */
2045 if (omsb == width && APInt::tcLSB(parts, dstPartsCount) + 1 != omsb)
2048 /* This case can happen because of rounding. */
2053 APInt::tcNegate (parts, dstPartsCount);
2055 if (omsb >= width + !isSigned)
2059 if (lost_fraction == lfExactlyZero) {
2066 /* Same as convertToSignExtendedInteger, except we provide
2067 deterministic values in case of an invalid operation exception,
2068 namely zero for NaNs and the minimal or maximal value respectively
2069 for underflow or overflow.
2070 The *isExact output tells whether the result is exact, in the sense
2071 that converting it back to the original floating point type produces
2072 the original value. This is almost equivalent to result==opOK,
2073 except for negative zeroes.
2076 APFloat::convertToInteger(integerPart *parts, unsigned int width,
2078 roundingMode rounding_mode, bool *isExact) const
2082 fs = convertToSignExtendedInteger(parts, width, isSigned, rounding_mode,
2085 if (fs == opInvalidOp) {
2086 unsigned int bits, dstPartsCount;
2088 dstPartsCount = partCountForBits(width);
2090 if (category == fcNaN)
2095 bits = width - isSigned;
2097 APInt::tcSetLeastSignificantBits(parts, dstPartsCount, bits);
2098 if (sign && isSigned)
2099 APInt::tcShiftLeft(parts, dstPartsCount, width - 1);
2105 /* Same as convertToInteger(integerPart*, ...), except the result is returned in
2106 an APSInt, whose initial bit-width and signed-ness are used to determine the
2107 precision of the conversion.
2110 APFloat::convertToInteger(APSInt &result,
2111 roundingMode rounding_mode, bool *isExact) const
2113 unsigned bitWidth = result.getBitWidth();
2114 SmallVector<uint64_t, 4> parts(result.getNumWords());
2115 opStatus status = convertToInteger(
2116 parts.data(), bitWidth, result.isSigned(), rounding_mode, isExact);
2117 // Keeps the original signed-ness.
2118 result = APInt(bitWidth, parts);
2122 /* Convert an unsigned integer SRC to a floating point number,
2123 rounding according to ROUNDING_MODE. The sign of the floating
2124 point number is not modified. */
2126 APFloat::convertFromUnsignedParts(const integerPart *src,
2127 unsigned int srcCount,
2128 roundingMode rounding_mode)
2130 unsigned int omsb, precision, dstCount;
2132 lostFraction lost_fraction;
2134 assertArithmeticOK(*semantics);
2135 category = fcNormal;
2136 omsb = APInt::tcMSB(src, srcCount) + 1;
2137 dst = significandParts();
2138 dstCount = partCount();
2139 precision = semantics->precision;
2141 /* We want the most significant PRECISION bits of SRC. There may not
2142 be that many; extract what we can. */
2143 if (precision <= omsb) {
2144 exponent = omsb - 1;
2145 lost_fraction = lostFractionThroughTruncation(src, srcCount,
2147 APInt::tcExtract(dst, dstCount, src, precision, omsb - precision);
2149 exponent = precision - 1;
2150 lost_fraction = lfExactlyZero;
2151 APInt::tcExtract(dst, dstCount, src, omsb, 0);
2154 return normalize(rounding_mode, lost_fraction);
2158 APFloat::convertFromAPInt(const APInt &Val,
2160 roundingMode rounding_mode)
2162 unsigned int partCount = Val.getNumWords();
2166 if (isSigned && api.isNegative()) {
2171 return convertFromUnsignedParts(api.getRawData(), partCount, rounding_mode);
2174 /* Convert a two's complement integer SRC to a floating point number,
2175 rounding according to ROUNDING_MODE. ISSIGNED is true if the
2176 integer is signed, in which case it must be sign-extended. */
2178 APFloat::convertFromSignExtendedInteger(const integerPart *src,
2179 unsigned int srcCount,
2181 roundingMode rounding_mode)
2185 assertArithmeticOK(*semantics);
2187 APInt::tcExtractBit(src, srcCount * integerPartWidth - 1)) {
2190 /* If we're signed and negative negate a copy. */
2192 copy = new integerPart[srcCount];
2193 APInt::tcAssign(copy, src, srcCount);
2194 APInt::tcNegate(copy, srcCount);
2195 status = convertFromUnsignedParts(copy, srcCount, rounding_mode);
2199 status = convertFromUnsignedParts(src, srcCount, rounding_mode);
2205 /* FIXME: should this just take a const APInt reference? */
2207 APFloat::convertFromZeroExtendedInteger(const integerPart *parts,
2208 unsigned int width, bool isSigned,
2209 roundingMode rounding_mode)
2211 unsigned int partCount = partCountForBits(width);
2212 APInt api = APInt(width, makeArrayRef(parts, partCount));
2215 if (isSigned && APInt::tcExtractBit(parts, width - 1)) {
2220 return convertFromUnsignedParts(api.getRawData(), partCount, rounding_mode);
2224 APFloat::convertFromHexadecimalString(StringRef s, roundingMode rounding_mode)
2226 lostFraction lost_fraction = lfExactlyZero;
2227 integerPart *significand;
2228 unsigned int bitPos, partsCount;
2229 StringRef::iterator dot, firstSignificantDigit;
2233 category = fcNormal;
2235 significand = significandParts();
2236 partsCount = partCount();
2237 bitPos = partsCount * integerPartWidth;
2239 /* Skip leading zeroes and any (hexa)decimal point. */
2240 StringRef::iterator begin = s.begin();
2241 StringRef::iterator end = s.end();
2242 StringRef::iterator p = skipLeadingZeroesAndAnyDot(begin, end, &dot);
2243 firstSignificantDigit = p;
2246 integerPart hex_value;
2249 assert(dot == end && "String contains multiple dots");
2256 hex_value = hexDigitValue(*p);
2257 if (hex_value == -1U) {
2266 /* Store the number whilst 4-bit nibbles remain. */
2269 hex_value <<= bitPos % integerPartWidth;
2270 significand[bitPos / integerPartWidth] |= hex_value;
2272 lost_fraction = trailingHexadecimalFraction(p, end, hex_value);
2273 while (p != end && hexDigitValue(*p) != -1U)
2280 /* Hex floats require an exponent but not a hexadecimal point. */
2281 assert(p != end && "Hex strings require an exponent");
2282 assert((*p == 'p' || *p == 'P') && "Invalid character in significand");
2283 assert(p != begin && "Significand has no digits");
2284 assert((dot == end || p - begin != 1) && "Significand has no digits");
2286 /* Ignore the exponent if we are zero. */
2287 if (p != firstSignificantDigit) {
2290 /* Implicit hexadecimal point? */
2294 /* Calculate the exponent adjustment implicit in the number of
2295 significant digits. */
2296 expAdjustment = static_cast<int>(dot - firstSignificantDigit);
2297 if (expAdjustment < 0)
2299 expAdjustment = expAdjustment * 4 - 1;
2301 /* Adjust for writing the significand starting at the most
2302 significant nibble. */
2303 expAdjustment += semantics->precision;
2304 expAdjustment -= partsCount * integerPartWidth;
2306 /* Adjust for the given exponent. */
2307 exponent = totalExponent(p + 1, end, expAdjustment);
2310 return normalize(rounding_mode, lost_fraction);
2314 APFloat::roundSignificandWithExponent(const integerPart *decSigParts,
2315 unsigned sigPartCount, int exp,
2316 roundingMode rounding_mode)
2318 unsigned int parts, pow5PartCount;
2319 fltSemantics calcSemantics = { 32767, -32767, 0, true };
2320 integerPart pow5Parts[maxPowerOfFiveParts];
2323 isNearest = (rounding_mode == rmNearestTiesToEven ||
2324 rounding_mode == rmNearestTiesToAway);
2326 parts = partCountForBits(semantics->precision + 11);
2328 /* Calculate pow(5, abs(exp)). */
2329 pow5PartCount = powerOf5(pow5Parts, exp >= 0 ? exp: -exp);
2331 for (;; parts *= 2) {
2332 opStatus sigStatus, powStatus;
2333 unsigned int excessPrecision, truncatedBits;
2335 calcSemantics.precision = parts * integerPartWidth - 1;
2336 excessPrecision = calcSemantics.precision - semantics->precision;
2337 truncatedBits = excessPrecision;
2339 APFloat decSig(calcSemantics, fcZero, sign);
2340 APFloat pow5(calcSemantics, fcZero, false);
2342 sigStatus = decSig.convertFromUnsignedParts(decSigParts, sigPartCount,
2343 rmNearestTiesToEven);
2344 powStatus = pow5.convertFromUnsignedParts(pow5Parts, pow5PartCount,
2345 rmNearestTiesToEven);
2346 /* Add exp, as 10^n = 5^n * 2^n. */
2347 decSig.exponent += exp;
2349 lostFraction calcLostFraction;
2350 integerPart HUerr, HUdistance;
2351 unsigned int powHUerr;
2354 /* multiplySignificand leaves the precision-th bit set to 1. */
2355 calcLostFraction = decSig.multiplySignificand(pow5, NULL);
2356 powHUerr = powStatus != opOK;
2358 calcLostFraction = decSig.divideSignificand(pow5);
2359 /* Denormal numbers have less precision. */
2360 if (decSig.exponent < semantics->minExponent) {
2361 excessPrecision += (semantics->minExponent - decSig.exponent);
2362 truncatedBits = excessPrecision;
2363 if (excessPrecision > calcSemantics.precision)
2364 excessPrecision = calcSemantics.precision;
2366 /* Extra half-ulp lost in reciprocal of exponent. */
2367 powHUerr = (powStatus == opOK && calcLostFraction == lfExactlyZero) ? 0:2;
2370 /* Both multiplySignificand and divideSignificand return the
2371 result with the integer bit set. */
2372 assert(APInt::tcExtractBit
2373 (decSig.significandParts(), calcSemantics.precision - 1) == 1);
2375 HUerr = HUerrBound(calcLostFraction != lfExactlyZero, sigStatus != opOK,
2377 HUdistance = 2 * ulpsFromBoundary(decSig.significandParts(),
2378 excessPrecision, isNearest);
2380 /* Are we guaranteed to round correctly if we truncate? */
2381 if (HUdistance >= HUerr) {
2382 APInt::tcExtract(significandParts(), partCount(), decSig.significandParts(),
2383 calcSemantics.precision - excessPrecision,
2385 /* Take the exponent of decSig. If we tcExtract-ed less bits
2386 above we must adjust our exponent to compensate for the
2387 implicit right shift. */
2388 exponent = (decSig.exponent + semantics->precision
2389 - (calcSemantics.precision - excessPrecision));
2390 calcLostFraction = lostFractionThroughTruncation(decSig.significandParts(),
2393 return normalize(rounding_mode, calcLostFraction);
2399 APFloat::convertFromDecimalString(StringRef str, roundingMode rounding_mode)
2404 /* Scan the text. */
2405 StringRef::iterator p = str.begin();
2406 interpretDecimal(p, str.end(), &D);
2408 /* Handle the quick cases. First the case of no significant digits,
2409 i.e. zero, and then exponents that are obviously too large or too
2410 small. Writing L for log 10 / log 2, a number d.ddddd*10^exp
2411 definitely overflows if
2413 (exp - 1) * L >= maxExponent
2415 and definitely underflows to zero where
2417 (exp + 1) * L <= minExponent - precision
2419 With integer arithmetic the tightest bounds for L are
2421 93/28 < L < 196/59 [ numerator <= 256 ]
2422 42039/12655 < L < 28738/8651 [ numerator <= 65536 ]
2425 if (decDigitValue(*D.firstSigDigit) >= 10U) {
2429 /* Check whether the normalized exponent is high enough to overflow
2430 max during the log-rebasing in the max-exponent check below. */
2431 } else if (D.normalizedExponent - 1 > INT_MAX / 42039) {
2432 fs = handleOverflow(rounding_mode);
2434 /* If it wasn't, then it also wasn't high enough to overflow max
2435 during the log-rebasing in the min-exponent check. Check that it
2436 won't overflow min in either check, then perform the min-exponent
2438 } else if (D.normalizedExponent - 1 < INT_MIN / 42039 ||
2439 (D.normalizedExponent + 1) * 28738 <=
2440 8651 * (semantics->minExponent - (int) semantics->precision)) {
2441 /* Underflow to zero and round. */
2443 fs = normalize(rounding_mode, lfLessThanHalf);
2445 /* We can finally safely perform the max-exponent check. */
2446 } else if ((D.normalizedExponent - 1) * 42039
2447 >= 12655 * semantics->maxExponent) {
2448 /* Overflow and round. */
2449 fs = handleOverflow(rounding_mode);
2451 integerPart *decSignificand;
2452 unsigned int partCount;
2454 /* A tight upper bound on number of bits required to hold an
2455 N-digit decimal integer is N * 196 / 59. Allocate enough space
2456 to hold the full significand, and an extra part required by
2458 partCount = static_cast<unsigned int>(D.lastSigDigit - D.firstSigDigit) + 1;
2459 partCount = partCountForBits(1 + 196 * partCount / 59);
2460 decSignificand = new integerPart[partCount + 1];
2463 /* Convert to binary efficiently - we do almost all multiplication
2464 in an integerPart. When this would overflow do we do a single
2465 bignum multiplication, and then revert again to multiplication
2466 in an integerPart. */
2468 integerPart decValue, val, multiplier;
2476 if (p == str.end()) {
2480 decValue = decDigitValue(*p++);
2481 assert(decValue < 10U && "Invalid character in significand");
2483 val = val * 10 + decValue;
2484 /* The maximum number that can be multiplied by ten with any
2485 digit added without overflowing an integerPart. */
2486 } while (p <= D.lastSigDigit && multiplier <= (~ (integerPart) 0 - 9) / 10);
2488 /* Multiply out the current part. */
2489 APInt::tcMultiplyPart(decSignificand, decSignificand, multiplier, val,
2490 partCount, partCount + 1, false);
2492 /* If we used another part (likely but not guaranteed), increase
2494 if (decSignificand[partCount])
2496 } while (p <= D.lastSigDigit);
2498 category = fcNormal;
2499 fs = roundSignificandWithExponent(decSignificand, partCount,
2500 D.exponent, rounding_mode);
2502 delete [] decSignificand;
2509 APFloat::convertFromString(StringRef str, roundingMode rounding_mode)
2511 assertArithmeticOK(*semantics);
2512 assert(!str.empty() && "Invalid string length");
2514 /* Handle a leading minus sign. */
2515 StringRef::iterator p = str.begin();
2516 size_t slen = str.size();
2517 sign = *p == '-' ? 1 : 0;
2518 if (*p == '-' || *p == '+') {
2521 assert(slen && "String has no digits");
2524 if (slen >= 2 && p[0] == '0' && (p[1] == 'x' || p[1] == 'X')) {
2525 assert(slen - 2 && "Invalid string");
2526 return convertFromHexadecimalString(StringRef(p + 2, slen - 2),
2530 return convertFromDecimalString(StringRef(p, slen), rounding_mode);
2533 /* Write out a hexadecimal representation of the floating point value
2534 to DST, which must be of sufficient size, in the C99 form
2535 [-]0xh.hhhhp[+-]d. Return the number of characters written,
2536 excluding the terminating NUL.
2538 If UPPERCASE, the output is in upper case, otherwise in lower case.
2540 HEXDIGITS digits appear altogether, rounding the value if
2541 necessary. If HEXDIGITS is 0, the minimal precision to display the
2542 number precisely is used instead. If nothing would appear after
2543 the decimal point it is suppressed.
2545 The decimal exponent is always printed and has at least one digit.
2546 Zero values display an exponent of zero. Infinities and NaNs
2547 appear as "infinity" or "nan" respectively.
2549 The above rules are as specified by C99. There is ambiguity about
2550 what the leading hexadecimal digit should be. This implementation
2551 uses whatever is necessary so that the exponent is displayed as
2552 stored. This implies the exponent will fall within the IEEE format
2553 range, and the leading hexadecimal digit will be 0 (for denormals),
2554 1 (normal numbers) or 2 (normal numbers rounded-away-from-zero with
2555 any other digits zero).
2558 APFloat::convertToHexString(char *dst, unsigned int hexDigits,
2559 bool upperCase, roundingMode rounding_mode) const
2563 assertArithmeticOK(*semantics);
2571 memcpy (dst, upperCase ? infinityU: infinityL, sizeof infinityU - 1);
2572 dst += sizeof infinityL - 1;
2576 memcpy (dst, upperCase ? NaNU: NaNL, sizeof NaNU - 1);
2577 dst += sizeof NaNU - 1;
2582 *dst++ = upperCase ? 'X': 'x';
2584 if (hexDigits > 1) {
2586 memset (dst, '0', hexDigits - 1);
2587 dst += hexDigits - 1;
2589 *dst++ = upperCase ? 'P': 'p';
2594 dst = convertNormalToHexString (dst, hexDigits, upperCase, rounding_mode);
2600 return static_cast<unsigned int>(dst - p);
2603 /* Does the hard work of outputting the correctly rounded hexadecimal
2604 form of a normal floating point number with the specified number of
2605 hexadecimal digits. If HEXDIGITS is zero the minimum number of
2606 digits necessary to print the value precisely is output. */
2608 APFloat::convertNormalToHexString(char *dst, unsigned int hexDigits,
2610 roundingMode rounding_mode) const
2612 unsigned int count, valueBits, shift, partsCount, outputDigits;
2613 const char *hexDigitChars;
2614 const integerPart *significand;
2619 *dst++ = upperCase ? 'X': 'x';
2622 hexDigitChars = upperCase ? hexDigitsUpper: hexDigitsLower;
2624 significand = significandParts();
2625 partsCount = partCount();
2627 /* +3 because the first digit only uses the single integer bit, so
2628 we have 3 virtual zero most-significant-bits. */
2629 valueBits = semantics->precision + 3;
2630 shift = integerPartWidth - valueBits % integerPartWidth;
2632 /* The natural number of digits required ignoring trailing
2633 insignificant zeroes. */
2634 outputDigits = (valueBits - significandLSB () + 3) / 4;
2636 /* hexDigits of zero means use the required number for the
2637 precision. Otherwise, see if we are truncating. If we are,
2638 find out if we need to round away from zero. */
2640 if (hexDigits < outputDigits) {
2641 /* We are dropping non-zero bits, so need to check how to round.
2642 "bits" is the number of dropped bits. */
2644 lostFraction fraction;
2646 bits = valueBits - hexDigits * 4;
2647 fraction = lostFractionThroughTruncation (significand, partsCount, bits);
2648 roundUp = roundAwayFromZero(rounding_mode, fraction, bits);
2650 outputDigits = hexDigits;
2653 /* Write the digits consecutively, and start writing in the location
2654 of the hexadecimal point. We move the most significant digit
2655 left and add the hexadecimal point later. */
2658 count = (valueBits + integerPartWidth - 1) / integerPartWidth;
2660 while (outputDigits && count) {
2663 /* Put the most significant integerPartWidth bits in "part". */
2664 if (--count == partsCount)
2665 part = 0; /* An imaginary higher zero part. */
2667 part = significand[count] << shift;
2670 part |= significand[count - 1] >> (integerPartWidth - shift);
2672 /* Convert as much of "part" to hexdigits as we can. */
2673 unsigned int curDigits = integerPartWidth / 4;
2675 if (curDigits > outputDigits)
2676 curDigits = outputDigits;
2677 dst += partAsHex (dst, part, curDigits, hexDigitChars);
2678 outputDigits -= curDigits;
2684 /* Note that hexDigitChars has a trailing '0'. */
2687 *q = hexDigitChars[hexDigitValue (*q) + 1];
2688 } while (*q == '0');
2691 /* Add trailing zeroes. */
2692 memset (dst, '0', outputDigits);
2693 dst += outputDigits;
2696 /* Move the most significant digit to before the point, and if there
2697 is something after the decimal point add it. This must come
2698 after rounding above. */
2705 /* Finally output the exponent. */
2706 *dst++ = upperCase ? 'P': 'p';
2708 return writeSignedDecimal (dst, exponent);
2711 hash_code llvm::hash_value(const APFloat &Arg) {
2712 if (Arg.category != APFloat::fcNormal)
2713 return hash_combine((uint8_t)Arg.category,
2714 // NaN has no sign, fix it at zero.
2715 Arg.isNaN() ? (uint8_t)0 : (uint8_t)Arg.sign,
2716 Arg.semantics->precision);
2718 // Normal floats need their exponent and significand hashed.
2719 return hash_combine((uint8_t)Arg.category, (uint8_t)Arg.sign,
2720 Arg.semantics->precision, Arg.exponent,
2722 Arg.significandParts(),
2723 Arg.significandParts() + Arg.partCount()));
2726 // Conversion from APFloat to/from host float/double. It may eventually be
2727 // possible to eliminate these and have everybody deal with APFloats, but that
2728 // will take a while. This approach will not easily extend to long double.
2729 // Current implementation requires integerPartWidth==64, which is correct at
2730 // the moment but could be made more general.
2732 // Denormals have exponent minExponent in APFloat, but minExponent-1 in
2733 // the actual IEEE respresentations. We compensate for that here.
2736 APFloat::convertF80LongDoubleAPFloatToAPInt() const
2738 assert(semantics == (const llvm::fltSemantics*)&x87DoubleExtended);
2739 assert(partCount()==2);
2741 uint64_t myexponent, mysignificand;
2743 if (category==fcNormal) {
2744 myexponent = exponent+16383; //bias
2745 mysignificand = significandParts()[0];
2746 if (myexponent==1 && !(mysignificand & 0x8000000000000000ULL))
2747 myexponent = 0; // denormal
2748 } else if (category==fcZero) {
2751 } else if (category==fcInfinity) {
2752 myexponent = 0x7fff;
2753 mysignificand = 0x8000000000000000ULL;
2755 assert(category == fcNaN && "Unknown category");
2756 myexponent = 0x7fff;
2757 mysignificand = significandParts()[0];
2761 words[0] = mysignificand;
2762 words[1] = ((uint64_t)(sign & 1) << 15) |
2763 (myexponent & 0x7fffLL);
2764 return APInt(80, words);
2768 APFloat::convertPPCDoubleDoubleAPFloatToAPInt() const
2770 assert(semantics == (const llvm::fltSemantics*)&PPCDoubleDouble);
2771 assert(partCount()==2);
2773 uint64_t myexponent, mysignificand, myexponent2, mysignificand2;
2775 if (category==fcNormal) {
2776 myexponent = exponent + 1023; //bias
2777 myexponent2 = exponent2 + 1023;
2778 mysignificand = significandParts()[0];
2779 mysignificand2 = significandParts()[1];
2780 if (myexponent==1 && !(mysignificand & 0x10000000000000LL))
2781 myexponent = 0; // denormal
2782 if (myexponent2==1 && !(mysignificand2 & 0x10000000000000LL))
2783 myexponent2 = 0; // denormal
2784 } else if (category==fcZero) {
2789 } else if (category==fcInfinity) {
2795 assert(category == fcNaN && "Unknown category");
2797 mysignificand = significandParts()[0];
2798 myexponent2 = exponent2;
2799 mysignificand2 = significandParts()[1];
2803 words[0] = ((uint64_t)(sign & 1) << 63) |
2804 ((myexponent & 0x7ff) << 52) |
2805 (mysignificand & 0xfffffffffffffLL);
2806 words[1] = ((uint64_t)(sign2 & 1) << 63) |
2807 ((myexponent2 & 0x7ff) << 52) |
2808 (mysignificand2 & 0xfffffffffffffLL);
2809 return APInt(128, words);
2813 APFloat::convertQuadrupleAPFloatToAPInt() const
2815 assert(semantics == (const llvm::fltSemantics*)&IEEEquad);
2816 assert(partCount()==2);
2818 uint64_t myexponent, mysignificand, mysignificand2;
2820 if (category==fcNormal) {
2821 myexponent = exponent+16383; //bias
2822 mysignificand = significandParts()[0];
2823 mysignificand2 = significandParts()[1];
2824 if (myexponent==1 && !(mysignificand2 & 0x1000000000000LL))
2825 myexponent = 0; // denormal
2826 } else if (category==fcZero) {
2828 mysignificand = mysignificand2 = 0;
2829 } else if (category==fcInfinity) {
2830 myexponent = 0x7fff;
2831 mysignificand = mysignificand2 = 0;
2833 assert(category == fcNaN && "Unknown category!");
2834 myexponent = 0x7fff;
2835 mysignificand = significandParts()[0];
2836 mysignificand2 = significandParts()[1];
2840 words[0] = mysignificand;
2841 words[1] = ((uint64_t)(sign & 1) << 63) |
2842 ((myexponent & 0x7fff) << 48) |
2843 (mysignificand2 & 0xffffffffffffLL);
2845 return APInt(128, words);
2849 APFloat::convertDoubleAPFloatToAPInt() const
2851 assert(semantics == (const llvm::fltSemantics*)&IEEEdouble);
2852 assert(partCount()==1);
2854 uint64_t myexponent, mysignificand;
2856 if (category==fcNormal) {
2857 myexponent = exponent+1023; //bias
2858 mysignificand = *significandParts();
2859 if (myexponent==1 && !(mysignificand & 0x10000000000000LL))
2860 myexponent = 0; // denormal
2861 } else if (category==fcZero) {
2864 } else if (category==fcInfinity) {
2868 assert(category == fcNaN && "Unknown category!");
2870 mysignificand = *significandParts();
2873 return APInt(64, ((((uint64_t)(sign & 1) << 63) |
2874 ((myexponent & 0x7ff) << 52) |
2875 (mysignificand & 0xfffffffffffffLL))));
2879 APFloat::convertFloatAPFloatToAPInt() const
2881 assert(semantics == (const llvm::fltSemantics*)&IEEEsingle);
2882 assert(partCount()==1);
2884 uint32_t myexponent, mysignificand;
2886 if (category==fcNormal) {
2887 myexponent = exponent+127; //bias
2888 mysignificand = (uint32_t)*significandParts();
2889 if (myexponent == 1 && !(mysignificand & 0x800000))
2890 myexponent = 0; // denormal
2891 } else if (category==fcZero) {
2894 } else if (category==fcInfinity) {
2898 assert(category == fcNaN && "Unknown category!");
2900 mysignificand = (uint32_t)*significandParts();
2903 return APInt(32, (((sign&1) << 31) | ((myexponent&0xff) << 23) |
2904 (mysignificand & 0x7fffff)));
2908 APFloat::convertHalfAPFloatToAPInt() const
2910 assert(semantics == (const llvm::fltSemantics*)&IEEEhalf);
2911 assert(partCount()==1);
2913 uint32_t myexponent, mysignificand;
2915 if (category==fcNormal) {
2916 myexponent = exponent+15; //bias
2917 mysignificand = (uint32_t)*significandParts();
2918 if (myexponent == 1 && !(mysignificand & 0x400))
2919 myexponent = 0; // denormal
2920 } else if (category==fcZero) {
2923 } else if (category==fcInfinity) {
2927 assert(category == fcNaN && "Unknown category!");
2929 mysignificand = (uint32_t)*significandParts();
2932 return APInt(16, (((sign&1) << 15) | ((myexponent&0x1f) << 10) |
2933 (mysignificand & 0x3ff)));
2936 // This function creates an APInt that is just a bit map of the floating
2937 // point constant as it would appear in memory. It is not a conversion,
2938 // and treating the result as a normal integer is unlikely to be useful.
2941 APFloat::bitcastToAPInt() const
2943 if (semantics == (const llvm::fltSemantics*)&IEEEhalf)
2944 return convertHalfAPFloatToAPInt();
2946 if (semantics == (const llvm::fltSemantics*)&IEEEsingle)
2947 return convertFloatAPFloatToAPInt();
2949 if (semantics == (const llvm::fltSemantics*)&IEEEdouble)
2950 return convertDoubleAPFloatToAPInt();
2952 if (semantics == (const llvm::fltSemantics*)&IEEEquad)
2953 return convertQuadrupleAPFloatToAPInt();
2955 if (semantics == (const llvm::fltSemantics*)&PPCDoubleDouble)
2956 return convertPPCDoubleDoubleAPFloatToAPInt();
2958 assert(semantics == (const llvm::fltSemantics*)&x87DoubleExtended &&
2960 return convertF80LongDoubleAPFloatToAPInt();
2964 APFloat::convertToFloat() const
2966 assert(semantics == (const llvm::fltSemantics*)&IEEEsingle &&
2967 "Float semantics are not IEEEsingle");
2968 APInt api = bitcastToAPInt();
2969 return api.bitsToFloat();
2973 APFloat::convertToDouble() const
2975 assert(semantics == (const llvm::fltSemantics*)&IEEEdouble &&
2976 "Float semantics are not IEEEdouble");
2977 APInt api = bitcastToAPInt();
2978 return api.bitsToDouble();
2981 /// Integer bit is explicit in this format. Intel hardware (387 and later)
2982 /// does not support these bit patterns:
2983 /// exponent = all 1's, integer bit 0, significand 0 ("pseudoinfinity")
2984 /// exponent = all 1's, integer bit 0, significand nonzero ("pseudoNaN")
2985 /// exponent = 0, integer bit 1 ("pseudodenormal")
2986 /// exponent!=0 nor all 1's, integer bit 0 ("unnormal")
2987 /// At the moment, the first two are treated as NaNs, the second two as Normal.
2989 APFloat::initFromF80LongDoubleAPInt(const APInt &api)
2991 assert(api.getBitWidth()==80);
2992 uint64_t i1 = api.getRawData()[0];
2993 uint64_t i2 = api.getRawData()[1];
2994 uint64_t myexponent = (i2 & 0x7fff);
2995 uint64_t mysignificand = i1;
2997 initialize(&APFloat::x87DoubleExtended);
2998 assert(partCount()==2);
3000 sign = static_cast<unsigned int>(i2>>15);
3001 if (myexponent==0 && mysignificand==0) {
3002 // exponent, significand meaningless
3004 } else if (myexponent==0x7fff && mysignificand==0x8000000000000000ULL) {
3005 // exponent, significand meaningless
3006 category = fcInfinity;
3007 } else if (myexponent==0x7fff && mysignificand!=0x8000000000000000ULL) {
3008 // exponent meaningless
3010 significandParts()[0] = mysignificand;
3011 significandParts()[1] = 0;
3013 category = fcNormal;
3014 exponent = myexponent - 16383;
3015 significandParts()[0] = mysignificand;
3016 significandParts()[1] = 0;
3017 if (myexponent==0) // denormal
3023 APFloat::initFromPPCDoubleDoubleAPInt(const APInt &api)
3025 assert(api.getBitWidth()==128);
3026 uint64_t i1 = api.getRawData()[0];
3027 uint64_t i2 = api.getRawData()[1];
3028 uint64_t myexponent = (i1 >> 52) & 0x7ff;
3029 uint64_t mysignificand = i1 & 0xfffffffffffffLL;
3030 uint64_t myexponent2 = (i2 >> 52) & 0x7ff;
3031 uint64_t mysignificand2 = i2 & 0xfffffffffffffLL;
3033 initialize(&APFloat::PPCDoubleDouble);
3034 assert(partCount()==2);
3036 sign = static_cast<unsigned int>(i1>>63);
3037 sign2 = static_cast<unsigned int>(i2>>63);
3038 if (myexponent==0 && mysignificand==0) {
3039 // exponent, significand meaningless
3040 // exponent2 and significand2 are required to be 0; we don't check
3042 } else if (myexponent==0x7ff && mysignificand==0) {
3043 // exponent, significand meaningless
3044 // exponent2 and significand2 are required to be 0; we don't check
3045 category = fcInfinity;
3046 } else if (myexponent==0x7ff && mysignificand!=0) {
3047 // exponent meaningless. So is the whole second word, but keep it
3050 exponent2 = myexponent2;
3051 significandParts()[0] = mysignificand;
3052 significandParts()[1] = mysignificand2;
3054 category = fcNormal;
3055 // Note there is no category2; the second word is treated as if it is
3056 // fcNormal, although it might be something else considered by itself.
3057 exponent = myexponent - 1023;
3058 exponent2 = myexponent2 - 1023;
3059 significandParts()[0] = mysignificand;
3060 significandParts()[1] = mysignificand2;
3061 if (myexponent==0) // denormal
3064 significandParts()[0] |= 0x10000000000000LL; // integer bit
3068 significandParts()[1] |= 0x10000000000000LL; // integer bit
3073 APFloat::initFromQuadrupleAPInt(const APInt &api)
3075 assert(api.getBitWidth()==128);
3076 uint64_t i1 = api.getRawData()[0];
3077 uint64_t i2 = api.getRawData()[1];
3078 uint64_t myexponent = (i2 >> 48) & 0x7fff;
3079 uint64_t mysignificand = i1;
3080 uint64_t mysignificand2 = i2 & 0xffffffffffffLL;
3082 initialize(&APFloat::IEEEquad);
3083 assert(partCount()==2);
3085 sign = static_cast<unsigned int>(i2>>63);
3086 if (myexponent==0 &&
3087 (mysignificand==0 && mysignificand2==0)) {
3088 // exponent, significand meaningless
3090 } else if (myexponent==0x7fff &&
3091 (mysignificand==0 && mysignificand2==0)) {
3092 // exponent, significand meaningless
3093 category = fcInfinity;
3094 } else if (myexponent==0x7fff &&
3095 (mysignificand!=0 || mysignificand2 !=0)) {
3096 // exponent meaningless
3098 significandParts()[0] = mysignificand;
3099 significandParts()[1] = mysignificand2;
3101 category = fcNormal;
3102 exponent = myexponent - 16383;
3103 significandParts()[0] = mysignificand;
3104 significandParts()[1] = mysignificand2;
3105 if (myexponent==0) // denormal
3108 significandParts()[1] |= 0x1000000000000LL; // integer bit
3113 APFloat::initFromDoubleAPInt(const APInt &api)
3115 assert(api.getBitWidth()==64);
3116 uint64_t i = *api.getRawData();
3117 uint64_t myexponent = (i >> 52) & 0x7ff;
3118 uint64_t mysignificand = i & 0xfffffffffffffLL;
3120 initialize(&APFloat::IEEEdouble);
3121 assert(partCount()==1);
3123 sign = static_cast<unsigned int>(i>>63);
3124 if (myexponent==0 && mysignificand==0) {
3125 // exponent, significand meaningless
3127 } else if (myexponent==0x7ff && mysignificand==0) {
3128 // exponent, significand meaningless
3129 category = fcInfinity;
3130 } else if (myexponent==0x7ff && mysignificand!=0) {
3131 // exponent meaningless
3133 *significandParts() = mysignificand;
3135 category = fcNormal;
3136 exponent = myexponent - 1023;
3137 *significandParts() = mysignificand;
3138 if (myexponent==0) // denormal
3141 *significandParts() |= 0x10000000000000LL; // integer bit
3146 APFloat::initFromFloatAPInt(const APInt & api)
3148 assert(api.getBitWidth()==32);
3149 uint32_t i = (uint32_t)*api.getRawData();
3150 uint32_t myexponent = (i >> 23) & 0xff;
3151 uint32_t mysignificand = i & 0x7fffff;
3153 initialize(&APFloat::IEEEsingle);
3154 assert(partCount()==1);
3157 if (myexponent==0 && mysignificand==0) {
3158 // exponent, significand meaningless
3160 } else if (myexponent==0xff && mysignificand==0) {
3161 // exponent, significand meaningless
3162 category = fcInfinity;
3163 } else if (myexponent==0xff && mysignificand!=0) {
3164 // sign, exponent, significand meaningless
3166 *significandParts() = mysignificand;
3168 category = fcNormal;
3169 exponent = myexponent - 127; //bias
3170 *significandParts() = mysignificand;
3171 if (myexponent==0) // denormal
3174 *significandParts() |= 0x800000; // integer bit
3179 APFloat::initFromHalfAPInt(const APInt & api)
3181 assert(api.getBitWidth()==16);
3182 uint32_t i = (uint32_t)*api.getRawData();
3183 uint32_t myexponent = (i >> 10) & 0x1f;
3184 uint32_t mysignificand = i & 0x3ff;
3186 initialize(&APFloat::IEEEhalf);
3187 assert(partCount()==1);
3190 if (myexponent==0 && mysignificand==0) {
3191 // exponent, significand meaningless
3193 } else if (myexponent==0x1f && mysignificand==0) {
3194 // exponent, significand meaningless
3195 category = fcInfinity;
3196 } else if (myexponent==0x1f && mysignificand!=0) {
3197 // sign, exponent, significand meaningless
3199 *significandParts() = mysignificand;
3201 category = fcNormal;
3202 exponent = myexponent - 15; //bias
3203 *significandParts() = mysignificand;
3204 if (myexponent==0) // denormal
3207 *significandParts() |= 0x400; // integer bit
3211 /// Treat api as containing the bits of a floating point number. Currently
3212 /// we infer the floating point type from the size of the APInt. The
3213 /// isIEEE argument distinguishes between PPC128 and IEEE128 (not meaningful
3214 /// when the size is anything else).
3216 APFloat::initFromAPInt(const APInt& api, bool isIEEE)
3218 if (api.getBitWidth() == 16)
3219 return initFromHalfAPInt(api);
3220 else if (api.getBitWidth() == 32)
3221 return initFromFloatAPInt(api);
3222 else if (api.getBitWidth()==64)
3223 return initFromDoubleAPInt(api);
3224 else if (api.getBitWidth()==80)
3225 return initFromF80LongDoubleAPInt(api);
3226 else if (api.getBitWidth()==128)
3228 initFromQuadrupleAPInt(api) : initFromPPCDoubleDoubleAPInt(api));
3230 llvm_unreachable(0);
3234 APFloat::getAllOnesValue(unsigned BitWidth, bool isIEEE)
3236 return APFloat(APInt::getAllOnesValue(BitWidth), isIEEE);
3239 APFloat APFloat::getLargest(const fltSemantics &Sem, bool Negative) {
3240 APFloat Val(Sem, fcNormal, Negative);
3242 // We want (in interchange format):
3243 // sign = {Negative}
3245 // significand = 1..1
3247 Val.exponent = Sem.maxExponent; // unbiased
3249 // 1-initialize all bits....
3250 Val.zeroSignificand();
3251 integerPart *significand = Val.significandParts();
3252 unsigned N = partCountForBits(Sem.precision);
3253 for (unsigned i = 0; i != N; ++i)
3254 significand[i] = ~((integerPart) 0);
3256 // ...and then clear the top bits for internal consistency.
3257 if (Sem.precision % integerPartWidth != 0)
3259 (((integerPart) 1) << (Sem.precision % integerPartWidth)) - 1;
3264 APFloat APFloat::getSmallest(const fltSemantics &Sem, bool Negative) {
3265 APFloat Val(Sem, fcNormal, Negative);
3267 // We want (in interchange format):
3268 // sign = {Negative}
3270 // significand = 0..01
3272 Val.exponent = Sem.minExponent; // unbiased
3273 Val.zeroSignificand();
3274 Val.significandParts()[0] = 1;
3278 APFloat APFloat::getSmallestNormalized(const fltSemantics &Sem, bool Negative) {
3279 APFloat Val(Sem, fcNormal, Negative);
3281 // We want (in interchange format):
3282 // sign = {Negative}
3284 // significand = 10..0
3286 Val.exponent = Sem.minExponent;
3287 Val.zeroSignificand();
3288 Val.significandParts()[partCountForBits(Sem.precision)-1] |=
3289 (((integerPart) 1) << ((Sem.precision - 1) % integerPartWidth));
3294 APFloat::APFloat(const APInt& api, bool isIEEE) : exponent2(0), sign2(0) {
3295 initFromAPInt(api, isIEEE);
3298 APFloat::APFloat(float f) : exponent2(0), sign2(0) {
3299 initFromAPInt(APInt::floatToBits(f));
3302 APFloat::APFloat(double d) : exponent2(0), sign2(0) {
3303 initFromAPInt(APInt::doubleToBits(d));
3307 void append(SmallVectorImpl<char> &Buffer, StringRef Str) {
3308 Buffer.append(Str.begin(), Str.end());
3311 /// Removes data from the given significand until it is no more
3312 /// precise than is required for the desired precision.
3313 void AdjustToPrecision(APInt &significand,
3314 int &exp, unsigned FormatPrecision) {
3315 unsigned bits = significand.getActiveBits();
3317 // 196/59 is a very slight overestimate of lg_2(10).
3318 unsigned bitsRequired = (FormatPrecision * 196 + 58) / 59;
3320 if (bits <= bitsRequired) return;
3322 unsigned tensRemovable = (bits - bitsRequired) * 59 / 196;
3323 if (!tensRemovable) return;
3325 exp += tensRemovable;
3327 APInt divisor(significand.getBitWidth(), 1);
3328 APInt powten(significand.getBitWidth(), 10);
3330 if (tensRemovable & 1)
3332 tensRemovable >>= 1;
3333 if (!tensRemovable) break;
3337 significand = significand.udiv(divisor);
3339 // Truncate the significand down to its active bit count, but
3340 // don't try to drop below 32.
3341 unsigned newPrecision = std::max(32U, significand.getActiveBits());
3342 significand = significand.trunc(newPrecision);
3346 void AdjustToPrecision(SmallVectorImpl<char> &buffer,
3347 int &exp, unsigned FormatPrecision) {
3348 unsigned N = buffer.size();
3349 if (N <= FormatPrecision) return;
3351 // The most significant figures are the last ones in the buffer.
3352 unsigned FirstSignificant = N - FormatPrecision;
3355 // FIXME: this probably shouldn't use 'round half up'.
3357 // Rounding down is just a truncation, except we also want to drop
3358 // trailing zeros from the new result.
3359 if (buffer[FirstSignificant - 1] < '5') {
3360 while (FirstSignificant < N && buffer[FirstSignificant] == '0')
3363 exp += FirstSignificant;
3364 buffer.erase(&buffer[0], &buffer[FirstSignificant]);
3368 // Rounding up requires a decimal add-with-carry. If we continue
3369 // the carry, the newly-introduced zeros will just be truncated.
3370 for (unsigned I = FirstSignificant; I != N; ++I) {
3371 if (buffer[I] == '9') {
3379 // If we carried through, we have exactly one digit of precision.
3380 if (FirstSignificant == N) {
3381 exp += FirstSignificant;
3383 buffer.push_back('1');
3387 exp += FirstSignificant;
3388 buffer.erase(&buffer[0], &buffer[FirstSignificant]);
3392 void APFloat::toString(SmallVectorImpl<char> &Str,
3393 unsigned FormatPrecision,
3394 unsigned FormatMaxPadding) const {
3398 return append(Str, "-Inf");
3400 return append(Str, "+Inf");
3402 case fcNaN: return append(Str, "NaN");
3408 if (!FormatMaxPadding)
3409 append(Str, "0.0E+0");
3421 // Decompose the number into an APInt and an exponent.
3422 int exp = exponent - ((int) semantics->precision - 1);
3423 APInt significand(semantics->precision,
3424 makeArrayRef(significandParts(),
3425 partCountForBits(semantics->precision)));
3427 // Set FormatPrecision if zero. We want to do this before we
3428 // truncate trailing zeros, as those are part of the precision.
3429 if (!FormatPrecision) {
3430 // It's an interesting question whether to use the nominal
3431 // precision or the active precision here for denormals.
3433 // FormatPrecision = ceil(significandBits / lg_2(10))
3434 FormatPrecision = (semantics->precision * 59 + 195) / 196;
3437 // Ignore trailing binary zeros.
3438 int trailingZeros = significand.countTrailingZeros();
3439 exp += trailingZeros;
3440 significand = significand.lshr(trailingZeros);
3442 // Change the exponent from 2^e to 10^e.
3445 } else if (exp > 0) {
3447 significand = significand.zext(semantics->precision + exp);
3448 significand <<= exp;
3450 } else { /* exp < 0 */
3453 // We transform this using the identity:
3454 // (N)(2^-e) == (N)(5^e)(10^-e)
3455 // This means we have to multiply N (the significand) by 5^e.
3456 // To avoid overflow, we have to operate on numbers large
3457 // enough to store N * 5^e:
3458 // log2(N * 5^e) == log2(N) + e * log2(5)
3459 // <= semantics->precision + e * 137 / 59
3460 // (log_2(5) ~ 2.321928 < 2.322034 ~ 137/59)
3462 unsigned precision = semantics->precision + (137 * texp + 136) / 59;
3464 // Multiply significand by 5^e.
3465 // N * 5^0101 == N * 5^(1*1) * 5^(0*2) * 5^(1*4) * 5^(0*8)
3466 significand = significand.zext(precision);
3467 APInt five_to_the_i(precision, 5);
3469 if (texp & 1) significand *= five_to_the_i;
3473 five_to_the_i *= five_to_the_i;
3477 AdjustToPrecision(significand, exp, FormatPrecision);
3479 llvm::SmallVector<char, 256> buffer;
3482 unsigned precision = significand.getBitWidth();
3483 APInt ten(precision, 10);
3484 APInt digit(precision, 0);
3486 bool inTrail = true;
3487 while (significand != 0) {
3488 // digit <- significand % 10
3489 // significand <- significand / 10
3490 APInt::udivrem(significand, ten, significand, digit);
3492 unsigned d = digit.getZExtValue();
3494 // Drop trailing zeros.
3495 if (inTrail && !d) exp++;
3497 buffer.push_back((char) ('0' + d));
3502 assert(!buffer.empty() && "no characters in buffer!");
3504 // Drop down to FormatPrecision.
3505 // TODO: don't do more precise calculations above than are required.
3506 AdjustToPrecision(buffer, exp, FormatPrecision);
3508 unsigned NDigits = buffer.size();
3510 // Check whether we should use scientific notation.
3511 bool FormatScientific;
3512 if (!FormatMaxPadding)
3513 FormatScientific = true;
3518 // But we shouldn't make the number look more precise than it is.
3519 FormatScientific = ((unsigned) exp > FormatMaxPadding ||
3520 NDigits + (unsigned) exp > FormatPrecision);
3522 // Power of the most significant digit.
3523 int MSD = exp + (int) (NDigits - 1);
3526 FormatScientific = false;
3528 // 765e-5 == 0.00765
3530 FormatScientific = ((unsigned) -MSD) > FormatMaxPadding;
3535 // Scientific formatting is pretty straightforward.
3536 if (FormatScientific) {
3537 exp += (NDigits - 1);
3539 Str.push_back(buffer[NDigits-1]);
3544 for (unsigned I = 1; I != NDigits; ++I)
3545 Str.push_back(buffer[NDigits-1-I]);
3548 Str.push_back(exp >= 0 ? '+' : '-');
3549 if (exp < 0) exp = -exp;
3550 SmallVector<char, 6> expbuf;
3552 expbuf.push_back((char) ('0' + (exp % 10)));
3555 for (unsigned I = 0, E = expbuf.size(); I != E; ++I)
3556 Str.push_back(expbuf[E-1-I]);
3560 // Non-scientific, positive exponents.
3562 for (unsigned I = 0; I != NDigits; ++I)
3563 Str.push_back(buffer[NDigits-1-I]);
3564 for (unsigned I = 0; I != (unsigned) exp; ++I)
3569 // Non-scientific, negative exponents.
3571 // The number of digits to the left of the decimal point.
3572 int NWholeDigits = exp + (int) NDigits;
3575 if (NWholeDigits > 0) {
3576 for (; I != (unsigned) NWholeDigits; ++I)
3577 Str.push_back(buffer[NDigits-I-1]);
3580 unsigned NZeros = 1 + (unsigned) -NWholeDigits;
3584 for (unsigned Z = 1; Z != NZeros; ++Z)
3588 for (; I != NDigits; ++I)
3589 Str.push_back(buffer[NDigits-I-1]);
3592 bool APFloat::getExactInverse(APFloat *inv) const {
3593 // We can only guarantee the existence of an exact inverse for IEEE floats.
3594 if (semantics != &IEEEhalf && semantics != &IEEEsingle &&
3595 semantics != &IEEEdouble && semantics != &IEEEquad)
3598 // Special floats and denormals have no exact inverse.
3599 if (category != fcNormal)
3602 // Check that the number is a power of two by making sure that only the
3603 // integer bit is set in the significand.
3604 if (significandLSB() != semantics->precision - 1)
3608 APFloat reciprocal(*semantics, 1ULL);
3609 if (reciprocal.divide(*this, rmNearestTiesToEven) != opOK)
3612 // Avoid multiplication with a denormal, it is not safe on all platforms and
3613 // may be slower than a normal division.
3614 if (reciprocal.significandMSB() + 1 < reciprocal.semantics->precision)
3617 assert(reciprocal.category == fcNormal &&
3618 reciprocal.significandLSB() == reciprocal.semantics->precision - 1);