1 //===-- APFloat.cpp - Implement APFloat class -----------------------------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file implements a class to represent arbitrary precision floating
11 // point values and provide a variety of arithmetic operations on them.
13 //===----------------------------------------------------------------------===//
15 #include "llvm/ADT/APFloat.h"
16 #include "llvm/ADT/APSInt.h"
17 #include "llvm/ADT/FoldingSet.h"
18 #include "llvm/ADT/Hashing.h"
19 #include "llvm/ADT/StringRef.h"
20 #include "llvm/Support/ErrorHandling.h"
21 #include "llvm/Support/MathExtras.h"
27 #define convolve(lhs, rhs) ((lhs) * 4 + (rhs))
29 /* Assumed in hexadecimal significand parsing, and conversion to
30 hexadecimal strings. */
31 #define COMPILE_TIME_ASSERT(cond) extern int CTAssert[(cond) ? 1 : -1]
32 COMPILE_TIME_ASSERT(integerPartWidth % 4 == 0);
36 /* Represents floating point arithmetic semantics. */
38 /* The largest E such that 2^E is representable; this matches the
39 definition of IEEE 754. */
40 exponent_t maxExponent;
42 /* The smallest E such that 2^E is a normalized number; this
43 matches the definition of IEEE 754. */
44 exponent_t minExponent;
46 /* Number of bits in the significand. This includes the integer
48 unsigned int precision;
50 /* True if arithmetic is supported. */
51 unsigned int arithmeticOK;
54 const fltSemantics APFloat::IEEEhalf = { 15, -14, 11, true };
55 const fltSemantics APFloat::IEEEsingle = { 127, -126, 24, true };
56 const fltSemantics APFloat::IEEEdouble = { 1023, -1022, 53, true };
57 const fltSemantics APFloat::IEEEquad = { 16383, -16382, 113, true };
58 const fltSemantics APFloat::x87DoubleExtended = { 16383, -16382, 64, true };
59 const fltSemantics APFloat::Bogus = { 0, 0, 0, true };
61 // The PowerPC format consists of two doubles. It does not map cleanly
62 // onto the usual format above. For now only storage of constants of
63 // this type is supported, no arithmetic.
64 const fltSemantics APFloat::PPCDoubleDouble = { 1023, -1022, 106, false };
66 /* A tight upper bound on number of parts required to hold the value
69 power * 815 / (351 * integerPartWidth) + 1
71 However, whilst the result may require only this many parts,
72 because we are multiplying two values to get it, the
73 multiplication may require an extra part with the excess part
74 being zero (consider the trivial case of 1 * 1, tcFullMultiply
75 requires two parts to hold the single-part result). So we add an
76 extra one to guarantee enough space whilst multiplying. */
77 const unsigned int maxExponent = 16383;
78 const unsigned int maxPrecision = 113;
79 const unsigned int maxPowerOfFiveExponent = maxExponent + maxPrecision - 1;
80 const unsigned int maxPowerOfFiveParts = 2 + ((maxPowerOfFiveExponent * 815)
81 / (351 * integerPartWidth));
84 /* A bunch of private, handy routines. */
86 static inline unsigned int
87 partCountForBits(unsigned int bits)
89 return ((bits) + integerPartWidth - 1) / integerPartWidth;
92 /* Returns 0U-9U. Return values >= 10U are not digits. */
93 static inline unsigned int
94 decDigitValue(unsigned int c)
100 hexDigitValue(unsigned int c)
120 assertArithmeticOK(const llvm::fltSemantics &semantics) {
121 assert(semantics.arithmeticOK &&
122 "Compile-time arithmetic does not support these semantics");
125 /* Return the value of a decimal exponent of the form
128 If the exponent overflows, returns a large exponent with the
131 readExponent(StringRef::iterator begin, StringRef::iterator end)
134 unsigned int absExponent;
135 const unsigned int overlargeExponent = 24000; /* FIXME. */
136 StringRef::iterator p = begin;
138 assert(p != end && "Exponent has no digits");
140 isNegative = (*p == '-');
141 if (*p == '-' || *p == '+') {
143 assert(p != end && "Exponent has no digits");
146 absExponent = decDigitValue(*p++);
147 assert(absExponent < 10U && "Invalid character in exponent");
149 for (; p != end; ++p) {
152 value = decDigitValue(*p);
153 assert(value < 10U && "Invalid character in exponent");
155 value += absExponent * 10;
156 if (absExponent >= overlargeExponent) {
157 absExponent = overlargeExponent;
158 p = end; /* outwit assert below */
164 assert(p == end && "Invalid exponent in exponent");
167 return -(int) absExponent;
169 return (int) absExponent;
172 /* This is ugly and needs cleaning up, but I don't immediately see
173 how whilst remaining safe. */
175 totalExponent(StringRef::iterator p, StringRef::iterator end,
176 int exponentAdjustment)
178 int unsignedExponent;
179 bool negative, overflow;
182 assert(p != end && "Exponent has no digits");
184 negative = *p == '-';
185 if (*p == '-' || *p == '+') {
187 assert(p != end && "Exponent has no digits");
190 unsignedExponent = 0;
192 for (; p != end; ++p) {
195 value = decDigitValue(*p);
196 assert(value < 10U && "Invalid character in exponent");
198 unsignedExponent = unsignedExponent * 10 + value;
199 if (unsignedExponent > 32767)
203 if (exponentAdjustment > 32767 || exponentAdjustment < -32768)
207 exponent = unsignedExponent;
209 exponent = -exponent;
210 exponent += exponentAdjustment;
211 if (exponent > 32767 || exponent < -32768)
216 exponent = negative ? -32768: 32767;
221 static StringRef::iterator
222 skipLeadingZeroesAndAnyDot(StringRef::iterator begin, StringRef::iterator end,
223 StringRef::iterator *dot)
225 StringRef::iterator p = begin;
227 while (*p == '0' && p != end)
233 assert(end - begin != 1 && "Significand has no digits");
235 while (*p == '0' && p != end)
242 /* Given a normal decimal floating point number of the form
246 where the decimal point and exponent are optional, fill out the
247 structure D. Exponent is appropriate if the significand is
248 treated as an integer, and normalizedExponent if the significand
249 is taken to have the decimal point after a single leading
252 If the value is zero, V->firstSigDigit points to a non-digit, and
253 the return exponent is zero.
256 const char *firstSigDigit;
257 const char *lastSigDigit;
259 int normalizedExponent;
263 interpretDecimal(StringRef::iterator begin, StringRef::iterator end,
266 StringRef::iterator dot = end;
267 StringRef::iterator p = skipLeadingZeroesAndAnyDot (begin, end, &dot);
269 D->firstSigDigit = p;
271 D->normalizedExponent = 0;
273 for (; p != end; ++p) {
275 assert(dot == end && "String contains multiple dots");
280 if (decDigitValue(*p) >= 10U)
285 assert((*p == 'e' || *p == 'E') && "Invalid character in significand");
286 assert(p != begin && "Significand has no digits");
287 assert((dot == end || p - begin != 1) && "Significand has no digits");
289 /* p points to the first non-digit in the string */
290 D->exponent = readExponent(p + 1, end);
292 /* Implied decimal point? */
297 /* If number is all zeroes accept any exponent. */
298 if (p != D->firstSigDigit) {
299 /* Drop insignificant trailing zeroes. */
304 while (p != begin && *p == '0');
305 while (p != begin && *p == '.');
308 /* Adjust the exponents for any decimal point. */
309 D->exponent += static_cast<exponent_t>((dot - p) - (dot > p));
310 D->normalizedExponent = (D->exponent +
311 static_cast<exponent_t>((p - D->firstSigDigit)
312 - (dot > D->firstSigDigit && dot < p)));
318 /* Return the trailing fraction of a hexadecimal number.
319 DIGITVALUE is the first hex digit of the fraction, P points to
322 trailingHexadecimalFraction(StringRef::iterator p, StringRef::iterator end,
323 unsigned int digitValue)
325 unsigned int hexDigit;
327 /* If the first trailing digit isn't 0 or 8 we can work out the
328 fraction immediately. */
330 return lfMoreThanHalf;
331 else if (digitValue < 8 && digitValue > 0)
332 return lfLessThanHalf;
334 /* Otherwise we need to find the first non-zero digit. */
338 assert(p != end && "Invalid trailing hexadecimal fraction!");
340 hexDigit = hexDigitValue(*p);
342 /* If we ran off the end it is exactly zero or one-half, otherwise
345 return digitValue == 0 ? lfExactlyZero: lfExactlyHalf;
347 return digitValue == 0 ? lfLessThanHalf: lfMoreThanHalf;
350 /* Return the fraction lost were a bignum truncated losing the least
351 significant BITS bits. */
353 lostFractionThroughTruncation(const integerPart *parts,
354 unsigned int partCount,
359 lsb = APInt::tcLSB(parts, partCount);
361 /* Note this is guaranteed true if bits == 0, or LSB == -1U. */
363 return lfExactlyZero;
365 return lfExactlyHalf;
366 if (bits <= partCount * integerPartWidth &&
367 APInt::tcExtractBit(parts, bits - 1))
368 return lfMoreThanHalf;
370 return lfLessThanHalf;
373 /* Shift DST right BITS bits noting lost fraction. */
375 shiftRight(integerPart *dst, unsigned int parts, unsigned int bits)
377 lostFraction lost_fraction;
379 lost_fraction = lostFractionThroughTruncation(dst, parts, bits);
381 APInt::tcShiftRight(dst, parts, bits);
383 return lost_fraction;
386 /* Combine the effect of two lost fractions. */
388 combineLostFractions(lostFraction moreSignificant,
389 lostFraction lessSignificant)
391 if (lessSignificant != lfExactlyZero) {
392 if (moreSignificant == lfExactlyZero)
393 moreSignificant = lfLessThanHalf;
394 else if (moreSignificant == lfExactlyHalf)
395 moreSignificant = lfMoreThanHalf;
398 return moreSignificant;
401 /* The error from the true value, in half-ulps, on multiplying two
402 floating point numbers, which differ from the value they
403 approximate by at most HUE1 and HUE2 half-ulps, is strictly less
404 than the returned value.
406 See "How to Read Floating Point Numbers Accurately" by William D
409 HUerrBound(bool inexactMultiply, unsigned int HUerr1, unsigned int HUerr2)
411 assert(HUerr1 < 2 || HUerr2 < 2 || (HUerr1 + HUerr2 < 8));
413 if (HUerr1 + HUerr2 == 0)
414 return inexactMultiply * 2; /* <= inexactMultiply half-ulps. */
416 return inexactMultiply + 2 * (HUerr1 + HUerr2);
419 /* The number of ulps from the boundary (zero, or half if ISNEAREST)
420 when the least significant BITS are truncated. BITS cannot be
423 ulpsFromBoundary(const integerPart *parts, unsigned int bits, bool isNearest)
425 unsigned int count, partBits;
426 integerPart part, boundary;
431 count = bits / integerPartWidth;
432 partBits = bits % integerPartWidth + 1;
434 part = parts[count] & (~(integerPart) 0 >> (integerPartWidth - partBits));
437 boundary = (integerPart) 1 << (partBits - 1);
442 if (part - boundary <= boundary - part)
443 return part - boundary;
445 return boundary - part;
448 if (part == boundary) {
451 return ~(integerPart) 0; /* A lot. */
454 } else if (part == boundary - 1) {
457 return ~(integerPart) 0; /* A lot. */
462 return ~(integerPart) 0; /* A lot. */
465 /* Place pow(5, power) in DST, and return the number of parts used.
466 DST must be at least one part larger than size of the answer. */
468 powerOf5(integerPart *dst, unsigned int power)
470 static const integerPart firstEightPowers[] = { 1, 5, 25, 125, 625, 3125,
472 integerPart pow5s[maxPowerOfFiveParts * 2 + 5];
473 pow5s[0] = 78125 * 5;
475 unsigned int partsCount[16] = { 1 };
476 integerPart scratch[maxPowerOfFiveParts], *p1, *p2, *pow5;
478 assert(power <= maxExponent);
483 *p1 = firstEightPowers[power & 7];
489 for (unsigned int n = 0; power; power >>= 1, n++) {
494 /* Calculate pow(5,pow(2,n+3)) if we haven't yet. */
496 pc = partsCount[n - 1];
497 APInt::tcFullMultiply(pow5, pow5 - pc, pow5 - pc, pc, pc);
499 if (pow5[pc - 1] == 0)
507 APInt::tcFullMultiply(p2, p1, pow5, result, pc);
509 if (p2[result - 1] == 0)
512 /* Now result is in p1 with partsCount parts and p2 is scratch
514 tmp = p1, p1 = p2, p2 = tmp;
521 APInt::tcAssign(dst, p1, result);
526 /* Zero at the end to avoid modular arithmetic when adding one; used
527 when rounding up during hexadecimal output. */
528 static const char hexDigitsLower[] = "0123456789abcdef0";
529 static const char hexDigitsUpper[] = "0123456789ABCDEF0";
530 static const char infinityL[] = "infinity";
531 static const char infinityU[] = "INFINITY";
532 static const char NaNL[] = "nan";
533 static const char NaNU[] = "NAN";
535 /* Write out an integerPart in hexadecimal, starting with the most
536 significant nibble. Write out exactly COUNT hexdigits, return
539 partAsHex (char *dst, integerPart part, unsigned int count,
540 const char *hexDigitChars)
542 unsigned int result = count;
544 assert(count != 0 && count <= integerPartWidth / 4);
546 part >>= (integerPartWidth - 4 * count);
548 dst[count] = hexDigitChars[part & 0xf];
555 /* Write out an unsigned decimal integer. */
557 writeUnsignedDecimal (char *dst, unsigned int n)
573 /* Write out a signed decimal integer. */
575 writeSignedDecimal (char *dst, int value)
579 dst = writeUnsignedDecimal(dst, -(unsigned) value);
581 dst = writeUnsignedDecimal(dst, value);
588 APFloat::initialize(const fltSemantics *ourSemantics)
592 semantics = ourSemantics;
595 significand.parts = new integerPart[count];
599 APFloat::freeSignificand()
602 delete [] significand.parts;
606 APFloat::assign(const APFloat &rhs)
608 assert(semantics == rhs.semantics);
611 category = rhs.category;
612 exponent = rhs.exponent;
614 exponent2 = rhs.exponent2;
615 if (category == fcNormal || category == fcNaN)
616 copySignificand(rhs);
620 APFloat::copySignificand(const APFloat &rhs)
622 assert(category == fcNormal || category == fcNaN);
623 assert(rhs.partCount() >= partCount());
625 APInt::tcAssign(significandParts(), rhs.significandParts(),
629 /* Make this number a NaN, with an arbitrary but deterministic value
630 for the significand. If double or longer, this is a signalling NaN,
631 which may not be ideal. If float, this is QNaN(0). */
632 void APFloat::makeNaN(bool SNaN, bool Negative, const APInt *fill)
637 integerPart *significand = significandParts();
638 unsigned numParts = partCount();
640 // Set the significand bits to the fill.
641 if (!fill || fill->getNumWords() < numParts)
642 APInt::tcSet(significand, 0, numParts);
644 APInt::tcAssign(significand, fill->getRawData(),
645 std::min(fill->getNumWords(), numParts));
647 // Zero out the excess bits of the significand.
648 unsigned bitsToPreserve = semantics->precision - 1;
649 unsigned part = bitsToPreserve / 64;
650 bitsToPreserve %= 64;
651 significand[part] &= ((1ULL << bitsToPreserve) - 1);
652 for (part++; part != numParts; ++part)
653 significand[part] = 0;
656 unsigned QNaNBit = semantics->precision - 2;
659 // We always have to clear the QNaN bit to make it an SNaN.
660 APInt::tcClearBit(significand, QNaNBit);
662 // If there are no bits set in the payload, we have to set
663 // *something* to make it a NaN instead of an infinity;
664 // conventionally, this is the next bit down from the QNaN bit.
665 if (APInt::tcIsZero(significand, numParts))
666 APInt::tcSetBit(significand, QNaNBit - 1);
668 // We always have to set the QNaN bit to make it a QNaN.
669 APInt::tcSetBit(significand, QNaNBit);
672 // For x87 extended precision, we want to make a NaN, not a
673 // pseudo-NaN. Maybe we should expose the ability to make
675 if (semantics == &APFloat::x87DoubleExtended)
676 APInt::tcSetBit(significand, QNaNBit + 1);
679 APFloat APFloat::makeNaN(const fltSemantics &Sem, bool SNaN, bool Negative,
681 APFloat value(Sem, uninitialized);
682 value.makeNaN(SNaN, Negative, fill);
687 APFloat::operator=(const APFloat &rhs)
690 if (semantics != rhs.semantics) {
692 initialize(rhs.semantics);
701 APFloat::bitwiseIsEqual(const APFloat &rhs) const {
704 if (semantics != rhs.semantics ||
705 category != rhs.category ||
708 if (semantics==(const llvm::fltSemantics*)&PPCDoubleDouble &&
711 if (category==fcZero || category==fcInfinity)
713 else if (category==fcNormal && exponent!=rhs.exponent)
715 else if (semantics==(const llvm::fltSemantics*)&PPCDoubleDouble &&
716 exponent2!=rhs.exponent2)
720 const integerPart* p=significandParts();
721 const integerPart* q=rhs.significandParts();
722 for (; i>0; i--, p++, q++) {
730 APFloat::APFloat(const fltSemantics &ourSemantics, integerPart value)
731 : exponent2(0), sign2(0) {
732 assertArithmeticOK(ourSemantics);
733 initialize(&ourSemantics);
736 exponent = ourSemantics.precision - 1;
737 significandParts()[0] = value;
738 normalize(rmNearestTiesToEven, lfExactlyZero);
741 APFloat::APFloat(const fltSemantics &ourSemantics) : exponent2(0), sign2(0) {
742 assertArithmeticOK(ourSemantics);
743 initialize(&ourSemantics);
748 APFloat::APFloat(const fltSemantics &ourSemantics, uninitializedTag tag)
749 : exponent2(0), sign2(0) {
750 assertArithmeticOK(ourSemantics);
751 // Allocates storage if necessary but does not initialize it.
752 initialize(&ourSemantics);
755 APFloat::APFloat(const fltSemantics &ourSemantics,
756 fltCategory ourCategory, bool negative)
757 : exponent2(0), sign2(0) {
758 assertArithmeticOK(ourSemantics);
759 initialize(&ourSemantics);
760 category = ourCategory;
762 if (category == fcNormal)
764 else if (ourCategory == fcNaN)
768 APFloat::APFloat(const fltSemantics &ourSemantics, StringRef text)
769 : exponent2(0), sign2(0) {
770 assertArithmeticOK(ourSemantics);
771 initialize(&ourSemantics);
772 convertFromString(text, rmNearestTiesToEven);
775 APFloat::APFloat(const APFloat &rhs) : exponent2(0), sign2(0) {
776 initialize(rhs.semantics);
785 // Profile - This method 'profiles' an APFloat for use with FoldingSet.
786 void APFloat::Profile(FoldingSetNodeID& ID) const {
787 ID.Add(bitcastToAPInt());
791 APFloat::partCount() const
793 return partCountForBits(semantics->precision + 1);
797 APFloat::semanticsPrecision(const fltSemantics &semantics)
799 return semantics.precision;
803 APFloat::significandParts() const
805 return const_cast<APFloat *>(this)->significandParts();
809 APFloat::significandParts()
811 assert(category == fcNormal || category == fcNaN);
814 return significand.parts;
816 return &significand.part;
820 APFloat::zeroSignificand()
823 APInt::tcSet(significandParts(), 0, partCount());
826 /* Increment an fcNormal floating point number's significand. */
828 APFloat::incrementSignificand()
832 carry = APInt::tcIncrement(significandParts(), partCount());
834 /* Our callers should never cause us to overflow. */
839 /* Add the significand of the RHS. Returns the carry flag. */
841 APFloat::addSignificand(const APFloat &rhs)
845 parts = significandParts();
847 assert(semantics == rhs.semantics);
848 assert(exponent == rhs.exponent);
850 return APInt::tcAdd(parts, rhs.significandParts(), 0, partCount());
853 /* Subtract the significand of the RHS with a borrow flag. Returns
856 APFloat::subtractSignificand(const APFloat &rhs, integerPart borrow)
860 parts = significandParts();
862 assert(semantics == rhs.semantics);
863 assert(exponent == rhs.exponent);
865 return APInt::tcSubtract(parts, rhs.significandParts(), borrow,
869 /* Multiply the significand of the RHS. If ADDEND is non-NULL, add it
870 on to the full-precision result of the multiplication. Returns the
873 APFloat::multiplySignificand(const APFloat &rhs, const APFloat *addend)
875 unsigned int omsb; // One, not zero, based MSB.
876 unsigned int partsCount, newPartsCount, precision;
877 integerPart *lhsSignificand;
878 integerPart scratch[4];
879 integerPart *fullSignificand;
880 lostFraction lost_fraction;
883 assert(semantics == rhs.semantics);
885 precision = semantics->precision;
886 newPartsCount = partCountForBits(precision * 2);
888 if (newPartsCount > 4)
889 fullSignificand = new integerPart[newPartsCount];
891 fullSignificand = scratch;
893 lhsSignificand = significandParts();
894 partsCount = partCount();
896 APInt::tcFullMultiply(fullSignificand, lhsSignificand,
897 rhs.significandParts(), partsCount, partsCount);
899 lost_fraction = lfExactlyZero;
900 omsb = APInt::tcMSB(fullSignificand, newPartsCount) + 1;
901 exponent += rhs.exponent;
904 Significand savedSignificand = significand;
905 const fltSemantics *savedSemantics = semantics;
906 fltSemantics extendedSemantics;
908 unsigned int extendedPrecision;
910 /* Normalize our MSB. */
911 extendedPrecision = precision + precision - 1;
912 if (omsb != extendedPrecision) {
913 APInt::tcShiftLeft(fullSignificand, newPartsCount,
914 extendedPrecision - omsb);
915 exponent -= extendedPrecision - omsb;
918 /* Create new semantics. */
919 extendedSemantics = *semantics;
920 extendedSemantics.precision = extendedPrecision;
922 if (newPartsCount == 1)
923 significand.part = fullSignificand[0];
925 significand.parts = fullSignificand;
926 semantics = &extendedSemantics;
928 APFloat extendedAddend(*addend);
929 status = extendedAddend.convert(extendedSemantics, rmTowardZero, &ignored);
930 assert(status == opOK);
932 lost_fraction = addOrSubtractSignificand(extendedAddend, false);
934 /* Restore our state. */
935 if (newPartsCount == 1)
936 fullSignificand[0] = significand.part;
937 significand = savedSignificand;
938 semantics = savedSemantics;
940 omsb = APInt::tcMSB(fullSignificand, newPartsCount) + 1;
943 exponent -= (precision - 1);
945 if (omsb > precision) {
946 unsigned int bits, significantParts;
949 bits = omsb - precision;
950 significantParts = partCountForBits(omsb);
951 lf = shiftRight(fullSignificand, significantParts, bits);
952 lost_fraction = combineLostFractions(lf, lost_fraction);
956 APInt::tcAssign(lhsSignificand, fullSignificand, partsCount);
958 if (newPartsCount > 4)
959 delete [] fullSignificand;
961 return lost_fraction;
964 /* Multiply the significands of LHS and RHS to DST. */
966 APFloat::divideSignificand(const APFloat &rhs)
968 unsigned int bit, i, partsCount;
969 const integerPart *rhsSignificand;
970 integerPart *lhsSignificand, *dividend, *divisor;
971 integerPart scratch[4];
972 lostFraction lost_fraction;
974 assert(semantics == rhs.semantics);
976 lhsSignificand = significandParts();
977 rhsSignificand = rhs.significandParts();
978 partsCount = partCount();
981 dividend = new integerPart[partsCount * 2];
985 divisor = dividend + partsCount;
987 /* Copy the dividend and divisor as they will be modified in-place. */
988 for (i = 0; i < partsCount; i++) {
989 dividend[i] = lhsSignificand[i];
990 divisor[i] = rhsSignificand[i];
991 lhsSignificand[i] = 0;
994 exponent -= rhs.exponent;
996 unsigned int precision = semantics->precision;
998 /* Normalize the divisor. */
999 bit = precision - APInt::tcMSB(divisor, partsCount) - 1;
1002 APInt::tcShiftLeft(divisor, partsCount, bit);
1005 /* Normalize the dividend. */
1006 bit = precision - APInt::tcMSB(dividend, partsCount) - 1;
1009 APInt::tcShiftLeft(dividend, partsCount, bit);
1012 /* Ensure the dividend >= divisor initially for the loop below.
1013 Incidentally, this means that the division loop below is
1014 guaranteed to set the integer bit to one. */
1015 if (APInt::tcCompare(dividend, divisor, partsCount) < 0) {
1017 APInt::tcShiftLeft(dividend, partsCount, 1);
1018 assert(APInt::tcCompare(dividend, divisor, partsCount) >= 0);
1021 /* Long division. */
1022 for (bit = precision; bit; bit -= 1) {
1023 if (APInt::tcCompare(dividend, divisor, partsCount) >= 0) {
1024 APInt::tcSubtract(dividend, divisor, 0, partsCount);
1025 APInt::tcSetBit(lhsSignificand, bit - 1);
1028 APInt::tcShiftLeft(dividend, partsCount, 1);
1031 /* Figure out the lost fraction. */
1032 int cmp = APInt::tcCompare(dividend, divisor, partsCount);
1035 lost_fraction = lfMoreThanHalf;
1037 lost_fraction = lfExactlyHalf;
1038 else if (APInt::tcIsZero(dividend, partsCount))
1039 lost_fraction = lfExactlyZero;
1041 lost_fraction = lfLessThanHalf;
1046 return lost_fraction;
1050 APFloat::significandMSB() const
1052 return APInt::tcMSB(significandParts(), partCount());
1056 APFloat::significandLSB() const
1058 return APInt::tcLSB(significandParts(), partCount());
1061 /* Note that a zero result is NOT normalized to fcZero. */
1063 APFloat::shiftSignificandRight(unsigned int bits)
1065 /* Our exponent should not overflow. */
1066 assert((exponent_t) (exponent + bits) >= exponent);
1070 return shiftRight(significandParts(), partCount(), bits);
1073 /* Shift the significand left BITS bits, subtract BITS from its exponent. */
1075 APFloat::shiftSignificandLeft(unsigned int bits)
1077 assert(bits < semantics->precision);
1080 unsigned int partsCount = partCount();
1082 APInt::tcShiftLeft(significandParts(), partsCount, bits);
1085 assert(!APInt::tcIsZero(significandParts(), partsCount));
1090 APFloat::compareAbsoluteValue(const APFloat &rhs) const
1094 assert(semantics == rhs.semantics);
1095 assert(category == fcNormal);
1096 assert(rhs.category == fcNormal);
1098 compare = exponent - rhs.exponent;
1100 /* If exponents are equal, do an unsigned bignum comparison of the
1103 compare = APInt::tcCompare(significandParts(), rhs.significandParts(),
1107 return cmpGreaterThan;
1108 else if (compare < 0)
1114 /* Handle overflow. Sign is preserved. We either become infinity or
1115 the largest finite number. */
1117 APFloat::handleOverflow(roundingMode rounding_mode)
1120 if (rounding_mode == rmNearestTiesToEven ||
1121 rounding_mode == rmNearestTiesToAway ||
1122 (rounding_mode == rmTowardPositive && !sign) ||
1123 (rounding_mode == rmTowardNegative && sign)) {
1124 category = fcInfinity;
1125 return (opStatus) (opOverflow | opInexact);
1128 /* Otherwise we become the largest finite number. */
1129 category = fcNormal;
1130 exponent = semantics->maxExponent;
1131 APInt::tcSetLeastSignificantBits(significandParts(), partCount(),
1132 semantics->precision);
1137 /* Returns TRUE if, when truncating the current number, with BIT the
1138 new LSB, with the given lost fraction and rounding mode, the result
1139 would need to be rounded away from zero (i.e., by increasing the
1140 signficand). This routine must work for fcZero of both signs, and
1141 fcNormal numbers. */
1143 APFloat::roundAwayFromZero(roundingMode rounding_mode,
1144 lostFraction lost_fraction,
1145 unsigned int bit) const
1147 /* NaNs and infinities should not have lost fractions. */
1148 assert(category == fcNormal || category == fcZero);
1150 /* Current callers never pass this so we don't handle it. */
1151 assert(lost_fraction != lfExactlyZero);
1153 switch (rounding_mode) {
1154 case rmNearestTiesToAway:
1155 return lost_fraction == lfExactlyHalf || lost_fraction == lfMoreThanHalf;
1157 case rmNearestTiesToEven:
1158 if (lost_fraction == lfMoreThanHalf)
1161 /* Our zeroes don't have a significand to test. */
1162 if (lost_fraction == lfExactlyHalf && category != fcZero)
1163 return APInt::tcExtractBit(significandParts(), bit);
1170 case rmTowardPositive:
1171 return sign == false;
1173 case rmTowardNegative:
1174 return sign == true;
1176 llvm_unreachable("Invalid rounding mode found");
1180 APFloat::normalize(roundingMode rounding_mode,
1181 lostFraction lost_fraction)
1183 unsigned int omsb; /* One, not zero, based MSB. */
1186 if (category != fcNormal)
1189 /* Before rounding normalize the exponent of fcNormal numbers. */
1190 omsb = significandMSB() + 1;
1193 /* OMSB is numbered from 1. We want to place it in the integer
1194 bit numbered PRECISION if possible, with a compensating change in
1196 exponentChange = omsb - semantics->precision;
1198 /* If the resulting exponent is too high, overflow according to
1199 the rounding mode. */
1200 if (exponent + exponentChange > semantics->maxExponent)
1201 return handleOverflow(rounding_mode);
1203 /* Subnormal numbers have exponent minExponent, and their MSB
1204 is forced based on that. */
1205 if (exponent + exponentChange < semantics->minExponent)
1206 exponentChange = semantics->minExponent - exponent;
1208 /* Shifting left is easy as we don't lose precision. */
1209 if (exponentChange < 0) {
1210 assert(lost_fraction == lfExactlyZero);
1212 shiftSignificandLeft(-exponentChange);
1217 if (exponentChange > 0) {
1220 /* Shift right and capture any new lost fraction. */
1221 lf = shiftSignificandRight(exponentChange);
1223 lost_fraction = combineLostFractions(lf, lost_fraction);
1225 /* Keep OMSB up-to-date. */
1226 if (omsb > (unsigned) exponentChange)
1227 omsb -= exponentChange;
1233 /* Now round the number according to rounding_mode given the lost
1236 /* As specified in IEEE 754, since we do not trap we do not report
1237 underflow for exact results. */
1238 if (lost_fraction == lfExactlyZero) {
1239 /* Canonicalize zeroes. */
1246 /* Increment the significand if we're rounding away from zero. */
1247 if (roundAwayFromZero(rounding_mode, lost_fraction, 0)) {
1249 exponent = semantics->minExponent;
1251 incrementSignificand();
1252 omsb = significandMSB() + 1;
1254 /* Did the significand increment overflow? */
1255 if (omsb == (unsigned) semantics->precision + 1) {
1256 /* Renormalize by incrementing the exponent and shifting our
1257 significand right one. However if we already have the
1258 maximum exponent we overflow to infinity. */
1259 if (exponent == semantics->maxExponent) {
1260 category = fcInfinity;
1262 return (opStatus) (opOverflow | opInexact);
1265 shiftSignificandRight(1);
1271 /* The normal case - we were and are not denormal, and any
1272 significand increment above didn't overflow. */
1273 if (omsb == semantics->precision)
1276 /* We have a non-zero denormal. */
1277 assert(omsb < semantics->precision);
1279 /* Canonicalize zeroes. */
1283 /* The fcZero case is a denormal that underflowed to zero. */
1284 return (opStatus) (opUnderflow | opInexact);
1288 APFloat::addOrSubtractSpecials(const APFloat &rhs, bool subtract)
1290 switch (convolve(category, rhs.category)) {
1292 llvm_unreachable(0);
1294 case convolve(fcNaN, fcZero):
1295 case convolve(fcNaN, fcNormal):
1296 case convolve(fcNaN, fcInfinity):
1297 case convolve(fcNaN, fcNaN):
1298 case convolve(fcNormal, fcZero):
1299 case convolve(fcInfinity, fcNormal):
1300 case convolve(fcInfinity, fcZero):
1303 case convolve(fcZero, fcNaN):
1304 case convolve(fcNormal, fcNaN):
1305 case convolve(fcInfinity, fcNaN):
1307 copySignificand(rhs);
1310 case convolve(fcNormal, fcInfinity):
1311 case convolve(fcZero, fcInfinity):
1312 category = fcInfinity;
1313 sign = rhs.sign ^ subtract;
1316 case convolve(fcZero, fcNormal):
1318 sign = rhs.sign ^ subtract;
1321 case convolve(fcZero, fcZero):
1322 /* Sign depends on rounding mode; handled by caller. */
1325 case convolve(fcInfinity, fcInfinity):
1326 /* Differently signed infinities can only be validly
1328 if (((sign ^ rhs.sign)!=0) != subtract) {
1335 case convolve(fcNormal, fcNormal):
1340 /* Add or subtract two normal numbers. */
1342 APFloat::addOrSubtractSignificand(const APFloat &rhs, bool subtract)
1345 lostFraction lost_fraction;
1348 /* Determine if the operation on the absolute values is effectively
1349 an addition or subtraction. */
1350 subtract ^= (sign ^ rhs.sign) ? true : false;
1352 /* Are we bigger exponent-wise than the RHS? */
1353 bits = exponent - rhs.exponent;
1355 /* Subtraction is more subtle than one might naively expect. */
1357 APFloat temp_rhs(rhs);
1361 reverse = compareAbsoluteValue(temp_rhs) == cmpLessThan;
1362 lost_fraction = lfExactlyZero;
1363 } else if (bits > 0) {
1364 lost_fraction = temp_rhs.shiftSignificandRight(bits - 1);
1365 shiftSignificandLeft(1);
1368 lost_fraction = shiftSignificandRight(-bits - 1);
1369 temp_rhs.shiftSignificandLeft(1);
1374 carry = temp_rhs.subtractSignificand
1375 (*this, lost_fraction != lfExactlyZero);
1376 copySignificand(temp_rhs);
1379 carry = subtractSignificand
1380 (temp_rhs, lost_fraction != lfExactlyZero);
1383 /* Invert the lost fraction - it was on the RHS and
1385 if (lost_fraction == lfLessThanHalf)
1386 lost_fraction = lfMoreThanHalf;
1387 else if (lost_fraction == lfMoreThanHalf)
1388 lost_fraction = lfLessThanHalf;
1390 /* The code above is intended to ensure that no borrow is
1396 APFloat temp_rhs(rhs);
1398 lost_fraction = temp_rhs.shiftSignificandRight(bits);
1399 carry = addSignificand(temp_rhs);
1401 lost_fraction = shiftSignificandRight(-bits);
1402 carry = addSignificand(rhs);
1405 /* We have a guard bit; generating a carry cannot happen. */
1410 return lost_fraction;
1414 APFloat::multiplySpecials(const APFloat &rhs)
1416 switch (convolve(category, rhs.category)) {
1418 llvm_unreachable(0);
1420 case convolve(fcNaN, fcZero):
1421 case convolve(fcNaN, fcNormal):
1422 case convolve(fcNaN, fcInfinity):
1423 case convolve(fcNaN, fcNaN):
1426 case convolve(fcZero, fcNaN):
1427 case convolve(fcNormal, fcNaN):
1428 case convolve(fcInfinity, fcNaN):
1430 copySignificand(rhs);
1433 case convolve(fcNormal, fcInfinity):
1434 case convolve(fcInfinity, fcNormal):
1435 case convolve(fcInfinity, fcInfinity):
1436 category = fcInfinity;
1439 case convolve(fcZero, fcNormal):
1440 case convolve(fcNormal, fcZero):
1441 case convolve(fcZero, fcZero):
1445 case convolve(fcZero, fcInfinity):
1446 case convolve(fcInfinity, fcZero):
1450 case convolve(fcNormal, fcNormal):
1456 APFloat::divideSpecials(const APFloat &rhs)
1458 switch (convolve(category, rhs.category)) {
1460 llvm_unreachable(0);
1462 case convolve(fcNaN, fcZero):
1463 case convolve(fcNaN, fcNormal):
1464 case convolve(fcNaN, fcInfinity):
1465 case convolve(fcNaN, fcNaN):
1466 case convolve(fcInfinity, fcZero):
1467 case convolve(fcInfinity, fcNormal):
1468 case convolve(fcZero, fcInfinity):
1469 case convolve(fcZero, fcNormal):
1472 case convolve(fcZero, fcNaN):
1473 case convolve(fcNormal, fcNaN):
1474 case convolve(fcInfinity, fcNaN):
1476 copySignificand(rhs);
1479 case convolve(fcNormal, fcInfinity):
1483 case convolve(fcNormal, fcZero):
1484 category = fcInfinity;
1487 case convolve(fcInfinity, fcInfinity):
1488 case convolve(fcZero, fcZero):
1492 case convolve(fcNormal, fcNormal):
1498 APFloat::modSpecials(const APFloat &rhs)
1500 switch (convolve(category, rhs.category)) {
1502 llvm_unreachable(0);
1504 case convolve(fcNaN, fcZero):
1505 case convolve(fcNaN, fcNormal):
1506 case convolve(fcNaN, fcInfinity):
1507 case convolve(fcNaN, fcNaN):
1508 case convolve(fcZero, fcInfinity):
1509 case convolve(fcZero, fcNormal):
1510 case convolve(fcNormal, fcInfinity):
1513 case convolve(fcZero, fcNaN):
1514 case convolve(fcNormal, fcNaN):
1515 case convolve(fcInfinity, fcNaN):
1517 copySignificand(rhs);
1520 case convolve(fcNormal, fcZero):
1521 case convolve(fcInfinity, fcZero):
1522 case convolve(fcInfinity, fcNormal):
1523 case convolve(fcInfinity, fcInfinity):
1524 case convolve(fcZero, fcZero):
1528 case convolve(fcNormal, fcNormal):
1535 APFloat::changeSign()
1537 /* Look mummy, this one's easy. */
1542 APFloat::clearSign()
1544 /* So is this one. */
1549 APFloat::copySign(const APFloat &rhs)
1555 /* Normalized addition or subtraction. */
1557 APFloat::addOrSubtract(const APFloat &rhs, roundingMode rounding_mode,
1562 assertArithmeticOK(*semantics);
1564 fs = addOrSubtractSpecials(rhs, subtract);
1566 /* This return code means it was not a simple case. */
1567 if (fs == opDivByZero) {
1568 lostFraction lost_fraction;
1570 lost_fraction = addOrSubtractSignificand(rhs, subtract);
1571 fs = normalize(rounding_mode, lost_fraction);
1573 /* Can only be zero if we lost no fraction. */
1574 assert(category != fcZero || lost_fraction == lfExactlyZero);
1577 /* If two numbers add (exactly) to zero, IEEE 754 decrees it is a
1578 positive zero unless rounding to minus infinity, except that
1579 adding two like-signed zeroes gives that zero. */
1580 if (category == fcZero) {
1581 if (rhs.category != fcZero || (sign == rhs.sign) == subtract)
1582 sign = (rounding_mode == rmTowardNegative);
1588 /* Normalized addition. */
1590 APFloat::add(const APFloat &rhs, roundingMode rounding_mode)
1592 return addOrSubtract(rhs, rounding_mode, false);
1595 /* Normalized subtraction. */
1597 APFloat::subtract(const APFloat &rhs, roundingMode rounding_mode)
1599 return addOrSubtract(rhs, rounding_mode, true);
1602 /* Normalized multiply. */
1604 APFloat::multiply(const APFloat &rhs, roundingMode rounding_mode)
1608 assertArithmeticOK(*semantics);
1610 fs = multiplySpecials(rhs);
1612 if (category == fcNormal) {
1613 lostFraction lost_fraction = multiplySignificand(rhs, 0);
1614 fs = normalize(rounding_mode, lost_fraction);
1615 if (lost_fraction != lfExactlyZero)
1616 fs = (opStatus) (fs | opInexact);
1622 /* Normalized divide. */
1624 APFloat::divide(const APFloat &rhs, roundingMode rounding_mode)
1628 assertArithmeticOK(*semantics);
1630 fs = divideSpecials(rhs);
1632 if (category == fcNormal) {
1633 lostFraction lost_fraction = divideSignificand(rhs);
1634 fs = normalize(rounding_mode, lost_fraction);
1635 if (lost_fraction != lfExactlyZero)
1636 fs = (opStatus) (fs | opInexact);
1642 /* Normalized remainder. This is not currently correct in all cases. */
1644 APFloat::remainder(const APFloat &rhs)
1648 unsigned int origSign = sign;
1650 assertArithmeticOK(*semantics);
1651 fs = V.divide(rhs, rmNearestTiesToEven);
1652 if (fs == opDivByZero)
1655 int parts = partCount();
1656 integerPart *x = new integerPart[parts];
1658 fs = V.convertToInteger(x, parts * integerPartWidth, true,
1659 rmNearestTiesToEven, &ignored);
1660 if (fs==opInvalidOp)
1663 fs = V.convertFromZeroExtendedInteger(x, parts * integerPartWidth, true,
1664 rmNearestTiesToEven);
1665 assert(fs==opOK); // should always work
1667 fs = V.multiply(rhs, rmNearestTiesToEven);
1668 assert(fs==opOK || fs==opInexact); // should not overflow or underflow
1670 fs = subtract(V, rmNearestTiesToEven);
1671 assert(fs==opOK || fs==opInexact); // likewise
1674 sign = origSign; // IEEE754 requires this
1679 /* Normalized llvm frem (C fmod).
1680 This is not currently correct in all cases. */
1682 APFloat::mod(const APFloat &rhs, roundingMode rounding_mode)
1685 assertArithmeticOK(*semantics);
1686 fs = modSpecials(rhs);
1688 if (category == fcNormal && rhs.category == fcNormal) {
1690 unsigned int origSign = sign;
1692 fs = V.divide(rhs, rmNearestTiesToEven);
1693 if (fs == opDivByZero)
1696 int parts = partCount();
1697 integerPart *x = new integerPart[parts];
1699 fs = V.convertToInteger(x, parts * integerPartWidth, true,
1700 rmTowardZero, &ignored);
1701 if (fs==opInvalidOp)
1704 fs = V.convertFromZeroExtendedInteger(x, parts * integerPartWidth, true,
1705 rmNearestTiesToEven);
1706 assert(fs==opOK); // should always work
1708 fs = V.multiply(rhs, rounding_mode);
1709 assert(fs==opOK || fs==opInexact); // should not overflow or underflow
1711 fs = subtract(V, rounding_mode);
1712 assert(fs==opOK || fs==opInexact); // likewise
1715 sign = origSign; // IEEE754 requires this
1721 /* Normalized fused-multiply-add. */
1723 APFloat::fusedMultiplyAdd(const APFloat &multiplicand,
1724 const APFloat &addend,
1725 roundingMode rounding_mode)
1729 assertArithmeticOK(*semantics);
1731 /* Post-multiplication sign, before addition. */
1732 sign ^= multiplicand.sign;
1734 /* If and only if all arguments are normal do we need to do an
1735 extended-precision calculation. */
1736 if (category == fcNormal &&
1737 multiplicand.category == fcNormal &&
1738 addend.category == fcNormal) {
1739 lostFraction lost_fraction;
1741 lost_fraction = multiplySignificand(multiplicand, &addend);
1742 fs = normalize(rounding_mode, lost_fraction);
1743 if (lost_fraction != lfExactlyZero)
1744 fs = (opStatus) (fs | opInexact);
1746 /* If two numbers add (exactly) to zero, IEEE 754 decrees it is a
1747 positive zero unless rounding to minus infinity, except that
1748 adding two like-signed zeroes gives that zero. */
1749 if (category == fcZero && sign != addend.sign)
1750 sign = (rounding_mode == rmTowardNegative);
1752 fs = multiplySpecials(multiplicand);
1754 /* FS can only be opOK or opInvalidOp. There is no more work
1755 to do in the latter case. The IEEE-754R standard says it is
1756 implementation-defined in this case whether, if ADDEND is a
1757 quiet NaN, we raise invalid op; this implementation does so.
1759 If we need to do the addition we can do so with normal
1762 fs = addOrSubtract(addend, rounding_mode, false);
1768 /* Rounding-mode corrrect round to integral value. */
1769 APFloat::opStatus APFloat::roundToIntegral(roundingMode rounding_mode) {
1771 assertArithmeticOK(*semantics);
1773 // If the exponent is large enough, we know that this value is already
1774 // integral, and the arithmetic below would potentially cause it to saturate
1775 // to +/-Inf. Bail out early instead.
1776 if (exponent+1 >= (int)semanticsPrecision(*semantics))
1779 // The algorithm here is quite simple: we add 2^(p-1), where p is the
1780 // precision of our format, and then subtract it back off again. The choice
1781 // of rounding modes for the addition/subtraction determines the rounding mode
1782 // for our integral rounding as well.
1783 // NOTE: When the input value is negative, we do subtraction followed by
1784 // addition instead.
1785 APInt IntegerConstant(NextPowerOf2(semanticsPrecision(*semantics)), 1);
1786 IntegerConstant <<= semanticsPrecision(*semantics)-1;
1787 APFloat MagicConstant(*semantics);
1788 fs = MagicConstant.convertFromAPInt(IntegerConstant, false,
1789 rmNearestTiesToEven);
1790 MagicConstant.copySign(*this);
1795 // Preserve the input sign so that we can handle 0.0/-0.0 cases correctly.
1796 bool inputSign = isNegative();
1798 fs = add(MagicConstant, rounding_mode);
1799 if (fs != opOK && fs != opInexact)
1802 fs = subtract(MagicConstant, rounding_mode);
1804 // Restore the input sign.
1805 if (inputSign != isNegative())
1812 /* Comparison requires normalized numbers. */
1814 APFloat::compare(const APFloat &rhs) const
1818 assertArithmeticOK(*semantics);
1819 assert(semantics == rhs.semantics);
1821 switch (convolve(category, rhs.category)) {
1823 llvm_unreachable(0);
1825 case convolve(fcNaN, fcZero):
1826 case convolve(fcNaN, fcNormal):
1827 case convolve(fcNaN, fcInfinity):
1828 case convolve(fcNaN, fcNaN):
1829 case convolve(fcZero, fcNaN):
1830 case convolve(fcNormal, fcNaN):
1831 case convolve(fcInfinity, fcNaN):
1832 return cmpUnordered;
1834 case convolve(fcInfinity, fcNormal):
1835 case convolve(fcInfinity, fcZero):
1836 case convolve(fcNormal, fcZero):
1840 return cmpGreaterThan;
1842 case convolve(fcNormal, fcInfinity):
1843 case convolve(fcZero, fcInfinity):
1844 case convolve(fcZero, fcNormal):
1846 return cmpGreaterThan;
1850 case convolve(fcInfinity, fcInfinity):
1851 if (sign == rhs.sign)
1856 return cmpGreaterThan;
1858 case convolve(fcZero, fcZero):
1861 case convolve(fcNormal, fcNormal):
1865 /* Two normal numbers. Do they have the same sign? */
1866 if (sign != rhs.sign) {
1868 result = cmpLessThan;
1870 result = cmpGreaterThan;
1872 /* Compare absolute values; invert result if negative. */
1873 result = compareAbsoluteValue(rhs);
1876 if (result == cmpLessThan)
1877 result = cmpGreaterThan;
1878 else if (result == cmpGreaterThan)
1879 result = cmpLessThan;
1886 /// APFloat::convert - convert a value of one floating point type to another.
1887 /// The return value corresponds to the IEEE754 exceptions. *losesInfo
1888 /// records whether the transformation lost information, i.e. whether
1889 /// converting the result back to the original type will produce the
1890 /// original value (this is almost the same as return value==fsOK, but there
1891 /// are edge cases where this is not so).
1894 APFloat::convert(const fltSemantics &toSemantics,
1895 roundingMode rounding_mode, bool *losesInfo)
1897 lostFraction lostFraction;
1898 unsigned int newPartCount, oldPartCount;
1901 const fltSemantics &fromSemantics = *semantics;
1903 assertArithmeticOK(fromSemantics);
1904 assertArithmeticOK(toSemantics);
1905 lostFraction = lfExactlyZero;
1906 newPartCount = partCountForBits(toSemantics.precision + 1);
1907 oldPartCount = partCount();
1908 shift = toSemantics.precision - fromSemantics.precision;
1910 bool X86SpecialNan = false;
1911 if (&fromSemantics == &APFloat::x87DoubleExtended &&
1912 &toSemantics != &APFloat::x87DoubleExtended && category == fcNaN &&
1913 (!(*significandParts() & 0x8000000000000000ULL) ||
1914 !(*significandParts() & 0x4000000000000000ULL))) {
1915 // x86 has some unusual NaNs which cannot be represented in any other
1916 // format; note them here.
1917 X86SpecialNan = true;
1920 // If this is a truncation, perform the shift before we narrow the storage.
1921 if (shift < 0 && (category==fcNormal || category==fcNaN))
1922 lostFraction = shiftRight(significandParts(), oldPartCount, -shift);
1924 // Fix the storage so it can hold to new value.
1925 if (newPartCount > oldPartCount) {
1926 // The new type requires more storage; make it available.
1927 integerPart *newParts;
1928 newParts = new integerPart[newPartCount];
1929 APInt::tcSet(newParts, 0, newPartCount);
1930 if (category==fcNormal || category==fcNaN)
1931 APInt::tcAssign(newParts, significandParts(), oldPartCount);
1933 significand.parts = newParts;
1934 } else if (newPartCount == 1 && oldPartCount != 1) {
1935 // Switch to built-in storage for a single part.
1936 integerPart newPart = 0;
1937 if (category==fcNormal || category==fcNaN)
1938 newPart = significandParts()[0];
1940 significand.part = newPart;
1943 // Now that we have the right storage, switch the semantics.
1944 semantics = &toSemantics;
1946 // If this is an extension, perform the shift now that the storage is
1948 if (shift > 0 && (category==fcNormal || category==fcNaN))
1949 APInt::tcShiftLeft(significandParts(), newPartCount, shift);
1951 if (category == fcNormal) {
1952 fs = normalize(rounding_mode, lostFraction);
1953 *losesInfo = (fs != opOK);
1954 } else if (category == fcNaN) {
1955 *losesInfo = lostFraction != lfExactlyZero || X86SpecialNan;
1956 // gcc forces the Quiet bit on, which means (float)(double)(float_sNan)
1957 // does not give you back the same bits. This is dubious, and we
1958 // don't currently do it. You're really supposed to get
1959 // an invalid operation signal at runtime, but nobody does that.
1969 /* Convert a floating point number to an integer according to the
1970 rounding mode. If the rounded integer value is out of range this
1971 returns an invalid operation exception and the contents of the
1972 destination parts are unspecified. If the rounded value is in
1973 range but the floating point number is not the exact integer, the C
1974 standard doesn't require an inexact exception to be raised. IEEE
1975 854 does require it so we do that.
1977 Note that for conversions to integer type the C standard requires
1978 round-to-zero to always be used. */
1980 APFloat::convertToSignExtendedInteger(integerPart *parts, unsigned int width,
1982 roundingMode rounding_mode,
1983 bool *isExact) const
1985 lostFraction lost_fraction;
1986 const integerPart *src;
1987 unsigned int dstPartsCount, truncatedBits;
1989 assertArithmeticOK(*semantics);
1993 /* Handle the three special cases first. */
1994 if (category == fcInfinity || category == fcNaN)
1997 dstPartsCount = partCountForBits(width);
1999 if (category == fcZero) {
2000 APInt::tcSet(parts, 0, dstPartsCount);
2001 // Negative zero can't be represented as an int.
2006 src = significandParts();
2008 /* Step 1: place our absolute value, with any fraction truncated, in
2011 /* Our absolute value is less than one; truncate everything. */
2012 APInt::tcSet(parts, 0, dstPartsCount);
2013 /* For exponent -1 the integer bit represents .5, look at that.
2014 For smaller exponents leftmost truncated bit is 0. */
2015 truncatedBits = semantics->precision -1U - exponent;
2017 /* We want the most significant (exponent + 1) bits; the rest are
2019 unsigned int bits = exponent + 1U;
2021 /* Hopelessly large in magnitude? */
2025 if (bits < semantics->precision) {
2026 /* We truncate (semantics->precision - bits) bits. */
2027 truncatedBits = semantics->precision - bits;
2028 APInt::tcExtract(parts, dstPartsCount, src, bits, truncatedBits);
2030 /* We want at least as many bits as are available. */
2031 APInt::tcExtract(parts, dstPartsCount, src, semantics->precision, 0);
2032 APInt::tcShiftLeft(parts, dstPartsCount, bits - semantics->precision);
2037 /* Step 2: work out any lost fraction, and increment the absolute
2038 value if we would round away from zero. */
2039 if (truncatedBits) {
2040 lost_fraction = lostFractionThroughTruncation(src, partCount(),
2042 if (lost_fraction != lfExactlyZero &&
2043 roundAwayFromZero(rounding_mode, lost_fraction, truncatedBits)) {
2044 if (APInt::tcIncrement(parts, dstPartsCount))
2045 return opInvalidOp; /* Overflow. */
2048 lost_fraction = lfExactlyZero;
2051 /* Step 3: check if we fit in the destination. */
2052 unsigned int omsb = APInt::tcMSB(parts, dstPartsCount) + 1;
2056 /* Negative numbers cannot be represented as unsigned. */
2060 /* It takes omsb bits to represent the unsigned integer value.
2061 We lose a bit for the sign, but care is needed as the
2062 maximally negative integer is a special case. */
2063 if (omsb == width && APInt::tcLSB(parts, dstPartsCount) + 1 != omsb)
2066 /* This case can happen because of rounding. */
2071 APInt::tcNegate (parts, dstPartsCount);
2073 if (omsb >= width + !isSigned)
2077 if (lost_fraction == lfExactlyZero) {
2084 /* Same as convertToSignExtendedInteger, except we provide
2085 deterministic values in case of an invalid operation exception,
2086 namely zero for NaNs and the minimal or maximal value respectively
2087 for underflow or overflow.
2088 The *isExact output tells whether the result is exact, in the sense
2089 that converting it back to the original floating point type produces
2090 the original value. This is almost equivalent to result==opOK,
2091 except for negative zeroes.
2094 APFloat::convertToInteger(integerPart *parts, unsigned int width,
2096 roundingMode rounding_mode, bool *isExact) const
2100 fs = convertToSignExtendedInteger(parts, width, isSigned, rounding_mode,
2103 if (fs == opInvalidOp) {
2104 unsigned int bits, dstPartsCount;
2106 dstPartsCount = partCountForBits(width);
2108 if (category == fcNaN)
2113 bits = width - isSigned;
2115 APInt::tcSetLeastSignificantBits(parts, dstPartsCount, bits);
2116 if (sign && isSigned)
2117 APInt::tcShiftLeft(parts, dstPartsCount, width - 1);
2123 /* Same as convertToInteger(integerPart*, ...), except the result is returned in
2124 an APSInt, whose initial bit-width and signed-ness are used to determine the
2125 precision of the conversion.
2128 APFloat::convertToInteger(APSInt &result,
2129 roundingMode rounding_mode, bool *isExact) const
2131 unsigned bitWidth = result.getBitWidth();
2132 SmallVector<uint64_t, 4> parts(result.getNumWords());
2133 opStatus status = convertToInteger(
2134 parts.data(), bitWidth, result.isSigned(), rounding_mode, isExact);
2135 // Keeps the original signed-ness.
2136 result = APInt(bitWidth, parts);
2140 /* Convert an unsigned integer SRC to a floating point number,
2141 rounding according to ROUNDING_MODE. The sign of the floating
2142 point number is not modified. */
2144 APFloat::convertFromUnsignedParts(const integerPart *src,
2145 unsigned int srcCount,
2146 roundingMode rounding_mode)
2148 unsigned int omsb, precision, dstCount;
2150 lostFraction lost_fraction;
2152 assertArithmeticOK(*semantics);
2153 category = fcNormal;
2154 omsb = APInt::tcMSB(src, srcCount) + 1;
2155 dst = significandParts();
2156 dstCount = partCount();
2157 precision = semantics->precision;
2159 /* We want the most significant PRECISION bits of SRC. There may not
2160 be that many; extract what we can. */
2161 if (precision <= omsb) {
2162 exponent = omsb - 1;
2163 lost_fraction = lostFractionThroughTruncation(src, srcCount,
2165 APInt::tcExtract(dst, dstCount, src, precision, omsb - precision);
2167 exponent = precision - 1;
2168 lost_fraction = lfExactlyZero;
2169 APInt::tcExtract(dst, dstCount, src, omsb, 0);
2172 return normalize(rounding_mode, lost_fraction);
2176 APFloat::convertFromAPInt(const APInt &Val,
2178 roundingMode rounding_mode)
2180 unsigned int partCount = Val.getNumWords();
2184 if (isSigned && api.isNegative()) {
2189 return convertFromUnsignedParts(api.getRawData(), partCount, rounding_mode);
2192 /* Convert a two's complement integer SRC to a floating point number,
2193 rounding according to ROUNDING_MODE. ISSIGNED is true if the
2194 integer is signed, in which case it must be sign-extended. */
2196 APFloat::convertFromSignExtendedInteger(const integerPart *src,
2197 unsigned int srcCount,
2199 roundingMode rounding_mode)
2203 assertArithmeticOK(*semantics);
2205 APInt::tcExtractBit(src, srcCount * integerPartWidth - 1)) {
2208 /* If we're signed and negative negate a copy. */
2210 copy = new integerPart[srcCount];
2211 APInt::tcAssign(copy, src, srcCount);
2212 APInt::tcNegate(copy, srcCount);
2213 status = convertFromUnsignedParts(copy, srcCount, rounding_mode);
2217 status = convertFromUnsignedParts(src, srcCount, rounding_mode);
2223 /* FIXME: should this just take a const APInt reference? */
2225 APFloat::convertFromZeroExtendedInteger(const integerPart *parts,
2226 unsigned int width, bool isSigned,
2227 roundingMode rounding_mode)
2229 unsigned int partCount = partCountForBits(width);
2230 APInt api = APInt(width, makeArrayRef(parts, partCount));
2233 if (isSigned && APInt::tcExtractBit(parts, width - 1)) {
2238 return convertFromUnsignedParts(api.getRawData(), partCount, rounding_mode);
2242 APFloat::convertFromHexadecimalString(StringRef s, roundingMode rounding_mode)
2244 lostFraction lost_fraction = lfExactlyZero;
2245 integerPart *significand;
2246 unsigned int bitPos, partsCount;
2247 StringRef::iterator dot, firstSignificantDigit;
2251 category = fcNormal;
2253 significand = significandParts();
2254 partsCount = partCount();
2255 bitPos = partsCount * integerPartWidth;
2257 /* Skip leading zeroes and any (hexa)decimal point. */
2258 StringRef::iterator begin = s.begin();
2259 StringRef::iterator end = s.end();
2260 StringRef::iterator p = skipLeadingZeroesAndAnyDot(begin, end, &dot);
2261 firstSignificantDigit = p;
2264 integerPart hex_value;
2267 assert(dot == end && "String contains multiple dots");
2274 hex_value = hexDigitValue(*p);
2275 if (hex_value == -1U) {
2284 /* Store the number whilst 4-bit nibbles remain. */
2287 hex_value <<= bitPos % integerPartWidth;
2288 significand[bitPos / integerPartWidth] |= hex_value;
2290 lost_fraction = trailingHexadecimalFraction(p, end, hex_value);
2291 while (p != end && hexDigitValue(*p) != -1U)
2298 /* Hex floats require an exponent but not a hexadecimal point. */
2299 assert(p != end && "Hex strings require an exponent");
2300 assert((*p == 'p' || *p == 'P') && "Invalid character in significand");
2301 assert(p != begin && "Significand has no digits");
2302 assert((dot == end || p - begin != 1) && "Significand has no digits");
2304 /* Ignore the exponent if we are zero. */
2305 if (p != firstSignificantDigit) {
2308 /* Implicit hexadecimal point? */
2312 /* Calculate the exponent adjustment implicit in the number of
2313 significant digits. */
2314 expAdjustment = static_cast<int>(dot - firstSignificantDigit);
2315 if (expAdjustment < 0)
2317 expAdjustment = expAdjustment * 4 - 1;
2319 /* Adjust for writing the significand starting at the most
2320 significant nibble. */
2321 expAdjustment += semantics->precision;
2322 expAdjustment -= partsCount * integerPartWidth;
2324 /* Adjust for the given exponent. */
2325 exponent = totalExponent(p + 1, end, expAdjustment);
2328 return normalize(rounding_mode, lost_fraction);
2332 APFloat::roundSignificandWithExponent(const integerPart *decSigParts,
2333 unsigned sigPartCount, int exp,
2334 roundingMode rounding_mode)
2336 unsigned int parts, pow5PartCount;
2337 fltSemantics calcSemantics = { 32767, -32767, 0, true };
2338 integerPart pow5Parts[maxPowerOfFiveParts];
2341 isNearest = (rounding_mode == rmNearestTiesToEven ||
2342 rounding_mode == rmNearestTiesToAway);
2344 parts = partCountForBits(semantics->precision + 11);
2346 /* Calculate pow(5, abs(exp)). */
2347 pow5PartCount = powerOf5(pow5Parts, exp >= 0 ? exp: -exp);
2349 for (;; parts *= 2) {
2350 opStatus sigStatus, powStatus;
2351 unsigned int excessPrecision, truncatedBits;
2353 calcSemantics.precision = parts * integerPartWidth - 1;
2354 excessPrecision = calcSemantics.precision - semantics->precision;
2355 truncatedBits = excessPrecision;
2357 APFloat decSig(calcSemantics, fcZero, sign);
2358 APFloat pow5(calcSemantics, fcZero, false);
2360 sigStatus = decSig.convertFromUnsignedParts(decSigParts, sigPartCount,
2361 rmNearestTiesToEven);
2362 powStatus = pow5.convertFromUnsignedParts(pow5Parts, pow5PartCount,
2363 rmNearestTiesToEven);
2364 /* Add exp, as 10^n = 5^n * 2^n. */
2365 decSig.exponent += exp;
2367 lostFraction calcLostFraction;
2368 integerPart HUerr, HUdistance;
2369 unsigned int powHUerr;
2372 /* multiplySignificand leaves the precision-th bit set to 1. */
2373 calcLostFraction = decSig.multiplySignificand(pow5, NULL);
2374 powHUerr = powStatus != opOK;
2376 calcLostFraction = decSig.divideSignificand(pow5);
2377 /* Denormal numbers have less precision. */
2378 if (decSig.exponent < semantics->minExponent) {
2379 excessPrecision += (semantics->minExponent - decSig.exponent);
2380 truncatedBits = excessPrecision;
2381 if (excessPrecision > calcSemantics.precision)
2382 excessPrecision = calcSemantics.precision;
2384 /* Extra half-ulp lost in reciprocal of exponent. */
2385 powHUerr = (powStatus == opOK && calcLostFraction == lfExactlyZero) ? 0:2;
2388 /* Both multiplySignificand and divideSignificand return the
2389 result with the integer bit set. */
2390 assert(APInt::tcExtractBit
2391 (decSig.significandParts(), calcSemantics.precision - 1) == 1);
2393 HUerr = HUerrBound(calcLostFraction != lfExactlyZero, sigStatus != opOK,
2395 HUdistance = 2 * ulpsFromBoundary(decSig.significandParts(),
2396 excessPrecision, isNearest);
2398 /* Are we guaranteed to round correctly if we truncate? */
2399 if (HUdistance >= HUerr) {
2400 APInt::tcExtract(significandParts(), partCount(), decSig.significandParts(),
2401 calcSemantics.precision - excessPrecision,
2403 /* Take the exponent of decSig. If we tcExtract-ed less bits
2404 above we must adjust our exponent to compensate for the
2405 implicit right shift. */
2406 exponent = (decSig.exponent + semantics->precision
2407 - (calcSemantics.precision - excessPrecision));
2408 calcLostFraction = lostFractionThroughTruncation(decSig.significandParts(),
2411 return normalize(rounding_mode, calcLostFraction);
2417 APFloat::convertFromDecimalString(StringRef str, roundingMode rounding_mode)
2422 /* Scan the text. */
2423 StringRef::iterator p = str.begin();
2424 interpretDecimal(p, str.end(), &D);
2426 /* Handle the quick cases. First the case of no significant digits,
2427 i.e. zero, and then exponents that are obviously too large or too
2428 small. Writing L for log 10 / log 2, a number d.ddddd*10^exp
2429 definitely overflows if
2431 (exp - 1) * L >= maxExponent
2433 and definitely underflows to zero where
2435 (exp + 1) * L <= minExponent - precision
2437 With integer arithmetic the tightest bounds for L are
2439 93/28 < L < 196/59 [ numerator <= 256 ]
2440 42039/12655 < L < 28738/8651 [ numerator <= 65536 ]
2443 if (decDigitValue(*D.firstSigDigit) >= 10U) {
2447 /* Check whether the normalized exponent is high enough to overflow
2448 max during the log-rebasing in the max-exponent check below. */
2449 } else if (D.normalizedExponent - 1 > INT_MAX / 42039) {
2450 fs = handleOverflow(rounding_mode);
2452 /* If it wasn't, then it also wasn't high enough to overflow max
2453 during the log-rebasing in the min-exponent check. Check that it
2454 won't overflow min in either check, then perform the min-exponent
2456 } else if (D.normalizedExponent - 1 < INT_MIN / 42039 ||
2457 (D.normalizedExponent + 1) * 28738 <=
2458 8651 * (semantics->minExponent - (int) semantics->precision)) {
2459 /* Underflow to zero and round. */
2461 fs = normalize(rounding_mode, lfLessThanHalf);
2463 /* We can finally safely perform the max-exponent check. */
2464 } else if ((D.normalizedExponent - 1) * 42039
2465 >= 12655 * semantics->maxExponent) {
2466 /* Overflow and round. */
2467 fs = handleOverflow(rounding_mode);
2469 integerPart *decSignificand;
2470 unsigned int partCount;
2472 /* A tight upper bound on number of bits required to hold an
2473 N-digit decimal integer is N * 196 / 59. Allocate enough space
2474 to hold the full significand, and an extra part required by
2476 partCount = static_cast<unsigned int>(D.lastSigDigit - D.firstSigDigit) + 1;
2477 partCount = partCountForBits(1 + 196 * partCount / 59);
2478 decSignificand = new integerPart[partCount + 1];
2481 /* Convert to binary efficiently - we do almost all multiplication
2482 in an integerPart. When this would overflow do we do a single
2483 bignum multiplication, and then revert again to multiplication
2484 in an integerPart. */
2486 integerPart decValue, val, multiplier;
2494 if (p == str.end()) {
2498 decValue = decDigitValue(*p++);
2499 assert(decValue < 10U && "Invalid character in significand");
2501 val = val * 10 + decValue;
2502 /* The maximum number that can be multiplied by ten with any
2503 digit added without overflowing an integerPart. */
2504 } while (p <= D.lastSigDigit && multiplier <= (~ (integerPart) 0 - 9) / 10);
2506 /* Multiply out the current part. */
2507 APInt::tcMultiplyPart(decSignificand, decSignificand, multiplier, val,
2508 partCount, partCount + 1, false);
2510 /* If we used another part (likely but not guaranteed), increase
2512 if (decSignificand[partCount])
2514 } while (p <= D.lastSigDigit);
2516 category = fcNormal;
2517 fs = roundSignificandWithExponent(decSignificand, partCount,
2518 D.exponent, rounding_mode);
2520 delete [] decSignificand;
2527 APFloat::convertFromString(StringRef str, roundingMode rounding_mode)
2529 assertArithmeticOK(*semantics);
2530 assert(!str.empty() && "Invalid string length");
2532 /* Handle a leading minus sign. */
2533 StringRef::iterator p = str.begin();
2534 size_t slen = str.size();
2535 sign = *p == '-' ? 1 : 0;
2536 if (*p == '-' || *p == '+') {
2539 assert(slen && "String has no digits");
2542 if (slen >= 2 && p[0] == '0' && (p[1] == 'x' || p[1] == 'X')) {
2543 assert(slen - 2 && "Invalid string");
2544 return convertFromHexadecimalString(StringRef(p + 2, slen - 2),
2548 return convertFromDecimalString(StringRef(p, slen), rounding_mode);
2551 /* Write out a hexadecimal representation of the floating point value
2552 to DST, which must be of sufficient size, in the C99 form
2553 [-]0xh.hhhhp[+-]d. Return the number of characters written,
2554 excluding the terminating NUL.
2556 If UPPERCASE, the output is in upper case, otherwise in lower case.
2558 HEXDIGITS digits appear altogether, rounding the value if
2559 necessary. If HEXDIGITS is 0, the minimal precision to display the
2560 number precisely is used instead. If nothing would appear after
2561 the decimal point it is suppressed.
2563 The decimal exponent is always printed and has at least one digit.
2564 Zero values display an exponent of zero. Infinities and NaNs
2565 appear as "infinity" or "nan" respectively.
2567 The above rules are as specified by C99. There is ambiguity about
2568 what the leading hexadecimal digit should be. This implementation
2569 uses whatever is necessary so that the exponent is displayed as
2570 stored. This implies the exponent will fall within the IEEE format
2571 range, and the leading hexadecimal digit will be 0 (for denormals),
2572 1 (normal numbers) or 2 (normal numbers rounded-away-from-zero with
2573 any other digits zero).
2576 APFloat::convertToHexString(char *dst, unsigned int hexDigits,
2577 bool upperCase, roundingMode rounding_mode) const
2581 assertArithmeticOK(*semantics);
2589 memcpy (dst, upperCase ? infinityU: infinityL, sizeof infinityU - 1);
2590 dst += sizeof infinityL - 1;
2594 memcpy (dst, upperCase ? NaNU: NaNL, sizeof NaNU - 1);
2595 dst += sizeof NaNU - 1;
2600 *dst++ = upperCase ? 'X': 'x';
2602 if (hexDigits > 1) {
2604 memset (dst, '0', hexDigits - 1);
2605 dst += hexDigits - 1;
2607 *dst++ = upperCase ? 'P': 'p';
2612 dst = convertNormalToHexString (dst, hexDigits, upperCase, rounding_mode);
2618 return static_cast<unsigned int>(dst - p);
2621 /* Does the hard work of outputting the correctly rounded hexadecimal
2622 form of a normal floating point number with the specified number of
2623 hexadecimal digits. If HEXDIGITS is zero the minimum number of
2624 digits necessary to print the value precisely is output. */
2626 APFloat::convertNormalToHexString(char *dst, unsigned int hexDigits,
2628 roundingMode rounding_mode) const
2630 unsigned int count, valueBits, shift, partsCount, outputDigits;
2631 const char *hexDigitChars;
2632 const integerPart *significand;
2637 *dst++ = upperCase ? 'X': 'x';
2640 hexDigitChars = upperCase ? hexDigitsUpper: hexDigitsLower;
2642 significand = significandParts();
2643 partsCount = partCount();
2645 /* +3 because the first digit only uses the single integer bit, so
2646 we have 3 virtual zero most-significant-bits. */
2647 valueBits = semantics->precision + 3;
2648 shift = integerPartWidth - valueBits % integerPartWidth;
2650 /* The natural number of digits required ignoring trailing
2651 insignificant zeroes. */
2652 outputDigits = (valueBits - significandLSB () + 3) / 4;
2654 /* hexDigits of zero means use the required number for the
2655 precision. Otherwise, see if we are truncating. If we are,
2656 find out if we need to round away from zero. */
2658 if (hexDigits < outputDigits) {
2659 /* We are dropping non-zero bits, so need to check how to round.
2660 "bits" is the number of dropped bits. */
2662 lostFraction fraction;
2664 bits = valueBits - hexDigits * 4;
2665 fraction = lostFractionThroughTruncation (significand, partsCount, bits);
2666 roundUp = roundAwayFromZero(rounding_mode, fraction, bits);
2668 outputDigits = hexDigits;
2671 /* Write the digits consecutively, and start writing in the location
2672 of the hexadecimal point. We move the most significant digit
2673 left and add the hexadecimal point later. */
2676 count = (valueBits + integerPartWidth - 1) / integerPartWidth;
2678 while (outputDigits && count) {
2681 /* Put the most significant integerPartWidth bits in "part". */
2682 if (--count == partsCount)
2683 part = 0; /* An imaginary higher zero part. */
2685 part = significand[count] << shift;
2688 part |= significand[count - 1] >> (integerPartWidth - shift);
2690 /* Convert as much of "part" to hexdigits as we can. */
2691 unsigned int curDigits = integerPartWidth / 4;
2693 if (curDigits > outputDigits)
2694 curDigits = outputDigits;
2695 dst += partAsHex (dst, part, curDigits, hexDigitChars);
2696 outputDigits -= curDigits;
2702 /* Note that hexDigitChars has a trailing '0'. */
2705 *q = hexDigitChars[hexDigitValue (*q) + 1];
2706 } while (*q == '0');
2709 /* Add trailing zeroes. */
2710 memset (dst, '0', outputDigits);
2711 dst += outputDigits;
2714 /* Move the most significant digit to before the point, and if there
2715 is something after the decimal point add it. This must come
2716 after rounding above. */
2723 /* Finally output the exponent. */
2724 *dst++ = upperCase ? 'P': 'p';
2726 return writeSignedDecimal (dst, exponent);
2729 hash_code llvm::hash_value(const APFloat &Arg) {
2730 if (Arg.category != APFloat::fcNormal)
2731 return hash_combine((uint8_t)Arg.category,
2732 // NaN has no sign, fix it at zero.
2733 Arg.isNaN() ? (uint8_t)0 : (uint8_t)Arg.sign,
2734 Arg.semantics->precision);
2736 // Normal floats need their exponent and significand hashed.
2737 return hash_combine((uint8_t)Arg.category, (uint8_t)Arg.sign,
2738 Arg.semantics->precision, Arg.exponent,
2740 Arg.significandParts(),
2741 Arg.significandParts() + Arg.partCount()));
2744 // Conversion from APFloat to/from host float/double. It may eventually be
2745 // possible to eliminate these and have everybody deal with APFloats, but that
2746 // will take a while. This approach will not easily extend to long double.
2747 // Current implementation requires integerPartWidth==64, which is correct at
2748 // the moment but could be made more general.
2750 // Denormals have exponent minExponent in APFloat, but minExponent-1 in
2751 // the actual IEEE respresentations. We compensate for that here.
2754 APFloat::convertF80LongDoubleAPFloatToAPInt() const
2756 assert(semantics == (const llvm::fltSemantics*)&x87DoubleExtended);
2757 assert(partCount()==2);
2759 uint64_t myexponent, mysignificand;
2761 if (category==fcNormal) {
2762 myexponent = exponent+16383; //bias
2763 mysignificand = significandParts()[0];
2764 if (myexponent==1 && !(mysignificand & 0x8000000000000000ULL))
2765 myexponent = 0; // denormal
2766 } else if (category==fcZero) {
2769 } else if (category==fcInfinity) {
2770 myexponent = 0x7fff;
2771 mysignificand = 0x8000000000000000ULL;
2773 assert(category == fcNaN && "Unknown category");
2774 myexponent = 0x7fff;
2775 mysignificand = significandParts()[0];
2779 words[0] = mysignificand;
2780 words[1] = ((uint64_t)(sign & 1) << 15) |
2781 (myexponent & 0x7fffLL);
2782 return APInt(80, words);
2786 APFloat::convertPPCDoubleDoubleAPFloatToAPInt() const
2788 assert(semantics == (const llvm::fltSemantics*)&PPCDoubleDouble);
2789 assert(partCount()==2);
2791 uint64_t myexponent, mysignificand, myexponent2, mysignificand2;
2793 if (category==fcNormal) {
2794 myexponent = exponent + 1023; //bias
2795 myexponent2 = exponent2 + 1023;
2796 mysignificand = significandParts()[0];
2797 mysignificand2 = significandParts()[1];
2798 if (myexponent==1 && !(mysignificand & 0x10000000000000LL))
2799 myexponent = 0; // denormal
2800 if (myexponent2==1 && !(mysignificand2 & 0x10000000000000LL))
2801 myexponent2 = 0; // denormal
2802 } else if (category==fcZero) {
2807 } else if (category==fcInfinity) {
2813 assert(category == fcNaN && "Unknown category");
2815 mysignificand = significandParts()[0];
2816 myexponent2 = exponent2;
2817 mysignificand2 = significandParts()[1];
2821 words[0] = ((uint64_t)(sign & 1) << 63) |
2822 ((myexponent & 0x7ff) << 52) |
2823 (mysignificand & 0xfffffffffffffLL);
2824 words[1] = ((uint64_t)(sign2 & 1) << 63) |
2825 ((myexponent2 & 0x7ff) << 52) |
2826 (mysignificand2 & 0xfffffffffffffLL);
2827 return APInt(128, words);
2831 APFloat::convertQuadrupleAPFloatToAPInt() const
2833 assert(semantics == (const llvm::fltSemantics*)&IEEEquad);
2834 assert(partCount()==2);
2836 uint64_t myexponent, mysignificand, mysignificand2;
2838 if (category==fcNormal) {
2839 myexponent = exponent+16383; //bias
2840 mysignificand = significandParts()[0];
2841 mysignificand2 = significandParts()[1];
2842 if (myexponent==1 && !(mysignificand2 & 0x1000000000000LL))
2843 myexponent = 0; // denormal
2844 } else if (category==fcZero) {
2846 mysignificand = mysignificand2 = 0;
2847 } else if (category==fcInfinity) {
2848 myexponent = 0x7fff;
2849 mysignificand = mysignificand2 = 0;
2851 assert(category == fcNaN && "Unknown category!");
2852 myexponent = 0x7fff;
2853 mysignificand = significandParts()[0];
2854 mysignificand2 = significandParts()[1];
2858 words[0] = mysignificand;
2859 words[1] = ((uint64_t)(sign & 1) << 63) |
2860 ((myexponent & 0x7fff) << 48) |
2861 (mysignificand2 & 0xffffffffffffLL);
2863 return APInt(128, words);
2867 APFloat::convertDoubleAPFloatToAPInt() const
2869 assert(semantics == (const llvm::fltSemantics*)&IEEEdouble);
2870 assert(partCount()==1);
2872 uint64_t myexponent, mysignificand;
2874 if (category==fcNormal) {
2875 myexponent = exponent+1023; //bias
2876 mysignificand = *significandParts();
2877 if (myexponent==1 && !(mysignificand & 0x10000000000000LL))
2878 myexponent = 0; // denormal
2879 } else if (category==fcZero) {
2882 } else if (category==fcInfinity) {
2886 assert(category == fcNaN && "Unknown category!");
2888 mysignificand = *significandParts();
2891 return APInt(64, ((((uint64_t)(sign & 1) << 63) |
2892 ((myexponent & 0x7ff) << 52) |
2893 (mysignificand & 0xfffffffffffffLL))));
2897 APFloat::convertFloatAPFloatToAPInt() const
2899 assert(semantics == (const llvm::fltSemantics*)&IEEEsingle);
2900 assert(partCount()==1);
2902 uint32_t myexponent, mysignificand;
2904 if (category==fcNormal) {
2905 myexponent = exponent+127; //bias
2906 mysignificand = (uint32_t)*significandParts();
2907 if (myexponent == 1 && !(mysignificand & 0x800000))
2908 myexponent = 0; // denormal
2909 } else if (category==fcZero) {
2912 } else if (category==fcInfinity) {
2916 assert(category == fcNaN && "Unknown category!");
2918 mysignificand = (uint32_t)*significandParts();
2921 return APInt(32, (((sign&1) << 31) | ((myexponent&0xff) << 23) |
2922 (mysignificand & 0x7fffff)));
2926 APFloat::convertHalfAPFloatToAPInt() const
2928 assert(semantics == (const llvm::fltSemantics*)&IEEEhalf);
2929 assert(partCount()==1);
2931 uint32_t myexponent, mysignificand;
2933 if (category==fcNormal) {
2934 myexponent = exponent+15; //bias
2935 mysignificand = (uint32_t)*significandParts();
2936 if (myexponent == 1 && !(mysignificand & 0x400))
2937 myexponent = 0; // denormal
2938 } else if (category==fcZero) {
2941 } else if (category==fcInfinity) {
2945 assert(category == fcNaN && "Unknown category!");
2947 mysignificand = (uint32_t)*significandParts();
2950 return APInt(16, (((sign&1) << 15) | ((myexponent&0x1f) << 10) |
2951 (mysignificand & 0x3ff)));
2954 // This function creates an APInt that is just a bit map of the floating
2955 // point constant as it would appear in memory. It is not a conversion,
2956 // and treating the result as a normal integer is unlikely to be useful.
2959 APFloat::bitcastToAPInt() const
2961 if (semantics == (const llvm::fltSemantics*)&IEEEhalf)
2962 return convertHalfAPFloatToAPInt();
2964 if (semantics == (const llvm::fltSemantics*)&IEEEsingle)
2965 return convertFloatAPFloatToAPInt();
2967 if (semantics == (const llvm::fltSemantics*)&IEEEdouble)
2968 return convertDoubleAPFloatToAPInt();
2970 if (semantics == (const llvm::fltSemantics*)&IEEEquad)
2971 return convertQuadrupleAPFloatToAPInt();
2973 if (semantics == (const llvm::fltSemantics*)&PPCDoubleDouble)
2974 return convertPPCDoubleDoubleAPFloatToAPInt();
2976 assert(semantics == (const llvm::fltSemantics*)&x87DoubleExtended &&
2978 return convertF80LongDoubleAPFloatToAPInt();
2982 APFloat::convertToFloat() const
2984 assert(semantics == (const llvm::fltSemantics*)&IEEEsingle &&
2985 "Float semantics are not IEEEsingle");
2986 APInt api = bitcastToAPInt();
2987 return api.bitsToFloat();
2991 APFloat::convertToDouble() const
2993 assert(semantics == (const llvm::fltSemantics*)&IEEEdouble &&
2994 "Float semantics are not IEEEdouble");
2995 APInt api = bitcastToAPInt();
2996 return api.bitsToDouble();
2999 /// Integer bit is explicit in this format. Intel hardware (387 and later)
3000 /// does not support these bit patterns:
3001 /// exponent = all 1's, integer bit 0, significand 0 ("pseudoinfinity")
3002 /// exponent = all 1's, integer bit 0, significand nonzero ("pseudoNaN")
3003 /// exponent = 0, integer bit 1 ("pseudodenormal")
3004 /// exponent!=0 nor all 1's, integer bit 0 ("unnormal")
3005 /// At the moment, the first two are treated as NaNs, the second two as Normal.
3007 APFloat::initFromF80LongDoubleAPInt(const APInt &api)
3009 assert(api.getBitWidth()==80);
3010 uint64_t i1 = api.getRawData()[0];
3011 uint64_t i2 = api.getRawData()[1];
3012 uint64_t myexponent = (i2 & 0x7fff);
3013 uint64_t mysignificand = i1;
3015 initialize(&APFloat::x87DoubleExtended);
3016 assert(partCount()==2);
3018 sign = static_cast<unsigned int>(i2>>15);
3019 if (myexponent==0 && mysignificand==0) {
3020 // exponent, significand meaningless
3022 } else if (myexponent==0x7fff && mysignificand==0x8000000000000000ULL) {
3023 // exponent, significand meaningless
3024 category = fcInfinity;
3025 } else if (myexponent==0x7fff && mysignificand!=0x8000000000000000ULL) {
3026 // exponent meaningless
3028 significandParts()[0] = mysignificand;
3029 significandParts()[1] = 0;
3031 category = fcNormal;
3032 exponent = myexponent - 16383;
3033 significandParts()[0] = mysignificand;
3034 significandParts()[1] = 0;
3035 if (myexponent==0) // denormal
3041 APFloat::initFromPPCDoubleDoubleAPInt(const APInt &api)
3043 assert(api.getBitWidth()==128);
3044 uint64_t i1 = api.getRawData()[0];
3045 uint64_t i2 = api.getRawData()[1];
3046 uint64_t myexponent = (i1 >> 52) & 0x7ff;
3047 uint64_t mysignificand = i1 & 0xfffffffffffffLL;
3048 uint64_t myexponent2 = (i2 >> 52) & 0x7ff;
3049 uint64_t mysignificand2 = i2 & 0xfffffffffffffLL;
3051 initialize(&APFloat::PPCDoubleDouble);
3052 assert(partCount()==2);
3054 sign = static_cast<unsigned int>(i1>>63);
3055 sign2 = static_cast<unsigned int>(i2>>63);
3056 if (myexponent==0 && mysignificand==0) {
3057 // exponent, significand meaningless
3058 // exponent2 and significand2 are required to be 0; we don't check
3060 } else if (myexponent==0x7ff && mysignificand==0) {
3061 // exponent, significand meaningless
3062 // exponent2 and significand2 are required to be 0; we don't check
3063 category = fcInfinity;
3064 } else if (myexponent==0x7ff && mysignificand!=0) {
3065 // exponent meaningless. So is the whole second word, but keep it
3068 exponent2 = myexponent2;
3069 significandParts()[0] = mysignificand;
3070 significandParts()[1] = mysignificand2;
3072 category = fcNormal;
3073 // Note there is no category2; the second word is treated as if it is
3074 // fcNormal, although it might be something else considered by itself.
3075 exponent = myexponent - 1023;
3076 exponent2 = myexponent2 - 1023;
3077 significandParts()[0] = mysignificand;
3078 significandParts()[1] = mysignificand2;
3079 if (myexponent==0) // denormal
3082 significandParts()[0] |= 0x10000000000000LL; // integer bit
3086 significandParts()[1] |= 0x10000000000000LL; // integer bit
3091 APFloat::initFromQuadrupleAPInt(const APInt &api)
3093 assert(api.getBitWidth()==128);
3094 uint64_t i1 = api.getRawData()[0];
3095 uint64_t i2 = api.getRawData()[1];
3096 uint64_t myexponent = (i2 >> 48) & 0x7fff;
3097 uint64_t mysignificand = i1;
3098 uint64_t mysignificand2 = i2 & 0xffffffffffffLL;
3100 initialize(&APFloat::IEEEquad);
3101 assert(partCount()==2);
3103 sign = static_cast<unsigned int>(i2>>63);
3104 if (myexponent==0 &&
3105 (mysignificand==0 && mysignificand2==0)) {
3106 // exponent, significand meaningless
3108 } else if (myexponent==0x7fff &&
3109 (mysignificand==0 && mysignificand2==0)) {
3110 // exponent, significand meaningless
3111 category = fcInfinity;
3112 } else if (myexponent==0x7fff &&
3113 (mysignificand!=0 || mysignificand2 !=0)) {
3114 // exponent meaningless
3116 significandParts()[0] = mysignificand;
3117 significandParts()[1] = mysignificand2;
3119 category = fcNormal;
3120 exponent = myexponent - 16383;
3121 significandParts()[0] = mysignificand;
3122 significandParts()[1] = mysignificand2;
3123 if (myexponent==0) // denormal
3126 significandParts()[1] |= 0x1000000000000LL; // integer bit
3131 APFloat::initFromDoubleAPInt(const APInt &api)
3133 assert(api.getBitWidth()==64);
3134 uint64_t i = *api.getRawData();
3135 uint64_t myexponent = (i >> 52) & 0x7ff;
3136 uint64_t mysignificand = i & 0xfffffffffffffLL;
3138 initialize(&APFloat::IEEEdouble);
3139 assert(partCount()==1);
3141 sign = static_cast<unsigned int>(i>>63);
3142 if (myexponent==0 && mysignificand==0) {
3143 // exponent, significand meaningless
3145 } else if (myexponent==0x7ff && mysignificand==0) {
3146 // exponent, significand meaningless
3147 category = fcInfinity;
3148 } else if (myexponent==0x7ff && mysignificand!=0) {
3149 // exponent meaningless
3151 *significandParts() = mysignificand;
3153 category = fcNormal;
3154 exponent = myexponent - 1023;
3155 *significandParts() = mysignificand;
3156 if (myexponent==0) // denormal
3159 *significandParts() |= 0x10000000000000LL; // integer bit
3164 APFloat::initFromFloatAPInt(const APInt & api)
3166 assert(api.getBitWidth()==32);
3167 uint32_t i = (uint32_t)*api.getRawData();
3168 uint32_t myexponent = (i >> 23) & 0xff;
3169 uint32_t mysignificand = i & 0x7fffff;
3171 initialize(&APFloat::IEEEsingle);
3172 assert(partCount()==1);
3175 if (myexponent==0 && mysignificand==0) {
3176 // exponent, significand meaningless
3178 } else if (myexponent==0xff && mysignificand==0) {
3179 // exponent, significand meaningless
3180 category = fcInfinity;
3181 } else if (myexponent==0xff && mysignificand!=0) {
3182 // sign, exponent, significand meaningless
3184 *significandParts() = mysignificand;
3186 category = fcNormal;
3187 exponent = myexponent - 127; //bias
3188 *significandParts() = mysignificand;
3189 if (myexponent==0) // denormal
3192 *significandParts() |= 0x800000; // integer bit
3197 APFloat::initFromHalfAPInt(const APInt & api)
3199 assert(api.getBitWidth()==16);
3200 uint32_t i = (uint32_t)*api.getRawData();
3201 uint32_t myexponent = (i >> 10) & 0x1f;
3202 uint32_t mysignificand = i & 0x3ff;
3204 initialize(&APFloat::IEEEhalf);
3205 assert(partCount()==1);
3208 if (myexponent==0 && mysignificand==0) {
3209 // exponent, significand meaningless
3211 } else if (myexponent==0x1f && mysignificand==0) {
3212 // exponent, significand meaningless
3213 category = fcInfinity;
3214 } else if (myexponent==0x1f && mysignificand!=0) {
3215 // sign, exponent, significand meaningless
3217 *significandParts() = mysignificand;
3219 category = fcNormal;
3220 exponent = myexponent - 15; //bias
3221 *significandParts() = mysignificand;
3222 if (myexponent==0) // denormal
3225 *significandParts() |= 0x400; // integer bit
3229 /// Treat api as containing the bits of a floating point number. Currently
3230 /// we infer the floating point type from the size of the APInt. The
3231 /// isIEEE argument distinguishes between PPC128 and IEEE128 (not meaningful
3232 /// when the size is anything else).
3234 APFloat::initFromAPInt(const APInt& api, bool isIEEE)
3236 if (api.getBitWidth() == 16)
3237 return initFromHalfAPInt(api);
3238 else if (api.getBitWidth() == 32)
3239 return initFromFloatAPInt(api);
3240 else if (api.getBitWidth()==64)
3241 return initFromDoubleAPInt(api);
3242 else if (api.getBitWidth()==80)
3243 return initFromF80LongDoubleAPInt(api);
3244 else if (api.getBitWidth()==128)
3246 initFromQuadrupleAPInt(api) : initFromPPCDoubleDoubleAPInt(api));
3248 llvm_unreachable(0);
3252 APFloat::getAllOnesValue(unsigned BitWidth, bool isIEEE)
3254 return APFloat(APInt::getAllOnesValue(BitWidth), isIEEE);
3257 APFloat APFloat::getLargest(const fltSemantics &Sem, bool Negative) {
3258 APFloat Val(Sem, fcNormal, Negative);
3260 // We want (in interchange format):
3261 // sign = {Negative}
3263 // significand = 1..1
3265 Val.exponent = Sem.maxExponent; // unbiased
3267 // 1-initialize all bits....
3268 Val.zeroSignificand();
3269 integerPart *significand = Val.significandParts();
3270 unsigned N = partCountForBits(Sem.precision);
3271 for (unsigned i = 0; i != N; ++i)
3272 significand[i] = ~((integerPart) 0);
3274 // ...and then clear the top bits for internal consistency.
3275 if (Sem.precision % integerPartWidth != 0)
3277 (((integerPart) 1) << (Sem.precision % integerPartWidth)) - 1;
3282 APFloat APFloat::getSmallest(const fltSemantics &Sem, bool Negative) {
3283 APFloat Val(Sem, fcNormal, Negative);
3285 // We want (in interchange format):
3286 // sign = {Negative}
3288 // significand = 0..01
3290 Val.exponent = Sem.minExponent; // unbiased
3291 Val.zeroSignificand();
3292 Val.significandParts()[0] = 1;
3296 APFloat APFloat::getSmallestNormalized(const fltSemantics &Sem, bool Negative) {
3297 APFloat Val(Sem, fcNormal, Negative);
3299 // We want (in interchange format):
3300 // sign = {Negative}
3302 // significand = 10..0
3304 Val.exponent = Sem.minExponent;
3305 Val.zeroSignificand();
3306 Val.significandParts()[partCountForBits(Sem.precision)-1] |=
3307 (((integerPart) 1) << ((Sem.precision - 1) % integerPartWidth));
3312 APFloat::APFloat(const APInt& api, bool isIEEE) : exponent2(0), sign2(0) {
3313 initFromAPInt(api, isIEEE);
3316 APFloat::APFloat(float f) : exponent2(0), sign2(0) {
3317 initFromAPInt(APInt::floatToBits(f));
3320 APFloat::APFloat(double d) : exponent2(0), sign2(0) {
3321 initFromAPInt(APInt::doubleToBits(d));
3325 void append(SmallVectorImpl<char> &Buffer, StringRef Str) {
3326 Buffer.append(Str.begin(), Str.end());
3329 /// Removes data from the given significand until it is no more
3330 /// precise than is required for the desired precision.
3331 void AdjustToPrecision(APInt &significand,
3332 int &exp, unsigned FormatPrecision) {
3333 unsigned bits = significand.getActiveBits();
3335 // 196/59 is a very slight overestimate of lg_2(10).
3336 unsigned bitsRequired = (FormatPrecision * 196 + 58) / 59;
3338 if (bits <= bitsRequired) return;
3340 unsigned tensRemovable = (bits - bitsRequired) * 59 / 196;
3341 if (!tensRemovable) return;
3343 exp += tensRemovable;
3345 APInt divisor(significand.getBitWidth(), 1);
3346 APInt powten(significand.getBitWidth(), 10);
3348 if (tensRemovable & 1)
3350 tensRemovable >>= 1;
3351 if (!tensRemovable) break;
3355 significand = significand.udiv(divisor);
3357 // Truncate the significand down to its active bit count, but
3358 // don't try to drop below 32.
3359 unsigned newPrecision = std::max(32U, significand.getActiveBits());
3360 significand = significand.trunc(newPrecision);
3364 void AdjustToPrecision(SmallVectorImpl<char> &buffer,
3365 int &exp, unsigned FormatPrecision) {
3366 unsigned N = buffer.size();
3367 if (N <= FormatPrecision) return;
3369 // The most significant figures are the last ones in the buffer.
3370 unsigned FirstSignificant = N - FormatPrecision;
3373 // FIXME: this probably shouldn't use 'round half up'.
3375 // Rounding down is just a truncation, except we also want to drop
3376 // trailing zeros from the new result.
3377 if (buffer[FirstSignificant - 1] < '5') {
3378 while (FirstSignificant < N && buffer[FirstSignificant] == '0')
3381 exp += FirstSignificant;
3382 buffer.erase(&buffer[0], &buffer[FirstSignificant]);
3386 // Rounding up requires a decimal add-with-carry. If we continue
3387 // the carry, the newly-introduced zeros will just be truncated.
3388 for (unsigned I = FirstSignificant; I != N; ++I) {
3389 if (buffer[I] == '9') {
3397 // If we carried through, we have exactly one digit of precision.
3398 if (FirstSignificant == N) {
3399 exp += FirstSignificant;
3401 buffer.push_back('1');
3405 exp += FirstSignificant;
3406 buffer.erase(&buffer[0], &buffer[FirstSignificant]);
3410 void APFloat::toString(SmallVectorImpl<char> &Str,
3411 unsigned FormatPrecision,
3412 unsigned FormatMaxPadding) const {
3416 return append(Str, "-Inf");
3418 return append(Str, "+Inf");
3420 case fcNaN: return append(Str, "NaN");
3426 if (!FormatMaxPadding)
3427 append(Str, "0.0E+0");
3439 // Decompose the number into an APInt and an exponent.
3440 int exp = exponent - ((int) semantics->precision - 1);
3441 APInt significand(semantics->precision,
3442 makeArrayRef(significandParts(),
3443 partCountForBits(semantics->precision)));
3445 // Set FormatPrecision if zero. We want to do this before we
3446 // truncate trailing zeros, as those are part of the precision.
3447 if (!FormatPrecision) {
3448 // It's an interesting question whether to use the nominal
3449 // precision or the active precision here for denormals.
3451 // FormatPrecision = ceil(significandBits / lg_2(10))
3452 FormatPrecision = (semantics->precision * 59 + 195) / 196;
3455 // Ignore trailing binary zeros.
3456 int trailingZeros = significand.countTrailingZeros();
3457 exp += trailingZeros;
3458 significand = significand.lshr(trailingZeros);
3460 // Change the exponent from 2^e to 10^e.
3463 } else if (exp > 0) {
3465 significand = significand.zext(semantics->precision + exp);
3466 significand <<= exp;
3468 } else { /* exp < 0 */
3471 // We transform this using the identity:
3472 // (N)(2^-e) == (N)(5^e)(10^-e)
3473 // This means we have to multiply N (the significand) by 5^e.
3474 // To avoid overflow, we have to operate on numbers large
3475 // enough to store N * 5^e:
3476 // log2(N * 5^e) == log2(N) + e * log2(5)
3477 // <= semantics->precision + e * 137 / 59
3478 // (log_2(5) ~ 2.321928 < 2.322034 ~ 137/59)
3480 unsigned precision = semantics->precision + (137 * texp + 136) / 59;
3482 // Multiply significand by 5^e.
3483 // N * 5^0101 == N * 5^(1*1) * 5^(0*2) * 5^(1*4) * 5^(0*8)
3484 significand = significand.zext(precision);
3485 APInt five_to_the_i(precision, 5);
3487 if (texp & 1) significand *= five_to_the_i;
3491 five_to_the_i *= five_to_the_i;
3495 AdjustToPrecision(significand, exp, FormatPrecision);
3497 llvm::SmallVector<char, 256> buffer;
3500 unsigned precision = significand.getBitWidth();
3501 APInt ten(precision, 10);
3502 APInt digit(precision, 0);
3504 bool inTrail = true;
3505 while (significand != 0) {
3506 // digit <- significand % 10
3507 // significand <- significand / 10
3508 APInt::udivrem(significand, ten, significand, digit);
3510 unsigned d = digit.getZExtValue();
3512 // Drop trailing zeros.
3513 if (inTrail && !d) exp++;
3515 buffer.push_back((char) ('0' + d));
3520 assert(!buffer.empty() && "no characters in buffer!");
3522 // Drop down to FormatPrecision.
3523 // TODO: don't do more precise calculations above than are required.
3524 AdjustToPrecision(buffer, exp, FormatPrecision);
3526 unsigned NDigits = buffer.size();
3528 // Check whether we should use scientific notation.
3529 bool FormatScientific;
3530 if (!FormatMaxPadding)
3531 FormatScientific = true;
3536 // But we shouldn't make the number look more precise than it is.
3537 FormatScientific = ((unsigned) exp > FormatMaxPadding ||
3538 NDigits + (unsigned) exp > FormatPrecision);
3540 // Power of the most significant digit.
3541 int MSD = exp + (int) (NDigits - 1);
3544 FormatScientific = false;
3546 // 765e-5 == 0.00765
3548 FormatScientific = ((unsigned) -MSD) > FormatMaxPadding;
3553 // Scientific formatting is pretty straightforward.
3554 if (FormatScientific) {
3555 exp += (NDigits - 1);
3557 Str.push_back(buffer[NDigits-1]);
3562 for (unsigned I = 1; I != NDigits; ++I)
3563 Str.push_back(buffer[NDigits-1-I]);
3566 Str.push_back(exp >= 0 ? '+' : '-');
3567 if (exp < 0) exp = -exp;
3568 SmallVector<char, 6> expbuf;
3570 expbuf.push_back((char) ('0' + (exp % 10)));
3573 for (unsigned I = 0, E = expbuf.size(); I != E; ++I)
3574 Str.push_back(expbuf[E-1-I]);
3578 // Non-scientific, positive exponents.
3580 for (unsigned I = 0; I != NDigits; ++I)
3581 Str.push_back(buffer[NDigits-1-I]);
3582 for (unsigned I = 0; I != (unsigned) exp; ++I)
3587 // Non-scientific, negative exponents.
3589 // The number of digits to the left of the decimal point.
3590 int NWholeDigits = exp + (int) NDigits;
3593 if (NWholeDigits > 0) {
3594 for (; I != (unsigned) NWholeDigits; ++I)
3595 Str.push_back(buffer[NDigits-I-1]);
3598 unsigned NZeros = 1 + (unsigned) -NWholeDigits;
3602 for (unsigned Z = 1; Z != NZeros; ++Z)
3606 for (; I != NDigits; ++I)
3607 Str.push_back(buffer[NDigits-I-1]);
3610 bool APFloat::getExactInverse(APFloat *inv) const {
3611 // We can only guarantee the existence of an exact inverse for IEEE floats.
3612 if (semantics != &IEEEhalf && semantics != &IEEEsingle &&
3613 semantics != &IEEEdouble && semantics != &IEEEquad)
3616 // Special floats and denormals have no exact inverse.
3617 if (category != fcNormal)
3620 // Check that the number is a power of two by making sure that only the
3621 // integer bit is set in the significand.
3622 if (significandLSB() != semantics->precision - 1)
3626 APFloat reciprocal(*semantics, 1ULL);
3627 if (reciprocal.divide(*this, rmNearestTiesToEven) != opOK)
3630 // Avoid multiplication with a denormal, it is not safe on all platforms and
3631 // may be slower than a normal division.
3632 if (reciprocal.significandMSB() + 1 < reciprocal.semantics->precision)
3635 assert(reciprocal.category == fcNormal &&
3636 reciprocal.significandLSB() == reciprocal.semantics->precision - 1);