1 //===-- APFloat.cpp - Implement APFloat class -----------------------------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file implements a class to represent arbitrary precision floating
11 // point values and provide a variety of arithmetic operations on them.
13 //===----------------------------------------------------------------------===//
15 #include "llvm/ADT/APFloat.h"
16 #include "llvm/ADT/APSInt.h"
17 #include "llvm/ADT/FoldingSet.h"
18 #include "llvm/ADT/Hashing.h"
19 #include "llvm/ADT/StringRef.h"
20 #include "llvm/Support/ErrorHandling.h"
21 #include "llvm/Support/MathExtras.h"
27 #define convolve(lhs, rhs) ((lhs) * 4 + (rhs))
29 /* Assumed in hexadecimal significand parsing, and conversion to
30 hexadecimal strings. */
31 #define COMPILE_TIME_ASSERT(cond) extern int CTAssert[(cond) ? 1 : -1]
32 COMPILE_TIME_ASSERT(integerPartWidth % 4 == 0);
36 /* Represents floating point arithmetic semantics. */
38 /* The largest E such that 2^E is representable; this matches the
39 definition of IEEE 754. */
40 exponent_t maxExponent;
42 /* The smallest E such that 2^E is a normalized number; this
43 matches the definition of IEEE 754. */
44 exponent_t minExponent;
46 /* Number of bits in the significand. This includes the integer
48 unsigned int precision;
50 /* True if arithmetic is supported. */
51 unsigned int arithmeticOK;
54 const fltSemantics APFloat::IEEEhalf = { 15, -14, 11, true };
55 const fltSemantics APFloat::IEEEsingle = { 127, -126, 24, true };
56 const fltSemantics APFloat::IEEEdouble = { 1023, -1022, 53, true };
57 const fltSemantics APFloat::IEEEquad = { 16383, -16382, 113, true };
58 const fltSemantics APFloat::x87DoubleExtended = { 16383, -16382, 64, true };
59 const fltSemantics APFloat::Bogus = { 0, 0, 0, true };
61 /* The PowerPC format consists of two doubles. It does not map cleanly
62 onto the usual format above. It is approximated using twice the
63 mantissa bits. Note that for exponents near the double minimum,
64 we no longer can represent the full 106 mantissa bits, so those
65 will be treated as denormal numbers.
67 FIXME: While this approximation is equivalent to what GCC uses for
68 compile-time arithmetic on PPC double-double numbers, it is not able
69 to represent all possible values held by a PPC double-double number,
70 for example: (long double) 1.0 + (long double) 0x1p-106
71 Should this be replaced by a full emulation of PPC double-double? */
72 const fltSemantics APFloat::PPCDoubleDouble =
73 { 1023, -1022 + 53, 53 + 53, true };
75 /* A tight upper bound on number of parts required to hold the value
78 power * 815 / (351 * integerPartWidth) + 1
80 However, whilst the result may require only this many parts,
81 because we are multiplying two values to get it, the
82 multiplication may require an extra part with the excess part
83 being zero (consider the trivial case of 1 * 1, tcFullMultiply
84 requires two parts to hold the single-part result). So we add an
85 extra one to guarantee enough space whilst multiplying. */
86 const unsigned int maxExponent = 16383;
87 const unsigned int maxPrecision = 113;
88 const unsigned int maxPowerOfFiveExponent = maxExponent + maxPrecision - 1;
89 const unsigned int maxPowerOfFiveParts = 2 + ((maxPowerOfFiveExponent * 815)
90 / (351 * integerPartWidth));
93 /* A bunch of private, handy routines. */
95 static inline unsigned int
96 partCountForBits(unsigned int bits)
98 return ((bits) + integerPartWidth - 1) / integerPartWidth;
101 /* Returns 0U-9U. Return values >= 10U are not digits. */
102 static inline unsigned int
103 decDigitValue(unsigned int c)
109 hexDigitValue(unsigned int c)
129 assertArithmeticOK(const llvm::fltSemantics &semantics) {
130 assert(semantics.arithmeticOK &&
131 "Compile-time arithmetic does not support these semantics");
134 /* Return the value of a decimal exponent of the form
137 If the exponent overflows, returns a large exponent with the
140 readExponent(StringRef::iterator begin, StringRef::iterator end)
143 unsigned int absExponent;
144 const unsigned int overlargeExponent = 24000; /* FIXME. */
145 StringRef::iterator p = begin;
147 assert(p != end && "Exponent has no digits");
149 isNegative = (*p == '-');
150 if (*p == '-' || *p == '+') {
152 assert(p != end && "Exponent has no digits");
155 absExponent = decDigitValue(*p++);
156 assert(absExponent < 10U && "Invalid character in exponent");
158 for (; p != end; ++p) {
161 value = decDigitValue(*p);
162 assert(value < 10U && "Invalid character in exponent");
164 value += absExponent * 10;
165 if (absExponent >= overlargeExponent) {
166 absExponent = overlargeExponent;
167 p = end; /* outwit assert below */
173 assert(p == end && "Invalid exponent in exponent");
176 return -(int) absExponent;
178 return (int) absExponent;
181 /* This is ugly and needs cleaning up, but I don't immediately see
182 how whilst remaining safe. */
184 totalExponent(StringRef::iterator p, StringRef::iterator end,
185 int exponentAdjustment)
187 int unsignedExponent;
188 bool negative, overflow;
191 assert(p != end && "Exponent has no digits");
193 negative = *p == '-';
194 if (*p == '-' || *p == '+') {
196 assert(p != end && "Exponent has no digits");
199 unsignedExponent = 0;
201 for (; p != end; ++p) {
204 value = decDigitValue(*p);
205 assert(value < 10U && "Invalid character in exponent");
207 unsignedExponent = unsignedExponent * 10 + value;
208 if (unsignedExponent > 32767) {
214 if (exponentAdjustment > 32767 || exponentAdjustment < -32768)
218 exponent = unsignedExponent;
220 exponent = -exponent;
221 exponent += exponentAdjustment;
222 if (exponent > 32767 || exponent < -32768)
227 exponent = negative ? -32768: 32767;
232 static StringRef::iterator
233 skipLeadingZeroesAndAnyDot(StringRef::iterator begin, StringRef::iterator end,
234 StringRef::iterator *dot)
236 StringRef::iterator p = begin;
238 while (*p == '0' && p != end)
244 assert(end - begin != 1 && "Significand has no digits");
246 while (*p == '0' && p != end)
253 /* Given a normal decimal floating point number of the form
257 where the decimal point and exponent are optional, fill out the
258 structure D. Exponent is appropriate if the significand is
259 treated as an integer, and normalizedExponent if the significand
260 is taken to have the decimal point after a single leading
263 If the value is zero, V->firstSigDigit points to a non-digit, and
264 the return exponent is zero.
267 const char *firstSigDigit;
268 const char *lastSigDigit;
270 int normalizedExponent;
274 interpretDecimal(StringRef::iterator begin, StringRef::iterator end,
277 StringRef::iterator dot = end;
278 StringRef::iterator p = skipLeadingZeroesAndAnyDot (begin, end, &dot);
280 D->firstSigDigit = p;
282 D->normalizedExponent = 0;
284 for (; p != end; ++p) {
286 assert(dot == end && "String contains multiple dots");
291 if (decDigitValue(*p) >= 10U)
296 assert((*p == 'e' || *p == 'E') && "Invalid character in significand");
297 assert(p != begin && "Significand has no digits");
298 assert((dot == end || p - begin != 1) && "Significand has no digits");
300 /* p points to the first non-digit in the string */
301 D->exponent = readExponent(p + 1, end);
303 /* Implied decimal point? */
308 /* If number is all zeroes accept any exponent. */
309 if (p != D->firstSigDigit) {
310 /* Drop insignificant trailing zeroes. */
315 while (p != begin && *p == '0');
316 while (p != begin && *p == '.');
319 /* Adjust the exponents for any decimal point. */
320 D->exponent += static_cast<exponent_t>((dot - p) - (dot > p));
321 D->normalizedExponent = (D->exponent +
322 static_cast<exponent_t>((p - D->firstSigDigit)
323 - (dot > D->firstSigDigit && dot < p)));
329 /* Return the trailing fraction of a hexadecimal number.
330 DIGITVALUE is the first hex digit of the fraction, P points to
333 trailingHexadecimalFraction(StringRef::iterator p, StringRef::iterator end,
334 unsigned int digitValue)
336 unsigned int hexDigit;
338 /* If the first trailing digit isn't 0 or 8 we can work out the
339 fraction immediately. */
341 return lfMoreThanHalf;
342 else if (digitValue < 8 && digitValue > 0)
343 return lfLessThanHalf;
345 /* Otherwise we need to find the first non-zero digit. */
349 assert(p != end && "Invalid trailing hexadecimal fraction!");
351 hexDigit = hexDigitValue(*p);
353 /* If we ran off the end it is exactly zero or one-half, otherwise
356 return digitValue == 0 ? lfExactlyZero: lfExactlyHalf;
358 return digitValue == 0 ? lfLessThanHalf: lfMoreThanHalf;
361 /* Return the fraction lost were a bignum truncated losing the least
362 significant BITS bits. */
364 lostFractionThroughTruncation(const integerPart *parts,
365 unsigned int partCount,
370 lsb = APInt::tcLSB(parts, partCount);
372 /* Note this is guaranteed true if bits == 0, or LSB == -1U. */
374 return lfExactlyZero;
376 return lfExactlyHalf;
377 if (bits <= partCount * integerPartWidth &&
378 APInt::tcExtractBit(parts, bits - 1))
379 return lfMoreThanHalf;
381 return lfLessThanHalf;
384 /* Shift DST right BITS bits noting lost fraction. */
386 shiftRight(integerPart *dst, unsigned int parts, unsigned int bits)
388 lostFraction lost_fraction;
390 lost_fraction = lostFractionThroughTruncation(dst, parts, bits);
392 APInt::tcShiftRight(dst, parts, bits);
394 return lost_fraction;
397 /* Combine the effect of two lost fractions. */
399 combineLostFractions(lostFraction moreSignificant,
400 lostFraction lessSignificant)
402 if (lessSignificant != lfExactlyZero) {
403 if (moreSignificant == lfExactlyZero)
404 moreSignificant = lfLessThanHalf;
405 else if (moreSignificant == lfExactlyHalf)
406 moreSignificant = lfMoreThanHalf;
409 return moreSignificant;
412 /* The error from the true value, in half-ulps, on multiplying two
413 floating point numbers, which differ from the value they
414 approximate by at most HUE1 and HUE2 half-ulps, is strictly less
415 than the returned value.
417 See "How to Read Floating Point Numbers Accurately" by William D
420 HUerrBound(bool inexactMultiply, unsigned int HUerr1, unsigned int HUerr2)
422 assert(HUerr1 < 2 || HUerr2 < 2 || (HUerr1 + HUerr2 < 8));
424 if (HUerr1 + HUerr2 == 0)
425 return inexactMultiply * 2; /* <= inexactMultiply half-ulps. */
427 return inexactMultiply + 2 * (HUerr1 + HUerr2);
430 /* The number of ulps from the boundary (zero, or half if ISNEAREST)
431 when the least significant BITS are truncated. BITS cannot be
434 ulpsFromBoundary(const integerPart *parts, unsigned int bits, bool isNearest)
436 unsigned int count, partBits;
437 integerPart part, boundary;
442 count = bits / integerPartWidth;
443 partBits = bits % integerPartWidth + 1;
445 part = parts[count] & (~(integerPart) 0 >> (integerPartWidth - partBits));
448 boundary = (integerPart) 1 << (partBits - 1);
453 if (part - boundary <= boundary - part)
454 return part - boundary;
456 return boundary - part;
459 if (part == boundary) {
462 return ~(integerPart) 0; /* A lot. */
465 } else if (part == boundary - 1) {
468 return ~(integerPart) 0; /* A lot. */
473 return ~(integerPart) 0; /* A lot. */
476 /* Place pow(5, power) in DST, and return the number of parts used.
477 DST must be at least one part larger than size of the answer. */
479 powerOf5(integerPart *dst, unsigned int power)
481 static const integerPart firstEightPowers[] = { 1, 5, 25, 125, 625, 3125,
483 integerPart pow5s[maxPowerOfFiveParts * 2 + 5];
484 pow5s[0] = 78125 * 5;
486 unsigned int partsCount[16] = { 1 };
487 integerPart scratch[maxPowerOfFiveParts], *p1, *p2, *pow5;
489 assert(power <= maxExponent);
494 *p1 = firstEightPowers[power & 7];
500 for (unsigned int n = 0; power; power >>= 1, n++) {
505 /* Calculate pow(5,pow(2,n+3)) if we haven't yet. */
507 pc = partsCount[n - 1];
508 APInt::tcFullMultiply(pow5, pow5 - pc, pow5 - pc, pc, pc);
510 if (pow5[pc - 1] == 0)
518 APInt::tcFullMultiply(p2, p1, pow5, result, pc);
520 if (p2[result - 1] == 0)
523 /* Now result is in p1 with partsCount parts and p2 is scratch
525 tmp = p1, p1 = p2, p2 = tmp;
532 APInt::tcAssign(dst, p1, result);
537 /* Zero at the end to avoid modular arithmetic when adding one; used
538 when rounding up during hexadecimal output. */
539 static const char hexDigitsLower[] = "0123456789abcdef0";
540 static const char hexDigitsUpper[] = "0123456789ABCDEF0";
541 static const char infinityL[] = "infinity";
542 static const char infinityU[] = "INFINITY";
543 static const char NaNL[] = "nan";
544 static const char NaNU[] = "NAN";
546 /* Write out an integerPart in hexadecimal, starting with the most
547 significant nibble. Write out exactly COUNT hexdigits, return
550 partAsHex (char *dst, integerPart part, unsigned int count,
551 const char *hexDigitChars)
553 unsigned int result = count;
555 assert(count != 0 && count <= integerPartWidth / 4);
557 part >>= (integerPartWidth - 4 * count);
559 dst[count] = hexDigitChars[part & 0xf];
566 /* Write out an unsigned decimal integer. */
568 writeUnsignedDecimal (char *dst, unsigned int n)
584 /* Write out a signed decimal integer. */
586 writeSignedDecimal (char *dst, int value)
590 dst = writeUnsignedDecimal(dst, -(unsigned) value);
592 dst = writeUnsignedDecimal(dst, value);
599 APFloat::initialize(const fltSemantics *ourSemantics)
603 semantics = ourSemantics;
606 significand.parts = new integerPart[count];
610 APFloat::freeSignificand()
613 delete [] significand.parts;
617 APFloat::assign(const APFloat &rhs)
619 assert(semantics == rhs.semantics);
622 category = rhs.category;
623 exponent = rhs.exponent;
624 if (category == fcNormal || category == fcNaN)
625 copySignificand(rhs);
629 APFloat::copySignificand(const APFloat &rhs)
631 assert(category == fcNormal || category == fcNaN);
632 assert(rhs.partCount() >= partCount());
634 APInt::tcAssign(significandParts(), rhs.significandParts(),
638 /* Make this number a NaN, with an arbitrary but deterministic value
639 for the significand. If double or longer, this is a signalling NaN,
640 which may not be ideal. If float, this is QNaN(0). */
641 void APFloat::makeNaN(bool SNaN, bool Negative, const APInt *fill)
646 integerPart *significand = significandParts();
647 unsigned numParts = partCount();
649 // Set the significand bits to the fill.
650 if (!fill || fill->getNumWords() < numParts)
651 APInt::tcSet(significand, 0, numParts);
653 APInt::tcAssign(significand, fill->getRawData(),
654 std::min(fill->getNumWords(), numParts));
656 // Zero out the excess bits of the significand.
657 unsigned bitsToPreserve = semantics->precision - 1;
658 unsigned part = bitsToPreserve / 64;
659 bitsToPreserve %= 64;
660 significand[part] &= ((1ULL << bitsToPreserve) - 1);
661 for (part++; part != numParts; ++part)
662 significand[part] = 0;
665 unsigned QNaNBit = semantics->precision - 2;
668 // We always have to clear the QNaN bit to make it an SNaN.
669 APInt::tcClearBit(significand, QNaNBit);
671 // If there are no bits set in the payload, we have to set
672 // *something* to make it a NaN instead of an infinity;
673 // conventionally, this is the next bit down from the QNaN bit.
674 if (APInt::tcIsZero(significand, numParts))
675 APInt::tcSetBit(significand, QNaNBit - 1);
677 // We always have to set the QNaN bit to make it a QNaN.
678 APInt::tcSetBit(significand, QNaNBit);
681 // For x87 extended precision, we want to make a NaN, not a
682 // pseudo-NaN. Maybe we should expose the ability to make
684 if (semantics == &APFloat::x87DoubleExtended)
685 APInt::tcSetBit(significand, QNaNBit + 1);
688 APFloat APFloat::makeNaN(const fltSemantics &Sem, bool SNaN, bool Negative,
690 APFloat value(Sem, uninitialized);
691 value.makeNaN(SNaN, Negative, fill);
696 APFloat::operator=(const APFloat &rhs)
699 if (semantics != rhs.semantics) {
701 initialize(rhs.semantics);
710 APFloat::bitwiseIsEqual(const APFloat &rhs) const {
713 if (semantics != rhs.semantics ||
714 category != rhs.category ||
717 if (category==fcZero || category==fcInfinity)
719 else if (category==fcNormal && exponent!=rhs.exponent)
723 const integerPart* p=significandParts();
724 const integerPart* q=rhs.significandParts();
725 for (; i>0; i--, p++, q++) {
733 APFloat::APFloat(const fltSemantics &ourSemantics, integerPart value) {
734 assertArithmeticOK(ourSemantics);
735 initialize(&ourSemantics);
738 exponent = ourSemantics.precision - 1;
739 significandParts()[0] = value;
740 normalize(rmNearestTiesToEven, lfExactlyZero);
743 APFloat::APFloat(const fltSemantics &ourSemantics) {
744 assertArithmeticOK(ourSemantics);
745 initialize(&ourSemantics);
750 APFloat::APFloat(const fltSemantics &ourSemantics, uninitializedTag tag) {
751 assertArithmeticOK(ourSemantics);
752 // Allocates storage if necessary but does not initialize it.
753 initialize(&ourSemantics);
756 APFloat::APFloat(const fltSemantics &ourSemantics,
757 fltCategory ourCategory, bool negative) {
758 assertArithmeticOK(ourSemantics);
759 initialize(&ourSemantics);
760 category = ourCategory;
762 if (category == fcNormal)
764 else if (ourCategory == fcNaN)
768 APFloat::APFloat(const fltSemantics &ourSemantics, StringRef text) {
769 assertArithmeticOK(ourSemantics);
770 initialize(&ourSemantics);
771 convertFromString(text, rmNearestTiesToEven);
774 APFloat::APFloat(const APFloat &rhs) {
775 initialize(rhs.semantics);
784 // Profile - This method 'profiles' an APFloat for use with FoldingSet.
785 void APFloat::Profile(FoldingSetNodeID& ID) const {
786 ID.Add(bitcastToAPInt());
790 APFloat::partCount() const
792 return partCountForBits(semantics->precision + 1);
796 APFloat::semanticsPrecision(const fltSemantics &semantics)
798 return semantics.precision;
802 APFloat::significandParts() const
804 return const_cast<APFloat *>(this)->significandParts();
808 APFloat::significandParts()
810 assert(category == fcNormal || category == fcNaN);
813 return significand.parts;
815 return &significand.part;
819 APFloat::zeroSignificand()
822 APInt::tcSet(significandParts(), 0, partCount());
825 /* Increment an fcNormal floating point number's significand. */
827 APFloat::incrementSignificand()
831 carry = APInt::tcIncrement(significandParts(), partCount());
833 /* Our callers should never cause us to overflow. */
838 /* Add the significand of the RHS. Returns the carry flag. */
840 APFloat::addSignificand(const APFloat &rhs)
844 parts = significandParts();
846 assert(semantics == rhs.semantics);
847 assert(exponent == rhs.exponent);
849 return APInt::tcAdd(parts, rhs.significandParts(), 0, partCount());
852 /* Subtract the significand of the RHS with a borrow flag. Returns
855 APFloat::subtractSignificand(const APFloat &rhs, integerPart borrow)
859 parts = significandParts();
861 assert(semantics == rhs.semantics);
862 assert(exponent == rhs.exponent);
864 return APInt::tcSubtract(parts, rhs.significandParts(), borrow,
868 /* Multiply the significand of the RHS. If ADDEND is non-NULL, add it
869 on to the full-precision result of the multiplication. Returns the
872 APFloat::multiplySignificand(const APFloat &rhs, const APFloat *addend)
874 unsigned int omsb; // One, not zero, based MSB.
875 unsigned int partsCount, newPartsCount, precision;
876 integerPart *lhsSignificand;
877 integerPart scratch[4];
878 integerPart *fullSignificand;
879 lostFraction lost_fraction;
882 assert(semantics == rhs.semantics);
884 precision = semantics->precision;
885 newPartsCount = partCountForBits(precision * 2);
887 if (newPartsCount > 4)
888 fullSignificand = new integerPart[newPartsCount];
890 fullSignificand = scratch;
892 lhsSignificand = significandParts();
893 partsCount = partCount();
895 APInt::tcFullMultiply(fullSignificand, lhsSignificand,
896 rhs.significandParts(), partsCount, partsCount);
898 lost_fraction = lfExactlyZero;
899 omsb = APInt::tcMSB(fullSignificand, newPartsCount) + 1;
900 exponent += rhs.exponent;
903 Significand savedSignificand = significand;
904 const fltSemantics *savedSemantics = semantics;
905 fltSemantics extendedSemantics;
907 unsigned int extendedPrecision;
909 /* Normalize our MSB. */
910 extendedPrecision = precision + precision - 1;
911 if (omsb != extendedPrecision) {
912 APInt::tcShiftLeft(fullSignificand, newPartsCount,
913 extendedPrecision - omsb);
914 exponent -= extendedPrecision - omsb;
917 /* Create new semantics. */
918 extendedSemantics = *semantics;
919 extendedSemantics.precision = extendedPrecision;
921 if (newPartsCount == 1)
922 significand.part = fullSignificand[0];
924 significand.parts = fullSignificand;
925 semantics = &extendedSemantics;
927 APFloat extendedAddend(*addend);
928 status = extendedAddend.convert(extendedSemantics, rmTowardZero, &ignored);
929 assert(status == opOK);
931 lost_fraction = addOrSubtractSignificand(extendedAddend, false);
933 /* Restore our state. */
934 if (newPartsCount == 1)
935 fullSignificand[0] = significand.part;
936 significand = savedSignificand;
937 semantics = savedSemantics;
939 omsb = APInt::tcMSB(fullSignificand, newPartsCount) + 1;
942 exponent -= (precision - 1);
944 if (omsb > precision) {
945 unsigned int bits, significantParts;
948 bits = omsb - precision;
949 significantParts = partCountForBits(omsb);
950 lf = shiftRight(fullSignificand, significantParts, bits);
951 lost_fraction = combineLostFractions(lf, lost_fraction);
955 APInt::tcAssign(lhsSignificand, fullSignificand, partsCount);
957 if (newPartsCount > 4)
958 delete [] fullSignificand;
960 return lost_fraction;
963 /* Multiply the significands of LHS and RHS to DST. */
965 APFloat::divideSignificand(const APFloat &rhs)
967 unsigned int bit, i, partsCount;
968 const integerPart *rhsSignificand;
969 integerPart *lhsSignificand, *dividend, *divisor;
970 integerPart scratch[4];
971 lostFraction lost_fraction;
973 assert(semantics == rhs.semantics);
975 lhsSignificand = significandParts();
976 rhsSignificand = rhs.significandParts();
977 partsCount = partCount();
980 dividend = new integerPart[partsCount * 2];
984 divisor = dividend + partsCount;
986 /* Copy the dividend and divisor as they will be modified in-place. */
987 for (i = 0; i < partsCount; i++) {
988 dividend[i] = lhsSignificand[i];
989 divisor[i] = rhsSignificand[i];
990 lhsSignificand[i] = 0;
993 exponent -= rhs.exponent;
995 unsigned int precision = semantics->precision;
997 /* Normalize the divisor. */
998 bit = precision - APInt::tcMSB(divisor, partsCount) - 1;
1001 APInt::tcShiftLeft(divisor, partsCount, bit);
1004 /* Normalize the dividend. */
1005 bit = precision - APInt::tcMSB(dividend, partsCount) - 1;
1008 APInt::tcShiftLeft(dividend, partsCount, bit);
1011 /* Ensure the dividend >= divisor initially for the loop below.
1012 Incidentally, this means that the division loop below is
1013 guaranteed to set the integer bit to one. */
1014 if (APInt::tcCompare(dividend, divisor, partsCount) < 0) {
1016 APInt::tcShiftLeft(dividend, partsCount, 1);
1017 assert(APInt::tcCompare(dividend, divisor, partsCount) >= 0);
1020 /* Long division. */
1021 for (bit = precision; bit; bit -= 1) {
1022 if (APInt::tcCompare(dividend, divisor, partsCount) >= 0) {
1023 APInt::tcSubtract(dividend, divisor, 0, partsCount);
1024 APInt::tcSetBit(lhsSignificand, bit - 1);
1027 APInt::tcShiftLeft(dividend, partsCount, 1);
1030 /* Figure out the lost fraction. */
1031 int cmp = APInt::tcCompare(dividend, divisor, partsCount);
1034 lost_fraction = lfMoreThanHalf;
1036 lost_fraction = lfExactlyHalf;
1037 else if (APInt::tcIsZero(dividend, partsCount))
1038 lost_fraction = lfExactlyZero;
1040 lost_fraction = lfLessThanHalf;
1045 return lost_fraction;
1049 APFloat::significandMSB() const
1051 return APInt::tcMSB(significandParts(), partCount());
1055 APFloat::significandLSB() const
1057 return APInt::tcLSB(significandParts(), partCount());
1060 /* Note that a zero result is NOT normalized to fcZero. */
1062 APFloat::shiftSignificandRight(unsigned int bits)
1064 /* Our exponent should not overflow. */
1065 assert((exponent_t) (exponent + bits) >= exponent);
1069 return shiftRight(significandParts(), partCount(), bits);
1072 /* Shift the significand left BITS bits, subtract BITS from its exponent. */
1074 APFloat::shiftSignificandLeft(unsigned int bits)
1076 assert(bits < semantics->precision);
1079 unsigned int partsCount = partCount();
1081 APInt::tcShiftLeft(significandParts(), partsCount, bits);
1084 assert(!APInt::tcIsZero(significandParts(), partsCount));
1089 APFloat::compareAbsoluteValue(const APFloat &rhs) const
1093 assert(semantics == rhs.semantics);
1094 assert(category == fcNormal);
1095 assert(rhs.category == fcNormal);
1097 compare = exponent - rhs.exponent;
1099 /* If exponents are equal, do an unsigned bignum comparison of the
1102 compare = APInt::tcCompare(significandParts(), rhs.significandParts(),
1106 return cmpGreaterThan;
1107 else if (compare < 0)
1113 /* Handle overflow. Sign is preserved. We either become infinity or
1114 the largest finite number. */
1116 APFloat::handleOverflow(roundingMode rounding_mode)
1119 if (rounding_mode == rmNearestTiesToEven ||
1120 rounding_mode == rmNearestTiesToAway ||
1121 (rounding_mode == rmTowardPositive && !sign) ||
1122 (rounding_mode == rmTowardNegative && sign)) {
1123 category = fcInfinity;
1124 return (opStatus) (opOverflow | opInexact);
1127 /* Otherwise we become the largest finite number. */
1128 category = fcNormal;
1129 exponent = semantics->maxExponent;
1130 APInt::tcSetLeastSignificantBits(significandParts(), partCount(),
1131 semantics->precision);
1136 /* Returns TRUE if, when truncating the current number, with BIT the
1137 new LSB, with the given lost fraction and rounding mode, the result
1138 would need to be rounded away from zero (i.e., by increasing the
1139 signficand). This routine must work for fcZero of both signs, and
1140 fcNormal numbers. */
1142 APFloat::roundAwayFromZero(roundingMode rounding_mode,
1143 lostFraction lost_fraction,
1144 unsigned int bit) const
1146 /* NaNs and infinities should not have lost fractions. */
1147 assert(category == fcNormal || category == fcZero);
1149 /* Current callers never pass this so we don't handle it. */
1150 assert(lost_fraction != lfExactlyZero);
1152 switch (rounding_mode) {
1153 case rmNearestTiesToAway:
1154 return lost_fraction == lfExactlyHalf || lost_fraction == lfMoreThanHalf;
1156 case rmNearestTiesToEven:
1157 if (lost_fraction == lfMoreThanHalf)
1160 /* Our zeroes don't have a significand to test. */
1161 if (lost_fraction == lfExactlyHalf && category != fcZero)
1162 return APInt::tcExtractBit(significandParts(), bit);
1169 case rmTowardPositive:
1170 return sign == false;
1172 case rmTowardNegative:
1173 return sign == true;
1175 llvm_unreachable("Invalid rounding mode found");
1179 APFloat::normalize(roundingMode rounding_mode,
1180 lostFraction lost_fraction)
1182 unsigned int omsb; /* One, not zero, based MSB. */
1185 if (category != fcNormal)
1188 /* Before rounding normalize the exponent of fcNormal numbers. */
1189 omsb = significandMSB() + 1;
1192 /* OMSB is numbered from 1. We want to place it in the integer
1193 bit numbered PRECISION if possible, with a compensating change in
1195 exponentChange = omsb - semantics->precision;
1197 /* If the resulting exponent is too high, overflow according to
1198 the rounding mode. */
1199 if (exponent + exponentChange > semantics->maxExponent)
1200 return handleOverflow(rounding_mode);
1202 /* Subnormal numbers have exponent minExponent, and their MSB
1203 is forced based on that. */
1204 if (exponent + exponentChange < semantics->minExponent)
1205 exponentChange = semantics->minExponent - exponent;
1207 /* Shifting left is easy as we don't lose precision. */
1208 if (exponentChange < 0) {
1209 assert(lost_fraction == lfExactlyZero);
1211 shiftSignificandLeft(-exponentChange);
1216 if (exponentChange > 0) {
1219 /* Shift right and capture any new lost fraction. */
1220 lf = shiftSignificandRight(exponentChange);
1222 lost_fraction = combineLostFractions(lf, lost_fraction);
1224 /* Keep OMSB up-to-date. */
1225 if (omsb > (unsigned) exponentChange)
1226 omsb -= exponentChange;
1232 /* Now round the number according to rounding_mode given the lost
1235 /* As specified in IEEE 754, since we do not trap we do not report
1236 underflow for exact results. */
1237 if (lost_fraction == lfExactlyZero) {
1238 /* Canonicalize zeroes. */
1245 /* Increment the significand if we're rounding away from zero. */
1246 if (roundAwayFromZero(rounding_mode, lost_fraction, 0)) {
1248 exponent = semantics->minExponent;
1250 incrementSignificand();
1251 omsb = significandMSB() + 1;
1253 /* Did the significand increment overflow? */
1254 if (omsb == (unsigned) semantics->precision + 1) {
1255 /* Renormalize by incrementing the exponent and shifting our
1256 significand right one. However if we already have the
1257 maximum exponent we overflow to infinity. */
1258 if (exponent == semantics->maxExponent) {
1259 category = fcInfinity;
1261 return (opStatus) (opOverflow | opInexact);
1264 shiftSignificandRight(1);
1270 /* The normal case - we were and are not denormal, and any
1271 significand increment above didn't overflow. */
1272 if (omsb == semantics->precision)
1275 /* We have a non-zero denormal. */
1276 assert(omsb < semantics->precision);
1278 /* Canonicalize zeroes. */
1282 /* The fcZero case is a denormal that underflowed to zero. */
1283 return (opStatus) (opUnderflow | opInexact);
1287 APFloat::addOrSubtractSpecials(const APFloat &rhs, bool subtract)
1289 switch (convolve(category, rhs.category)) {
1291 llvm_unreachable(0);
1293 case convolve(fcNaN, fcZero):
1294 case convolve(fcNaN, fcNormal):
1295 case convolve(fcNaN, fcInfinity):
1296 case convolve(fcNaN, fcNaN):
1297 case convolve(fcNormal, fcZero):
1298 case convolve(fcInfinity, fcNormal):
1299 case convolve(fcInfinity, fcZero):
1302 case convolve(fcZero, fcNaN):
1303 case convolve(fcNormal, fcNaN):
1304 case convolve(fcInfinity, fcNaN):
1306 copySignificand(rhs);
1309 case convolve(fcNormal, fcInfinity):
1310 case convolve(fcZero, fcInfinity):
1311 category = fcInfinity;
1312 sign = rhs.sign ^ subtract;
1315 case convolve(fcZero, fcNormal):
1317 sign = rhs.sign ^ subtract;
1320 case convolve(fcZero, fcZero):
1321 /* Sign depends on rounding mode; handled by caller. */
1324 case convolve(fcInfinity, fcInfinity):
1325 /* Differently signed infinities can only be validly
1327 if (((sign ^ rhs.sign)!=0) != subtract) {
1334 case convolve(fcNormal, fcNormal):
1339 /* Add or subtract two normal numbers. */
1341 APFloat::addOrSubtractSignificand(const APFloat &rhs, bool subtract)
1344 lostFraction lost_fraction;
1347 /* Determine if the operation on the absolute values is effectively
1348 an addition or subtraction. */
1349 subtract ^= (sign ^ rhs.sign) ? true : false;
1351 /* Are we bigger exponent-wise than the RHS? */
1352 bits = exponent - rhs.exponent;
1354 /* Subtraction is more subtle than one might naively expect. */
1356 APFloat temp_rhs(rhs);
1360 reverse = compareAbsoluteValue(temp_rhs) == cmpLessThan;
1361 lost_fraction = lfExactlyZero;
1362 } else if (bits > 0) {
1363 lost_fraction = temp_rhs.shiftSignificandRight(bits - 1);
1364 shiftSignificandLeft(1);
1367 lost_fraction = shiftSignificandRight(-bits - 1);
1368 temp_rhs.shiftSignificandLeft(1);
1373 carry = temp_rhs.subtractSignificand
1374 (*this, lost_fraction != lfExactlyZero);
1375 copySignificand(temp_rhs);
1378 carry = subtractSignificand
1379 (temp_rhs, lost_fraction != lfExactlyZero);
1382 /* Invert the lost fraction - it was on the RHS and
1384 if (lost_fraction == lfLessThanHalf)
1385 lost_fraction = lfMoreThanHalf;
1386 else if (lost_fraction == lfMoreThanHalf)
1387 lost_fraction = lfLessThanHalf;
1389 /* The code above is intended to ensure that no borrow is
1395 APFloat temp_rhs(rhs);
1397 lost_fraction = temp_rhs.shiftSignificandRight(bits);
1398 carry = addSignificand(temp_rhs);
1400 lost_fraction = shiftSignificandRight(-bits);
1401 carry = addSignificand(rhs);
1404 /* We have a guard bit; generating a carry cannot happen. */
1409 return lost_fraction;
1413 APFloat::multiplySpecials(const APFloat &rhs)
1415 switch (convolve(category, rhs.category)) {
1417 llvm_unreachable(0);
1419 case convolve(fcNaN, fcZero):
1420 case convolve(fcNaN, fcNormal):
1421 case convolve(fcNaN, fcInfinity):
1422 case convolve(fcNaN, fcNaN):
1425 case convolve(fcZero, fcNaN):
1426 case convolve(fcNormal, fcNaN):
1427 case convolve(fcInfinity, fcNaN):
1429 copySignificand(rhs);
1432 case convolve(fcNormal, fcInfinity):
1433 case convolve(fcInfinity, fcNormal):
1434 case convolve(fcInfinity, fcInfinity):
1435 category = fcInfinity;
1438 case convolve(fcZero, fcNormal):
1439 case convolve(fcNormal, fcZero):
1440 case convolve(fcZero, fcZero):
1444 case convolve(fcZero, fcInfinity):
1445 case convolve(fcInfinity, fcZero):
1449 case convolve(fcNormal, fcNormal):
1455 APFloat::divideSpecials(const APFloat &rhs)
1457 switch (convolve(category, rhs.category)) {
1459 llvm_unreachable(0);
1461 case convolve(fcNaN, fcZero):
1462 case convolve(fcNaN, fcNormal):
1463 case convolve(fcNaN, fcInfinity):
1464 case convolve(fcNaN, fcNaN):
1465 case convolve(fcInfinity, fcZero):
1466 case convolve(fcInfinity, fcNormal):
1467 case convolve(fcZero, fcInfinity):
1468 case convolve(fcZero, fcNormal):
1471 case convolve(fcZero, fcNaN):
1472 case convolve(fcNormal, fcNaN):
1473 case convolve(fcInfinity, fcNaN):
1475 copySignificand(rhs);
1478 case convolve(fcNormal, fcInfinity):
1482 case convolve(fcNormal, fcZero):
1483 category = fcInfinity;
1486 case convolve(fcInfinity, fcInfinity):
1487 case convolve(fcZero, fcZero):
1491 case convolve(fcNormal, fcNormal):
1497 APFloat::modSpecials(const APFloat &rhs)
1499 switch (convolve(category, rhs.category)) {
1501 llvm_unreachable(0);
1503 case convolve(fcNaN, fcZero):
1504 case convolve(fcNaN, fcNormal):
1505 case convolve(fcNaN, fcInfinity):
1506 case convolve(fcNaN, fcNaN):
1507 case convolve(fcZero, fcInfinity):
1508 case convolve(fcZero, fcNormal):
1509 case convolve(fcNormal, fcInfinity):
1512 case convolve(fcZero, fcNaN):
1513 case convolve(fcNormal, fcNaN):
1514 case convolve(fcInfinity, fcNaN):
1516 copySignificand(rhs);
1519 case convolve(fcNormal, fcZero):
1520 case convolve(fcInfinity, fcZero):
1521 case convolve(fcInfinity, fcNormal):
1522 case convolve(fcInfinity, fcInfinity):
1523 case convolve(fcZero, fcZero):
1527 case convolve(fcNormal, fcNormal):
1534 APFloat::changeSign()
1536 /* Look mummy, this one's easy. */
1541 APFloat::clearSign()
1543 /* So is this one. */
1548 APFloat::copySign(const APFloat &rhs)
1554 /* Normalized addition or subtraction. */
1556 APFloat::addOrSubtract(const APFloat &rhs, roundingMode rounding_mode,
1561 assertArithmeticOK(*semantics);
1563 fs = addOrSubtractSpecials(rhs, subtract);
1565 /* This return code means it was not a simple case. */
1566 if (fs == opDivByZero) {
1567 lostFraction lost_fraction;
1569 lost_fraction = addOrSubtractSignificand(rhs, subtract);
1570 fs = normalize(rounding_mode, lost_fraction);
1572 /* Can only be zero if we lost no fraction. */
1573 assert(category != fcZero || lost_fraction == lfExactlyZero);
1576 /* If two numbers add (exactly) to zero, IEEE 754 decrees it is a
1577 positive zero unless rounding to minus infinity, except that
1578 adding two like-signed zeroes gives that zero. */
1579 if (category == fcZero) {
1580 if (rhs.category != fcZero || (sign == rhs.sign) == subtract)
1581 sign = (rounding_mode == rmTowardNegative);
1587 /* Normalized addition. */
1589 APFloat::add(const APFloat &rhs, roundingMode rounding_mode)
1591 return addOrSubtract(rhs, rounding_mode, false);
1594 /* Normalized subtraction. */
1596 APFloat::subtract(const APFloat &rhs, roundingMode rounding_mode)
1598 return addOrSubtract(rhs, rounding_mode, true);
1601 /* Normalized multiply. */
1603 APFloat::multiply(const APFloat &rhs, roundingMode rounding_mode)
1607 assertArithmeticOK(*semantics);
1609 fs = multiplySpecials(rhs);
1611 if (category == fcNormal) {
1612 lostFraction lost_fraction = multiplySignificand(rhs, 0);
1613 fs = normalize(rounding_mode, lost_fraction);
1614 if (lost_fraction != lfExactlyZero)
1615 fs = (opStatus) (fs | opInexact);
1621 /* Normalized divide. */
1623 APFloat::divide(const APFloat &rhs, roundingMode rounding_mode)
1627 assertArithmeticOK(*semantics);
1629 fs = divideSpecials(rhs);
1631 if (category == fcNormal) {
1632 lostFraction lost_fraction = divideSignificand(rhs);
1633 fs = normalize(rounding_mode, lost_fraction);
1634 if (lost_fraction != lfExactlyZero)
1635 fs = (opStatus) (fs | opInexact);
1641 /* Normalized remainder. This is not currently correct in all cases. */
1643 APFloat::remainder(const APFloat &rhs)
1647 unsigned int origSign = sign;
1649 assertArithmeticOK(*semantics);
1650 fs = V.divide(rhs, rmNearestTiesToEven);
1651 if (fs == opDivByZero)
1654 int parts = partCount();
1655 integerPart *x = new integerPart[parts];
1657 fs = V.convertToInteger(x, parts * integerPartWidth, true,
1658 rmNearestTiesToEven, &ignored);
1659 if (fs==opInvalidOp)
1662 fs = V.convertFromZeroExtendedInteger(x, parts * integerPartWidth, true,
1663 rmNearestTiesToEven);
1664 assert(fs==opOK); // should always work
1666 fs = V.multiply(rhs, rmNearestTiesToEven);
1667 assert(fs==opOK || fs==opInexact); // should not overflow or underflow
1669 fs = subtract(V, rmNearestTiesToEven);
1670 assert(fs==opOK || fs==opInexact); // likewise
1673 sign = origSign; // IEEE754 requires this
1678 /* Normalized llvm frem (C fmod).
1679 This is not currently correct in all cases. */
1681 APFloat::mod(const APFloat &rhs, roundingMode rounding_mode)
1684 assertArithmeticOK(*semantics);
1685 fs = modSpecials(rhs);
1687 if (category == fcNormal && rhs.category == fcNormal) {
1689 unsigned int origSign = sign;
1691 fs = V.divide(rhs, rmNearestTiesToEven);
1692 if (fs == opDivByZero)
1695 int parts = partCount();
1696 integerPart *x = new integerPart[parts];
1698 fs = V.convertToInteger(x, parts * integerPartWidth, true,
1699 rmTowardZero, &ignored);
1700 if (fs==opInvalidOp)
1703 fs = V.convertFromZeroExtendedInteger(x, parts * integerPartWidth, true,
1704 rmNearestTiesToEven);
1705 assert(fs==opOK); // should always work
1707 fs = V.multiply(rhs, rounding_mode);
1708 assert(fs==opOK || fs==opInexact); // should not overflow or underflow
1710 fs = subtract(V, rounding_mode);
1711 assert(fs==opOK || fs==opInexact); // likewise
1714 sign = origSign; // IEEE754 requires this
1720 /* Normalized fused-multiply-add. */
1722 APFloat::fusedMultiplyAdd(const APFloat &multiplicand,
1723 const APFloat &addend,
1724 roundingMode rounding_mode)
1728 assertArithmeticOK(*semantics);
1730 /* Post-multiplication sign, before addition. */
1731 sign ^= multiplicand.sign;
1733 /* If and only if all arguments are normal do we need to do an
1734 extended-precision calculation. */
1735 if (category == fcNormal &&
1736 multiplicand.category == fcNormal &&
1737 addend.category == fcNormal) {
1738 lostFraction lost_fraction;
1740 lost_fraction = multiplySignificand(multiplicand, &addend);
1741 fs = normalize(rounding_mode, lost_fraction);
1742 if (lost_fraction != lfExactlyZero)
1743 fs = (opStatus) (fs | opInexact);
1745 /* If two numbers add (exactly) to zero, IEEE 754 decrees it is a
1746 positive zero unless rounding to minus infinity, except that
1747 adding two like-signed zeroes gives that zero. */
1748 if (category == fcZero && sign != addend.sign)
1749 sign = (rounding_mode == rmTowardNegative);
1751 fs = multiplySpecials(multiplicand);
1753 /* FS can only be opOK or opInvalidOp. There is no more work
1754 to do in the latter case. The IEEE-754R standard says it is
1755 implementation-defined in this case whether, if ADDEND is a
1756 quiet NaN, we raise invalid op; this implementation does so.
1758 If we need to do the addition we can do so with normal
1761 fs = addOrSubtract(addend, rounding_mode, false);
1767 /* Rounding-mode corrrect round to integral value. */
1768 APFloat::opStatus APFloat::roundToIntegral(roundingMode rounding_mode) {
1770 assertArithmeticOK(*semantics);
1772 // If the exponent is large enough, we know that this value is already
1773 // integral, and the arithmetic below would potentially cause it to saturate
1774 // to +/-Inf. Bail out early instead.
1775 if (category == fcNormal && exponent+1 >= (int)semanticsPrecision(*semantics))
1778 // The algorithm here is quite simple: we add 2^(p-1), where p is the
1779 // precision of our format, and then subtract it back off again. The choice
1780 // of rounding modes for the addition/subtraction determines the rounding mode
1781 // for our integral rounding as well.
1782 // NOTE: When the input value is negative, we do subtraction followed by
1783 // addition instead.
1784 APInt IntegerConstant(NextPowerOf2(semanticsPrecision(*semantics)), 1);
1785 IntegerConstant <<= semanticsPrecision(*semantics)-1;
1786 APFloat MagicConstant(*semantics);
1787 fs = MagicConstant.convertFromAPInt(IntegerConstant, false,
1788 rmNearestTiesToEven);
1789 MagicConstant.copySign(*this);
1794 // Preserve the input sign so that we can handle 0.0/-0.0 cases correctly.
1795 bool inputSign = isNegative();
1797 fs = add(MagicConstant, rounding_mode);
1798 if (fs != opOK && fs != opInexact)
1801 fs = subtract(MagicConstant, rounding_mode);
1803 // Restore the input sign.
1804 if (inputSign != isNegative())
1811 /* Comparison requires normalized numbers. */
1813 APFloat::compare(const APFloat &rhs) const
1817 assertArithmeticOK(*semantics);
1818 assert(semantics == rhs.semantics);
1820 switch (convolve(category, rhs.category)) {
1822 llvm_unreachable(0);
1824 case convolve(fcNaN, fcZero):
1825 case convolve(fcNaN, fcNormal):
1826 case convolve(fcNaN, fcInfinity):
1827 case convolve(fcNaN, fcNaN):
1828 case convolve(fcZero, fcNaN):
1829 case convolve(fcNormal, fcNaN):
1830 case convolve(fcInfinity, fcNaN):
1831 return cmpUnordered;
1833 case convolve(fcInfinity, fcNormal):
1834 case convolve(fcInfinity, fcZero):
1835 case convolve(fcNormal, fcZero):
1839 return cmpGreaterThan;
1841 case convolve(fcNormal, fcInfinity):
1842 case convolve(fcZero, fcInfinity):
1843 case convolve(fcZero, fcNormal):
1845 return cmpGreaterThan;
1849 case convolve(fcInfinity, fcInfinity):
1850 if (sign == rhs.sign)
1855 return cmpGreaterThan;
1857 case convolve(fcZero, fcZero):
1860 case convolve(fcNormal, fcNormal):
1864 /* Two normal numbers. Do they have the same sign? */
1865 if (sign != rhs.sign) {
1867 result = cmpLessThan;
1869 result = cmpGreaterThan;
1871 /* Compare absolute values; invert result if negative. */
1872 result = compareAbsoluteValue(rhs);
1875 if (result == cmpLessThan)
1876 result = cmpGreaterThan;
1877 else if (result == cmpGreaterThan)
1878 result = cmpLessThan;
1885 /// APFloat::convert - convert a value of one floating point type to another.
1886 /// The return value corresponds to the IEEE754 exceptions. *losesInfo
1887 /// records whether the transformation lost information, i.e. whether
1888 /// converting the result back to the original type will produce the
1889 /// original value (this is almost the same as return value==fsOK, but there
1890 /// are edge cases where this is not so).
1893 APFloat::convert(const fltSemantics &toSemantics,
1894 roundingMode rounding_mode, bool *losesInfo)
1896 lostFraction lostFraction;
1897 unsigned int newPartCount, oldPartCount;
1900 const fltSemantics &fromSemantics = *semantics;
1902 assertArithmeticOK(fromSemantics);
1903 assertArithmeticOK(toSemantics);
1904 lostFraction = lfExactlyZero;
1905 newPartCount = partCountForBits(toSemantics.precision + 1);
1906 oldPartCount = partCount();
1907 shift = toSemantics.precision - fromSemantics.precision;
1909 bool X86SpecialNan = false;
1910 if (&fromSemantics == &APFloat::x87DoubleExtended &&
1911 &toSemantics != &APFloat::x87DoubleExtended && category == fcNaN &&
1912 (!(*significandParts() & 0x8000000000000000ULL) ||
1913 !(*significandParts() & 0x4000000000000000ULL))) {
1914 // x86 has some unusual NaNs which cannot be represented in any other
1915 // format; note them here.
1916 X86SpecialNan = true;
1919 // If this is a truncation, perform the shift before we narrow the storage.
1920 if (shift < 0 && (category==fcNormal || category==fcNaN))
1921 lostFraction = shiftRight(significandParts(), oldPartCount, -shift);
1923 // Fix the storage so it can hold to new value.
1924 if (newPartCount > oldPartCount) {
1925 // The new type requires more storage; make it available.
1926 integerPart *newParts;
1927 newParts = new integerPart[newPartCount];
1928 APInt::tcSet(newParts, 0, newPartCount);
1929 if (category==fcNormal || category==fcNaN)
1930 APInt::tcAssign(newParts, significandParts(), oldPartCount);
1932 significand.parts = newParts;
1933 } else if (newPartCount == 1 && oldPartCount != 1) {
1934 // Switch to built-in storage for a single part.
1935 integerPart newPart = 0;
1936 if (category==fcNormal || category==fcNaN)
1937 newPart = significandParts()[0];
1939 significand.part = newPart;
1942 // Now that we have the right storage, switch the semantics.
1943 semantics = &toSemantics;
1945 // If this is an extension, perform the shift now that the storage is
1947 if (shift > 0 && (category==fcNormal || category==fcNaN))
1948 APInt::tcShiftLeft(significandParts(), newPartCount, shift);
1950 if (category == fcNormal) {
1951 fs = normalize(rounding_mode, lostFraction);
1952 *losesInfo = (fs != opOK);
1953 } else if (category == fcNaN) {
1954 *losesInfo = lostFraction != lfExactlyZero || X86SpecialNan;
1955 // gcc forces the Quiet bit on, which means (float)(double)(float_sNan)
1956 // does not give you back the same bits. This is dubious, and we
1957 // don't currently do it. You're really supposed to get
1958 // an invalid operation signal at runtime, but nobody does that.
1968 /* Convert a floating point number to an integer according to the
1969 rounding mode. If the rounded integer value is out of range this
1970 returns an invalid operation exception and the contents of the
1971 destination parts are unspecified. If the rounded value is in
1972 range but the floating point number is not the exact integer, the C
1973 standard doesn't require an inexact exception to be raised. IEEE
1974 854 does require it so we do that.
1976 Note that for conversions to integer type the C standard requires
1977 round-to-zero to always be used. */
1979 APFloat::convertToSignExtendedInteger(integerPart *parts, unsigned int width,
1981 roundingMode rounding_mode,
1982 bool *isExact) const
1984 lostFraction lost_fraction;
1985 const integerPart *src;
1986 unsigned int dstPartsCount, truncatedBits;
1988 assertArithmeticOK(*semantics);
1992 /* Handle the three special cases first. */
1993 if (category == fcInfinity || category == fcNaN)
1996 dstPartsCount = partCountForBits(width);
1998 if (category == fcZero) {
1999 APInt::tcSet(parts, 0, dstPartsCount);
2000 // Negative zero can't be represented as an int.
2005 src = significandParts();
2007 /* Step 1: place our absolute value, with any fraction truncated, in
2010 /* Our absolute value is less than one; truncate everything. */
2011 APInt::tcSet(parts, 0, dstPartsCount);
2012 /* For exponent -1 the integer bit represents .5, look at that.
2013 For smaller exponents leftmost truncated bit is 0. */
2014 truncatedBits = semantics->precision -1U - exponent;
2016 /* We want the most significant (exponent + 1) bits; the rest are
2018 unsigned int bits = exponent + 1U;
2020 /* Hopelessly large in magnitude? */
2024 if (bits < semantics->precision) {
2025 /* We truncate (semantics->precision - bits) bits. */
2026 truncatedBits = semantics->precision - bits;
2027 APInt::tcExtract(parts, dstPartsCount, src, bits, truncatedBits);
2029 /* We want at least as many bits as are available. */
2030 APInt::tcExtract(parts, dstPartsCount, src, semantics->precision, 0);
2031 APInt::tcShiftLeft(parts, dstPartsCount, bits - semantics->precision);
2036 /* Step 2: work out any lost fraction, and increment the absolute
2037 value if we would round away from zero. */
2038 if (truncatedBits) {
2039 lost_fraction = lostFractionThroughTruncation(src, partCount(),
2041 if (lost_fraction != lfExactlyZero &&
2042 roundAwayFromZero(rounding_mode, lost_fraction, truncatedBits)) {
2043 if (APInt::tcIncrement(parts, dstPartsCount))
2044 return opInvalidOp; /* Overflow. */
2047 lost_fraction = lfExactlyZero;
2050 /* Step 3: check if we fit in the destination. */
2051 unsigned int omsb = APInt::tcMSB(parts, dstPartsCount) + 1;
2055 /* Negative numbers cannot be represented as unsigned. */
2059 /* It takes omsb bits to represent the unsigned integer value.
2060 We lose a bit for the sign, but care is needed as the
2061 maximally negative integer is a special case. */
2062 if (omsb == width && APInt::tcLSB(parts, dstPartsCount) + 1 != omsb)
2065 /* This case can happen because of rounding. */
2070 APInt::tcNegate (parts, dstPartsCount);
2072 if (omsb >= width + !isSigned)
2076 if (lost_fraction == lfExactlyZero) {
2083 /* Same as convertToSignExtendedInteger, except we provide
2084 deterministic values in case of an invalid operation exception,
2085 namely zero for NaNs and the minimal or maximal value respectively
2086 for underflow or overflow.
2087 The *isExact output tells whether the result is exact, in the sense
2088 that converting it back to the original floating point type produces
2089 the original value. This is almost equivalent to result==opOK,
2090 except for negative zeroes.
2093 APFloat::convertToInteger(integerPart *parts, unsigned int width,
2095 roundingMode rounding_mode, bool *isExact) const
2099 fs = convertToSignExtendedInteger(parts, width, isSigned, rounding_mode,
2102 if (fs == opInvalidOp) {
2103 unsigned int bits, dstPartsCount;
2105 dstPartsCount = partCountForBits(width);
2107 if (category == fcNaN)
2112 bits = width - isSigned;
2114 APInt::tcSetLeastSignificantBits(parts, dstPartsCount, bits);
2115 if (sign && isSigned)
2116 APInt::tcShiftLeft(parts, dstPartsCount, width - 1);
2122 /* Same as convertToInteger(integerPart*, ...), except the result is returned in
2123 an APSInt, whose initial bit-width and signed-ness are used to determine the
2124 precision of the conversion.
2127 APFloat::convertToInteger(APSInt &result,
2128 roundingMode rounding_mode, bool *isExact) const
2130 unsigned bitWidth = result.getBitWidth();
2131 SmallVector<uint64_t, 4> parts(result.getNumWords());
2132 opStatus status = convertToInteger(
2133 parts.data(), bitWidth, result.isSigned(), rounding_mode, isExact);
2134 // Keeps the original signed-ness.
2135 result = APInt(bitWidth, parts);
2139 /* Convert an unsigned integer SRC to a floating point number,
2140 rounding according to ROUNDING_MODE. The sign of the floating
2141 point number is not modified. */
2143 APFloat::convertFromUnsignedParts(const integerPart *src,
2144 unsigned int srcCount,
2145 roundingMode rounding_mode)
2147 unsigned int omsb, precision, dstCount;
2149 lostFraction lost_fraction;
2151 assertArithmeticOK(*semantics);
2152 category = fcNormal;
2153 omsb = APInt::tcMSB(src, srcCount) + 1;
2154 dst = significandParts();
2155 dstCount = partCount();
2156 precision = semantics->precision;
2158 /* We want the most significant PRECISION bits of SRC. There may not
2159 be that many; extract what we can. */
2160 if (precision <= omsb) {
2161 exponent = omsb - 1;
2162 lost_fraction = lostFractionThroughTruncation(src, srcCount,
2164 APInt::tcExtract(dst, dstCount, src, precision, omsb - precision);
2166 exponent = precision - 1;
2167 lost_fraction = lfExactlyZero;
2168 APInt::tcExtract(dst, dstCount, src, omsb, 0);
2171 return normalize(rounding_mode, lost_fraction);
2175 APFloat::convertFromAPInt(const APInt &Val,
2177 roundingMode rounding_mode)
2179 unsigned int partCount = Val.getNumWords();
2183 if (isSigned && api.isNegative()) {
2188 return convertFromUnsignedParts(api.getRawData(), partCount, rounding_mode);
2191 /* Convert a two's complement integer SRC to a floating point number,
2192 rounding according to ROUNDING_MODE. ISSIGNED is true if the
2193 integer is signed, in which case it must be sign-extended. */
2195 APFloat::convertFromSignExtendedInteger(const integerPart *src,
2196 unsigned int srcCount,
2198 roundingMode rounding_mode)
2202 assertArithmeticOK(*semantics);
2204 APInt::tcExtractBit(src, srcCount * integerPartWidth - 1)) {
2207 /* If we're signed and negative negate a copy. */
2209 copy = new integerPart[srcCount];
2210 APInt::tcAssign(copy, src, srcCount);
2211 APInt::tcNegate(copy, srcCount);
2212 status = convertFromUnsignedParts(copy, srcCount, rounding_mode);
2216 status = convertFromUnsignedParts(src, srcCount, rounding_mode);
2222 /* FIXME: should this just take a const APInt reference? */
2224 APFloat::convertFromZeroExtendedInteger(const integerPart *parts,
2225 unsigned int width, bool isSigned,
2226 roundingMode rounding_mode)
2228 unsigned int partCount = partCountForBits(width);
2229 APInt api = APInt(width, makeArrayRef(parts, partCount));
2232 if (isSigned && APInt::tcExtractBit(parts, width - 1)) {
2237 return convertFromUnsignedParts(api.getRawData(), partCount, rounding_mode);
2241 APFloat::convertFromHexadecimalString(StringRef s, roundingMode rounding_mode)
2243 lostFraction lost_fraction = lfExactlyZero;
2244 integerPart *significand;
2245 unsigned int bitPos, partsCount;
2246 StringRef::iterator dot, firstSignificantDigit;
2250 category = fcNormal;
2252 significand = significandParts();
2253 partsCount = partCount();
2254 bitPos = partsCount * integerPartWidth;
2256 /* Skip leading zeroes and any (hexa)decimal point. */
2257 StringRef::iterator begin = s.begin();
2258 StringRef::iterator end = s.end();
2259 StringRef::iterator p = skipLeadingZeroesAndAnyDot(begin, end, &dot);
2260 firstSignificantDigit = p;
2263 integerPart hex_value;
2266 assert(dot == end && "String contains multiple dots");
2273 hex_value = hexDigitValue(*p);
2274 if (hex_value == -1U) {
2283 /* Store the number whilst 4-bit nibbles remain. */
2286 hex_value <<= bitPos % integerPartWidth;
2287 significand[bitPos / integerPartWidth] |= hex_value;
2289 lost_fraction = trailingHexadecimalFraction(p, end, hex_value);
2290 while (p != end && hexDigitValue(*p) != -1U)
2297 /* Hex floats require an exponent but not a hexadecimal point. */
2298 assert(p != end && "Hex strings require an exponent");
2299 assert((*p == 'p' || *p == 'P') && "Invalid character in significand");
2300 assert(p != begin && "Significand has no digits");
2301 assert((dot == end || p - begin != 1) && "Significand has no digits");
2303 /* Ignore the exponent if we are zero. */
2304 if (p != firstSignificantDigit) {
2307 /* Implicit hexadecimal point? */
2311 /* Calculate the exponent adjustment implicit in the number of
2312 significant digits. */
2313 expAdjustment = static_cast<int>(dot - firstSignificantDigit);
2314 if (expAdjustment < 0)
2316 expAdjustment = expAdjustment * 4 - 1;
2318 /* Adjust for writing the significand starting at the most
2319 significant nibble. */
2320 expAdjustment += semantics->precision;
2321 expAdjustment -= partsCount * integerPartWidth;
2323 /* Adjust for the given exponent. */
2324 exponent = totalExponent(p + 1, end, expAdjustment);
2327 return normalize(rounding_mode, lost_fraction);
2331 APFloat::roundSignificandWithExponent(const integerPart *decSigParts,
2332 unsigned sigPartCount, int exp,
2333 roundingMode rounding_mode)
2335 unsigned int parts, pow5PartCount;
2336 fltSemantics calcSemantics = { 32767, -32767, 0, true };
2337 integerPart pow5Parts[maxPowerOfFiveParts];
2340 isNearest = (rounding_mode == rmNearestTiesToEven ||
2341 rounding_mode == rmNearestTiesToAway);
2343 parts = partCountForBits(semantics->precision + 11);
2345 /* Calculate pow(5, abs(exp)). */
2346 pow5PartCount = powerOf5(pow5Parts, exp >= 0 ? exp: -exp);
2348 for (;; parts *= 2) {
2349 opStatus sigStatus, powStatus;
2350 unsigned int excessPrecision, truncatedBits;
2352 calcSemantics.precision = parts * integerPartWidth - 1;
2353 excessPrecision = calcSemantics.precision - semantics->precision;
2354 truncatedBits = excessPrecision;
2356 APFloat decSig(calcSemantics, fcZero, sign);
2357 APFloat pow5(calcSemantics, fcZero, false);
2359 sigStatus = decSig.convertFromUnsignedParts(decSigParts, sigPartCount,
2360 rmNearestTiesToEven);
2361 powStatus = pow5.convertFromUnsignedParts(pow5Parts, pow5PartCount,
2362 rmNearestTiesToEven);
2363 /* Add exp, as 10^n = 5^n * 2^n. */
2364 decSig.exponent += exp;
2366 lostFraction calcLostFraction;
2367 integerPart HUerr, HUdistance;
2368 unsigned int powHUerr;
2371 /* multiplySignificand leaves the precision-th bit set to 1. */
2372 calcLostFraction = decSig.multiplySignificand(pow5, NULL);
2373 powHUerr = powStatus != opOK;
2375 calcLostFraction = decSig.divideSignificand(pow5);
2376 /* Denormal numbers have less precision. */
2377 if (decSig.exponent < semantics->minExponent) {
2378 excessPrecision += (semantics->minExponent - decSig.exponent);
2379 truncatedBits = excessPrecision;
2380 if (excessPrecision > calcSemantics.precision)
2381 excessPrecision = calcSemantics.precision;
2383 /* Extra half-ulp lost in reciprocal of exponent. */
2384 powHUerr = (powStatus == opOK && calcLostFraction == lfExactlyZero) ? 0:2;
2387 /* Both multiplySignificand and divideSignificand return the
2388 result with the integer bit set. */
2389 assert(APInt::tcExtractBit
2390 (decSig.significandParts(), calcSemantics.precision - 1) == 1);
2392 HUerr = HUerrBound(calcLostFraction != lfExactlyZero, sigStatus != opOK,
2394 HUdistance = 2 * ulpsFromBoundary(decSig.significandParts(),
2395 excessPrecision, isNearest);
2397 /* Are we guaranteed to round correctly if we truncate? */
2398 if (HUdistance >= HUerr) {
2399 APInt::tcExtract(significandParts(), partCount(), decSig.significandParts(),
2400 calcSemantics.precision - excessPrecision,
2402 /* Take the exponent of decSig. If we tcExtract-ed less bits
2403 above we must adjust our exponent to compensate for the
2404 implicit right shift. */
2405 exponent = (decSig.exponent + semantics->precision
2406 - (calcSemantics.precision - excessPrecision));
2407 calcLostFraction = lostFractionThroughTruncation(decSig.significandParts(),
2410 return normalize(rounding_mode, calcLostFraction);
2416 APFloat::convertFromDecimalString(StringRef str, roundingMode rounding_mode)
2421 /* Scan the text. */
2422 StringRef::iterator p = str.begin();
2423 interpretDecimal(p, str.end(), &D);
2425 /* Handle the quick cases. First the case of no significant digits,
2426 i.e. zero, and then exponents that are obviously too large or too
2427 small. Writing L for log 10 / log 2, a number d.ddddd*10^exp
2428 definitely overflows if
2430 (exp - 1) * L >= maxExponent
2432 and definitely underflows to zero where
2434 (exp + 1) * L <= minExponent - precision
2436 With integer arithmetic the tightest bounds for L are
2438 93/28 < L < 196/59 [ numerator <= 256 ]
2439 42039/12655 < L < 28738/8651 [ numerator <= 65536 ]
2442 if (decDigitValue(*D.firstSigDigit) >= 10U) {
2446 /* Check whether the normalized exponent is high enough to overflow
2447 max during the log-rebasing in the max-exponent check below. */
2448 } else if (D.normalizedExponent - 1 > INT_MAX / 42039) {
2449 fs = handleOverflow(rounding_mode);
2451 /* If it wasn't, then it also wasn't high enough to overflow max
2452 during the log-rebasing in the min-exponent check. Check that it
2453 won't overflow min in either check, then perform the min-exponent
2455 } else if (D.normalizedExponent - 1 < INT_MIN / 42039 ||
2456 (D.normalizedExponent + 1) * 28738 <=
2457 8651 * (semantics->minExponent - (int) semantics->precision)) {
2458 /* Underflow to zero and round. */
2460 fs = normalize(rounding_mode, lfLessThanHalf);
2462 /* We can finally safely perform the max-exponent check. */
2463 } else if ((D.normalizedExponent - 1) * 42039
2464 >= 12655 * semantics->maxExponent) {
2465 /* Overflow and round. */
2466 fs = handleOverflow(rounding_mode);
2468 integerPart *decSignificand;
2469 unsigned int partCount;
2471 /* A tight upper bound on number of bits required to hold an
2472 N-digit decimal integer is N * 196 / 59. Allocate enough space
2473 to hold the full significand, and an extra part required by
2475 partCount = static_cast<unsigned int>(D.lastSigDigit - D.firstSigDigit) + 1;
2476 partCount = partCountForBits(1 + 196 * partCount / 59);
2477 decSignificand = new integerPart[partCount + 1];
2480 /* Convert to binary efficiently - we do almost all multiplication
2481 in an integerPart. When this would overflow do we do a single
2482 bignum multiplication, and then revert again to multiplication
2483 in an integerPart. */
2485 integerPart decValue, val, multiplier;
2493 if (p == str.end()) {
2497 decValue = decDigitValue(*p++);
2498 assert(decValue < 10U && "Invalid character in significand");
2500 val = val * 10 + decValue;
2501 /* The maximum number that can be multiplied by ten with any
2502 digit added without overflowing an integerPart. */
2503 } while (p <= D.lastSigDigit && multiplier <= (~ (integerPart) 0 - 9) / 10);
2505 /* Multiply out the current part. */
2506 APInt::tcMultiplyPart(decSignificand, decSignificand, multiplier, val,
2507 partCount, partCount + 1, false);
2509 /* If we used another part (likely but not guaranteed), increase
2511 if (decSignificand[partCount])
2513 } while (p <= D.lastSigDigit);
2515 category = fcNormal;
2516 fs = roundSignificandWithExponent(decSignificand, partCount,
2517 D.exponent, rounding_mode);
2519 delete [] decSignificand;
2526 APFloat::convertFromString(StringRef str, roundingMode rounding_mode)
2528 assertArithmeticOK(*semantics);
2529 assert(!str.empty() && "Invalid string length");
2531 /* Handle a leading minus sign. */
2532 StringRef::iterator p = str.begin();
2533 size_t slen = str.size();
2534 sign = *p == '-' ? 1 : 0;
2535 if (*p == '-' || *p == '+') {
2538 assert(slen && "String has no digits");
2541 if (slen >= 2 && p[0] == '0' && (p[1] == 'x' || p[1] == 'X')) {
2542 assert(slen - 2 && "Invalid string");
2543 return convertFromHexadecimalString(StringRef(p + 2, slen - 2),
2547 return convertFromDecimalString(StringRef(p, slen), rounding_mode);
2550 /* Write out a hexadecimal representation of the floating point value
2551 to DST, which must be of sufficient size, in the C99 form
2552 [-]0xh.hhhhp[+-]d. Return the number of characters written,
2553 excluding the terminating NUL.
2555 If UPPERCASE, the output is in upper case, otherwise in lower case.
2557 HEXDIGITS digits appear altogether, rounding the value if
2558 necessary. If HEXDIGITS is 0, the minimal precision to display the
2559 number precisely is used instead. If nothing would appear after
2560 the decimal point it is suppressed.
2562 The decimal exponent is always printed and has at least one digit.
2563 Zero values display an exponent of zero. Infinities and NaNs
2564 appear as "infinity" or "nan" respectively.
2566 The above rules are as specified by C99. There is ambiguity about
2567 what the leading hexadecimal digit should be. This implementation
2568 uses whatever is necessary so that the exponent is displayed as
2569 stored. This implies the exponent will fall within the IEEE format
2570 range, and the leading hexadecimal digit will be 0 (for denormals),
2571 1 (normal numbers) or 2 (normal numbers rounded-away-from-zero with
2572 any other digits zero).
2575 APFloat::convertToHexString(char *dst, unsigned int hexDigits,
2576 bool upperCase, roundingMode rounding_mode) const
2580 assertArithmeticOK(*semantics);
2588 memcpy (dst, upperCase ? infinityU: infinityL, sizeof infinityU - 1);
2589 dst += sizeof infinityL - 1;
2593 memcpy (dst, upperCase ? NaNU: NaNL, sizeof NaNU - 1);
2594 dst += sizeof NaNU - 1;
2599 *dst++ = upperCase ? 'X': 'x';
2601 if (hexDigits > 1) {
2603 memset (dst, '0', hexDigits - 1);
2604 dst += hexDigits - 1;
2606 *dst++ = upperCase ? 'P': 'p';
2611 dst = convertNormalToHexString (dst, hexDigits, upperCase, rounding_mode);
2617 return static_cast<unsigned int>(dst - p);
2620 /* Does the hard work of outputting the correctly rounded hexadecimal
2621 form of a normal floating point number with the specified number of
2622 hexadecimal digits. If HEXDIGITS is zero the minimum number of
2623 digits necessary to print the value precisely is output. */
2625 APFloat::convertNormalToHexString(char *dst, unsigned int hexDigits,
2627 roundingMode rounding_mode) const
2629 unsigned int count, valueBits, shift, partsCount, outputDigits;
2630 const char *hexDigitChars;
2631 const integerPart *significand;
2636 *dst++ = upperCase ? 'X': 'x';
2639 hexDigitChars = upperCase ? hexDigitsUpper: hexDigitsLower;
2641 significand = significandParts();
2642 partsCount = partCount();
2644 /* +3 because the first digit only uses the single integer bit, so
2645 we have 3 virtual zero most-significant-bits. */
2646 valueBits = semantics->precision + 3;
2647 shift = integerPartWidth - valueBits % integerPartWidth;
2649 /* The natural number of digits required ignoring trailing
2650 insignificant zeroes. */
2651 outputDigits = (valueBits - significandLSB () + 3) / 4;
2653 /* hexDigits of zero means use the required number for the
2654 precision. Otherwise, see if we are truncating. If we are,
2655 find out if we need to round away from zero. */
2657 if (hexDigits < outputDigits) {
2658 /* We are dropping non-zero bits, so need to check how to round.
2659 "bits" is the number of dropped bits. */
2661 lostFraction fraction;
2663 bits = valueBits - hexDigits * 4;
2664 fraction = lostFractionThroughTruncation (significand, partsCount, bits);
2665 roundUp = roundAwayFromZero(rounding_mode, fraction, bits);
2667 outputDigits = hexDigits;
2670 /* Write the digits consecutively, and start writing in the location
2671 of the hexadecimal point. We move the most significant digit
2672 left and add the hexadecimal point later. */
2675 count = (valueBits + integerPartWidth - 1) / integerPartWidth;
2677 while (outputDigits && count) {
2680 /* Put the most significant integerPartWidth bits in "part". */
2681 if (--count == partsCount)
2682 part = 0; /* An imaginary higher zero part. */
2684 part = significand[count] << shift;
2687 part |= significand[count - 1] >> (integerPartWidth - shift);
2689 /* Convert as much of "part" to hexdigits as we can. */
2690 unsigned int curDigits = integerPartWidth / 4;
2692 if (curDigits > outputDigits)
2693 curDigits = outputDigits;
2694 dst += partAsHex (dst, part, curDigits, hexDigitChars);
2695 outputDigits -= curDigits;
2701 /* Note that hexDigitChars has a trailing '0'. */
2704 *q = hexDigitChars[hexDigitValue (*q) + 1];
2705 } while (*q == '0');
2708 /* Add trailing zeroes. */
2709 memset (dst, '0', outputDigits);
2710 dst += outputDigits;
2713 /* Move the most significant digit to before the point, and if there
2714 is something after the decimal point add it. This must come
2715 after rounding above. */
2722 /* Finally output the exponent. */
2723 *dst++ = upperCase ? 'P': 'p';
2725 return writeSignedDecimal (dst, exponent);
2728 hash_code llvm::hash_value(const APFloat &Arg) {
2729 if (Arg.category != APFloat::fcNormal)
2730 return hash_combine((uint8_t)Arg.category,
2731 // NaN has no sign, fix it at zero.
2732 Arg.isNaN() ? (uint8_t)0 : (uint8_t)Arg.sign,
2733 Arg.semantics->precision);
2735 // Normal floats need their exponent and significand hashed.
2736 return hash_combine((uint8_t)Arg.category, (uint8_t)Arg.sign,
2737 Arg.semantics->precision, Arg.exponent,
2739 Arg.significandParts(),
2740 Arg.significandParts() + Arg.partCount()));
2743 // Conversion from APFloat to/from host float/double. It may eventually be
2744 // possible to eliminate these and have everybody deal with APFloats, but that
2745 // will take a while. This approach will not easily extend to long double.
2746 // Current implementation requires integerPartWidth==64, which is correct at
2747 // the moment but could be made more general.
2749 // Denormals have exponent minExponent in APFloat, but minExponent-1 in
2750 // the actual IEEE respresentations. We compensate for that here.
2753 APFloat::convertF80LongDoubleAPFloatToAPInt() const
2755 assert(semantics == (const llvm::fltSemantics*)&x87DoubleExtended);
2756 assert(partCount()==2);
2758 uint64_t myexponent, mysignificand;
2760 if (category==fcNormal) {
2761 myexponent = exponent+16383; //bias
2762 mysignificand = significandParts()[0];
2763 if (myexponent==1 && !(mysignificand & 0x8000000000000000ULL))
2764 myexponent = 0; // denormal
2765 } else if (category==fcZero) {
2768 } else if (category==fcInfinity) {
2769 myexponent = 0x7fff;
2770 mysignificand = 0x8000000000000000ULL;
2772 assert(category == fcNaN && "Unknown category");
2773 myexponent = 0x7fff;
2774 mysignificand = significandParts()[0];
2778 words[0] = mysignificand;
2779 words[1] = ((uint64_t)(sign & 1) << 15) |
2780 (myexponent & 0x7fffLL);
2781 return APInt(80, words);
2785 APFloat::convertPPCDoubleDoubleAPFloatToAPInt() const
2787 assert(semantics == (const llvm::fltSemantics*)&PPCDoubleDouble);
2788 assert(partCount()==2);
2794 // Convert number to double. To avoid spurious underflows, we re-
2795 // normalize against the "double" minExponent first, and only *then*
2796 // truncate the mantissa. The result of that second conversion
2797 // may be inexact, but should never underflow.
2798 APFloat extended(*this);
2799 fltSemantics extendedSemantics = *semantics;
2800 extendedSemantics.minExponent = IEEEdouble.minExponent;
2801 fs = extended.convert(extendedSemantics, rmNearestTiesToEven, &losesInfo);
2802 assert(fs == opOK && !losesInfo);
2805 APFloat u(extended);
2806 fs = u.convert(IEEEdouble, rmNearestTiesToEven, &losesInfo);
2807 assert(fs == opOK || fs == opInexact);
2809 words[0] = *u.convertDoubleAPFloatToAPInt().getRawData();
2811 // If conversion was exact or resulted in a special case, we're done;
2812 // just set the second double to zero. Otherwise, re-convert back to
2813 // the extended format and compute the difference. This now should
2814 // convert exactly to double.
2815 if (u.category == fcNormal && losesInfo) {
2816 fs = u.convert(extendedSemantics, rmNearestTiesToEven, &losesInfo);
2817 assert(fs == opOK && !losesInfo);
2820 APFloat v(extended);
2821 v.subtract(u, rmNearestTiesToEven);
2822 fs = v.convert(IEEEdouble, rmNearestTiesToEven, &losesInfo);
2823 assert(fs == opOK && !losesInfo);
2825 words[1] = *v.convertDoubleAPFloatToAPInt().getRawData();
2830 return APInt(128, words);
2834 APFloat::convertQuadrupleAPFloatToAPInt() const
2836 assert(semantics == (const llvm::fltSemantics*)&IEEEquad);
2837 assert(partCount()==2);
2839 uint64_t myexponent, mysignificand, mysignificand2;
2841 if (category==fcNormal) {
2842 myexponent = exponent+16383; //bias
2843 mysignificand = significandParts()[0];
2844 mysignificand2 = significandParts()[1];
2845 if (myexponent==1 && !(mysignificand2 & 0x1000000000000LL))
2846 myexponent = 0; // denormal
2847 } else if (category==fcZero) {
2849 mysignificand = mysignificand2 = 0;
2850 } else if (category==fcInfinity) {
2851 myexponent = 0x7fff;
2852 mysignificand = mysignificand2 = 0;
2854 assert(category == fcNaN && "Unknown category!");
2855 myexponent = 0x7fff;
2856 mysignificand = significandParts()[0];
2857 mysignificand2 = significandParts()[1];
2861 words[0] = mysignificand;
2862 words[1] = ((uint64_t)(sign & 1) << 63) |
2863 ((myexponent & 0x7fff) << 48) |
2864 (mysignificand2 & 0xffffffffffffLL);
2866 return APInt(128, words);
2870 APFloat::convertDoubleAPFloatToAPInt() const
2872 assert(semantics == (const llvm::fltSemantics*)&IEEEdouble);
2873 assert(partCount()==1);
2875 uint64_t myexponent, mysignificand;
2877 if (category==fcNormal) {
2878 myexponent = exponent+1023; //bias
2879 mysignificand = *significandParts();
2880 if (myexponent==1 && !(mysignificand & 0x10000000000000LL))
2881 myexponent = 0; // denormal
2882 } else if (category==fcZero) {
2885 } else if (category==fcInfinity) {
2889 assert(category == fcNaN && "Unknown category!");
2891 mysignificand = *significandParts();
2894 return APInt(64, ((((uint64_t)(sign & 1) << 63) |
2895 ((myexponent & 0x7ff) << 52) |
2896 (mysignificand & 0xfffffffffffffLL))));
2900 APFloat::convertFloatAPFloatToAPInt() const
2902 assert(semantics == (const llvm::fltSemantics*)&IEEEsingle);
2903 assert(partCount()==1);
2905 uint32_t myexponent, mysignificand;
2907 if (category==fcNormal) {
2908 myexponent = exponent+127; //bias
2909 mysignificand = (uint32_t)*significandParts();
2910 if (myexponent == 1 && !(mysignificand & 0x800000))
2911 myexponent = 0; // denormal
2912 } else if (category==fcZero) {
2915 } else if (category==fcInfinity) {
2919 assert(category == fcNaN && "Unknown category!");
2921 mysignificand = (uint32_t)*significandParts();
2924 return APInt(32, (((sign&1) << 31) | ((myexponent&0xff) << 23) |
2925 (mysignificand & 0x7fffff)));
2929 APFloat::convertHalfAPFloatToAPInt() const
2931 assert(semantics == (const llvm::fltSemantics*)&IEEEhalf);
2932 assert(partCount()==1);
2934 uint32_t myexponent, mysignificand;
2936 if (category==fcNormal) {
2937 myexponent = exponent+15; //bias
2938 mysignificand = (uint32_t)*significandParts();
2939 if (myexponent == 1 && !(mysignificand & 0x400))
2940 myexponent = 0; // denormal
2941 } else if (category==fcZero) {
2944 } else if (category==fcInfinity) {
2948 assert(category == fcNaN && "Unknown category!");
2950 mysignificand = (uint32_t)*significandParts();
2953 return APInt(16, (((sign&1) << 15) | ((myexponent&0x1f) << 10) |
2954 (mysignificand & 0x3ff)));
2957 // This function creates an APInt that is just a bit map of the floating
2958 // point constant as it would appear in memory. It is not a conversion,
2959 // and treating the result as a normal integer is unlikely to be useful.
2962 APFloat::bitcastToAPInt() const
2964 if (semantics == (const llvm::fltSemantics*)&IEEEhalf)
2965 return convertHalfAPFloatToAPInt();
2967 if (semantics == (const llvm::fltSemantics*)&IEEEsingle)
2968 return convertFloatAPFloatToAPInt();
2970 if (semantics == (const llvm::fltSemantics*)&IEEEdouble)
2971 return convertDoubleAPFloatToAPInt();
2973 if (semantics == (const llvm::fltSemantics*)&IEEEquad)
2974 return convertQuadrupleAPFloatToAPInt();
2976 if (semantics == (const llvm::fltSemantics*)&PPCDoubleDouble)
2977 return convertPPCDoubleDoubleAPFloatToAPInt();
2979 assert(semantics == (const llvm::fltSemantics*)&x87DoubleExtended &&
2981 return convertF80LongDoubleAPFloatToAPInt();
2985 APFloat::convertToFloat() const
2987 assert(semantics == (const llvm::fltSemantics*)&IEEEsingle &&
2988 "Float semantics are not IEEEsingle");
2989 APInt api = bitcastToAPInt();
2990 return api.bitsToFloat();
2994 APFloat::convertToDouble() const
2996 assert(semantics == (const llvm::fltSemantics*)&IEEEdouble &&
2997 "Float semantics are not IEEEdouble");
2998 APInt api = bitcastToAPInt();
2999 return api.bitsToDouble();
3002 /// Integer bit is explicit in this format. Intel hardware (387 and later)
3003 /// does not support these bit patterns:
3004 /// exponent = all 1's, integer bit 0, significand 0 ("pseudoinfinity")
3005 /// exponent = all 1's, integer bit 0, significand nonzero ("pseudoNaN")
3006 /// exponent = 0, integer bit 1 ("pseudodenormal")
3007 /// exponent!=0 nor all 1's, integer bit 0 ("unnormal")
3008 /// At the moment, the first two are treated as NaNs, the second two as Normal.
3010 APFloat::initFromF80LongDoubleAPInt(const APInt &api)
3012 assert(api.getBitWidth()==80);
3013 uint64_t i1 = api.getRawData()[0];
3014 uint64_t i2 = api.getRawData()[1];
3015 uint64_t myexponent = (i2 & 0x7fff);
3016 uint64_t mysignificand = i1;
3018 initialize(&APFloat::x87DoubleExtended);
3019 assert(partCount()==2);
3021 sign = static_cast<unsigned int>(i2>>15);
3022 if (myexponent==0 && mysignificand==0) {
3023 // exponent, significand meaningless
3025 } else if (myexponent==0x7fff && mysignificand==0x8000000000000000ULL) {
3026 // exponent, significand meaningless
3027 category = fcInfinity;
3028 } else if (myexponent==0x7fff && mysignificand!=0x8000000000000000ULL) {
3029 // exponent meaningless
3031 significandParts()[0] = mysignificand;
3032 significandParts()[1] = 0;
3034 category = fcNormal;
3035 exponent = myexponent - 16383;
3036 significandParts()[0] = mysignificand;
3037 significandParts()[1] = 0;
3038 if (myexponent==0) // denormal
3044 APFloat::initFromPPCDoubleDoubleAPInt(const APInt &api)
3046 assert(api.getBitWidth()==128);
3047 uint64_t i1 = api.getRawData()[0];
3048 uint64_t i2 = api.getRawData()[1];
3052 // Get the first double and convert to our format.
3053 initFromDoubleAPInt(APInt(64, i1));
3054 fs = convert(PPCDoubleDouble, rmNearestTiesToEven, &losesInfo);
3055 assert(fs == opOK && !losesInfo);
3058 // Unless we have a special case, add in second double.
3059 if (category == fcNormal) {
3060 APFloat v(APInt(64, i2));
3061 fs = v.convert(PPCDoubleDouble, rmNearestTiesToEven, &losesInfo);
3062 assert(fs == opOK && !losesInfo);
3065 add(v, rmNearestTiesToEven);
3070 APFloat::initFromQuadrupleAPInt(const APInt &api)
3072 assert(api.getBitWidth()==128);
3073 uint64_t i1 = api.getRawData()[0];
3074 uint64_t i2 = api.getRawData()[1];
3075 uint64_t myexponent = (i2 >> 48) & 0x7fff;
3076 uint64_t mysignificand = i1;
3077 uint64_t mysignificand2 = i2 & 0xffffffffffffLL;
3079 initialize(&APFloat::IEEEquad);
3080 assert(partCount()==2);
3082 sign = static_cast<unsigned int>(i2>>63);
3083 if (myexponent==0 &&
3084 (mysignificand==0 && mysignificand2==0)) {
3085 // exponent, significand meaningless
3087 } else if (myexponent==0x7fff &&
3088 (mysignificand==0 && mysignificand2==0)) {
3089 // exponent, significand meaningless
3090 category = fcInfinity;
3091 } else if (myexponent==0x7fff &&
3092 (mysignificand!=0 || mysignificand2 !=0)) {
3093 // exponent meaningless
3095 significandParts()[0] = mysignificand;
3096 significandParts()[1] = mysignificand2;
3098 category = fcNormal;
3099 exponent = myexponent - 16383;
3100 significandParts()[0] = mysignificand;
3101 significandParts()[1] = mysignificand2;
3102 if (myexponent==0) // denormal
3105 significandParts()[1] |= 0x1000000000000LL; // integer bit
3110 APFloat::initFromDoubleAPInt(const APInt &api)
3112 assert(api.getBitWidth()==64);
3113 uint64_t i = *api.getRawData();
3114 uint64_t myexponent = (i >> 52) & 0x7ff;
3115 uint64_t mysignificand = i & 0xfffffffffffffLL;
3117 initialize(&APFloat::IEEEdouble);
3118 assert(partCount()==1);
3120 sign = static_cast<unsigned int>(i>>63);
3121 if (myexponent==0 && mysignificand==0) {
3122 // exponent, significand meaningless
3124 } else if (myexponent==0x7ff && mysignificand==0) {
3125 // exponent, significand meaningless
3126 category = fcInfinity;
3127 } else if (myexponent==0x7ff && mysignificand!=0) {
3128 // exponent meaningless
3130 *significandParts() = mysignificand;
3132 category = fcNormal;
3133 exponent = myexponent - 1023;
3134 *significandParts() = mysignificand;
3135 if (myexponent==0) // denormal
3138 *significandParts() |= 0x10000000000000LL; // integer bit
3143 APFloat::initFromFloatAPInt(const APInt & api)
3145 assert(api.getBitWidth()==32);
3146 uint32_t i = (uint32_t)*api.getRawData();
3147 uint32_t myexponent = (i >> 23) & 0xff;
3148 uint32_t mysignificand = i & 0x7fffff;
3150 initialize(&APFloat::IEEEsingle);
3151 assert(partCount()==1);
3154 if (myexponent==0 && mysignificand==0) {
3155 // exponent, significand meaningless
3157 } else if (myexponent==0xff && mysignificand==0) {
3158 // exponent, significand meaningless
3159 category = fcInfinity;
3160 } else if (myexponent==0xff && mysignificand!=0) {
3161 // sign, exponent, significand meaningless
3163 *significandParts() = mysignificand;
3165 category = fcNormal;
3166 exponent = myexponent - 127; //bias
3167 *significandParts() = mysignificand;
3168 if (myexponent==0) // denormal
3171 *significandParts() |= 0x800000; // integer bit
3176 APFloat::initFromHalfAPInt(const APInt & api)
3178 assert(api.getBitWidth()==16);
3179 uint32_t i = (uint32_t)*api.getRawData();
3180 uint32_t myexponent = (i >> 10) & 0x1f;
3181 uint32_t mysignificand = i & 0x3ff;
3183 initialize(&APFloat::IEEEhalf);
3184 assert(partCount()==1);
3187 if (myexponent==0 && mysignificand==0) {
3188 // exponent, significand meaningless
3190 } else if (myexponent==0x1f && mysignificand==0) {
3191 // exponent, significand meaningless
3192 category = fcInfinity;
3193 } else if (myexponent==0x1f && mysignificand!=0) {
3194 // sign, exponent, significand meaningless
3196 *significandParts() = mysignificand;
3198 category = fcNormal;
3199 exponent = myexponent - 15; //bias
3200 *significandParts() = mysignificand;
3201 if (myexponent==0) // denormal
3204 *significandParts() |= 0x400; // integer bit
3208 /// Treat api as containing the bits of a floating point number. Currently
3209 /// we infer the floating point type from the size of the APInt. The
3210 /// isIEEE argument distinguishes between PPC128 and IEEE128 (not meaningful
3211 /// when the size is anything else).
3213 APFloat::initFromAPInt(const APInt& api, bool isIEEE)
3215 if (api.getBitWidth() == 16)
3216 return initFromHalfAPInt(api);
3217 else if (api.getBitWidth() == 32)
3218 return initFromFloatAPInt(api);
3219 else if (api.getBitWidth()==64)
3220 return initFromDoubleAPInt(api);
3221 else if (api.getBitWidth()==80)
3222 return initFromF80LongDoubleAPInt(api);
3223 else if (api.getBitWidth()==128)
3225 initFromQuadrupleAPInt(api) : initFromPPCDoubleDoubleAPInt(api));
3227 llvm_unreachable(0);
3231 APFloat::getAllOnesValue(unsigned BitWidth, bool isIEEE)
3233 return APFloat(APInt::getAllOnesValue(BitWidth), isIEEE);
3236 APFloat APFloat::getLargest(const fltSemantics &Sem, bool Negative) {
3237 APFloat Val(Sem, fcNormal, Negative);
3239 // We want (in interchange format):
3240 // sign = {Negative}
3242 // significand = 1..1
3244 Val.exponent = Sem.maxExponent; // unbiased
3246 // 1-initialize all bits....
3247 Val.zeroSignificand();
3248 integerPart *significand = Val.significandParts();
3249 unsigned N = partCountForBits(Sem.precision);
3250 for (unsigned i = 0; i != N; ++i)
3251 significand[i] = ~((integerPart) 0);
3253 // ...and then clear the top bits for internal consistency.
3254 if (Sem.precision % integerPartWidth != 0)
3256 (((integerPart) 1) << (Sem.precision % integerPartWidth)) - 1;
3261 APFloat APFloat::getSmallest(const fltSemantics &Sem, bool Negative) {
3262 APFloat Val(Sem, fcNormal, Negative);
3264 // We want (in interchange format):
3265 // sign = {Negative}
3267 // significand = 0..01
3269 Val.exponent = Sem.minExponent; // unbiased
3270 Val.zeroSignificand();
3271 Val.significandParts()[0] = 1;
3275 APFloat APFloat::getSmallestNormalized(const fltSemantics &Sem, bool Negative) {
3276 APFloat Val(Sem, fcNormal, Negative);
3278 // We want (in interchange format):
3279 // sign = {Negative}
3281 // significand = 10..0
3283 Val.exponent = Sem.minExponent;
3284 Val.zeroSignificand();
3285 Val.significandParts()[partCountForBits(Sem.precision)-1] |=
3286 (((integerPart) 1) << ((Sem.precision - 1) % integerPartWidth));
3291 APFloat::APFloat(const APInt& api, bool isIEEE) {
3292 initFromAPInt(api, isIEEE);
3295 APFloat::APFloat(float f) {
3296 initFromAPInt(APInt::floatToBits(f));
3299 APFloat::APFloat(double d) {
3300 initFromAPInt(APInt::doubleToBits(d));
3304 void append(SmallVectorImpl<char> &Buffer, StringRef Str) {
3305 Buffer.append(Str.begin(), Str.end());
3308 /// Removes data from the given significand until it is no more
3309 /// precise than is required for the desired precision.
3310 void AdjustToPrecision(APInt &significand,
3311 int &exp, unsigned FormatPrecision) {
3312 unsigned bits = significand.getActiveBits();
3314 // 196/59 is a very slight overestimate of lg_2(10).
3315 unsigned bitsRequired = (FormatPrecision * 196 + 58) / 59;
3317 if (bits <= bitsRequired) return;
3319 unsigned tensRemovable = (bits - bitsRequired) * 59 / 196;
3320 if (!tensRemovable) return;
3322 exp += tensRemovable;
3324 APInt divisor(significand.getBitWidth(), 1);
3325 APInt powten(significand.getBitWidth(), 10);
3327 if (tensRemovable & 1)
3329 tensRemovable >>= 1;
3330 if (!tensRemovable) break;
3334 significand = significand.udiv(divisor);
3336 // Truncate the significand down to its active bit count, but
3337 // don't try to drop below 32.
3338 unsigned newPrecision = std::max(32U, significand.getActiveBits());
3339 significand = significand.trunc(newPrecision);
3343 void AdjustToPrecision(SmallVectorImpl<char> &buffer,
3344 int &exp, unsigned FormatPrecision) {
3345 unsigned N = buffer.size();
3346 if (N <= FormatPrecision) return;
3348 // The most significant figures are the last ones in the buffer.
3349 unsigned FirstSignificant = N - FormatPrecision;
3352 // FIXME: this probably shouldn't use 'round half up'.
3354 // Rounding down is just a truncation, except we also want to drop
3355 // trailing zeros from the new result.
3356 if (buffer[FirstSignificant - 1] < '5') {
3357 while (FirstSignificant < N && buffer[FirstSignificant] == '0')
3360 exp += FirstSignificant;
3361 buffer.erase(&buffer[0], &buffer[FirstSignificant]);
3365 // Rounding up requires a decimal add-with-carry. If we continue
3366 // the carry, the newly-introduced zeros will just be truncated.
3367 for (unsigned I = FirstSignificant; I != N; ++I) {
3368 if (buffer[I] == '9') {
3376 // If we carried through, we have exactly one digit of precision.
3377 if (FirstSignificant == N) {
3378 exp += FirstSignificant;
3380 buffer.push_back('1');
3384 exp += FirstSignificant;
3385 buffer.erase(&buffer[0], &buffer[FirstSignificant]);
3389 void APFloat::toString(SmallVectorImpl<char> &Str,
3390 unsigned FormatPrecision,
3391 unsigned FormatMaxPadding) const {
3395 return append(Str, "-Inf");
3397 return append(Str, "+Inf");
3399 case fcNaN: return append(Str, "NaN");
3405 if (!FormatMaxPadding)
3406 append(Str, "0.0E+0");
3418 // Decompose the number into an APInt and an exponent.
3419 int exp = exponent - ((int) semantics->precision - 1);
3420 APInt significand(semantics->precision,
3421 makeArrayRef(significandParts(),
3422 partCountForBits(semantics->precision)));
3424 // Set FormatPrecision if zero. We want to do this before we
3425 // truncate trailing zeros, as those are part of the precision.
3426 if (!FormatPrecision) {
3427 // It's an interesting question whether to use the nominal
3428 // precision or the active precision here for denormals.
3430 // FormatPrecision = ceil(significandBits / lg_2(10))
3431 FormatPrecision = (semantics->precision * 59 + 195) / 196;
3434 // Ignore trailing binary zeros.
3435 int trailingZeros = significand.countTrailingZeros();
3436 exp += trailingZeros;
3437 significand = significand.lshr(trailingZeros);
3439 // Change the exponent from 2^e to 10^e.
3442 } else if (exp > 0) {
3444 significand = significand.zext(semantics->precision + exp);
3445 significand <<= exp;
3447 } else { /* exp < 0 */
3450 // We transform this using the identity:
3451 // (N)(2^-e) == (N)(5^e)(10^-e)
3452 // This means we have to multiply N (the significand) by 5^e.
3453 // To avoid overflow, we have to operate on numbers large
3454 // enough to store N * 5^e:
3455 // log2(N * 5^e) == log2(N) + e * log2(5)
3456 // <= semantics->precision + e * 137 / 59
3457 // (log_2(5) ~ 2.321928 < 2.322034 ~ 137/59)
3459 unsigned precision = semantics->precision + (137 * texp + 136) / 59;
3461 // Multiply significand by 5^e.
3462 // N * 5^0101 == N * 5^(1*1) * 5^(0*2) * 5^(1*4) * 5^(0*8)
3463 significand = significand.zext(precision);
3464 APInt five_to_the_i(precision, 5);
3466 if (texp & 1) significand *= five_to_the_i;
3470 five_to_the_i *= five_to_the_i;
3474 AdjustToPrecision(significand, exp, FormatPrecision);
3476 llvm::SmallVector<char, 256> buffer;
3479 unsigned precision = significand.getBitWidth();
3480 APInt ten(precision, 10);
3481 APInt digit(precision, 0);
3483 bool inTrail = true;
3484 while (significand != 0) {
3485 // digit <- significand % 10
3486 // significand <- significand / 10
3487 APInt::udivrem(significand, ten, significand, digit);
3489 unsigned d = digit.getZExtValue();
3491 // Drop trailing zeros.
3492 if (inTrail && !d) exp++;
3494 buffer.push_back((char) ('0' + d));
3499 assert(!buffer.empty() && "no characters in buffer!");
3501 // Drop down to FormatPrecision.
3502 // TODO: don't do more precise calculations above than are required.
3503 AdjustToPrecision(buffer, exp, FormatPrecision);
3505 unsigned NDigits = buffer.size();
3507 // Check whether we should use scientific notation.
3508 bool FormatScientific;
3509 if (!FormatMaxPadding)
3510 FormatScientific = true;
3515 // But we shouldn't make the number look more precise than it is.
3516 FormatScientific = ((unsigned) exp > FormatMaxPadding ||
3517 NDigits + (unsigned) exp > FormatPrecision);
3519 // Power of the most significant digit.
3520 int MSD = exp + (int) (NDigits - 1);
3523 FormatScientific = false;
3525 // 765e-5 == 0.00765
3527 FormatScientific = ((unsigned) -MSD) > FormatMaxPadding;
3532 // Scientific formatting is pretty straightforward.
3533 if (FormatScientific) {
3534 exp += (NDigits - 1);
3536 Str.push_back(buffer[NDigits-1]);
3541 for (unsigned I = 1; I != NDigits; ++I)
3542 Str.push_back(buffer[NDigits-1-I]);
3545 Str.push_back(exp >= 0 ? '+' : '-');
3546 if (exp < 0) exp = -exp;
3547 SmallVector<char, 6> expbuf;
3549 expbuf.push_back((char) ('0' + (exp % 10)));
3552 for (unsigned I = 0, E = expbuf.size(); I != E; ++I)
3553 Str.push_back(expbuf[E-1-I]);
3557 // Non-scientific, positive exponents.
3559 for (unsigned I = 0; I != NDigits; ++I)
3560 Str.push_back(buffer[NDigits-1-I]);
3561 for (unsigned I = 0; I != (unsigned) exp; ++I)
3566 // Non-scientific, negative exponents.
3568 // The number of digits to the left of the decimal point.
3569 int NWholeDigits = exp + (int) NDigits;
3572 if (NWholeDigits > 0) {
3573 for (; I != (unsigned) NWholeDigits; ++I)
3574 Str.push_back(buffer[NDigits-I-1]);
3577 unsigned NZeros = 1 + (unsigned) -NWholeDigits;
3581 for (unsigned Z = 1; Z != NZeros; ++Z)
3585 for (; I != NDigits; ++I)
3586 Str.push_back(buffer[NDigits-I-1]);
3589 bool APFloat::getExactInverse(APFloat *inv) const {
3590 // We can only guarantee the existence of an exact inverse for IEEE floats.
3591 if (semantics != &IEEEhalf && semantics != &IEEEsingle &&
3592 semantics != &IEEEdouble && semantics != &IEEEquad)
3595 // Special floats and denormals have no exact inverse.
3596 if (category != fcNormal)
3599 // Check that the number is a power of two by making sure that only the
3600 // integer bit is set in the significand.
3601 if (significandLSB() != semantics->precision - 1)
3605 APFloat reciprocal(*semantics, 1ULL);
3606 if (reciprocal.divide(*this, rmNearestTiesToEven) != opOK)
3609 // Avoid multiplication with a denormal, it is not safe on all platforms and
3610 // may be slower than a normal division.
3611 if (reciprocal.significandMSB() + 1 < reciprocal.semantics->precision)
3614 assert(reciprocal.category == fcNormal &&
3615 reciprocal.significandLSB() == reciprocal.semantics->precision - 1);