1 //===-- APFloat.cpp - Implement APFloat class -----------------------------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file implements a class to represent arbitrary precision floating
11 // point values and provide a variety of arithmetic operations on them.
13 //===----------------------------------------------------------------------===//
15 #include "llvm/ADT/APFloat.h"
16 #include "llvm/ADT/APSInt.h"
17 #include "llvm/ADT/FoldingSet.h"
18 #include "llvm/ADT/Hashing.h"
19 #include "llvm/ADT/StringExtras.h"
20 #include "llvm/ADT/StringRef.h"
21 #include "llvm/Support/ErrorHandling.h"
22 #include "llvm/Support/MathExtras.h"
28 #define convolve(lhs, rhs) ((lhs) * 4 + (rhs))
30 /* Assumed in hexadecimal significand parsing, and conversion to
31 hexadecimal strings. */
32 #define COMPILE_TIME_ASSERT(cond) extern int CTAssert[(cond) ? 1 : -1]
33 COMPILE_TIME_ASSERT(integerPartWidth % 4 == 0);
37 /* Represents floating point arithmetic semantics. */
39 /* The largest E such that 2^E is representable; this matches the
40 definition of IEEE 754. */
41 exponent_t maxExponent;
43 /* The smallest E such that 2^E is a normalized number; this
44 matches the definition of IEEE 754. */
45 exponent_t minExponent;
47 /* Number of bits in the significand. This includes the integer
49 unsigned int precision;
52 const fltSemantics APFloat::IEEEhalf = { 15, -14, 11 };
53 const fltSemantics APFloat::IEEEsingle = { 127, -126, 24 };
54 const fltSemantics APFloat::IEEEdouble = { 1023, -1022, 53 };
55 const fltSemantics APFloat::IEEEquad = { 16383, -16382, 113 };
56 const fltSemantics APFloat::x87DoubleExtended = { 16383, -16382, 64 };
57 const fltSemantics APFloat::Bogus = { 0, 0, 0 };
59 /* The PowerPC format consists of two doubles. It does not map cleanly
60 onto the usual format above. It is approximated using twice the
61 mantissa bits. Note that for exponents near the double minimum,
62 we no longer can represent the full 106 mantissa bits, so those
63 will be treated as denormal numbers.
65 FIXME: While this approximation is equivalent to what GCC uses for
66 compile-time arithmetic on PPC double-double numbers, it is not able
67 to represent all possible values held by a PPC double-double number,
68 for example: (long double) 1.0 + (long double) 0x1p-106
69 Should this be replaced by a full emulation of PPC double-double? */
70 const fltSemantics APFloat::PPCDoubleDouble = { 1023, -1022 + 53, 53 + 53 };
72 /* A tight upper bound on number of parts required to hold the value
75 power * 815 / (351 * integerPartWidth) + 1
77 However, whilst the result may require only this many parts,
78 because we are multiplying two values to get it, the
79 multiplication may require an extra part with the excess part
80 being zero (consider the trivial case of 1 * 1, tcFullMultiply
81 requires two parts to hold the single-part result). So we add an
82 extra one to guarantee enough space whilst multiplying. */
83 const unsigned int maxExponent = 16383;
84 const unsigned int maxPrecision = 113;
85 const unsigned int maxPowerOfFiveExponent = maxExponent + maxPrecision - 1;
86 const unsigned int maxPowerOfFiveParts = 2 + ((maxPowerOfFiveExponent * 815)
87 / (351 * integerPartWidth));
90 /* A bunch of private, handy routines. */
92 static inline unsigned int
93 partCountForBits(unsigned int bits)
95 return ((bits) + integerPartWidth - 1) / integerPartWidth;
98 /* Returns 0U-9U. Return values >= 10U are not digits. */
99 static inline unsigned int
100 decDigitValue(unsigned int c)
105 /* Return the value of a decimal exponent of the form
108 If the exponent overflows, returns a large exponent with the
111 readExponent(StringRef::iterator begin, StringRef::iterator end)
114 unsigned int absExponent;
115 const unsigned int overlargeExponent = 24000; /* FIXME. */
116 StringRef::iterator p = begin;
118 assert(p != end && "Exponent has no digits");
120 isNegative = (*p == '-');
121 if (*p == '-' || *p == '+') {
123 assert(p != end && "Exponent has no digits");
126 absExponent = decDigitValue(*p++);
127 assert(absExponent < 10U && "Invalid character in exponent");
129 for (; p != end; ++p) {
132 value = decDigitValue(*p);
133 assert(value < 10U && "Invalid character in exponent");
135 value += absExponent * 10;
136 if (absExponent >= overlargeExponent) {
137 absExponent = overlargeExponent;
138 p = end; /* outwit assert below */
144 assert(p == end && "Invalid exponent in exponent");
147 return -(int) absExponent;
149 return (int) absExponent;
152 /* This is ugly and needs cleaning up, but I don't immediately see
153 how whilst remaining safe. */
155 totalExponent(StringRef::iterator p, StringRef::iterator end,
156 int exponentAdjustment)
158 int unsignedExponent;
159 bool negative, overflow;
162 assert(p != end && "Exponent has no digits");
164 negative = *p == '-';
165 if (*p == '-' || *p == '+') {
167 assert(p != end && "Exponent has no digits");
170 unsignedExponent = 0;
172 for (; p != end; ++p) {
175 value = decDigitValue(*p);
176 assert(value < 10U && "Invalid character in exponent");
178 unsignedExponent = unsignedExponent * 10 + value;
179 if (unsignedExponent > 32767) {
185 if (exponentAdjustment > 32767 || exponentAdjustment < -32768)
189 exponent = unsignedExponent;
191 exponent = -exponent;
192 exponent += exponentAdjustment;
193 if (exponent > 32767 || exponent < -32768)
198 exponent = negative ? -32768: 32767;
203 static StringRef::iterator
204 skipLeadingZeroesAndAnyDot(StringRef::iterator begin, StringRef::iterator end,
205 StringRef::iterator *dot)
207 StringRef::iterator p = begin;
209 while (*p == '0' && p != end)
215 assert(end - begin != 1 && "Significand has no digits");
217 while (*p == '0' && p != end)
224 /* Given a normal decimal floating point number of the form
228 where the decimal point and exponent are optional, fill out the
229 structure D. Exponent is appropriate if the significand is
230 treated as an integer, and normalizedExponent if the significand
231 is taken to have the decimal point after a single leading
234 If the value is zero, V->firstSigDigit points to a non-digit, and
235 the return exponent is zero.
238 const char *firstSigDigit;
239 const char *lastSigDigit;
241 int normalizedExponent;
245 interpretDecimal(StringRef::iterator begin, StringRef::iterator end,
248 StringRef::iterator dot = end;
249 StringRef::iterator p = skipLeadingZeroesAndAnyDot (begin, end, &dot);
251 D->firstSigDigit = p;
253 D->normalizedExponent = 0;
255 for (; p != end; ++p) {
257 assert(dot == end && "String contains multiple dots");
262 if (decDigitValue(*p) >= 10U)
267 assert((*p == 'e' || *p == 'E') && "Invalid character in significand");
268 assert(p != begin && "Significand has no digits");
269 assert((dot == end || p - begin != 1) && "Significand has no digits");
271 /* p points to the first non-digit in the string */
272 D->exponent = readExponent(p + 1, end);
274 /* Implied decimal point? */
279 /* If number is all zeroes accept any exponent. */
280 if (p != D->firstSigDigit) {
281 /* Drop insignificant trailing zeroes. */
286 while (p != begin && *p == '0');
287 while (p != begin && *p == '.');
290 /* Adjust the exponents for any decimal point. */
291 D->exponent += static_cast<exponent_t>((dot - p) - (dot > p));
292 D->normalizedExponent = (D->exponent +
293 static_cast<exponent_t>((p - D->firstSigDigit)
294 - (dot > D->firstSigDigit && dot < p)));
300 /* Return the trailing fraction of a hexadecimal number.
301 DIGITVALUE is the first hex digit of the fraction, P points to
304 trailingHexadecimalFraction(StringRef::iterator p, StringRef::iterator end,
305 unsigned int digitValue)
307 unsigned int hexDigit;
309 /* If the first trailing digit isn't 0 or 8 we can work out the
310 fraction immediately. */
312 return lfMoreThanHalf;
313 else if (digitValue < 8 && digitValue > 0)
314 return lfLessThanHalf;
316 /* Otherwise we need to find the first non-zero digit. */
320 assert(p != end && "Invalid trailing hexadecimal fraction!");
322 hexDigit = hexDigitValue(*p);
324 /* If we ran off the end it is exactly zero or one-half, otherwise
327 return digitValue == 0 ? lfExactlyZero: lfExactlyHalf;
329 return digitValue == 0 ? lfLessThanHalf: lfMoreThanHalf;
332 /* Return the fraction lost were a bignum truncated losing the least
333 significant BITS bits. */
335 lostFractionThroughTruncation(const integerPart *parts,
336 unsigned int partCount,
341 lsb = APInt::tcLSB(parts, partCount);
343 /* Note this is guaranteed true if bits == 0, or LSB == -1U. */
345 return lfExactlyZero;
347 return lfExactlyHalf;
348 if (bits <= partCount * integerPartWidth &&
349 APInt::tcExtractBit(parts, bits - 1))
350 return lfMoreThanHalf;
352 return lfLessThanHalf;
355 /* Shift DST right BITS bits noting lost fraction. */
357 shiftRight(integerPart *dst, unsigned int parts, unsigned int bits)
359 lostFraction lost_fraction;
361 lost_fraction = lostFractionThroughTruncation(dst, parts, bits);
363 APInt::tcShiftRight(dst, parts, bits);
365 return lost_fraction;
368 /* Combine the effect of two lost fractions. */
370 combineLostFractions(lostFraction moreSignificant,
371 lostFraction lessSignificant)
373 if (lessSignificant != lfExactlyZero) {
374 if (moreSignificant == lfExactlyZero)
375 moreSignificant = lfLessThanHalf;
376 else if (moreSignificant == lfExactlyHalf)
377 moreSignificant = lfMoreThanHalf;
380 return moreSignificant;
383 /* The error from the true value, in half-ulps, on multiplying two
384 floating point numbers, which differ from the value they
385 approximate by at most HUE1 and HUE2 half-ulps, is strictly less
386 than the returned value.
388 See "How to Read Floating Point Numbers Accurately" by William D
391 HUerrBound(bool inexactMultiply, unsigned int HUerr1, unsigned int HUerr2)
393 assert(HUerr1 < 2 || HUerr2 < 2 || (HUerr1 + HUerr2 < 8));
395 if (HUerr1 + HUerr2 == 0)
396 return inexactMultiply * 2; /* <= inexactMultiply half-ulps. */
398 return inexactMultiply + 2 * (HUerr1 + HUerr2);
401 /* The number of ulps from the boundary (zero, or half if ISNEAREST)
402 when the least significant BITS are truncated. BITS cannot be
405 ulpsFromBoundary(const integerPart *parts, unsigned int bits, bool isNearest)
407 unsigned int count, partBits;
408 integerPart part, boundary;
413 count = bits / integerPartWidth;
414 partBits = bits % integerPartWidth + 1;
416 part = parts[count] & (~(integerPart) 0 >> (integerPartWidth - partBits));
419 boundary = (integerPart) 1 << (partBits - 1);
424 if (part - boundary <= boundary - part)
425 return part - boundary;
427 return boundary - part;
430 if (part == boundary) {
433 return ~(integerPart) 0; /* A lot. */
436 } else if (part == boundary - 1) {
439 return ~(integerPart) 0; /* A lot. */
444 return ~(integerPart) 0; /* A lot. */
447 /* Place pow(5, power) in DST, and return the number of parts used.
448 DST must be at least one part larger than size of the answer. */
450 powerOf5(integerPart *dst, unsigned int power)
452 static const integerPart firstEightPowers[] = { 1, 5, 25, 125, 625, 3125,
454 integerPart pow5s[maxPowerOfFiveParts * 2 + 5];
455 pow5s[0] = 78125 * 5;
457 unsigned int partsCount[16] = { 1 };
458 integerPart scratch[maxPowerOfFiveParts], *p1, *p2, *pow5;
460 assert(power <= maxExponent);
465 *p1 = firstEightPowers[power & 7];
471 for (unsigned int n = 0; power; power >>= 1, n++) {
476 /* Calculate pow(5,pow(2,n+3)) if we haven't yet. */
478 pc = partsCount[n - 1];
479 APInt::tcFullMultiply(pow5, pow5 - pc, pow5 - pc, pc, pc);
481 if (pow5[pc - 1] == 0)
489 APInt::tcFullMultiply(p2, p1, pow5, result, pc);
491 if (p2[result - 1] == 0)
494 /* Now result is in p1 with partsCount parts and p2 is scratch
496 tmp = p1, p1 = p2, p2 = tmp;
503 APInt::tcAssign(dst, p1, result);
508 /* Zero at the end to avoid modular arithmetic when adding one; used
509 when rounding up during hexadecimal output. */
510 static const char hexDigitsLower[] = "0123456789abcdef0";
511 static const char hexDigitsUpper[] = "0123456789ABCDEF0";
512 static const char infinityL[] = "infinity";
513 static const char infinityU[] = "INFINITY";
514 static const char NaNL[] = "nan";
515 static const char NaNU[] = "NAN";
517 /* Write out an integerPart in hexadecimal, starting with the most
518 significant nibble. Write out exactly COUNT hexdigits, return
521 partAsHex (char *dst, integerPart part, unsigned int count,
522 const char *hexDigitChars)
524 unsigned int result = count;
526 assert(count != 0 && count <= integerPartWidth / 4);
528 part >>= (integerPartWidth - 4 * count);
530 dst[count] = hexDigitChars[part & 0xf];
537 /* Write out an unsigned decimal integer. */
539 writeUnsignedDecimal (char *dst, unsigned int n)
555 /* Write out a signed decimal integer. */
557 writeSignedDecimal (char *dst, int value)
561 dst = writeUnsignedDecimal(dst, -(unsigned) value);
563 dst = writeUnsignedDecimal(dst, value);
570 APFloat::initialize(const fltSemantics *ourSemantics)
574 semantics = ourSemantics;
577 significand.parts = new integerPart[count];
581 APFloat::freeSignificand()
584 delete [] significand.parts;
588 APFloat::assign(const APFloat &rhs)
590 assert(semantics == rhs.semantics);
593 category = rhs.category;
594 exponent = rhs.exponent;
595 if (category == fcNormal || category == fcNaN)
596 copySignificand(rhs);
600 APFloat::copySignificand(const APFloat &rhs)
602 assert(category == fcNormal || category == fcNaN);
603 assert(rhs.partCount() >= partCount());
605 APInt::tcAssign(significandParts(), rhs.significandParts(),
609 /* Make this number a NaN, with an arbitrary but deterministic value
610 for the significand. If double or longer, this is a signalling NaN,
611 which may not be ideal. If float, this is QNaN(0). */
612 void APFloat::makeNaN(bool SNaN, bool Negative, const APInt *fill)
617 integerPart *significand = significandParts();
618 unsigned numParts = partCount();
620 // Set the significand bits to the fill.
621 if (!fill || fill->getNumWords() < numParts)
622 APInt::tcSet(significand, 0, numParts);
624 APInt::tcAssign(significand, fill->getRawData(),
625 std::min(fill->getNumWords(), numParts));
627 // Zero out the excess bits of the significand.
628 unsigned bitsToPreserve = semantics->precision - 1;
629 unsigned part = bitsToPreserve / 64;
630 bitsToPreserve %= 64;
631 significand[part] &= ((1ULL << bitsToPreserve) - 1);
632 for (part++; part != numParts; ++part)
633 significand[part] = 0;
636 unsigned QNaNBit = semantics->precision - 2;
639 // We always have to clear the QNaN bit to make it an SNaN.
640 APInt::tcClearBit(significand, QNaNBit);
642 // If there are no bits set in the payload, we have to set
643 // *something* to make it a NaN instead of an infinity;
644 // conventionally, this is the next bit down from the QNaN bit.
645 if (APInt::tcIsZero(significand, numParts))
646 APInt::tcSetBit(significand, QNaNBit - 1);
648 // We always have to set the QNaN bit to make it a QNaN.
649 APInt::tcSetBit(significand, QNaNBit);
652 // For x87 extended precision, we want to make a NaN, not a
653 // pseudo-NaN. Maybe we should expose the ability to make
655 if (semantics == &APFloat::x87DoubleExtended)
656 APInt::tcSetBit(significand, QNaNBit + 1);
659 APFloat APFloat::makeNaN(const fltSemantics &Sem, bool SNaN, bool Negative,
661 APFloat value(Sem, uninitialized);
662 value.makeNaN(SNaN, Negative, fill);
667 APFloat::operator=(const APFloat &rhs)
670 if (semantics != rhs.semantics) {
672 initialize(rhs.semantics);
681 APFloat::isDenormal() const {
682 return isNormal() && (exponent == semantics->minExponent) &&
683 (APInt::tcExtractBit(significandParts(),
684 semantics->precision - 1) == 0);
688 APFloat::bitwiseIsEqual(const APFloat &rhs) const {
691 if (semantics != rhs.semantics ||
692 category != rhs.category ||
695 if (category==fcZero || category==fcInfinity)
697 else if (category==fcNormal && exponent!=rhs.exponent)
701 const integerPart* p=significandParts();
702 const integerPart* q=rhs.significandParts();
703 for (; i>0; i--, p++, q++) {
711 APFloat::APFloat(const fltSemantics &ourSemantics, integerPart value) {
712 initialize(&ourSemantics);
715 exponent = ourSemantics.precision - 1;
716 significandParts()[0] = value;
717 normalize(rmNearestTiesToEven, lfExactlyZero);
720 APFloat::APFloat(const fltSemantics &ourSemantics) {
721 initialize(&ourSemantics);
726 APFloat::APFloat(const fltSemantics &ourSemantics, uninitializedTag tag) {
727 // Allocates storage if necessary but does not initialize it.
728 initialize(&ourSemantics);
731 APFloat::APFloat(const fltSemantics &ourSemantics,
732 fltCategory ourCategory, bool negative) {
733 initialize(&ourSemantics);
734 category = ourCategory;
736 if (category == fcNormal)
738 else if (ourCategory == fcNaN)
742 APFloat::APFloat(const fltSemantics &ourSemantics, StringRef text) {
743 initialize(&ourSemantics);
744 convertFromString(text, rmNearestTiesToEven);
747 APFloat::APFloat(const APFloat &rhs) {
748 initialize(rhs.semantics);
757 // Profile - This method 'profiles' an APFloat for use with FoldingSet.
758 void APFloat::Profile(FoldingSetNodeID& ID) const {
759 ID.Add(bitcastToAPInt());
763 APFloat::partCount() const
765 return partCountForBits(semantics->precision + 1);
769 APFloat::semanticsPrecision(const fltSemantics &semantics)
771 return semantics.precision;
775 APFloat::significandParts() const
777 return const_cast<APFloat *>(this)->significandParts();
781 APFloat::significandParts()
783 assert(category == fcNormal || category == fcNaN);
786 return significand.parts;
788 return &significand.part;
792 APFloat::zeroSignificand()
795 APInt::tcSet(significandParts(), 0, partCount());
798 /* Increment an fcNormal floating point number's significand. */
800 APFloat::incrementSignificand()
804 carry = APInt::tcIncrement(significandParts(), partCount());
806 /* Our callers should never cause us to overflow. */
811 /* Add the significand of the RHS. Returns the carry flag. */
813 APFloat::addSignificand(const APFloat &rhs)
817 parts = significandParts();
819 assert(semantics == rhs.semantics);
820 assert(exponent == rhs.exponent);
822 return APInt::tcAdd(parts, rhs.significandParts(), 0, partCount());
825 /* Subtract the significand of the RHS with a borrow flag. Returns
828 APFloat::subtractSignificand(const APFloat &rhs, integerPart borrow)
832 parts = significandParts();
834 assert(semantics == rhs.semantics);
835 assert(exponent == rhs.exponent);
837 return APInt::tcSubtract(parts, rhs.significandParts(), borrow,
841 /* Multiply the significand of the RHS. If ADDEND is non-NULL, add it
842 on to the full-precision result of the multiplication. Returns the
845 APFloat::multiplySignificand(const APFloat &rhs, const APFloat *addend)
847 unsigned int omsb; // One, not zero, based MSB.
848 unsigned int partsCount, newPartsCount, precision;
849 integerPart *lhsSignificand;
850 integerPart scratch[4];
851 integerPart *fullSignificand;
852 lostFraction lost_fraction;
855 assert(semantics == rhs.semantics);
857 precision = semantics->precision;
858 newPartsCount = partCountForBits(precision * 2);
860 if (newPartsCount > 4)
861 fullSignificand = new integerPart[newPartsCount];
863 fullSignificand = scratch;
865 lhsSignificand = significandParts();
866 partsCount = partCount();
868 APInt::tcFullMultiply(fullSignificand, lhsSignificand,
869 rhs.significandParts(), partsCount, partsCount);
871 lost_fraction = lfExactlyZero;
872 omsb = APInt::tcMSB(fullSignificand, newPartsCount) + 1;
873 exponent += rhs.exponent;
875 // Assume the operands involved in the multiplication are single-precision
876 // FP, and the two multiplicants are:
877 // *this = a23 . a22 ... a0 * 2^e1
878 // rhs = b23 . b22 ... b0 * 2^e2
879 // the result of multiplication is:
880 // *this = c47 c46 . c45 ... c0 * 2^(e1+e2)
881 // Note that there are two significant bits at the left-hand side of the
882 // radix point. Move the radix point toward left by one bit, and adjust
883 // exponent accordingly.
887 // The intermediate result of the multiplication has "2 * precision"
888 // signicant bit; adjust the addend to be consistent with mul result.
890 Significand savedSignificand = significand;
891 const fltSemantics *savedSemantics = semantics;
892 fltSemantics extendedSemantics;
894 unsigned int extendedPrecision;
896 /* Normalize our MSB. */
897 extendedPrecision = 2 * precision;
898 if (omsb != extendedPrecision) {
899 assert(extendedPrecision > omsb);
900 APInt::tcShiftLeft(fullSignificand, newPartsCount,
901 extendedPrecision - omsb);
902 exponent -= extendedPrecision - omsb;
905 /* Create new semantics. */
906 extendedSemantics = *semantics;
907 extendedSemantics.precision = extendedPrecision;
909 if (newPartsCount == 1)
910 significand.part = fullSignificand[0];
912 significand.parts = fullSignificand;
913 semantics = &extendedSemantics;
915 APFloat extendedAddend(*addend);
916 status = extendedAddend.convert(extendedSemantics, rmTowardZero, &ignored);
917 assert(status == opOK);
919 lost_fraction = addOrSubtractSignificand(extendedAddend, false);
921 /* Restore our state. */
922 if (newPartsCount == 1)
923 fullSignificand[0] = significand.part;
924 significand = savedSignificand;
925 semantics = savedSemantics;
927 omsb = APInt::tcMSB(fullSignificand, newPartsCount) + 1;
930 // Convert the result having "2 * precision" significant-bits back to the one
931 // having "precision" significant-bits. First, move the radix point from
932 // poision "2*precision - 1" to "precision - 1". The exponent need to be
933 // adjusted by "2*precision - 1" - "precision - 1" = "precision".
934 exponent -= precision;
936 // In case MSB resides at the left-hand side of radix point, shift the
937 // mantissa right by some amount to make sure the MSB reside right before
938 // the radix point (i.e. "MSB . rest-significant-bits").
940 // Note that the result is not normalized when "omsb < precision". So, the
941 // caller needs to call APFloat::normalize() if normalized value is expected.
942 if (omsb > precision) {
943 unsigned int bits, significantParts;
946 bits = omsb - precision;
947 significantParts = partCountForBits(omsb);
948 lf = shiftRight(fullSignificand, significantParts, bits);
949 lost_fraction = combineLostFractions(lf, lost_fraction);
953 APInt::tcAssign(lhsSignificand, fullSignificand, partsCount);
955 if (newPartsCount > 4)
956 delete [] fullSignificand;
958 return lost_fraction;
961 /* Multiply the significands of LHS and RHS to DST. */
963 APFloat::divideSignificand(const APFloat &rhs)
965 unsigned int bit, i, partsCount;
966 const integerPart *rhsSignificand;
967 integerPart *lhsSignificand, *dividend, *divisor;
968 integerPart scratch[4];
969 lostFraction lost_fraction;
971 assert(semantics == rhs.semantics);
973 lhsSignificand = significandParts();
974 rhsSignificand = rhs.significandParts();
975 partsCount = partCount();
978 dividend = new integerPart[partsCount * 2];
982 divisor = dividend + partsCount;
984 /* Copy the dividend and divisor as they will be modified in-place. */
985 for (i = 0; i < partsCount; i++) {
986 dividend[i] = lhsSignificand[i];
987 divisor[i] = rhsSignificand[i];
988 lhsSignificand[i] = 0;
991 exponent -= rhs.exponent;
993 unsigned int precision = semantics->precision;
995 /* Normalize the divisor. */
996 bit = precision - APInt::tcMSB(divisor, partsCount) - 1;
999 APInt::tcShiftLeft(divisor, partsCount, bit);
1002 /* Normalize the dividend. */
1003 bit = precision - APInt::tcMSB(dividend, partsCount) - 1;
1006 APInt::tcShiftLeft(dividend, partsCount, bit);
1009 /* Ensure the dividend >= divisor initially for the loop below.
1010 Incidentally, this means that the division loop below is
1011 guaranteed to set the integer bit to one. */
1012 if (APInt::tcCompare(dividend, divisor, partsCount) < 0) {
1014 APInt::tcShiftLeft(dividend, partsCount, 1);
1015 assert(APInt::tcCompare(dividend, divisor, partsCount) >= 0);
1018 /* Long division. */
1019 for (bit = precision; bit; bit -= 1) {
1020 if (APInt::tcCompare(dividend, divisor, partsCount) >= 0) {
1021 APInt::tcSubtract(dividend, divisor, 0, partsCount);
1022 APInt::tcSetBit(lhsSignificand, bit - 1);
1025 APInt::tcShiftLeft(dividend, partsCount, 1);
1028 /* Figure out the lost fraction. */
1029 int cmp = APInt::tcCompare(dividend, divisor, partsCount);
1032 lost_fraction = lfMoreThanHalf;
1034 lost_fraction = lfExactlyHalf;
1035 else if (APInt::tcIsZero(dividend, partsCount))
1036 lost_fraction = lfExactlyZero;
1038 lost_fraction = lfLessThanHalf;
1043 return lost_fraction;
1047 APFloat::significandMSB() const
1049 return APInt::tcMSB(significandParts(), partCount());
1053 APFloat::significandLSB() const
1055 return APInt::tcLSB(significandParts(), partCount());
1058 /* Note that a zero result is NOT normalized to fcZero. */
1060 APFloat::shiftSignificandRight(unsigned int bits)
1062 /* Our exponent should not overflow. */
1063 assert((exponent_t) (exponent + bits) >= exponent);
1067 return shiftRight(significandParts(), partCount(), bits);
1070 /* Shift the significand left BITS bits, subtract BITS from its exponent. */
1072 APFloat::shiftSignificandLeft(unsigned int bits)
1074 assert(bits < semantics->precision);
1077 unsigned int partsCount = partCount();
1079 APInt::tcShiftLeft(significandParts(), partsCount, bits);
1082 assert(!APInt::tcIsZero(significandParts(), partsCount));
1087 APFloat::compareAbsoluteValue(const APFloat &rhs) const
1091 assert(semantics == rhs.semantics);
1092 assert(category == fcNormal);
1093 assert(rhs.category == fcNormal);
1095 compare = exponent - rhs.exponent;
1097 /* If exponents are equal, do an unsigned bignum comparison of the
1100 compare = APInt::tcCompare(significandParts(), rhs.significandParts(),
1104 return cmpGreaterThan;
1105 else if (compare < 0)
1111 /* Handle overflow. Sign is preserved. We either become infinity or
1112 the largest finite number. */
1114 APFloat::handleOverflow(roundingMode rounding_mode)
1117 if (rounding_mode == rmNearestTiesToEven ||
1118 rounding_mode == rmNearestTiesToAway ||
1119 (rounding_mode == rmTowardPositive && !sign) ||
1120 (rounding_mode == rmTowardNegative && sign)) {
1121 category = fcInfinity;
1122 return (opStatus) (opOverflow | opInexact);
1125 /* Otherwise we become the largest finite number. */
1126 category = fcNormal;
1127 exponent = semantics->maxExponent;
1128 APInt::tcSetLeastSignificantBits(significandParts(), partCount(),
1129 semantics->precision);
1134 /* Returns TRUE if, when truncating the current number, with BIT the
1135 new LSB, with the given lost fraction and rounding mode, the result
1136 would need to be rounded away from zero (i.e., by increasing the
1137 signficand). This routine must work for fcZero of both signs, and
1138 fcNormal numbers. */
1140 APFloat::roundAwayFromZero(roundingMode rounding_mode,
1141 lostFraction lost_fraction,
1142 unsigned int bit) const
1144 /* NaNs and infinities should not have lost fractions. */
1145 assert(category == fcNormal || category == fcZero);
1147 /* Current callers never pass this so we don't handle it. */
1148 assert(lost_fraction != lfExactlyZero);
1150 switch (rounding_mode) {
1151 case rmNearestTiesToAway:
1152 return lost_fraction == lfExactlyHalf || lost_fraction == lfMoreThanHalf;
1154 case rmNearestTiesToEven:
1155 if (lost_fraction == lfMoreThanHalf)
1158 /* Our zeroes don't have a significand to test. */
1159 if (lost_fraction == lfExactlyHalf && category != fcZero)
1160 return APInt::tcExtractBit(significandParts(), bit);
1167 case rmTowardPositive:
1168 return sign == false;
1170 case rmTowardNegative:
1171 return sign == true;
1173 llvm_unreachable("Invalid rounding mode found");
1177 APFloat::normalize(roundingMode rounding_mode,
1178 lostFraction lost_fraction)
1180 unsigned int omsb; /* One, not zero, based MSB. */
1183 if (category != fcNormal)
1186 /* Before rounding normalize the exponent of fcNormal numbers. */
1187 omsb = significandMSB() + 1;
1190 /* OMSB is numbered from 1. We want to place it in the integer
1191 bit numbered PRECISION if possible, with a compensating change in
1193 exponentChange = omsb - semantics->precision;
1195 /* If the resulting exponent is too high, overflow according to
1196 the rounding mode. */
1197 if (exponent + exponentChange > semantics->maxExponent)
1198 return handleOverflow(rounding_mode);
1200 /* Subnormal numbers have exponent minExponent, and their MSB
1201 is forced based on that. */
1202 if (exponent + exponentChange < semantics->minExponent)
1203 exponentChange = semantics->minExponent - exponent;
1205 /* Shifting left is easy as we don't lose precision. */
1206 if (exponentChange < 0) {
1207 assert(lost_fraction == lfExactlyZero);
1209 shiftSignificandLeft(-exponentChange);
1214 if (exponentChange > 0) {
1217 /* Shift right and capture any new lost fraction. */
1218 lf = shiftSignificandRight(exponentChange);
1220 lost_fraction = combineLostFractions(lf, lost_fraction);
1222 /* Keep OMSB up-to-date. */
1223 if (omsb > (unsigned) exponentChange)
1224 omsb -= exponentChange;
1230 /* Now round the number according to rounding_mode given the lost
1233 /* As specified in IEEE 754, since we do not trap we do not report
1234 underflow for exact results. */
1235 if (lost_fraction == lfExactlyZero) {
1236 /* Canonicalize zeroes. */
1243 /* Increment the significand if we're rounding away from zero. */
1244 if (roundAwayFromZero(rounding_mode, lost_fraction, 0)) {
1246 exponent = semantics->minExponent;
1248 incrementSignificand();
1249 omsb = significandMSB() + 1;
1251 /* Did the significand increment overflow? */
1252 if (omsb == (unsigned) semantics->precision + 1) {
1253 /* Renormalize by incrementing the exponent and shifting our
1254 significand right one. However if we already have the
1255 maximum exponent we overflow to infinity. */
1256 if (exponent == semantics->maxExponent) {
1257 category = fcInfinity;
1259 return (opStatus) (opOverflow | opInexact);
1262 shiftSignificandRight(1);
1268 /* The normal case - we were and are not denormal, and any
1269 significand increment above didn't overflow. */
1270 if (omsb == semantics->precision)
1273 /* We have a non-zero denormal. */
1274 assert(omsb < semantics->precision);
1276 /* Canonicalize zeroes. */
1280 /* The fcZero case is a denormal that underflowed to zero. */
1281 return (opStatus) (opUnderflow | opInexact);
1285 APFloat::addOrSubtractSpecials(const APFloat &rhs, bool subtract)
1287 switch (convolve(category, rhs.category)) {
1289 llvm_unreachable(0);
1291 case convolve(fcNaN, fcZero):
1292 case convolve(fcNaN, fcNormal):
1293 case convolve(fcNaN, fcInfinity):
1294 case convolve(fcNaN, fcNaN):
1295 case convolve(fcNormal, fcZero):
1296 case convolve(fcInfinity, fcNormal):
1297 case convolve(fcInfinity, fcZero):
1300 case convolve(fcZero, fcNaN):
1301 case convolve(fcNormal, fcNaN):
1302 case convolve(fcInfinity, fcNaN):
1304 copySignificand(rhs);
1307 case convolve(fcNormal, fcInfinity):
1308 case convolve(fcZero, fcInfinity):
1309 category = fcInfinity;
1310 sign = rhs.sign ^ subtract;
1313 case convolve(fcZero, fcNormal):
1315 sign = rhs.sign ^ subtract;
1318 case convolve(fcZero, fcZero):
1319 /* Sign depends on rounding mode; handled by caller. */
1322 case convolve(fcInfinity, fcInfinity):
1323 /* Differently signed infinities can only be validly
1325 if (((sign ^ rhs.sign)!=0) != subtract) {
1332 case convolve(fcNormal, fcNormal):
1337 /* Add or subtract two normal numbers. */
1339 APFloat::addOrSubtractSignificand(const APFloat &rhs, bool subtract)
1342 lostFraction lost_fraction;
1345 /* Determine if the operation on the absolute values is effectively
1346 an addition or subtraction. */
1347 subtract ^= (sign ^ rhs.sign) ? true : false;
1349 /* Are we bigger exponent-wise than the RHS? */
1350 bits = exponent - rhs.exponent;
1352 /* Subtraction is more subtle than one might naively expect. */
1354 APFloat temp_rhs(rhs);
1358 reverse = compareAbsoluteValue(temp_rhs) == cmpLessThan;
1359 lost_fraction = lfExactlyZero;
1360 } else if (bits > 0) {
1361 lost_fraction = temp_rhs.shiftSignificandRight(bits - 1);
1362 shiftSignificandLeft(1);
1365 lost_fraction = shiftSignificandRight(-bits - 1);
1366 temp_rhs.shiftSignificandLeft(1);
1371 carry = temp_rhs.subtractSignificand
1372 (*this, lost_fraction != lfExactlyZero);
1373 copySignificand(temp_rhs);
1376 carry = subtractSignificand
1377 (temp_rhs, lost_fraction != lfExactlyZero);
1380 /* Invert the lost fraction - it was on the RHS and
1382 if (lost_fraction == lfLessThanHalf)
1383 lost_fraction = lfMoreThanHalf;
1384 else if (lost_fraction == lfMoreThanHalf)
1385 lost_fraction = lfLessThanHalf;
1387 /* The code above is intended to ensure that no borrow is
1393 APFloat temp_rhs(rhs);
1395 lost_fraction = temp_rhs.shiftSignificandRight(bits);
1396 carry = addSignificand(temp_rhs);
1398 lost_fraction = shiftSignificandRight(-bits);
1399 carry = addSignificand(rhs);
1402 /* We have a guard bit; generating a carry cannot happen. */
1407 return lost_fraction;
1411 APFloat::multiplySpecials(const APFloat &rhs)
1413 switch (convolve(category, rhs.category)) {
1415 llvm_unreachable(0);
1417 case convolve(fcNaN, fcZero):
1418 case convolve(fcNaN, fcNormal):
1419 case convolve(fcNaN, fcInfinity):
1420 case convolve(fcNaN, fcNaN):
1423 case convolve(fcZero, fcNaN):
1424 case convolve(fcNormal, fcNaN):
1425 case convolve(fcInfinity, fcNaN):
1427 copySignificand(rhs);
1430 case convolve(fcNormal, fcInfinity):
1431 case convolve(fcInfinity, fcNormal):
1432 case convolve(fcInfinity, fcInfinity):
1433 category = fcInfinity;
1436 case convolve(fcZero, fcNormal):
1437 case convolve(fcNormal, fcZero):
1438 case convolve(fcZero, fcZero):
1442 case convolve(fcZero, fcInfinity):
1443 case convolve(fcInfinity, fcZero):
1447 case convolve(fcNormal, fcNormal):
1453 APFloat::divideSpecials(const APFloat &rhs)
1455 switch (convolve(category, rhs.category)) {
1457 llvm_unreachable(0);
1459 case convolve(fcNaN, fcZero):
1460 case convolve(fcNaN, fcNormal):
1461 case convolve(fcNaN, fcInfinity):
1462 case convolve(fcNaN, fcNaN):
1463 case convolve(fcInfinity, fcZero):
1464 case convolve(fcInfinity, fcNormal):
1465 case convolve(fcZero, fcInfinity):
1466 case convolve(fcZero, fcNormal):
1469 case convolve(fcZero, fcNaN):
1470 case convolve(fcNormal, fcNaN):
1471 case convolve(fcInfinity, fcNaN):
1473 copySignificand(rhs);
1476 case convolve(fcNormal, fcInfinity):
1480 case convolve(fcNormal, fcZero):
1481 category = fcInfinity;
1484 case convolve(fcInfinity, fcInfinity):
1485 case convolve(fcZero, fcZero):
1489 case convolve(fcNormal, fcNormal):
1495 APFloat::modSpecials(const APFloat &rhs)
1497 switch (convolve(category, rhs.category)) {
1499 llvm_unreachable(0);
1501 case convolve(fcNaN, fcZero):
1502 case convolve(fcNaN, fcNormal):
1503 case convolve(fcNaN, fcInfinity):
1504 case convolve(fcNaN, fcNaN):
1505 case convolve(fcZero, fcInfinity):
1506 case convolve(fcZero, fcNormal):
1507 case convolve(fcNormal, fcInfinity):
1510 case convolve(fcZero, fcNaN):
1511 case convolve(fcNormal, fcNaN):
1512 case convolve(fcInfinity, fcNaN):
1514 copySignificand(rhs);
1517 case convolve(fcNormal, fcZero):
1518 case convolve(fcInfinity, fcZero):
1519 case convolve(fcInfinity, fcNormal):
1520 case convolve(fcInfinity, fcInfinity):
1521 case convolve(fcZero, fcZero):
1525 case convolve(fcNormal, fcNormal):
1532 APFloat::changeSign()
1534 /* Look mummy, this one's easy. */
1539 APFloat::clearSign()
1541 /* So is this one. */
1546 APFloat::copySign(const APFloat &rhs)
1552 /* Normalized addition or subtraction. */
1554 APFloat::addOrSubtract(const APFloat &rhs, roundingMode rounding_mode,
1559 fs = addOrSubtractSpecials(rhs, subtract);
1561 /* This return code means it was not a simple case. */
1562 if (fs == opDivByZero) {
1563 lostFraction lost_fraction;
1565 lost_fraction = addOrSubtractSignificand(rhs, subtract);
1566 fs = normalize(rounding_mode, lost_fraction);
1568 /* Can only be zero if we lost no fraction. */
1569 assert(category != fcZero || lost_fraction == lfExactlyZero);
1572 /* If two numbers add (exactly) to zero, IEEE 754 decrees it is a
1573 positive zero unless rounding to minus infinity, except that
1574 adding two like-signed zeroes gives that zero. */
1575 if (category == fcZero) {
1576 if (rhs.category != fcZero || (sign == rhs.sign) == subtract)
1577 sign = (rounding_mode == rmTowardNegative);
1583 /* Normalized addition. */
1585 APFloat::add(const APFloat &rhs, roundingMode rounding_mode)
1587 return addOrSubtract(rhs, rounding_mode, false);
1590 /* Normalized subtraction. */
1592 APFloat::subtract(const APFloat &rhs, roundingMode rounding_mode)
1594 return addOrSubtract(rhs, rounding_mode, true);
1597 /* Normalized multiply. */
1599 APFloat::multiply(const APFloat &rhs, roundingMode rounding_mode)
1604 fs = multiplySpecials(rhs);
1606 if (category == fcNormal) {
1607 lostFraction lost_fraction = multiplySignificand(rhs, 0);
1608 fs = normalize(rounding_mode, lost_fraction);
1609 if (lost_fraction != lfExactlyZero)
1610 fs = (opStatus) (fs | opInexact);
1616 /* Normalized divide. */
1618 APFloat::divide(const APFloat &rhs, roundingMode rounding_mode)
1623 fs = divideSpecials(rhs);
1625 if (category == fcNormal) {
1626 lostFraction lost_fraction = divideSignificand(rhs);
1627 fs = normalize(rounding_mode, lost_fraction);
1628 if (lost_fraction != lfExactlyZero)
1629 fs = (opStatus) (fs | opInexact);
1635 /* Normalized remainder. This is not currently correct in all cases. */
1637 APFloat::remainder(const APFloat &rhs)
1641 unsigned int origSign = sign;
1643 fs = V.divide(rhs, rmNearestTiesToEven);
1644 if (fs == opDivByZero)
1647 int parts = partCount();
1648 integerPart *x = new integerPart[parts];
1650 fs = V.convertToInteger(x, parts * integerPartWidth, true,
1651 rmNearestTiesToEven, &ignored);
1652 if (fs==opInvalidOp)
1655 fs = V.convertFromZeroExtendedInteger(x, parts * integerPartWidth, true,
1656 rmNearestTiesToEven);
1657 assert(fs==opOK); // should always work
1659 fs = V.multiply(rhs, rmNearestTiesToEven);
1660 assert(fs==opOK || fs==opInexact); // should not overflow or underflow
1662 fs = subtract(V, rmNearestTiesToEven);
1663 assert(fs==opOK || fs==opInexact); // likewise
1666 sign = origSign; // IEEE754 requires this
1671 /* Normalized llvm frem (C fmod).
1672 This is not currently correct in all cases. */
1674 APFloat::mod(const APFloat &rhs, roundingMode rounding_mode)
1677 fs = modSpecials(rhs);
1679 if (category == fcNormal && rhs.category == fcNormal) {
1681 unsigned int origSign = sign;
1683 fs = V.divide(rhs, rmNearestTiesToEven);
1684 if (fs == opDivByZero)
1687 int parts = partCount();
1688 integerPart *x = new integerPart[parts];
1690 fs = V.convertToInteger(x, parts * integerPartWidth, true,
1691 rmTowardZero, &ignored);
1692 if (fs==opInvalidOp)
1695 fs = V.convertFromZeroExtendedInteger(x, parts * integerPartWidth, true,
1696 rmNearestTiesToEven);
1697 assert(fs==opOK); // should always work
1699 fs = V.multiply(rhs, rounding_mode);
1700 assert(fs==opOK || fs==opInexact); // should not overflow or underflow
1702 fs = subtract(V, rounding_mode);
1703 assert(fs==opOK || fs==opInexact); // likewise
1706 sign = origSign; // IEEE754 requires this
1712 /* Normalized fused-multiply-add. */
1714 APFloat::fusedMultiplyAdd(const APFloat &multiplicand,
1715 const APFloat &addend,
1716 roundingMode rounding_mode)
1720 /* Post-multiplication sign, before addition. */
1721 sign ^= multiplicand.sign;
1723 /* If and only if all arguments are normal do we need to do an
1724 extended-precision calculation. */
1725 if (category == fcNormal &&
1726 multiplicand.category == fcNormal &&
1727 addend.category == fcNormal) {
1728 lostFraction lost_fraction;
1730 lost_fraction = multiplySignificand(multiplicand, &addend);
1731 fs = normalize(rounding_mode, lost_fraction);
1732 if (lost_fraction != lfExactlyZero)
1733 fs = (opStatus) (fs | opInexact);
1735 /* If two numbers add (exactly) to zero, IEEE 754 decrees it is a
1736 positive zero unless rounding to minus infinity, except that
1737 adding two like-signed zeroes gives that zero. */
1738 if (category == fcZero && sign != addend.sign)
1739 sign = (rounding_mode == rmTowardNegative);
1741 fs = multiplySpecials(multiplicand);
1743 /* FS can only be opOK or opInvalidOp. There is no more work
1744 to do in the latter case. The IEEE-754R standard says it is
1745 implementation-defined in this case whether, if ADDEND is a
1746 quiet NaN, we raise invalid op; this implementation does so.
1748 If we need to do the addition we can do so with normal
1751 fs = addOrSubtract(addend, rounding_mode, false);
1757 /* Rounding-mode corrrect round to integral value. */
1758 APFloat::opStatus APFloat::roundToIntegral(roundingMode rounding_mode) {
1761 // If the exponent is large enough, we know that this value is already
1762 // integral, and the arithmetic below would potentially cause it to saturate
1763 // to +/-Inf. Bail out early instead.
1764 if (category == fcNormal && exponent+1 >= (int)semanticsPrecision(*semantics))
1767 // The algorithm here is quite simple: we add 2^(p-1), where p is the
1768 // precision of our format, and then subtract it back off again. The choice
1769 // of rounding modes for the addition/subtraction determines the rounding mode
1770 // for our integral rounding as well.
1771 // NOTE: When the input value is negative, we do subtraction followed by
1772 // addition instead.
1773 APInt IntegerConstant(NextPowerOf2(semanticsPrecision(*semantics)), 1);
1774 IntegerConstant <<= semanticsPrecision(*semantics)-1;
1775 APFloat MagicConstant(*semantics);
1776 fs = MagicConstant.convertFromAPInt(IntegerConstant, false,
1777 rmNearestTiesToEven);
1778 MagicConstant.copySign(*this);
1783 // Preserve the input sign so that we can handle 0.0/-0.0 cases correctly.
1784 bool inputSign = isNegative();
1786 fs = add(MagicConstant, rounding_mode);
1787 if (fs != opOK && fs != opInexact)
1790 fs = subtract(MagicConstant, rounding_mode);
1792 // Restore the input sign.
1793 if (inputSign != isNegative())
1800 /* Comparison requires normalized numbers. */
1802 APFloat::compare(const APFloat &rhs) const
1806 assert(semantics == rhs.semantics);
1808 switch (convolve(category, rhs.category)) {
1810 llvm_unreachable(0);
1812 case convolve(fcNaN, fcZero):
1813 case convolve(fcNaN, fcNormal):
1814 case convolve(fcNaN, fcInfinity):
1815 case convolve(fcNaN, fcNaN):
1816 case convolve(fcZero, fcNaN):
1817 case convolve(fcNormal, fcNaN):
1818 case convolve(fcInfinity, fcNaN):
1819 return cmpUnordered;
1821 case convolve(fcInfinity, fcNormal):
1822 case convolve(fcInfinity, fcZero):
1823 case convolve(fcNormal, fcZero):
1827 return cmpGreaterThan;
1829 case convolve(fcNormal, fcInfinity):
1830 case convolve(fcZero, fcInfinity):
1831 case convolve(fcZero, fcNormal):
1833 return cmpGreaterThan;
1837 case convolve(fcInfinity, fcInfinity):
1838 if (sign == rhs.sign)
1843 return cmpGreaterThan;
1845 case convolve(fcZero, fcZero):
1848 case convolve(fcNormal, fcNormal):
1852 /* Two normal numbers. Do they have the same sign? */
1853 if (sign != rhs.sign) {
1855 result = cmpLessThan;
1857 result = cmpGreaterThan;
1859 /* Compare absolute values; invert result if negative. */
1860 result = compareAbsoluteValue(rhs);
1863 if (result == cmpLessThan)
1864 result = cmpGreaterThan;
1865 else if (result == cmpGreaterThan)
1866 result = cmpLessThan;
1873 /// APFloat::convert - convert a value of one floating point type to another.
1874 /// The return value corresponds to the IEEE754 exceptions. *losesInfo
1875 /// records whether the transformation lost information, i.e. whether
1876 /// converting the result back to the original type will produce the
1877 /// original value (this is almost the same as return value==fsOK, but there
1878 /// are edge cases where this is not so).
1881 APFloat::convert(const fltSemantics &toSemantics,
1882 roundingMode rounding_mode, bool *losesInfo)
1884 lostFraction lostFraction;
1885 unsigned int newPartCount, oldPartCount;
1888 const fltSemantics &fromSemantics = *semantics;
1890 lostFraction = lfExactlyZero;
1891 newPartCount = partCountForBits(toSemantics.precision + 1);
1892 oldPartCount = partCount();
1893 shift = toSemantics.precision - fromSemantics.precision;
1895 bool X86SpecialNan = false;
1896 if (&fromSemantics == &APFloat::x87DoubleExtended &&
1897 &toSemantics != &APFloat::x87DoubleExtended && category == fcNaN &&
1898 (!(*significandParts() & 0x8000000000000000ULL) ||
1899 !(*significandParts() & 0x4000000000000000ULL))) {
1900 // x86 has some unusual NaNs which cannot be represented in any other
1901 // format; note them here.
1902 X86SpecialNan = true;
1905 // If this is a truncation, perform the shift before we narrow the storage.
1906 if (shift < 0 && (category==fcNormal || category==fcNaN))
1907 lostFraction = shiftRight(significandParts(), oldPartCount, -shift);
1909 // Fix the storage so it can hold to new value.
1910 if (newPartCount > oldPartCount) {
1911 // The new type requires more storage; make it available.
1912 integerPart *newParts;
1913 newParts = new integerPart[newPartCount];
1914 APInt::tcSet(newParts, 0, newPartCount);
1915 if (category==fcNormal || category==fcNaN)
1916 APInt::tcAssign(newParts, significandParts(), oldPartCount);
1918 significand.parts = newParts;
1919 } else if (newPartCount == 1 && oldPartCount != 1) {
1920 // Switch to built-in storage for a single part.
1921 integerPart newPart = 0;
1922 if (category==fcNormal || category==fcNaN)
1923 newPart = significandParts()[0];
1925 significand.part = newPart;
1928 // Now that we have the right storage, switch the semantics.
1929 semantics = &toSemantics;
1931 // If this is an extension, perform the shift now that the storage is
1933 if (shift > 0 && (category==fcNormal || category==fcNaN))
1934 APInt::tcShiftLeft(significandParts(), newPartCount, shift);
1936 if (category == fcNormal) {
1937 fs = normalize(rounding_mode, lostFraction);
1938 *losesInfo = (fs != opOK);
1939 } else if (category == fcNaN) {
1940 *losesInfo = lostFraction != lfExactlyZero || X86SpecialNan;
1942 // For x87 extended precision, we want to make a NaN, not a special NaN if
1943 // the input wasn't special either.
1944 if (!X86SpecialNan && semantics == &APFloat::x87DoubleExtended)
1945 APInt::tcSetBit(significandParts(), semantics->precision - 1);
1947 // gcc forces the Quiet bit on, which means (float)(double)(float_sNan)
1948 // does not give you back the same bits. This is dubious, and we
1949 // don't currently do it. You're really supposed to get
1950 // an invalid operation signal at runtime, but nobody does that.
1960 /* Convert a floating point number to an integer according to the
1961 rounding mode. If the rounded integer value is out of range this
1962 returns an invalid operation exception and the contents of the
1963 destination parts are unspecified. If the rounded value is in
1964 range but the floating point number is not the exact integer, the C
1965 standard doesn't require an inexact exception to be raised. IEEE
1966 854 does require it so we do that.
1968 Note that for conversions to integer type the C standard requires
1969 round-to-zero to always be used. */
1971 APFloat::convertToSignExtendedInteger(integerPart *parts, unsigned int width,
1973 roundingMode rounding_mode,
1974 bool *isExact) const
1976 lostFraction lost_fraction;
1977 const integerPart *src;
1978 unsigned int dstPartsCount, truncatedBits;
1982 /* Handle the three special cases first. */
1983 if (category == fcInfinity || category == fcNaN)
1986 dstPartsCount = partCountForBits(width);
1988 if (category == fcZero) {
1989 APInt::tcSet(parts, 0, dstPartsCount);
1990 // Negative zero can't be represented as an int.
1995 src = significandParts();
1997 /* Step 1: place our absolute value, with any fraction truncated, in
2000 /* Our absolute value is less than one; truncate everything. */
2001 APInt::tcSet(parts, 0, dstPartsCount);
2002 /* For exponent -1 the integer bit represents .5, look at that.
2003 For smaller exponents leftmost truncated bit is 0. */
2004 truncatedBits = semantics->precision -1U - exponent;
2006 /* We want the most significant (exponent + 1) bits; the rest are
2008 unsigned int bits = exponent + 1U;
2010 /* Hopelessly large in magnitude? */
2014 if (bits < semantics->precision) {
2015 /* We truncate (semantics->precision - bits) bits. */
2016 truncatedBits = semantics->precision - bits;
2017 APInt::tcExtract(parts, dstPartsCount, src, bits, truncatedBits);
2019 /* We want at least as many bits as are available. */
2020 APInt::tcExtract(parts, dstPartsCount, src, semantics->precision, 0);
2021 APInt::tcShiftLeft(parts, dstPartsCount, bits - semantics->precision);
2026 /* Step 2: work out any lost fraction, and increment the absolute
2027 value if we would round away from zero. */
2028 if (truncatedBits) {
2029 lost_fraction = lostFractionThroughTruncation(src, partCount(),
2031 if (lost_fraction != lfExactlyZero &&
2032 roundAwayFromZero(rounding_mode, lost_fraction, truncatedBits)) {
2033 if (APInt::tcIncrement(parts, dstPartsCount))
2034 return opInvalidOp; /* Overflow. */
2037 lost_fraction = lfExactlyZero;
2040 /* Step 3: check if we fit in the destination. */
2041 unsigned int omsb = APInt::tcMSB(parts, dstPartsCount) + 1;
2045 /* Negative numbers cannot be represented as unsigned. */
2049 /* It takes omsb bits to represent the unsigned integer value.
2050 We lose a bit for the sign, but care is needed as the
2051 maximally negative integer is a special case. */
2052 if (omsb == width && APInt::tcLSB(parts, dstPartsCount) + 1 != omsb)
2055 /* This case can happen because of rounding. */
2060 APInt::tcNegate (parts, dstPartsCount);
2062 if (omsb >= width + !isSigned)
2066 if (lost_fraction == lfExactlyZero) {
2073 /* Same as convertToSignExtendedInteger, except we provide
2074 deterministic values in case of an invalid operation exception,
2075 namely zero for NaNs and the minimal or maximal value respectively
2076 for underflow or overflow.
2077 The *isExact output tells whether the result is exact, in the sense
2078 that converting it back to the original floating point type produces
2079 the original value. This is almost equivalent to result==opOK,
2080 except for negative zeroes.
2083 APFloat::convertToInteger(integerPart *parts, unsigned int width,
2085 roundingMode rounding_mode, bool *isExact) const
2089 fs = convertToSignExtendedInteger(parts, width, isSigned, rounding_mode,
2092 if (fs == opInvalidOp) {
2093 unsigned int bits, dstPartsCount;
2095 dstPartsCount = partCountForBits(width);
2097 if (category == fcNaN)
2102 bits = width - isSigned;
2104 APInt::tcSetLeastSignificantBits(parts, dstPartsCount, bits);
2105 if (sign && isSigned)
2106 APInt::tcShiftLeft(parts, dstPartsCount, width - 1);
2112 /* Same as convertToInteger(integerPart*, ...), except the result is returned in
2113 an APSInt, whose initial bit-width and signed-ness are used to determine the
2114 precision of the conversion.
2117 APFloat::convertToInteger(APSInt &result,
2118 roundingMode rounding_mode, bool *isExact) const
2120 unsigned bitWidth = result.getBitWidth();
2121 SmallVector<uint64_t, 4> parts(result.getNumWords());
2122 opStatus status = convertToInteger(
2123 parts.data(), bitWidth, result.isSigned(), rounding_mode, isExact);
2124 // Keeps the original signed-ness.
2125 result = APInt(bitWidth, parts);
2129 /* Convert an unsigned integer SRC to a floating point number,
2130 rounding according to ROUNDING_MODE. The sign of the floating
2131 point number is not modified. */
2133 APFloat::convertFromUnsignedParts(const integerPart *src,
2134 unsigned int srcCount,
2135 roundingMode rounding_mode)
2137 unsigned int omsb, precision, dstCount;
2139 lostFraction lost_fraction;
2141 category = fcNormal;
2142 omsb = APInt::tcMSB(src, srcCount) + 1;
2143 dst = significandParts();
2144 dstCount = partCount();
2145 precision = semantics->precision;
2147 /* We want the most significant PRECISION bits of SRC. There may not
2148 be that many; extract what we can. */
2149 if (precision <= omsb) {
2150 exponent = omsb - 1;
2151 lost_fraction = lostFractionThroughTruncation(src, srcCount,
2153 APInt::tcExtract(dst, dstCount, src, precision, omsb - precision);
2155 exponent = precision - 1;
2156 lost_fraction = lfExactlyZero;
2157 APInt::tcExtract(dst, dstCount, src, omsb, 0);
2160 return normalize(rounding_mode, lost_fraction);
2164 APFloat::convertFromAPInt(const APInt &Val,
2166 roundingMode rounding_mode)
2168 unsigned int partCount = Val.getNumWords();
2172 if (isSigned && api.isNegative()) {
2177 return convertFromUnsignedParts(api.getRawData(), partCount, rounding_mode);
2180 /* Convert a two's complement integer SRC to a floating point number,
2181 rounding according to ROUNDING_MODE. ISSIGNED is true if the
2182 integer is signed, in which case it must be sign-extended. */
2184 APFloat::convertFromSignExtendedInteger(const integerPart *src,
2185 unsigned int srcCount,
2187 roundingMode rounding_mode)
2192 APInt::tcExtractBit(src, srcCount * integerPartWidth - 1)) {
2195 /* If we're signed and negative negate a copy. */
2197 copy = new integerPart[srcCount];
2198 APInt::tcAssign(copy, src, srcCount);
2199 APInt::tcNegate(copy, srcCount);
2200 status = convertFromUnsignedParts(copy, srcCount, rounding_mode);
2204 status = convertFromUnsignedParts(src, srcCount, rounding_mode);
2210 /* FIXME: should this just take a const APInt reference? */
2212 APFloat::convertFromZeroExtendedInteger(const integerPart *parts,
2213 unsigned int width, bool isSigned,
2214 roundingMode rounding_mode)
2216 unsigned int partCount = partCountForBits(width);
2217 APInt api = APInt(width, makeArrayRef(parts, partCount));
2220 if (isSigned && APInt::tcExtractBit(parts, width - 1)) {
2225 return convertFromUnsignedParts(api.getRawData(), partCount, rounding_mode);
2229 APFloat::convertFromHexadecimalString(StringRef s, roundingMode rounding_mode)
2231 lostFraction lost_fraction = lfExactlyZero;
2232 integerPart *significand;
2233 unsigned int bitPos, partsCount;
2234 StringRef::iterator dot, firstSignificantDigit;
2238 category = fcNormal;
2240 significand = significandParts();
2241 partsCount = partCount();
2242 bitPos = partsCount * integerPartWidth;
2244 /* Skip leading zeroes and any (hexa)decimal point. */
2245 StringRef::iterator begin = s.begin();
2246 StringRef::iterator end = s.end();
2247 StringRef::iterator p = skipLeadingZeroesAndAnyDot(begin, end, &dot);
2248 firstSignificantDigit = p;
2251 integerPart hex_value;
2254 assert(dot == end && "String contains multiple dots");
2261 hex_value = hexDigitValue(*p);
2262 if (hex_value == -1U) {
2271 /* Store the number whilst 4-bit nibbles remain. */
2274 hex_value <<= bitPos % integerPartWidth;
2275 significand[bitPos / integerPartWidth] |= hex_value;
2277 lost_fraction = trailingHexadecimalFraction(p, end, hex_value);
2278 while (p != end && hexDigitValue(*p) != -1U)
2285 /* Hex floats require an exponent but not a hexadecimal point. */
2286 assert(p != end && "Hex strings require an exponent");
2287 assert((*p == 'p' || *p == 'P') && "Invalid character in significand");
2288 assert(p != begin && "Significand has no digits");
2289 assert((dot == end || p - begin != 1) && "Significand has no digits");
2291 /* Ignore the exponent if we are zero. */
2292 if (p != firstSignificantDigit) {
2295 /* Implicit hexadecimal point? */
2299 /* Calculate the exponent adjustment implicit in the number of
2300 significant digits. */
2301 expAdjustment = static_cast<int>(dot - firstSignificantDigit);
2302 if (expAdjustment < 0)
2304 expAdjustment = expAdjustment * 4 - 1;
2306 /* Adjust for writing the significand starting at the most
2307 significant nibble. */
2308 expAdjustment += semantics->precision;
2309 expAdjustment -= partsCount * integerPartWidth;
2311 /* Adjust for the given exponent. */
2312 exponent = totalExponent(p + 1, end, expAdjustment);
2315 return normalize(rounding_mode, lost_fraction);
2319 APFloat::roundSignificandWithExponent(const integerPart *decSigParts,
2320 unsigned sigPartCount, int exp,
2321 roundingMode rounding_mode)
2323 unsigned int parts, pow5PartCount;
2324 fltSemantics calcSemantics = { 32767, -32767, 0 };
2325 integerPart pow5Parts[maxPowerOfFiveParts];
2328 isNearest = (rounding_mode == rmNearestTiesToEven ||
2329 rounding_mode == rmNearestTiesToAway);
2331 parts = partCountForBits(semantics->precision + 11);
2333 /* Calculate pow(5, abs(exp)). */
2334 pow5PartCount = powerOf5(pow5Parts, exp >= 0 ? exp: -exp);
2336 for (;; parts *= 2) {
2337 opStatus sigStatus, powStatus;
2338 unsigned int excessPrecision, truncatedBits;
2340 calcSemantics.precision = parts * integerPartWidth - 1;
2341 excessPrecision = calcSemantics.precision - semantics->precision;
2342 truncatedBits = excessPrecision;
2344 APFloat decSig(calcSemantics, fcZero, sign);
2345 APFloat pow5(calcSemantics, fcZero, false);
2347 sigStatus = decSig.convertFromUnsignedParts(decSigParts, sigPartCount,
2348 rmNearestTiesToEven);
2349 powStatus = pow5.convertFromUnsignedParts(pow5Parts, pow5PartCount,
2350 rmNearestTiesToEven);
2351 /* Add exp, as 10^n = 5^n * 2^n. */
2352 decSig.exponent += exp;
2354 lostFraction calcLostFraction;
2355 integerPart HUerr, HUdistance;
2356 unsigned int powHUerr;
2359 /* multiplySignificand leaves the precision-th bit set to 1. */
2360 calcLostFraction = decSig.multiplySignificand(pow5, NULL);
2361 powHUerr = powStatus != opOK;
2363 calcLostFraction = decSig.divideSignificand(pow5);
2364 /* Denormal numbers have less precision. */
2365 if (decSig.exponent < semantics->minExponent) {
2366 excessPrecision += (semantics->minExponent - decSig.exponent);
2367 truncatedBits = excessPrecision;
2368 if (excessPrecision > calcSemantics.precision)
2369 excessPrecision = calcSemantics.precision;
2371 /* Extra half-ulp lost in reciprocal of exponent. */
2372 powHUerr = (powStatus == opOK && calcLostFraction == lfExactlyZero) ? 0:2;
2375 /* Both multiplySignificand and divideSignificand return the
2376 result with the integer bit set. */
2377 assert(APInt::tcExtractBit
2378 (decSig.significandParts(), calcSemantics.precision - 1) == 1);
2380 HUerr = HUerrBound(calcLostFraction != lfExactlyZero, sigStatus != opOK,
2382 HUdistance = 2 * ulpsFromBoundary(decSig.significandParts(),
2383 excessPrecision, isNearest);
2385 /* Are we guaranteed to round correctly if we truncate? */
2386 if (HUdistance >= HUerr) {
2387 APInt::tcExtract(significandParts(), partCount(), decSig.significandParts(),
2388 calcSemantics.precision - excessPrecision,
2390 /* Take the exponent of decSig. If we tcExtract-ed less bits
2391 above we must adjust our exponent to compensate for the
2392 implicit right shift. */
2393 exponent = (decSig.exponent + semantics->precision
2394 - (calcSemantics.precision - excessPrecision));
2395 calcLostFraction = lostFractionThroughTruncation(decSig.significandParts(),
2398 return normalize(rounding_mode, calcLostFraction);
2404 APFloat::convertFromDecimalString(StringRef str, roundingMode rounding_mode)
2409 /* Scan the text. */
2410 StringRef::iterator p = str.begin();
2411 interpretDecimal(p, str.end(), &D);
2413 /* Handle the quick cases. First the case of no significant digits,
2414 i.e. zero, and then exponents that are obviously too large or too
2415 small. Writing L for log 10 / log 2, a number d.ddddd*10^exp
2416 definitely overflows if
2418 (exp - 1) * L >= maxExponent
2420 and definitely underflows to zero where
2422 (exp + 1) * L <= minExponent - precision
2424 With integer arithmetic the tightest bounds for L are
2426 93/28 < L < 196/59 [ numerator <= 256 ]
2427 42039/12655 < L < 28738/8651 [ numerator <= 65536 ]
2430 if (decDigitValue(*D.firstSigDigit) >= 10U) {
2434 /* Check whether the normalized exponent is high enough to overflow
2435 max during the log-rebasing in the max-exponent check below. */
2436 } else if (D.normalizedExponent - 1 > INT_MAX / 42039) {
2437 fs = handleOverflow(rounding_mode);
2439 /* If it wasn't, then it also wasn't high enough to overflow max
2440 during the log-rebasing in the min-exponent check. Check that it
2441 won't overflow min in either check, then perform the min-exponent
2443 } else if (D.normalizedExponent - 1 < INT_MIN / 42039 ||
2444 (D.normalizedExponent + 1) * 28738 <=
2445 8651 * (semantics->minExponent - (int) semantics->precision)) {
2446 /* Underflow to zero and round. */
2448 fs = normalize(rounding_mode, lfLessThanHalf);
2450 /* We can finally safely perform the max-exponent check. */
2451 } else if ((D.normalizedExponent - 1) * 42039
2452 >= 12655 * semantics->maxExponent) {
2453 /* Overflow and round. */
2454 fs = handleOverflow(rounding_mode);
2456 integerPart *decSignificand;
2457 unsigned int partCount;
2459 /* A tight upper bound on number of bits required to hold an
2460 N-digit decimal integer is N * 196 / 59. Allocate enough space
2461 to hold the full significand, and an extra part required by
2463 partCount = static_cast<unsigned int>(D.lastSigDigit - D.firstSigDigit) + 1;
2464 partCount = partCountForBits(1 + 196 * partCount / 59);
2465 decSignificand = new integerPart[partCount + 1];
2468 /* Convert to binary efficiently - we do almost all multiplication
2469 in an integerPart. When this would overflow do we do a single
2470 bignum multiplication, and then revert again to multiplication
2471 in an integerPart. */
2473 integerPart decValue, val, multiplier;
2481 if (p == str.end()) {
2485 decValue = decDigitValue(*p++);
2486 assert(decValue < 10U && "Invalid character in significand");
2488 val = val * 10 + decValue;
2489 /* The maximum number that can be multiplied by ten with any
2490 digit added without overflowing an integerPart. */
2491 } while (p <= D.lastSigDigit && multiplier <= (~ (integerPart) 0 - 9) / 10);
2493 /* Multiply out the current part. */
2494 APInt::tcMultiplyPart(decSignificand, decSignificand, multiplier, val,
2495 partCount, partCount + 1, false);
2497 /* If we used another part (likely but not guaranteed), increase
2499 if (decSignificand[partCount])
2501 } while (p <= D.lastSigDigit);
2503 category = fcNormal;
2504 fs = roundSignificandWithExponent(decSignificand, partCount,
2505 D.exponent, rounding_mode);
2507 delete [] decSignificand;
2514 APFloat::convertFromString(StringRef str, roundingMode rounding_mode)
2516 assert(!str.empty() && "Invalid string length");
2518 /* Handle a leading minus sign. */
2519 StringRef::iterator p = str.begin();
2520 size_t slen = str.size();
2521 sign = *p == '-' ? 1 : 0;
2522 if (*p == '-' || *p == '+') {
2525 assert(slen && "String has no digits");
2528 if (slen >= 2 && p[0] == '0' && (p[1] == 'x' || p[1] == 'X')) {
2529 assert(slen - 2 && "Invalid string");
2530 return convertFromHexadecimalString(StringRef(p + 2, slen - 2),
2534 return convertFromDecimalString(StringRef(p, slen), rounding_mode);
2537 /* Write out a hexadecimal representation of the floating point value
2538 to DST, which must be of sufficient size, in the C99 form
2539 [-]0xh.hhhhp[+-]d. Return the number of characters written,
2540 excluding the terminating NUL.
2542 If UPPERCASE, the output is in upper case, otherwise in lower case.
2544 HEXDIGITS digits appear altogether, rounding the value if
2545 necessary. If HEXDIGITS is 0, the minimal precision to display the
2546 number precisely is used instead. If nothing would appear after
2547 the decimal point it is suppressed.
2549 The decimal exponent is always printed and has at least one digit.
2550 Zero values display an exponent of zero. Infinities and NaNs
2551 appear as "infinity" or "nan" respectively.
2553 The above rules are as specified by C99. There is ambiguity about
2554 what the leading hexadecimal digit should be. This implementation
2555 uses whatever is necessary so that the exponent is displayed as
2556 stored. This implies the exponent will fall within the IEEE format
2557 range, and the leading hexadecimal digit will be 0 (for denormals),
2558 1 (normal numbers) or 2 (normal numbers rounded-away-from-zero with
2559 any other digits zero).
2562 APFloat::convertToHexString(char *dst, unsigned int hexDigits,
2563 bool upperCase, roundingMode rounding_mode) const
2573 memcpy (dst, upperCase ? infinityU: infinityL, sizeof infinityU - 1);
2574 dst += sizeof infinityL - 1;
2578 memcpy (dst, upperCase ? NaNU: NaNL, sizeof NaNU - 1);
2579 dst += sizeof NaNU - 1;
2584 *dst++ = upperCase ? 'X': 'x';
2586 if (hexDigits > 1) {
2588 memset (dst, '0', hexDigits - 1);
2589 dst += hexDigits - 1;
2591 *dst++ = upperCase ? 'P': 'p';
2596 dst = convertNormalToHexString (dst, hexDigits, upperCase, rounding_mode);
2602 return static_cast<unsigned int>(dst - p);
2605 /* Does the hard work of outputting the correctly rounded hexadecimal
2606 form of a normal floating point number with the specified number of
2607 hexadecimal digits. If HEXDIGITS is zero the minimum number of
2608 digits necessary to print the value precisely is output. */
2610 APFloat::convertNormalToHexString(char *dst, unsigned int hexDigits,
2612 roundingMode rounding_mode) const
2614 unsigned int count, valueBits, shift, partsCount, outputDigits;
2615 const char *hexDigitChars;
2616 const integerPart *significand;
2621 *dst++ = upperCase ? 'X': 'x';
2624 hexDigitChars = upperCase ? hexDigitsUpper: hexDigitsLower;
2626 significand = significandParts();
2627 partsCount = partCount();
2629 /* +3 because the first digit only uses the single integer bit, so
2630 we have 3 virtual zero most-significant-bits. */
2631 valueBits = semantics->precision + 3;
2632 shift = integerPartWidth - valueBits % integerPartWidth;
2634 /* The natural number of digits required ignoring trailing
2635 insignificant zeroes. */
2636 outputDigits = (valueBits - significandLSB () + 3) / 4;
2638 /* hexDigits of zero means use the required number for the
2639 precision. Otherwise, see if we are truncating. If we are,
2640 find out if we need to round away from zero. */
2642 if (hexDigits < outputDigits) {
2643 /* We are dropping non-zero bits, so need to check how to round.
2644 "bits" is the number of dropped bits. */
2646 lostFraction fraction;
2648 bits = valueBits - hexDigits * 4;
2649 fraction = lostFractionThroughTruncation (significand, partsCount, bits);
2650 roundUp = roundAwayFromZero(rounding_mode, fraction, bits);
2652 outputDigits = hexDigits;
2655 /* Write the digits consecutively, and start writing in the location
2656 of the hexadecimal point. We move the most significant digit
2657 left and add the hexadecimal point later. */
2660 count = (valueBits + integerPartWidth - 1) / integerPartWidth;
2662 while (outputDigits && count) {
2665 /* Put the most significant integerPartWidth bits in "part". */
2666 if (--count == partsCount)
2667 part = 0; /* An imaginary higher zero part. */
2669 part = significand[count] << shift;
2672 part |= significand[count - 1] >> (integerPartWidth - shift);
2674 /* Convert as much of "part" to hexdigits as we can. */
2675 unsigned int curDigits = integerPartWidth / 4;
2677 if (curDigits > outputDigits)
2678 curDigits = outputDigits;
2679 dst += partAsHex (dst, part, curDigits, hexDigitChars);
2680 outputDigits -= curDigits;
2686 /* Note that hexDigitChars has a trailing '0'. */
2689 *q = hexDigitChars[hexDigitValue (*q) + 1];
2690 } while (*q == '0');
2693 /* Add trailing zeroes. */
2694 memset (dst, '0', outputDigits);
2695 dst += outputDigits;
2698 /* Move the most significant digit to before the point, and if there
2699 is something after the decimal point add it. This must come
2700 after rounding above. */
2707 /* Finally output the exponent. */
2708 *dst++ = upperCase ? 'P': 'p';
2710 return writeSignedDecimal (dst, exponent);
2713 hash_code llvm::hash_value(const APFloat &Arg) {
2714 if (Arg.category != APFloat::fcNormal)
2715 return hash_combine((uint8_t)Arg.category,
2716 // NaN has no sign, fix it at zero.
2717 Arg.isNaN() ? (uint8_t)0 : (uint8_t)Arg.sign,
2718 Arg.semantics->precision);
2720 // Normal floats need their exponent and significand hashed.
2721 return hash_combine((uint8_t)Arg.category, (uint8_t)Arg.sign,
2722 Arg.semantics->precision, Arg.exponent,
2724 Arg.significandParts(),
2725 Arg.significandParts() + Arg.partCount()));
2728 // Conversion from APFloat to/from host float/double. It may eventually be
2729 // possible to eliminate these and have everybody deal with APFloats, but that
2730 // will take a while. This approach will not easily extend to long double.
2731 // Current implementation requires integerPartWidth==64, which is correct at
2732 // the moment but could be made more general.
2734 // Denormals have exponent minExponent in APFloat, but minExponent-1 in
2735 // the actual IEEE respresentations. We compensate for that here.
2738 APFloat::convertF80LongDoubleAPFloatToAPInt() const
2740 assert(semantics == (const llvm::fltSemantics*)&x87DoubleExtended);
2741 assert(partCount()==2);
2743 uint64_t myexponent, mysignificand;
2745 if (category==fcNormal) {
2746 myexponent = exponent+16383; //bias
2747 mysignificand = significandParts()[0];
2748 if (myexponent==1 && !(mysignificand & 0x8000000000000000ULL))
2749 myexponent = 0; // denormal
2750 } else if (category==fcZero) {
2753 } else if (category==fcInfinity) {
2754 myexponent = 0x7fff;
2755 mysignificand = 0x8000000000000000ULL;
2757 assert(category == fcNaN && "Unknown category");
2758 myexponent = 0x7fff;
2759 mysignificand = significandParts()[0];
2763 words[0] = mysignificand;
2764 words[1] = ((uint64_t)(sign & 1) << 15) |
2765 (myexponent & 0x7fffLL);
2766 return APInt(80, words);
2770 APFloat::convertPPCDoubleDoubleAPFloatToAPInt() const
2772 assert(semantics == (const llvm::fltSemantics*)&PPCDoubleDouble);
2773 assert(partCount()==2);
2779 // Convert number to double. To avoid spurious underflows, we re-
2780 // normalize against the "double" minExponent first, and only *then*
2781 // truncate the mantissa. The result of that second conversion
2782 // may be inexact, but should never underflow.
2783 // Declare fltSemantics before APFloat that uses it (and
2784 // saves pointer to it) to ensure correct destruction order.
2785 fltSemantics extendedSemantics = *semantics;
2786 extendedSemantics.minExponent = IEEEdouble.minExponent;
2787 APFloat extended(*this);
2788 fs = extended.convert(extendedSemantics, rmNearestTiesToEven, &losesInfo);
2789 assert(fs == opOK && !losesInfo);
2792 APFloat u(extended);
2793 fs = u.convert(IEEEdouble, rmNearestTiesToEven, &losesInfo);
2794 assert(fs == opOK || fs == opInexact);
2796 words[0] = *u.convertDoubleAPFloatToAPInt().getRawData();
2798 // If conversion was exact or resulted in a special case, we're done;
2799 // just set the second double to zero. Otherwise, re-convert back to
2800 // the extended format and compute the difference. This now should
2801 // convert exactly to double.
2802 if (u.category == fcNormal && losesInfo) {
2803 fs = u.convert(extendedSemantics, rmNearestTiesToEven, &losesInfo);
2804 assert(fs == opOK && !losesInfo);
2807 APFloat v(extended);
2808 v.subtract(u, rmNearestTiesToEven);
2809 fs = v.convert(IEEEdouble, rmNearestTiesToEven, &losesInfo);
2810 assert(fs == opOK && !losesInfo);
2812 words[1] = *v.convertDoubleAPFloatToAPInt().getRawData();
2817 return APInt(128, words);
2821 APFloat::convertQuadrupleAPFloatToAPInt() const
2823 assert(semantics == (const llvm::fltSemantics*)&IEEEquad);
2824 assert(partCount()==2);
2826 uint64_t myexponent, mysignificand, mysignificand2;
2828 if (category==fcNormal) {
2829 myexponent = exponent+16383; //bias
2830 mysignificand = significandParts()[0];
2831 mysignificand2 = significandParts()[1];
2832 if (myexponent==1 && !(mysignificand2 & 0x1000000000000LL))
2833 myexponent = 0; // denormal
2834 } else if (category==fcZero) {
2836 mysignificand = mysignificand2 = 0;
2837 } else if (category==fcInfinity) {
2838 myexponent = 0x7fff;
2839 mysignificand = mysignificand2 = 0;
2841 assert(category == fcNaN && "Unknown category!");
2842 myexponent = 0x7fff;
2843 mysignificand = significandParts()[0];
2844 mysignificand2 = significandParts()[1];
2848 words[0] = mysignificand;
2849 words[1] = ((uint64_t)(sign & 1) << 63) |
2850 ((myexponent & 0x7fff) << 48) |
2851 (mysignificand2 & 0xffffffffffffLL);
2853 return APInt(128, words);
2857 APFloat::convertDoubleAPFloatToAPInt() const
2859 assert(semantics == (const llvm::fltSemantics*)&IEEEdouble);
2860 assert(partCount()==1);
2862 uint64_t myexponent, mysignificand;
2864 if (category==fcNormal) {
2865 myexponent = exponent+1023; //bias
2866 mysignificand = *significandParts();
2867 if (myexponent==1 && !(mysignificand & 0x10000000000000LL))
2868 myexponent = 0; // denormal
2869 } else if (category==fcZero) {
2872 } else if (category==fcInfinity) {
2876 assert(category == fcNaN && "Unknown category!");
2878 mysignificand = *significandParts();
2881 return APInt(64, ((((uint64_t)(sign & 1) << 63) |
2882 ((myexponent & 0x7ff) << 52) |
2883 (mysignificand & 0xfffffffffffffLL))));
2887 APFloat::convertFloatAPFloatToAPInt() const
2889 assert(semantics == (const llvm::fltSemantics*)&IEEEsingle);
2890 assert(partCount()==1);
2892 uint32_t myexponent, mysignificand;
2894 if (category==fcNormal) {
2895 myexponent = exponent+127; //bias
2896 mysignificand = (uint32_t)*significandParts();
2897 if (myexponent == 1 && !(mysignificand & 0x800000))
2898 myexponent = 0; // denormal
2899 } else if (category==fcZero) {
2902 } else if (category==fcInfinity) {
2906 assert(category == fcNaN && "Unknown category!");
2908 mysignificand = (uint32_t)*significandParts();
2911 return APInt(32, (((sign&1) << 31) | ((myexponent&0xff) << 23) |
2912 (mysignificand & 0x7fffff)));
2916 APFloat::convertHalfAPFloatToAPInt() const
2918 assert(semantics == (const llvm::fltSemantics*)&IEEEhalf);
2919 assert(partCount()==1);
2921 uint32_t myexponent, mysignificand;
2923 if (category==fcNormal) {
2924 myexponent = exponent+15; //bias
2925 mysignificand = (uint32_t)*significandParts();
2926 if (myexponent == 1 && !(mysignificand & 0x400))
2927 myexponent = 0; // denormal
2928 } else if (category==fcZero) {
2931 } else if (category==fcInfinity) {
2935 assert(category == fcNaN && "Unknown category!");
2937 mysignificand = (uint32_t)*significandParts();
2940 return APInt(16, (((sign&1) << 15) | ((myexponent&0x1f) << 10) |
2941 (mysignificand & 0x3ff)));
2944 // This function creates an APInt that is just a bit map of the floating
2945 // point constant as it would appear in memory. It is not a conversion,
2946 // and treating the result as a normal integer is unlikely to be useful.
2949 APFloat::bitcastToAPInt() const
2951 if (semantics == (const llvm::fltSemantics*)&IEEEhalf)
2952 return convertHalfAPFloatToAPInt();
2954 if (semantics == (const llvm::fltSemantics*)&IEEEsingle)
2955 return convertFloatAPFloatToAPInt();
2957 if (semantics == (const llvm::fltSemantics*)&IEEEdouble)
2958 return convertDoubleAPFloatToAPInt();
2960 if (semantics == (const llvm::fltSemantics*)&IEEEquad)
2961 return convertQuadrupleAPFloatToAPInt();
2963 if (semantics == (const llvm::fltSemantics*)&PPCDoubleDouble)
2964 return convertPPCDoubleDoubleAPFloatToAPInt();
2966 assert(semantics == (const llvm::fltSemantics*)&x87DoubleExtended &&
2968 return convertF80LongDoubleAPFloatToAPInt();
2972 APFloat::convertToFloat() const
2974 assert(semantics == (const llvm::fltSemantics*)&IEEEsingle &&
2975 "Float semantics are not IEEEsingle");
2976 APInt api = bitcastToAPInt();
2977 return api.bitsToFloat();
2981 APFloat::convertToDouble() const
2983 assert(semantics == (const llvm::fltSemantics*)&IEEEdouble &&
2984 "Float semantics are not IEEEdouble");
2985 APInt api = bitcastToAPInt();
2986 return api.bitsToDouble();
2989 /// Integer bit is explicit in this format. Intel hardware (387 and later)
2990 /// does not support these bit patterns:
2991 /// exponent = all 1's, integer bit 0, significand 0 ("pseudoinfinity")
2992 /// exponent = all 1's, integer bit 0, significand nonzero ("pseudoNaN")
2993 /// exponent = 0, integer bit 1 ("pseudodenormal")
2994 /// exponent!=0 nor all 1's, integer bit 0 ("unnormal")
2995 /// At the moment, the first two are treated as NaNs, the second two as Normal.
2997 APFloat::initFromF80LongDoubleAPInt(const APInt &api)
2999 assert(api.getBitWidth()==80);
3000 uint64_t i1 = api.getRawData()[0];
3001 uint64_t i2 = api.getRawData()[1];
3002 uint64_t myexponent = (i2 & 0x7fff);
3003 uint64_t mysignificand = i1;
3005 initialize(&APFloat::x87DoubleExtended);
3006 assert(partCount()==2);
3008 sign = static_cast<unsigned int>(i2>>15);
3009 if (myexponent==0 && mysignificand==0) {
3010 // exponent, significand meaningless
3012 } else if (myexponent==0x7fff && mysignificand==0x8000000000000000ULL) {
3013 // exponent, significand meaningless
3014 category = fcInfinity;
3015 } else if (myexponent==0x7fff && mysignificand!=0x8000000000000000ULL) {
3016 // exponent meaningless
3018 significandParts()[0] = mysignificand;
3019 significandParts()[1] = 0;
3021 category = fcNormal;
3022 exponent = myexponent - 16383;
3023 significandParts()[0] = mysignificand;
3024 significandParts()[1] = 0;
3025 if (myexponent==0) // denormal
3031 APFloat::initFromPPCDoubleDoubleAPInt(const APInt &api)
3033 assert(api.getBitWidth()==128);
3034 uint64_t i1 = api.getRawData()[0];
3035 uint64_t i2 = api.getRawData()[1];
3039 // Get the first double and convert to our format.
3040 initFromDoubleAPInt(APInt(64, i1));
3041 fs = convert(PPCDoubleDouble, rmNearestTiesToEven, &losesInfo);
3042 assert(fs == opOK && !losesInfo);
3045 // Unless we have a special case, add in second double.
3046 if (category == fcNormal) {
3047 APFloat v(IEEEdouble, APInt(64, i2));
3048 fs = v.convert(PPCDoubleDouble, rmNearestTiesToEven, &losesInfo);
3049 assert(fs == opOK && !losesInfo);
3052 add(v, rmNearestTiesToEven);
3057 APFloat::initFromQuadrupleAPInt(const APInt &api)
3059 assert(api.getBitWidth()==128);
3060 uint64_t i1 = api.getRawData()[0];
3061 uint64_t i2 = api.getRawData()[1];
3062 uint64_t myexponent = (i2 >> 48) & 0x7fff;
3063 uint64_t mysignificand = i1;
3064 uint64_t mysignificand2 = i2 & 0xffffffffffffLL;
3066 initialize(&APFloat::IEEEquad);
3067 assert(partCount()==2);
3069 sign = static_cast<unsigned int>(i2>>63);
3070 if (myexponent==0 &&
3071 (mysignificand==0 && mysignificand2==0)) {
3072 // exponent, significand meaningless
3074 } else if (myexponent==0x7fff &&
3075 (mysignificand==0 && mysignificand2==0)) {
3076 // exponent, significand meaningless
3077 category = fcInfinity;
3078 } else if (myexponent==0x7fff &&
3079 (mysignificand!=0 || mysignificand2 !=0)) {
3080 // exponent meaningless
3082 significandParts()[0] = mysignificand;
3083 significandParts()[1] = mysignificand2;
3085 category = fcNormal;
3086 exponent = myexponent - 16383;
3087 significandParts()[0] = mysignificand;
3088 significandParts()[1] = mysignificand2;
3089 if (myexponent==0) // denormal
3092 significandParts()[1] |= 0x1000000000000LL; // integer bit
3097 APFloat::initFromDoubleAPInt(const APInt &api)
3099 assert(api.getBitWidth()==64);
3100 uint64_t i = *api.getRawData();
3101 uint64_t myexponent = (i >> 52) & 0x7ff;
3102 uint64_t mysignificand = i & 0xfffffffffffffLL;
3104 initialize(&APFloat::IEEEdouble);
3105 assert(partCount()==1);
3107 sign = static_cast<unsigned int>(i>>63);
3108 if (myexponent==0 && mysignificand==0) {
3109 // exponent, significand meaningless
3111 } else if (myexponent==0x7ff && mysignificand==0) {
3112 // exponent, significand meaningless
3113 category = fcInfinity;
3114 } else if (myexponent==0x7ff && mysignificand!=0) {
3115 // exponent meaningless
3117 *significandParts() = mysignificand;
3119 category = fcNormal;
3120 exponent = myexponent - 1023;
3121 *significandParts() = mysignificand;
3122 if (myexponent==0) // denormal
3125 *significandParts() |= 0x10000000000000LL; // integer bit
3130 APFloat::initFromFloatAPInt(const APInt & api)
3132 assert(api.getBitWidth()==32);
3133 uint32_t i = (uint32_t)*api.getRawData();
3134 uint32_t myexponent = (i >> 23) & 0xff;
3135 uint32_t mysignificand = i & 0x7fffff;
3137 initialize(&APFloat::IEEEsingle);
3138 assert(partCount()==1);
3141 if (myexponent==0 && mysignificand==0) {
3142 // exponent, significand meaningless
3144 } else if (myexponent==0xff && mysignificand==0) {
3145 // exponent, significand meaningless
3146 category = fcInfinity;
3147 } else if (myexponent==0xff && mysignificand!=0) {
3148 // sign, exponent, significand meaningless
3150 *significandParts() = mysignificand;
3152 category = fcNormal;
3153 exponent = myexponent - 127; //bias
3154 *significandParts() = mysignificand;
3155 if (myexponent==0) // denormal
3158 *significandParts() |= 0x800000; // integer bit
3163 APFloat::initFromHalfAPInt(const APInt & api)
3165 assert(api.getBitWidth()==16);
3166 uint32_t i = (uint32_t)*api.getRawData();
3167 uint32_t myexponent = (i >> 10) & 0x1f;
3168 uint32_t mysignificand = i & 0x3ff;
3170 initialize(&APFloat::IEEEhalf);
3171 assert(partCount()==1);
3174 if (myexponent==0 && mysignificand==0) {
3175 // exponent, significand meaningless
3177 } else if (myexponent==0x1f && mysignificand==0) {
3178 // exponent, significand meaningless
3179 category = fcInfinity;
3180 } else if (myexponent==0x1f && mysignificand!=0) {
3181 // sign, exponent, significand meaningless
3183 *significandParts() = mysignificand;
3185 category = fcNormal;
3186 exponent = myexponent - 15; //bias
3187 *significandParts() = mysignificand;
3188 if (myexponent==0) // denormal
3191 *significandParts() |= 0x400; // integer bit
3195 /// Treat api as containing the bits of a floating point number. Currently
3196 /// we infer the floating point type from the size of the APInt. The
3197 /// isIEEE argument distinguishes between PPC128 and IEEE128 (not meaningful
3198 /// when the size is anything else).
3200 APFloat::initFromAPInt(const fltSemantics* Sem, const APInt& api)
3202 if (Sem == &IEEEhalf)
3203 return initFromHalfAPInt(api);
3204 if (Sem == &IEEEsingle)
3205 return initFromFloatAPInt(api);
3206 if (Sem == &IEEEdouble)
3207 return initFromDoubleAPInt(api);
3208 if (Sem == &x87DoubleExtended)
3209 return initFromF80LongDoubleAPInt(api);
3210 if (Sem == &IEEEquad)
3211 return initFromQuadrupleAPInt(api);
3212 if (Sem == &PPCDoubleDouble)
3213 return initFromPPCDoubleDoubleAPInt(api);
3215 llvm_unreachable(0);
3219 APFloat::getAllOnesValue(unsigned BitWidth, bool isIEEE)
3223 return APFloat(IEEEhalf, APInt::getAllOnesValue(BitWidth));
3225 return APFloat(IEEEsingle, APInt::getAllOnesValue(BitWidth));
3227 return APFloat(IEEEdouble, APInt::getAllOnesValue(BitWidth));
3229 return APFloat(x87DoubleExtended, APInt::getAllOnesValue(BitWidth));
3232 return APFloat(IEEEquad, APInt::getAllOnesValue(BitWidth));
3233 return APFloat(PPCDoubleDouble, APInt::getAllOnesValue(BitWidth));
3235 llvm_unreachable("Unknown floating bit width");
3239 APFloat APFloat::getLargest(const fltSemantics &Sem, bool Negative) {
3240 APFloat Val(Sem, fcNormal, Negative);
3242 // We want (in interchange format):
3243 // sign = {Negative}
3245 // significand = 1..1
3247 Val.exponent = Sem.maxExponent; // unbiased
3249 // 1-initialize all bits....
3250 Val.zeroSignificand();
3251 integerPart *significand = Val.significandParts();
3252 unsigned N = partCountForBits(Sem.precision);
3253 for (unsigned i = 0; i != N; ++i)
3254 significand[i] = ~((integerPart) 0);
3256 // ...and then clear the top bits for internal consistency.
3257 if (Sem.precision % integerPartWidth != 0)
3259 (((integerPart) 1) << (Sem.precision % integerPartWidth)) - 1;
3264 APFloat APFloat::getSmallest(const fltSemantics &Sem, bool Negative) {
3265 APFloat Val(Sem, fcNormal, Negative);
3267 // We want (in interchange format):
3268 // sign = {Negative}
3270 // significand = 0..01
3272 Val.exponent = Sem.minExponent; // unbiased
3273 Val.zeroSignificand();
3274 Val.significandParts()[0] = 1;
3278 APFloat APFloat::getSmallestNormalized(const fltSemantics &Sem, bool Negative) {
3279 APFloat Val(Sem, fcNormal, Negative);
3281 // We want (in interchange format):
3282 // sign = {Negative}
3284 // significand = 10..0
3286 Val.exponent = Sem.minExponent;
3287 Val.zeroSignificand();
3288 Val.significandParts()[partCountForBits(Sem.precision)-1] |=
3289 (((integerPart) 1) << ((Sem.precision - 1) % integerPartWidth));
3294 APFloat::APFloat(const fltSemantics &Sem, const APInt &API) {
3295 initFromAPInt(&Sem, API);
3298 APFloat::APFloat(float f) {
3299 initFromAPInt(&IEEEsingle, APInt::floatToBits(f));
3302 APFloat::APFloat(double d) {
3303 initFromAPInt(&IEEEdouble, APInt::doubleToBits(d));
3307 void append(SmallVectorImpl<char> &Buffer, StringRef Str) {
3308 Buffer.append(Str.begin(), Str.end());
3311 /// Removes data from the given significand until it is no more
3312 /// precise than is required for the desired precision.
3313 void AdjustToPrecision(APInt &significand,
3314 int &exp, unsigned FormatPrecision) {
3315 unsigned bits = significand.getActiveBits();
3317 // 196/59 is a very slight overestimate of lg_2(10).
3318 unsigned bitsRequired = (FormatPrecision * 196 + 58) / 59;
3320 if (bits <= bitsRequired) return;
3322 unsigned tensRemovable = (bits - bitsRequired) * 59 / 196;
3323 if (!tensRemovable) return;
3325 exp += tensRemovable;
3327 APInt divisor(significand.getBitWidth(), 1);
3328 APInt powten(significand.getBitWidth(), 10);
3330 if (tensRemovable & 1)
3332 tensRemovable >>= 1;
3333 if (!tensRemovable) break;
3337 significand = significand.udiv(divisor);
3339 // Truncate the significand down to its active bit count.
3340 significand = significand.trunc(significand.getActiveBits());
3344 void AdjustToPrecision(SmallVectorImpl<char> &buffer,
3345 int &exp, unsigned FormatPrecision) {
3346 unsigned N = buffer.size();
3347 if (N <= FormatPrecision) return;
3349 // The most significant figures are the last ones in the buffer.
3350 unsigned FirstSignificant = N - FormatPrecision;
3353 // FIXME: this probably shouldn't use 'round half up'.
3355 // Rounding down is just a truncation, except we also want to drop
3356 // trailing zeros from the new result.
3357 if (buffer[FirstSignificant - 1] < '5') {
3358 while (FirstSignificant < N && buffer[FirstSignificant] == '0')
3361 exp += FirstSignificant;
3362 buffer.erase(&buffer[0], &buffer[FirstSignificant]);
3366 // Rounding up requires a decimal add-with-carry. If we continue
3367 // the carry, the newly-introduced zeros will just be truncated.
3368 for (unsigned I = FirstSignificant; I != N; ++I) {
3369 if (buffer[I] == '9') {
3377 // If we carried through, we have exactly one digit of precision.
3378 if (FirstSignificant == N) {
3379 exp += FirstSignificant;
3381 buffer.push_back('1');
3385 exp += FirstSignificant;
3386 buffer.erase(&buffer[0], &buffer[FirstSignificant]);
3390 void APFloat::toString(SmallVectorImpl<char> &Str,
3391 unsigned FormatPrecision,
3392 unsigned FormatMaxPadding) const {
3396 return append(Str, "-Inf");
3398 return append(Str, "+Inf");
3400 case fcNaN: return append(Str, "NaN");
3406 if (!FormatMaxPadding)
3407 append(Str, "0.0E+0");
3419 // Decompose the number into an APInt and an exponent.
3420 int exp = exponent - ((int) semantics->precision - 1);
3421 APInt significand(semantics->precision,
3422 makeArrayRef(significandParts(),
3423 partCountForBits(semantics->precision)));
3425 // Set FormatPrecision if zero. We want to do this before we
3426 // truncate trailing zeros, as those are part of the precision.
3427 if (!FormatPrecision) {
3428 // It's an interesting question whether to use the nominal
3429 // precision or the active precision here for denormals.
3431 // FormatPrecision = ceil(significandBits / lg_2(10))
3432 FormatPrecision = (semantics->precision * 59 + 195) / 196;
3435 // Ignore trailing binary zeros.
3436 int trailingZeros = significand.countTrailingZeros();
3437 exp += trailingZeros;
3438 significand = significand.lshr(trailingZeros);
3440 // Change the exponent from 2^e to 10^e.
3443 } else if (exp > 0) {
3445 significand = significand.zext(semantics->precision + exp);
3446 significand <<= exp;
3448 } else { /* exp < 0 */
3451 // We transform this using the identity:
3452 // (N)(2^-e) == (N)(5^e)(10^-e)
3453 // This means we have to multiply N (the significand) by 5^e.
3454 // To avoid overflow, we have to operate on numbers large
3455 // enough to store N * 5^e:
3456 // log2(N * 5^e) == log2(N) + e * log2(5)
3457 // <= semantics->precision + e * 137 / 59
3458 // (log_2(5) ~ 2.321928 < 2.322034 ~ 137/59)
3460 unsigned precision = semantics->precision + (137 * texp + 136) / 59;
3462 // Multiply significand by 5^e.
3463 // N * 5^0101 == N * 5^(1*1) * 5^(0*2) * 5^(1*4) * 5^(0*8)
3464 significand = significand.zext(precision);
3465 APInt five_to_the_i(precision, 5);
3467 if (texp & 1) significand *= five_to_the_i;
3471 five_to_the_i *= five_to_the_i;
3475 AdjustToPrecision(significand, exp, FormatPrecision);
3477 SmallVector<char, 256> buffer;
3480 unsigned precision = significand.getBitWidth();
3481 APInt ten(precision, 10);
3482 APInt digit(precision, 0);
3484 bool inTrail = true;
3485 while (significand != 0) {
3486 // digit <- significand % 10
3487 // significand <- significand / 10
3488 APInt::udivrem(significand, ten, significand, digit);
3490 unsigned d = digit.getZExtValue();
3492 // Drop trailing zeros.
3493 if (inTrail && !d) exp++;
3495 buffer.push_back((char) ('0' + d));
3500 assert(!buffer.empty() && "no characters in buffer!");
3502 // Drop down to FormatPrecision.
3503 // TODO: don't do more precise calculations above than are required.
3504 AdjustToPrecision(buffer, exp, FormatPrecision);
3506 unsigned NDigits = buffer.size();
3508 // Check whether we should use scientific notation.
3509 bool FormatScientific;
3510 if (!FormatMaxPadding)
3511 FormatScientific = true;
3516 // But we shouldn't make the number look more precise than it is.
3517 FormatScientific = ((unsigned) exp > FormatMaxPadding ||
3518 NDigits + (unsigned) exp > FormatPrecision);
3520 // Power of the most significant digit.
3521 int MSD = exp + (int) (NDigits - 1);
3524 FormatScientific = false;
3526 // 765e-5 == 0.00765
3528 FormatScientific = ((unsigned) -MSD) > FormatMaxPadding;
3533 // Scientific formatting is pretty straightforward.
3534 if (FormatScientific) {
3535 exp += (NDigits - 1);
3537 Str.push_back(buffer[NDigits-1]);
3542 for (unsigned I = 1; I != NDigits; ++I)
3543 Str.push_back(buffer[NDigits-1-I]);
3546 Str.push_back(exp >= 0 ? '+' : '-');
3547 if (exp < 0) exp = -exp;
3548 SmallVector<char, 6> expbuf;
3550 expbuf.push_back((char) ('0' + (exp % 10)));
3553 for (unsigned I = 0, E = expbuf.size(); I != E; ++I)
3554 Str.push_back(expbuf[E-1-I]);
3558 // Non-scientific, positive exponents.
3560 for (unsigned I = 0; I != NDigits; ++I)
3561 Str.push_back(buffer[NDigits-1-I]);
3562 for (unsigned I = 0; I != (unsigned) exp; ++I)
3567 // Non-scientific, negative exponents.
3569 // The number of digits to the left of the decimal point.
3570 int NWholeDigits = exp + (int) NDigits;
3573 if (NWholeDigits > 0) {
3574 for (; I != (unsigned) NWholeDigits; ++I)
3575 Str.push_back(buffer[NDigits-I-1]);
3578 unsigned NZeros = 1 + (unsigned) -NWholeDigits;
3582 for (unsigned Z = 1; Z != NZeros; ++Z)
3586 for (; I != NDigits; ++I)
3587 Str.push_back(buffer[NDigits-I-1]);
3590 bool APFloat::getExactInverse(APFloat *inv) const {
3591 // Special floats and denormals have no exact inverse.
3592 if (category != fcNormal)
3595 // Check that the number is a power of two by making sure that only the
3596 // integer bit is set in the significand.
3597 if (significandLSB() != semantics->precision - 1)
3601 APFloat reciprocal(*semantics, 1ULL);
3602 if (reciprocal.divide(*this, rmNearestTiesToEven) != opOK)
3605 // Avoid multiplication with a denormal, it is not safe on all platforms and
3606 // may be slower than a normal division.
3607 if (reciprocal.significandMSB() + 1 < reciprocal.semantics->precision)
3610 assert(reciprocal.category == fcNormal &&
3611 reciprocal.significandLSB() == reciprocal.semantics->precision - 1);