1 //===-- APFloat.cpp - Implement APFloat class -----------------------------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file implements a class to represent arbitrary precision floating
11 // point values and provide a variety of arithmetic operations on them.
13 //===----------------------------------------------------------------------===//
15 #include "llvm/ADT/APFloat.h"
18 #include "llvm/Support/MathExtras.h"
22 #define convolve(lhs, rhs) ((lhs) * 4 + (rhs))
24 /* Assumed in hexadecimal significand parsing, and conversion to
25 hexadecimal strings. */
26 COMPILE_TIME_ASSERT(integerPartWidth % 4 == 0);
30 /* Represents floating point arithmetic semantics. */
32 /* The largest E such that 2^E is representable; this matches the
33 definition of IEEE 754. */
34 exponent_t maxExponent;
36 /* The smallest E such that 2^E is a normalized number; this
37 matches the definition of IEEE 754. */
38 exponent_t minExponent;
40 /* Number of bits in the significand. This includes the integer
42 unsigned int precision;
44 /* True if arithmetic is supported. */
45 unsigned int arithmeticOK;
48 const fltSemantics APFloat::IEEEsingle = { 127, -126, 24, true };
49 const fltSemantics APFloat::IEEEdouble = { 1023, -1022, 53, true };
50 const fltSemantics APFloat::IEEEquad = { 16383, -16382, 113, true };
51 const fltSemantics APFloat::x87DoubleExtended = { 16383, -16382, 64, true };
52 const fltSemantics APFloat::Bogus = { 0, 0, 0, true };
54 // The PowerPC format consists of two doubles. It does not map cleanly
55 // onto the usual format above. For now only storage of constants of
56 // this type is supported, no arithmetic.
57 const fltSemantics APFloat::PPCDoubleDouble = { 1023, -1022, 106, false };
59 /* A tight upper bound on number of parts required to hold the value
62 power * 815 / (351 * integerPartWidth) + 1
64 However, whilst the result may require only this many parts,
65 because we are multiplying two values to get it, the
66 multiplication may require an extra part with the excess part
67 being zero (consider the trivial case of 1 * 1, tcFullMultiply
68 requires two parts to hold the single-part result). So we add an
69 extra one to guarantee enough space whilst multiplying. */
70 const unsigned int maxExponent = 16383;
71 const unsigned int maxPrecision = 113;
72 const unsigned int maxPowerOfFiveExponent = maxExponent + maxPrecision - 1;
73 const unsigned int maxPowerOfFiveParts = 2 + ((maxPowerOfFiveExponent * 815)
74 / (351 * integerPartWidth));
77 /* Put a bunch of private, handy routines in an anonymous namespace. */
81 partCountForBits(unsigned int bits)
83 return ((bits) + integerPartWidth - 1) / integerPartWidth;
86 /* Returns 0U-9U. Return values >= 10U are not digits. */
88 decDigitValue(unsigned int c)
94 hexDigitValue(unsigned int c)
114 assertArithmeticOK(const llvm::fltSemantics &semantics) {
115 assert(semantics.arithmeticOK
116 && "Compile-time arithmetic does not support these semantics");
119 /* Return the value of a decimal exponent of the form
122 If the exponent overflows, returns a large exponent with the
125 readExponent(const char *p)
128 unsigned int absExponent;
129 const unsigned int overlargeExponent = 24000; /* FIXME. */
131 isNegative = (*p == '-');
132 if (*p == '-' || *p == '+')
135 absExponent = decDigitValue(*p++);
136 assert (absExponent < 10U);
141 value = decDigitValue(*p);
146 value += absExponent * 10;
147 if (absExponent >= overlargeExponent) {
148 absExponent = overlargeExponent;
155 return -(int) absExponent;
157 return (int) absExponent;
160 /* This is ugly and needs cleaning up, but I don't immediately see
161 how whilst remaining safe. */
163 totalExponent(const char *p, int exponentAdjustment)
165 integerPart unsignedExponent;
166 bool negative, overflow;
169 /* Move past the exponent letter and sign to the digits. */
171 negative = *p == '-';
172 if(*p == '-' || *p == '+')
175 unsignedExponent = 0;
180 value = decDigitValue(*p);
185 unsignedExponent = unsignedExponent * 10 + value;
186 if(unsignedExponent > 65535)
190 if(exponentAdjustment > 65535 || exponentAdjustment < -65536)
194 exponent = unsignedExponent;
196 exponent = -exponent;
197 exponent += exponentAdjustment;
198 if(exponent > 65535 || exponent < -65536)
203 exponent = negative ? -65536: 65535;
209 skipLeadingZeroesAndAnyDot(const char *p, const char **dot)
224 /* Given a normal decimal floating point number of the form
228 where the decimal point and exponent are optional, fill out the
229 structure D. Exponent is appropriate if the significand is
230 treated as an integer, and normalizedExponent if the significand
231 is taken to have the decimal point after a single leading
234 If the value is zero, V->firstSigDigit points to a non-digit, and
235 the return exponent is zero.
238 const char *firstSigDigit;
239 const char *lastSigDigit;
241 int normalizedExponent;
245 interpretDecimal(const char *p, decimalInfo *D)
249 p = skipLeadingZeroesAndAnyDot (p, &dot);
251 D->firstSigDigit = p;
253 D->normalizedExponent = 0;
260 if (decDigitValue(*p) >= 10U)
265 /* If number is all zerooes accept any exponent. */
266 if (p != D->firstSigDigit) {
267 if (*p == 'e' || *p == 'E')
268 D->exponent = readExponent(p + 1);
270 /* Implied decimal point? */
274 /* Drop insignificant trailing zeroes. */
281 /* Adjust the exponents for any decimal point. */
282 D->exponent += (dot - p) - (dot > p);
283 D->normalizedExponent = (D->exponent + (p - D->firstSigDigit)
284 - (dot > D->firstSigDigit && dot < p));
290 /* Return the trailing fraction of a hexadecimal number.
291 DIGITVALUE is the first hex digit of the fraction, P points to
294 trailingHexadecimalFraction(const char *p, unsigned int digitValue)
296 unsigned int hexDigit;
298 /* If the first trailing digit isn't 0 or 8 we can work out the
299 fraction immediately. */
301 return lfMoreThanHalf;
302 else if(digitValue < 8 && digitValue > 0)
303 return lfLessThanHalf;
305 /* Otherwise we need to find the first non-zero digit. */
309 hexDigit = hexDigitValue(*p);
311 /* If we ran off the end it is exactly zero or one-half, otherwise
314 return digitValue == 0 ? lfExactlyZero: lfExactlyHalf;
316 return digitValue == 0 ? lfLessThanHalf: lfMoreThanHalf;
319 /* Return the fraction lost were a bignum truncated losing the least
320 significant BITS bits. */
322 lostFractionThroughTruncation(const integerPart *parts,
323 unsigned int partCount,
328 lsb = APInt::tcLSB(parts, partCount);
330 /* Note this is guaranteed true if bits == 0, or LSB == -1U. */
332 return lfExactlyZero;
334 return lfExactlyHalf;
335 if(bits <= partCount * integerPartWidth
336 && APInt::tcExtractBit(parts, bits - 1))
337 return lfMoreThanHalf;
339 return lfLessThanHalf;
342 /* Shift DST right BITS bits noting lost fraction. */
344 shiftRight(integerPart *dst, unsigned int parts, unsigned int bits)
346 lostFraction lost_fraction;
348 lost_fraction = lostFractionThroughTruncation(dst, parts, bits);
350 APInt::tcShiftRight(dst, parts, bits);
352 return lost_fraction;
355 /* Combine the effect of two lost fractions. */
357 combineLostFractions(lostFraction moreSignificant,
358 lostFraction lessSignificant)
360 if(lessSignificant != lfExactlyZero) {
361 if(moreSignificant == lfExactlyZero)
362 moreSignificant = lfLessThanHalf;
363 else if(moreSignificant == lfExactlyHalf)
364 moreSignificant = lfMoreThanHalf;
367 return moreSignificant;
370 /* The error from the true value, in half-ulps, on multiplying two
371 floating point numbers, which differ from the value they
372 approximate by at most HUE1 and HUE2 half-ulps, is strictly less
373 than the returned value.
375 See "How to Read Floating Point Numbers Accurately" by William D
378 HUerrBound(bool inexactMultiply, unsigned int HUerr1, unsigned int HUerr2)
380 assert(HUerr1 < 2 || HUerr2 < 2 || (HUerr1 + HUerr2 < 8));
382 if (HUerr1 + HUerr2 == 0)
383 return inexactMultiply * 2; /* <= inexactMultiply half-ulps. */
385 return inexactMultiply + 2 * (HUerr1 + HUerr2);
388 /* The number of ulps from the boundary (zero, or half if ISNEAREST)
389 when the least significant BITS are truncated. BITS cannot be
392 ulpsFromBoundary(const integerPart *parts, unsigned int bits, bool isNearest)
394 unsigned int count, partBits;
395 integerPart part, boundary;
400 count = bits / integerPartWidth;
401 partBits = bits % integerPartWidth + 1;
403 part = parts[count] & (~(integerPart) 0 >> (integerPartWidth - partBits));
406 boundary = (integerPart) 1 << (partBits - 1);
411 if (part - boundary <= boundary - part)
412 return part - boundary;
414 return boundary - part;
417 if (part == boundary) {
420 return ~(integerPart) 0; /* A lot. */
423 } else if (part == boundary - 1) {
426 return ~(integerPart) 0; /* A lot. */
431 return ~(integerPart) 0; /* A lot. */
434 /* Place pow(5, power) in DST, and return the number of parts used.
435 DST must be at least one part larger than size of the answer. */
437 powerOf5(integerPart *dst, unsigned int power)
439 static integerPart firstEightPowers[] = { 1, 5, 25, 125, 625, 3125,
441 static integerPart pow5s[maxPowerOfFiveParts * 2 + 5] = { 78125 * 5 };
442 static unsigned int partsCount[16] = { 1 };
444 integerPart scratch[maxPowerOfFiveParts], *p1, *p2, *pow5;
447 assert(power <= maxExponent);
452 *p1 = firstEightPowers[power & 7];
458 for (unsigned int n = 0; power; power >>= 1, n++) {
463 /* Calculate pow(5,pow(2,n+3)) if we haven't yet. */
465 pc = partsCount[n - 1];
466 APInt::tcFullMultiply(pow5, pow5 - pc, pow5 - pc, pc, pc);
468 if (pow5[pc - 1] == 0)
476 APInt::tcFullMultiply(p2, p1, pow5, result, pc);
478 if (p2[result - 1] == 0)
481 /* Now result is in p1 with partsCount parts and p2 is scratch
483 tmp = p1, p1 = p2, p2 = tmp;
490 APInt::tcAssign(dst, p1, result);
495 /* Zero at the end to avoid modular arithmetic when adding one; used
496 when rounding up during hexadecimal output. */
497 static const char hexDigitsLower[] = "0123456789abcdef0";
498 static const char hexDigitsUpper[] = "0123456789ABCDEF0";
499 static const char infinityL[] = "infinity";
500 static const char infinityU[] = "INFINITY";
501 static const char NaNL[] = "nan";
502 static const char NaNU[] = "NAN";
504 /* Write out an integerPart in hexadecimal, starting with the most
505 significant nibble. Write out exactly COUNT hexdigits, return
508 partAsHex (char *dst, integerPart part, unsigned int count,
509 const char *hexDigitChars)
511 unsigned int result = count;
513 assert (count != 0 && count <= integerPartWidth / 4);
515 part >>= (integerPartWidth - 4 * count);
517 dst[count] = hexDigitChars[part & 0xf];
524 /* Write out an unsigned decimal integer. */
526 writeUnsignedDecimal (char *dst, unsigned int n)
542 /* Write out a signed decimal integer. */
544 writeSignedDecimal (char *dst, int value)
548 dst = writeUnsignedDecimal(dst, -(unsigned) value);
550 dst = writeUnsignedDecimal(dst, value);
558 APFloat::initialize(const fltSemantics *ourSemantics)
562 semantics = ourSemantics;
565 significand.parts = new integerPart[count];
569 APFloat::freeSignificand()
572 delete [] significand.parts;
576 APFloat::assign(const APFloat &rhs)
578 assert(semantics == rhs.semantics);
581 category = rhs.category;
582 exponent = rhs.exponent;
584 exponent2 = rhs.exponent2;
585 if(category == fcNormal || category == fcNaN)
586 copySignificand(rhs);
590 APFloat::copySignificand(const APFloat &rhs)
592 assert(category == fcNormal || category == fcNaN);
593 assert(rhs.partCount() >= partCount());
595 APInt::tcAssign(significandParts(), rhs.significandParts(),
599 /* Make this number a NaN, with an arbitrary but deterministic value
600 for the significand. */
602 APFloat::makeNaN(void)
605 APInt::tcSet(significandParts(), ~0U, partCount());
609 APFloat::operator=(const APFloat &rhs)
612 if(semantics != rhs.semantics) {
614 initialize(rhs.semantics);
623 APFloat::bitwiseIsEqual(const APFloat &rhs) const {
626 if (semantics != rhs.semantics ||
627 category != rhs.category ||
630 if (semantics==(const llvm::fltSemantics*)&PPCDoubleDouble &&
633 if (category==fcZero || category==fcInfinity)
635 else if (category==fcNormal && exponent!=rhs.exponent)
637 else if (semantics==(const llvm::fltSemantics*)&PPCDoubleDouble &&
638 exponent2!=rhs.exponent2)
642 const integerPart* p=significandParts();
643 const integerPart* q=rhs.significandParts();
644 for (; i>0; i--, p++, q++) {
652 APFloat::APFloat(const fltSemantics &ourSemantics, integerPart value)
654 assertArithmeticOK(ourSemantics);
655 initialize(&ourSemantics);
658 exponent = ourSemantics.precision - 1;
659 significandParts()[0] = value;
660 normalize(rmNearestTiesToEven, lfExactlyZero);
663 APFloat::APFloat(const fltSemantics &ourSemantics,
664 fltCategory ourCategory, bool negative)
666 assertArithmeticOK(ourSemantics);
667 initialize(&ourSemantics);
668 category = ourCategory;
670 if(category == fcNormal)
672 else if (ourCategory == fcNaN)
676 APFloat::APFloat(const fltSemantics &ourSemantics, const char *text)
678 assertArithmeticOK(ourSemantics);
679 initialize(&ourSemantics);
680 convertFromString(text, rmNearestTiesToEven);
683 APFloat::APFloat(const APFloat &rhs)
685 initialize(rhs.semantics);
695 APFloat::partCount() const
697 return partCountForBits(semantics->precision + 1);
701 APFloat::semanticsPrecision(const fltSemantics &semantics)
703 return semantics.precision;
707 APFloat::significandParts() const
709 return const_cast<APFloat *>(this)->significandParts();
713 APFloat::significandParts()
715 assert(category == fcNormal || category == fcNaN);
718 return significand.parts;
720 return &significand.part;
724 APFloat::zeroSignificand()
727 APInt::tcSet(significandParts(), 0, partCount());
730 /* Increment an fcNormal floating point number's significand. */
732 APFloat::incrementSignificand()
736 carry = APInt::tcIncrement(significandParts(), partCount());
738 /* Our callers should never cause us to overflow. */
742 /* Add the significand of the RHS. Returns the carry flag. */
744 APFloat::addSignificand(const APFloat &rhs)
748 parts = significandParts();
750 assert(semantics == rhs.semantics);
751 assert(exponent == rhs.exponent);
753 return APInt::tcAdd(parts, rhs.significandParts(), 0, partCount());
756 /* Subtract the significand of the RHS with a borrow flag. Returns
759 APFloat::subtractSignificand(const APFloat &rhs, integerPart borrow)
763 parts = significandParts();
765 assert(semantics == rhs.semantics);
766 assert(exponent == rhs.exponent);
768 return APInt::tcSubtract(parts, rhs.significandParts(), borrow,
772 /* Multiply the significand of the RHS. If ADDEND is non-NULL, add it
773 on to the full-precision result of the multiplication. Returns the
776 APFloat::multiplySignificand(const APFloat &rhs, const APFloat *addend)
778 unsigned int omsb; // One, not zero, based MSB.
779 unsigned int partsCount, newPartsCount, precision;
780 integerPart *lhsSignificand;
781 integerPart scratch[4];
782 integerPart *fullSignificand;
783 lostFraction lost_fraction;
785 assert(semantics == rhs.semantics);
787 precision = semantics->precision;
788 newPartsCount = partCountForBits(precision * 2);
790 if(newPartsCount > 4)
791 fullSignificand = new integerPart[newPartsCount];
793 fullSignificand = scratch;
795 lhsSignificand = significandParts();
796 partsCount = partCount();
798 APInt::tcFullMultiply(fullSignificand, lhsSignificand,
799 rhs.significandParts(), partsCount, partsCount);
801 lost_fraction = lfExactlyZero;
802 omsb = APInt::tcMSB(fullSignificand, newPartsCount) + 1;
803 exponent += rhs.exponent;
806 Significand savedSignificand = significand;
807 const fltSemantics *savedSemantics = semantics;
808 fltSemantics extendedSemantics;
810 unsigned int extendedPrecision;
812 /* Normalize our MSB. */
813 extendedPrecision = precision + precision - 1;
814 if(omsb != extendedPrecision)
816 APInt::tcShiftLeft(fullSignificand, newPartsCount,
817 extendedPrecision - omsb);
818 exponent -= extendedPrecision - omsb;
821 /* Create new semantics. */
822 extendedSemantics = *semantics;
823 extendedSemantics.precision = extendedPrecision;
825 if(newPartsCount == 1)
826 significand.part = fullSignificand[0];
828 significand.parts = fullSignificand;
829 semantics = &extendedSemantics;
831 APFloat extendedAddend(*addend);
832 status = extendedAddend.convert(extendedSemantics, rmTowardZero);
833 assert(status == opOK);
834 lost_fraction = addOrSubtractSignificand(extendedAddend, false);
836 /* Restore our state. */
837 if(newPartsCount == 1)
838 fullSignificand[0] = significand.part;
839 significand = savedSignificand;
840 semantics = savedSemantics;
842 omsb = APInt::tcMSB(fullSignificand, newPartsCount) + 1;
845 exponent -= (precision - 1);
847 if(omsb > precision) {
848 unsigned int bits, significantParts;
851 bits = omsb - precision;
852 significantParts = partCountForBits(omsb);
853 lf = shiftRight(fullSignificand, significantParts, bits);
854 lost_fraction = combineLostFractions(lf, lost_fraction);
858 APInt::tcAssign(lhsSignificand, fullSignificand, partsCount);
860 if(newPartsCount > 4)
861 delete [] fullSignificand;
863 return lost_fraction;
866 /* Multiply the significands of LHS and RHS to DST. */
868 APFloat::divideSignificand(const APFloat &rhs)
870 unsigned int bit, i, partsCount;
871 const integerPart *rhsSignificand;
872 integerPart *lhsSignificand, *dividend, *divisor;
873 integerPart scratch[4];
874 lostFraction lost_fraction;
876 assert(semantics == rhs.semantics);
878 lhsSignificand = significandParts();
879 rhsSignificand = rhs.significandParts();
880 partsCount = partCount();
883 dividend = new integerPart[partsCount * 2];
887 divisor = dividend + partsCount;
889 /* Copy the dividend and divisor as they will be modified in-place. */
890 for(i = 0; i < partsCount; i++) {
891 dividend[i] = lhsSignificand[i];
892 divisor[i] = rhsSignificand[i];
893 lhsSignificand[i] = 0;
896 exponent -= rhs.exponent;
898 unsigned int precision = semantics->precision;
900 /* Normalize the divisor. */
901 bit = precision - APInt::tcMSB(divisor, partsCount) - 1;
904 APInt::tcShiftLeft(divisor, partsCount, bit);
907 /* Normalize the dividend. */
908 bit = precision - APInt::tcMSB(dividend, partsCount) - 1;
911 APInt::tcShiftLeft(dividend, partsCount, bit);
914 /* Ensure the dividend >= divisor initially for the loop below.
915 Incidentally, this means that the division loop below is
916 guaranteed to set the integer bit to one. */
917 if(APInt::tcCompare(dividend, divisor, partsCount) < 0) {
919 APInt::tcShiftLeft(dividend, partsCount, 1);
920 assert(APInt::tcCompare(dividend, divisor, partsCount) >= 0);
924 for(bit = precision; bit; bit -= 1) {
925 if(APInt::tcCompare(dividend, divisor, partsCount) >= 0) {
926 APInt::tcSubtract(dividend, divisor, 0, partsCount);
927 APInt::tcSetBit(lhsSignificand, bit - 1);
930 APInt::tcShiftLeft(dividend, partsCount, 1);
933 /* Figure out the lost fraction. */
934 int cmp = APInt::tcCompare(dividend, divisor, partsCount);
937 lost_fraction = lfMoreThanHalf;
939 lost_fraction = lfExactlyHalf;
940 else if(APInt::tcIsZero(dividend, partsCount))
941 lost_fraction = lfExactlyZero;
943 lost_fraction = lfLessThanHalf;
948 return lost_fraction;
952 APFloat::significandMSB() const
954 return APInt::tcMSB(significandParts(), partCount());
958 APFloat::significandLSB() const
960 return APInt::tcLSB(significandParts(), partCount());
963 /* Note that a zero result is NOT normalized to fcZero. */
965 APFloat::shiftSignificandRight(unsigned int bits)
967 /* Our exponent should not overflow. */
968 assert((exponent_t) (exponent + bits) >= exponent);
972 return shiftRight(significandParts(), partCount(), bits);
975 /* Shift the significand left BITS bits, subtract BITS from its exponent. */
977 APFloat::shiftSignificandLeft(unsigned int bits)
979 assert(bits < semantics->precision);
982 unsigned int partsCount = partCount();
984 APInt::tcShiftLeft(significandParts(), partsCount, bits);
987 assert(!APInt::tcIsZero(significandParts(), partsCount));
992 APFloat::compareAbsoluteValue(const APFloat &rhs) const
996 assert(semantics == rhs.semantics);
997 assert(category == fcNormal);
998 assert(rhs.category == fcNormal);
1000 compare = exponent - rhs.exponent;
1002 /* If exponents are equal, do an unsigned bignum comparison of the
1005 compare = APInt::tcCompare(significandParts(), rhs.significandParts(),
1009 return cmpGreaterThan;
1010 else if(compare < 0)
1016 /* Handle overflow. Sign is preserved. We either become infinity or
1017 the largest finite number. */
1019 APFloat::handleOverflow(roundingMode rounding_mode)
1022 if(rounding_mode == rmNearestTiesToEven
1023 || rounding_mode == rmNearestTiesToAway
1024 || (rounding_mode == rmTowardPositive && !sign)
1025 || (rounding_mode == rmTowardNegative && sign))
1027 category = fcInfinity;
1028 return (opStatus) (opOverflow | opInexact);
1031 /* Otherwise we become the largest finite number. */
1032 category = fcNormal;
1033 exponent = semantics->maxExponent;
1034 APInt::tcSetLeastSignificantBits(significandParts(), partCount(),
1035 semantics->precision);
1040 /* Returns TRUE if, when truncating the current number, with BIT the
1041 new LSB, with the given lost fraction and rounding mode, the result
1042 would need to be rounded away from zero (i.e., by increasing the
1043 signficand). This routine must work for fcZero of both signs, and
1044 fcNormal numbers. */
1046 APFloat::roundAwayFromZero(roundingMode rounding_mode,
1047 lostFraction lost_fraction,
1048 unsigned int bit) const
1050 /* NaNs and infinities should not have lost fractions. */
1051 assert(category == fcNormal || category == fcZero);
1053 /* Current callers never pass this so we don't handle it. */
1054 assert(lost_fraction != lfExactlyZero);
1056 switch(rounding_mode) {
1060 case rmNearestTiesToAway:
1061 return lost_fraction == lfExactlyHalf || lost_fraction == lfMoreThanHalf;
1063 case rmNearestTiesToEven:
1064 if(lost_fraction == lfMoreThanHalf)
1067 /* Our zeroes don't have a significand to test. */
1068 if(lost_fraction == lfExactlyHalf && category != fcZero)
1069 return APInt::tcExtractBit(significandParts(), bit);
1076 case rmTowardPositive:
1077 return sign == false;
1079 case rmTowardNegative:
1080 return sign == true;
1085 APFloat::normalize(roundingMode rounding_mode,
1086 lostFraction lost_fraction)
1088 unsigned int omsb; /* One, not zero, based MSB. */
1091 if(category != fcNormal)
1094 /* Before rounding normalize the exponent of fcNormal numbers. */
1095 omsb = significandMSB() + 1;
1098 /* OMSB is numbered from 1. We want to place it in the integer
1099 bit numbered PRECISON if possible, with a compensating change in
1101 exponentChange = omsb - semantics->precision;
1103 /* If the resulting exponent is too high, overflow according to
1104 the rounding mode. */
1105 if(exponent + exponentChange > semantics->maxExponent)
1106 return handleOverflow(rounding_mode);
1108 /* Subnormal numbers have exponent minExponent, and their MSB
1109 is forced based on that. */
1110 if(exponent + exponentChange < semantics->minExponent)
1111 exponentChange = semantics->minExponent - exponent;
1113 /* Shifting left is easy as we don't lose precision. */
1114 if(exponentChange < 0) {
1115 assert(lost_fraction == lfExactlyZero);
1117 shiftSignificandLeft(-exponentChange);
1122 if(exponentChange > 0) {
1125 /* Shift right and capture any new lost fraction. */
1126 lf = shiftSignificandRight(exponentChange);
1128 lost_fraction = combineLostFractions(lf, lost_fraction);
1130 /* Keep OMSB up-to-date. */
1131 if(omsb > (unsigned) exponentChange)
1132 omsb -= exponentChange;
1138 /* Now round the number according to rounding_mode given the lost
1141 /* As specified in IEEE 754, since we do not trap we do not report
1142 underflow for exact results. */
1143 if(lost_fraction == lfExactlyZero) {
1144 /* Canonicalize zeroes. */
1151 /* Increment the significand if we're rounding away from zero. */
1152 if(roundAwayFromZero(rounding_mode, lost_fraction, 0)) {
1154 exponent = semantics->minExponent;
1156 incrementSignificand();
1157 omsb = significandMSB() + 1;
1159 /* Did the significand increment overflow? */
1160 if(omsb == (unsigned) semantics->precision + 1) {
1161 /* Renormalize by incrementing the exponent and shifting our
1162 significand right one. However if we already have the
1163 maximum exponent we overflow to infinity. */
1164 if(exponent == semantics->maxExponent) {
1165 category = fcInfinity;
1167 return (opStatus) (opOverflow | opInexact);
1170 shiftSignificandRight(1);
1176 /* The normal case - we were and are not denormal, and any
1177 significand increment above didn't overflow. */
1178 if(omsb == semantics->precision)
1181 /* We have a non-zero denormal. */
1182 assert(omsb < semantics->precision);
1184 /* Canonicalize zeroes. */
1188 /* The fcZero case is a denormal that underflowed to zero. */
1189 return (opStatus) (opUnderflow | opInexact);
1193 APFloat::addOrSubtractSpecials(const APFloat &rhs, bool subtract)
1195 switch(convolve(category, rhs.category)) {
1199 case convolve(fcNaN, fcZero):
1200 case convolve(fcNaN, fcNormal):
1201 case convolve(fcNaN, fcInfinity):
1202 case convolve(fcNaN, fcNaN):
1203 case convolve(fcNormal, fcZero):
1204 case convolve(fcInfinity, fcNormal):
1205 case convolve(fcInfinity, fcZero):
1208 case convolve(fcZero, fcNaN):
1209 case convolve(fcNormal, fcNaN):
1210 case convolve(fcInfinity, fcNaN):
1212 copySignificand(rhs);
1215 case convolve(fcNormal, fcInfinity):
1216 case convolve(fcZero, fcInfinity):
1217 category = fcInfinity;
1218 sign = rhs.sign ^ subtract;
1221 case convolve(fcZero, fcNormal):
1223 sign = rhs.sign ^ subtract;
1226 case convolve(fcZero, fcZero):
1227 /* Sign depends on rounding mode; handled by caller. */
1230 case convolve(fcInfinity, fcInfinity):
1231 /* Differently signed infinities can only be validly
1233 if((sign ^ rhs.sign) != subtract) {
1240 case convolve(fcNormal, fcNormal):
1245 /* Add or subtract two normal numbers. */
1247 APFloat::addOrSubtractSignificand(const APFloat &rhs, bool subtract)
1250 lostFraction lost_fraction;
1253 /* Determine if the operation on the absolute values is effectively
1254 an addition or subtraction. */
1255 subtract ^= (sign ^ rhs.sign) ? true : false;
1257 /* Are we bigger exponent-wise than the RHS? */
1258 bits = exponent - rhs.exponent;
1260 /* Subtraction is more subtle than one might naively expect. */
1262 APFloat temp_rhs(rhs);
1266 reverse = compareAbsoluteValue(temp_rhs) == cmpLessThan;
1267 lost_fraction = lfExactlyZero;
1268 } else if (bits > 0) {
1269 lost_fraction = temp_rhs.shiftSignificandRight(bits - 1);
1270 shiftSignificandLeft(1);
1273 lost_fraction = shiftSignificandRight(-bits - 1);
1274 temp_rhs.shiftSignificandLeft(1);
1279 carry = temp_rhs.subtractSignificand
1280 (*this, lost_fraction != lfExactlyZero);
1281 copySignificand(temp_rhs);
1284 carry = subtractSignificand
1285 (temp_rhs, lost_fraction != lfExactlyZero);
1288 /* Invert the lost fraction - it was on the RHS and
1290 if(lost_fraction == lfLessThanHalf)
1291 lost_fraction = lfMoreThanHalf;
1292 else if(lost_fraction == lfMoreThanHalf)
1293 lost_fraction = lfLessThanHalf;
1295 /* The code above is intended to ensure that no borrow is
1300 APFloat temp_rhs(rhs);
1302 lost_fraction = temp_rhs.shiftSignificandRight(bits);
1303 carry = addSignificand(temp_rhs);
1305 lost_fraction = shiftSignificandRight(-bits);
1306 carry = addSignificand(rhs);
1309 /* We have a guard bit; generating a carry cannot happen. */
1313 return lost_fraction;
1317 APFloat::multiplySpecials(const APFloat &rhs)
1319 switch(convolve(category, rhs.category)) {
1323 case convolve(fcNaN, fcZero):
1324 case convolve(fcNaN, fcNormal):
1325 case convolve(fcNaN, fcInfinity):
1326 case convolve(fcNaN, fcNaN):
1329 case convolve(fcZero, fcNaN):
1330 case convolve(fcNormal, fcNaN):
1331 case convolve(fcInfinity, fcNaN):
1333 copySignificand(rhs);
1336 case convolve(fcNormal, fcInfinity):
1337 case convolve(fcInfinity, fcNormal):
1338 case convolve(fcInfinity, fcInfinity):
1339 category = fcInfinity;
1342 case convolve(fcZero, fcNormal):
1343 case convolve(fcNormal, fcZero):
1344 case convolve(fcZero, fcZero):
1348 case convolve(fcZero, fcInfinity):
1349 case convolve(fcInfinity, fcZero):
1353 case convolve(fcNormal, fcNormal):
1359 APFloat::divideSpecials(const APFloat &rhs)
1361 switch(convolve(category, rhs.category)) {
1365 case convolve(fcNaN, fcZero):
1366 case convolve(fcNaN, fcNormal):
1367 case convolve(fcNaN, fcInfinity):
1368 case convolve(fcNaN, fcNaN):
1369 case convolve(fcInfinity, fcZero):
1370 case convolve(fcInfinity, fcNormal):
1371 case convolve(fcZero, fcInfinity):
1372 case convolve(fcZero, fcNormal):
1375 case convolve(fcZero, fcNaN):
1376 case convolve(fcNormal, fcNaN):
1377 case convolve(fcInfinity, fcNaN):
1379 copySignificand(rhs);
1382 case convolve(fcNormal, fcInfinity):
1386 case convolve(fcNormal, fcZero):
1387 category = fcInfinity;
1390 case convolve(fcInfinity, fcInfinity):
1391 case convolve(fcZero, fcZero):
1395 case convolve(fcNormal, fcNormal):
1402 APFloat::changeSign()
1404 /* Look mummy, this one's easy. */
1409 APFloat::clearSign()
1411 /* So is this one. */
1416 APFloat::copySign(const APFloat &rhs)
1422 /* Normalized addition or subtraction. */
1424 APFloat::addOrSubtract(const APFloat &rhs, roundingMode rounding_mode,
1429 assertArithmeticOK(*semantics);
1431 fs = addOrSubtractSpecials(rhs, subtract);
1433 /* This return code means it was not a simple case. */
1434 if(fs == opDivByZero) {
1435 lostFraction lost_fraction;
1437 lost_fraction = addOrSubtractSignificand(rhs, subtract);
1438 fs = normalize(rounding_mode, lost_fraction);
1440 /* Can only be zero if we lost no fraction. */
1441 assert(category != fcZero || lost_fraction == lfExactlyZero);
1444 /* If two numbers add (exactly) to zero, IEEE 754 decrees it is a
1445 positive zero unless rounding to minus infinity, except that
1446 adding two like-signed zeroes gives that zero. */
1447 if(category == fcZero) {
1448 if(rhs.category != fcZero || (sign == rhs.sign) == subtract)
1449 sign = (rounding_mode == rmTowardNegative);
1455 /* Normalized addition. */
1457 APFloat::add(const APFloat &rhs, roundingMode rounding_mode)
1459 return addOrSubtract(rhs, rounding_mode, false);
1462 /* Normalized subtraction. */
1464 APFloat::subtract(const APFloat &rhs, roundingMode rounding_mode)
1466 return addOrSubtract(rhs, rounding_mode, true);
1469 /* Normalized multiply. */
1471 APFloat::multiply(const APFloat &rhs, roundingMode rounding_mode)
1475 assertArithmeticOK(*semantics);
1477 fs = multiplySpecials(rhs);
1479 if(category == fcNormal) {
1480 lostFraction lost_fraction = multiplySignificand(rhs, 0);
1481 fs = normalize(rounding_mode, lost_fraction);
1482 if(lost_fraction != lfExactlyZero)
1483 fs = (opStatus) (fs | opInexact);
1489 /* Normalized divide. */
1491 APFloat::divide(const APFloat &rhs, roundingMode rounding_mode)
1495 assertArithmeticOK(*semantics);
1497 fs = divideSpecials(rhs);
1499 if(category == fcNormal) {
1500 lostFraction lost_fraction = divideSignificand(rhs);
1501 fs = normalize(rounding_mode, lost_fraction);
1502 if(lost_fraction != lfExactlyZero)
1503 fs = (opStatus) (fs | opInexact);
1509 /* Normalized remainder. This is not currently doing TRT. */
1511 APFloat::mod(const APFloat &rhs, roundingMode rounding_mode)
1515 unsigned int origSign = sign;
1517 assertArithmeticOK(*semantics);
1518 fs = V.divide(rhs, rmNearestTiesToEven);
1519 if (fs == opDivByZero)
1522 int parts = partCount();
1523 integerPart *x = new integerPart[parts];
1524 fs = V.convertToInteger(x, parts * integerPartWidth, true,
1525 rmNearestTiesToEven);
1526 if (fs==opInvalidOp)
1529 fs = V.convertFromZeroExtendedInteger(x, parts * integerPartWidth, true,
1530 rmNearestTiesToEven);
1531 assert(fs==opOK); // should always work
1533 fs = V.multiply(rhs, rounding_mode);
1534 assert(fs==opOK || fs==opInexact); // should not overflow or underflow
1536 fs = subtract(V, rounding_mode);
1537 assert(fs==opOK || fs==opInexact); // likewise
1540 sign = origSign; // IEEE754 requires this
1545 /* Normalized fused-multiply-add. */
1547 APFloat::fusedMultiplyAdd(const APFloat &multiplicand,
1548 const APFloat &addend,
1549 roundingMode rounding_mode)
1553 assertArithmeticOK(*semantics);
1555 /* Post-multiplication sign, before addition. */
1556 sign ^= multiplicand.sign;
1558 /* If and only if all arguments are normal do we need to do an
1559 extended-precision calculation. */
1560 if(category == fcNormal
1561 && multiplicand.category == fcNormal
1562 && addend.category == fcNormal) {
1563 lostFraction lost_fraction;
1565 lost_fraction = multiplySignificand(multiplicand, &addend);
1566 fs = normalize(rounding_mode, lost_fraction);
1567 if(lost_fraction != lfExactlyZero)
1568 fs = (opStatus) (fs | opInexact);
1570 /* If two numbers add (exactly) to zero, IEEE 754 decrees it is a
1571 positive zero unless rounding to minus infinity, except that
1572 adding two like-signed zeroes gives that zero. */
1573 if(category == fcZero && sign != addend.sign)
1574 sign = (rounding_mode == rmTowardNegative);
1576 fs = multiplySpecials(multiplicand);
1578 /* FS can only be opOK or opInvalidOp. There is no more work
1579 to do in the latter case. The IEEE-754R standard says it is
1580 implementation-defined in this case whether, if ADDEND is a
1581 quiet NaN, we raise invalid op; this implementation does so.
1583 If we need to do the addition we can do so with normal
1586 fs = addOrSubtract(addend, rounding_mode, false);
1592 /* Comparison requires normalized numbers. */
1594 APFloat::compare(const APFloat &rhs) const
1598 assertArithmeticOK(*semantics);
1599 assert(semantics == rhs.semantics);
1601 switch(convolve(category, rhs.category)) {
1605 case convolve(fcNaN, fcZero):
1606 case convolve(fcNaN, fcNormal):
1607 case convolve(fcNaN, fcInfinity):
1608 case convolve(fcNaN, fcNaN):
1609 case convolve(fcZero, fcNaN):
1610 case convolve(fcNormal, fcNaN):
1611 case convolve(fcInfinity, fcNaN):
1612 return cmpUnordered;
1614 case convolve(fcInfinity, fcNormal):
1615 case convolve(fcInfinity, fcZero):
1616 case convolve(fcNormal, fcZero):
1620 return cmpGreaterThan;
1622 case convolve(fcNormal, fcInfinity):
1623 case convolve(fcZero, fcInfinity):
1624 case convolve(fcZero, fcNormal):
1626 return cmpGreaterThan;
1630 case convolve(fcInfinity, fcInfinity):
1631 if(sign == rhs.sign)
1636 return cmpGreaterThan;
1638 case convolve(fcZero, fcZero):
1641 case convolve(fcNormal, fcNormal):
1645 /* Two normal numbers. Do they have the same sign? */
1646 if(sign != rhs.sign) {
1648 result = cmpLessThan;
1650 result = cmpGreaterThan;
1652 /* Compare absolute values; invert result if negative. */
1653 result = compareAbsoluteValue(rhs);
1656 if(result == cmpLessThan)
1657 result = cmpGreaterThan;
1658 else if(result == cmpGreaterThan)
1659 result = cmpLessThan;
1667 APFloat::convert(const fltSemantics &toSemantics,
1668 roundingMode rounding_mode)
1670 lostFraction lostFraction;
1671 unsigned int newPartCount, oldPartCount;
1674 assertArithmeticOK(*semantics);
1675 lostFraction = lfExactlyZero;
1676 newPartCount = partCountForBits(toSemantics.precision + 1);
1677 oldPartCount = partCount();
1679 /* Handle storage complications. If our new form is wider,
1680 re-allocate our bit pattern into wider storage. If it is
1681 narrower, we ignore the excess parts, but if narrowing to a
1682 single part we need to free the old storage.
1683 Be careful not to reference significandParts for zeroes
1684 and infinities, since it aborts. */
1685 if (newPartCount > oldPartCount) {
1686 integerPart *newParts;
1687 newParts = new integerPart[newPartCount];
1688 APInt::tcSet(newParts, 0, newPartCount);
1689 if (category==fcNormal || category==fcNaN)
1690 APInt::tcAssign(newParts, significandParts(), oldPartCount);
1692 significand.parts = newParts;
1693 } else if (newPartCount < oldPartCount) {
1694 /* Capture any lost fraction through truncation of parts so we get
1695 correct rounding whilst normalizing. */
1696 if (category==fcNormal)
1697 lostFraction = lostFractionThroughTruncation
1698 (significandParts(), oldPartCount, toSemantics.precision);
1699 if (newPartCount == 1) {
1700 integerPart newPart = 0;
1701 if (category==fcNormal || category==fcNaN)
1702 newPart = significandParts()[0];
1704 significand.part = newPart;
1708 if(category == fcNormal) {
1709 /* Re-interpret our bit-pattern. */
1710 exponent += toSemantics.precision - semantics->precision;
1711 semantics = &toSemantics;
1712 fs = normalize(rounding_mode, lostFraction);
1713 } else if (category == fcNaN) {
1714 int shift = toSemantics.precision - semantics->precision;
1715 // Do this now so significandParts gets the right answer
1716 semantics = &toSemantics;
1717 // No normalization here, just truncate
1719 APInt::tcShiftLeft(significandParts(), newPartCount, shift);
1721 APInt::tcShiftRight(significandParts(), newPartCount, -shift);
1722 // gcc forces the Quiet bit on, which means (float)(double)(float_sNan)
1723 // does not give you back the same bits. This is dubious, and we
1724 // don't currently do it. You're really supposed to get
1725 // an invalid operation signal at runtime, but nobody does that.
1728 semantics = &toSemantics;
1735 /* Convert a floating point number to an integer according to the
1736 rounding mode. If the rounded integer value is out of range this
1737 returns an invalid operation exception and the contents of the
1738 destination parts are unspecified. If the rounded value is in
1739 range but the floating point number is not the exact integer, the C
1740 standard doesn't require an inexact exception to be raised. IEEE
1741 854 does require it so we do that.
1743 Note that for conversions to integer type the C standard requires
1744 round-to-zero to always be used. */
1746 APFloat::convertToSignExtendedInteger(integerPart *parts, unsigned int width,
1748 roundingMode rounding_mode) const
1750 lostFraction lost_fraction;
1751 const integerPart *src;
1752 unsigned int dstPartsCount, truncatedBits;
1754 assertArithmeticOK(*semantics);
1756 /* Handle the three special cases first. */
1757 if(category == fcInfinity || category == fcNaN)
1760 dstPartsCount = partCountForBits(width);
1762 if(category == fcZero) {
1763 APInt::tcSet(parts, 0, dstPartsCount);
1767 src = significandParts();
1769 /* Step 1: place our absolute value, with any fraction truncated, in
1772 /* Our absolute value is less than one; truncate everything. */
1773 APInt::tcSet(parts, 0, dstPartsCount);
1774 truncatedBits = semantics->precision;
1776 /* We want the most significant (exponent + 1) bits; the rest are
1778 unsigned int bits = exponent + 1U;
1780 /* Hopelessly large in magnitude? */
1784 if (bits < semantics->precision) {
1785 /* We truncate (semantics->precision - bits) bits. */
1786 truncatedBits = semantics->precision - bits;
1787 APInt::tcExtract(parts, dstPartsCount, src, bits, truncatedBits);
1789 /* We want at least as many bits as are available. */
1790 APInt::tcExtract(parts, dstPartsCount, src, semantics->precision, 0);
1791 APInt::tcShiftLeft(parts, dstPartsCount, bits - semantics->precision);
1796 /* Step 2: work out any lost fraction, and increment the absolute
1797 value if we would round away from zero. */
1798 if (truncatedBits) {
1799 lost_fraction = lostFractionThroughTruncation(src, partCount(),
1801 if (lost_fraction != lfExactlyZero
1802 && roundAwayFromZero(rounding_mode, lost_fraction, truncatedBits)) {
1803 if (APInt::tcIncrement(parts, dstPartsCount))
1804 return opInvalidOp; /* Overflow. */
1807 lost_fraction = lfExactlyZero;
1810 /* Step 3: check if we fit in the destination. */
1811 unsigned int omsb = APInt::tcMSB(parts, dstPartsCount) + 1;
1815 /* Negative numbers cannot be represented as unsigned. */
1819 /* It takes omsb bits to represent the unsigned integer value.
1820 We lose a bit for the sign, but care is needed as the
1821 maximally negative integer is a special case. */
1822 if (omsb == width && APInt::tcLSB(parts, dstPartsCount) + 1 != omsb)
1825 /* This case can happen because of rounding. */
1830 APInt::tcNegate (parts, dstPartsCount);
1832 if (omsb >= width + !isSigned)
1836 if (lost_fraction == lfExactlyZero)
1842 /* Same as convertToSignExtendedInteger, except we provide
1843 deterministic values in case of an invalid operation exception,
1844 namely zero for NaNs and the minimal or maximal value respectively
1845 for underflow or overflow. */
1847 APFloat::convertToInteger(integerPart *parts, unsigned int width,
1849 roundingMode rounding_mode) const
1853 fs = convertToSignExtendedInteger(parts, width, isSigned, rounding_mode);
1855 if (fs == opInvalidOp) {
1856 unsigned int bits, dstPartsCount;
1858 dstPartsCount = partCountForBits(width);
1860 if (category == fcNaN)
1865 bits = width - isSigned;
1867 APInt::tcSetLeastSignificantBits(parts, dstPartsCount, bits);
1868 if (sign && isSigned)
1869 APInt::tcShiftLeft(parts, dstPartsCount, width - 1);
1875 /* Convert an unsigned integer SRC to a floating point number,
1876 rounding according to ROUNDING_MODE. The sign of the floating
1877 point number is not modified. */
1879 APFloat::convertFromUnsignedParts(const integerPart *src,
1880 unsigned int srcCount,
1881 roundingMode rounding_mode)
1883 unsigned int omsb, precision, dstCount;
1885 lostFraction lost_fraction;
1887 assertArithmeticOK(*semantics);
1888 category = fcNormal;
1889 omsb = APInt::tcMSB(src, srcCount) + 1;
1890 dst = significandParts();
1891 dstCount = partCount();
1892 precision = semantics->precision;
1894 /* We want the most significant PRECISON bits of SRC. There may not
1895 be that many; extract what we can. */
1896 if (precision <= omsb) {
1897 exponent = omsb - 1;
1898 lost_fraction = lostFractionThroughTruncation(src, srcCount,
1900 APInt::tcExtract(dst, dstCount, src, precision, omsb - precision);
1902 exponent = precision - 1;
1903 lost_fraction = lfExactlyZero;
1904 APInt::tcExtract(dst, dstCount, src, omsb, 0);
1907 return normalize(rounding_mode, lost_fraction);
1910 /* Convert a two's complement integer SRC to a floating point number,
1911 rounding according to ROUNDING_MODE. ISSIGNED is true if the
1912 integer is signed, in which case it must be sign-extended. */
1914 APFloat::convertFromSignExtendedInteger(const integerPart *src,
1915 unsigned int srcCount,
1917 roundingMode rounding_mode)
1921 assertArithmeticOK(*semantics);
1923 && APInt::tcExtractBit(src, srcCount * integerPartWidth - 1)) {
1926 /* If we're signed and negative negate a copy. */
1928 copy = new integerPart[srcCount];
1929 APInt::tcAssign(copy, src, srcCount);
1930 APInt::tcNegate(copy, srcCount);
1931 status = convertFromUnsignedParts(copy, srcCount, rounding_mode);
1935 status = convertFromUnsignedParts(src, srcCount, rounding_mode);
1941 /* FIXME: should this just take a const APInt reference? */
1943 APFloat::convertFromZeroExtendedInteger(const integerPart *parts,
1944 unsigned int width, bool isSigned,
1945 roundingMode rounding_mode)
1947 unsigned int partCount = partCountForBits(width);
1948 APInt api = APInt(width, partCount, parts);
1951 if(isSigned && APInt::tcExtractBit(parts, width - 1)) {
1956 return convertFromUnsignedParts(api.getRawData(), partCount, rounding_mode);
1960 APFloat::convertFromHexadecimalString(const char *p,
1961 roundingMode rounding_mode)
1963 lostFraction lost_fraction;
1964 integerPart *significand;
1965 unsigned int bitPos, partsCount;
1966 const char *dot, *firstSignificantDigit;
1970 category = fcNormal;
1972 significand = significandParts();
1973 partsCount = partCount();
1974 bitPos = partsCount * integerPartWidth;
1976 /* Skip leading zeroes and any (hexa)decimal point. */
1977 p = skipLeadingZeroesAndAnyDot(p, &dot);
1978 firstSignificantDigit = p;
1981 integerPart hex_value;
1988 hex_value = hexDigitValue(*p);
1989 if(hex_value == -1U) {
1990 lost_fraction = lfExactlyZero;
1996 /* Store the number whilst 4-bit nibbles remain. */
1999 hex_value <<= bitPos % integerPartWidth;
2000 significand[bitPos / integerPartWidth] |= hex_value;
2002 lost_fraction = trailingHexadecimalFraction(p, hex_value);
2003 while(hexDigitValue(*p) != -1U)
2009 /* Hex floats require an exponent but not a hexadecimal point. */
2010 assert(*p == 'p' || *p == 'P');
2012 /* Ignore the exponent if we are zero. */
2013 if(p != firstSignificantDigit) {
2016 /* Implicit hexadecimal point? */
2020 /* Calculate the exponent adjustment implicit in the number of
2021 significant digits. */
2022 expAdjustment = dot - firstSignificantDigit;
2023 if(expAdjustment < 0)
2025 expAdjustment = expAdjustment * 4 - 1;
2027 /* Adjust for writing the significand starting at the most
2028 significant nibble. */
2029 expAdjustment += semantics->precision;
2030 expAdjustment -= partsCount * integerPartWidth;
2032 /* Adjust for the given exponent. */
2033 exponent = totalExponent(p, expAdjustment);
2036 return normalize(rounding_mode, lost_fraction);
2040 APFloat::roundSignificandWithExponent(const integerPart *decSigParts,
2041 unsigned sigPartCount, int exp,
2042 roundingMode rounding_mode)
2044 unsigned int parts, pow5PartCount;
2045 fltSemantics calcSemantics = { 32767, -32767, 0, true };
2046 integerPart pow5Parts[maxPowerOfFiveParts];
2049 isNearest = (rounding_mode == rmNearestTiesToEven
2050 || rounding_mode == rmNearestTiesToAway);
2052 parts = partCountForBits(semantics->precision + 11);
2054 /* Calculate pow(5, abs(exp)). */
2055 pow5PartCount = powerOf5(pow5Parts, exp >= 0 ? exp: -exp);
2057 for (;; parts *= 2) {
2058 opStatus sigStatus, powStatus;
2059 unsigned int excessPrecision, truncatedBits;
2061 calcSemantics.precision = parts * integerPartWidth - 1;
2062 excessPrecision = calcSemantics.precision - semantics->precision;
2063 truncatedBits = excessPrecision;
2065 APFloat decSig(calcSemantics, fcZero, sign);
2066 APFloat pow5(calcSemantics, fcZero, false);
2068 sigStatus = decSig.convertFromUnsignedParts(decSigParts, sigPartCount,
2069 rmNearestTiesToEven);
2070 powStatus = pow5.convertFromUnsignedParts(pow5Parts, pow5PartCount,
2071 rmNearestTiesToEven);
2072 /* Add exp, as 10^n = 5^n * 2^n. */
2073 decSig.exponent += exp;
2075 lostFraction calcLostFraction;
2076 integerPart HUerr, HUdistance, powHUerr;
2079 /* multiplySignificand leaves the precision-th bit set to 1. */
2080 calcLostFraction = decSig.multiplySignificand(pow5, NULL);
2081 powHUerr = powStatus != opOK;
2083 calcLostFraction = decSig.divideSignificand(pow5);
2084 /* Denormal numbers have less precision. */
2085 if (decSig.exponent < semantics->minExponent) {
2086 excessPrecision += (semantics->minExponent - decSig.exponent);
2087 truncatedBits = excessPrecision;
2088 if (excessPrecision > calcSemantics.precision)
2089 excessPrecision = calcSemantics.precision;
2091 /* Extra half-ulp lost in reciprocal of exponent. */
2092 powHUerr = (powStatus == opOK && calcLostFraction == lfExactlyZero) ? 0: 2;
2095 /* Both multiplySignificand and divideSignificand return the
2096 result with the integer bit set. */
2097 assert (APInt::tcExtractBit
2098 (decSig.significandParts(), calcSemantics.precision - 1) == 1);
2100 HUerr = HUerrBound(calcLostFraction != lfExactlyZero, sigStatus != opOK,
2102 HUdistance = 2 * ulpsFromBoundary(decSig.significandParts(),
2103 excessPrecision, isNearest);
2105 /* Are we guaranteed to round correctly if we truncate? */
2106 if (HUdistance >= HUerr) {
2107 APInt::tcExtract(significandParts(), partCount(), decSig.significandParts(),
2108 calcSemantics.precision - excessPrecision,
2110 /* Take the exponent of decSig. If we tcExtract-ed less bits
2111 above we must adjust our exponent to compensate for the
2112 implicit right shift. */
2113 exponent = (decSig.exponent + semantics->precision
2114 - (calcSemantics.precision - excessPrecision));
2115 calcLostFraction = lostFractionThroughTruncation(decSig.significandParts(),
2118 return normalize(rounding_mode, calcLostFraction);
2124 APFloat::convertFromDecimalString(const char *p, roundingMode rounding_mode)
2129 /* Scan the text. */
2130 interpretDecimal(p, &D);
2132 /* Handle the quick cases. First the case of no significant digits,
2133 i.e. zero, and then exponents that are obviously too large or too
2134 small. Writing L for log 10 / log 2, a number d.ddddd*10^exp
2135 definitely overflows if
2137 (exp - 1) * L >= maxExponent
2139 and definitely underflows to zero where
2141 (exp + 1) * L <= minExponent - precision
2143 With integer arithmetic the tightest bounds for L are
2145 93/28 < L < 196/59 [ numerator <= 256 ]
2146 42039/12655 < L < 28738/8651 [ numerator <= 65536 ]
2149 if (decDigitValue(*D.firstSigDigit) >= 10U) {
2152 } else if ((D.normalizedExponent + 1) * 28738
2153 <= 8651 * (semantics->minExponent - (int) semantics->precision)) {
2154 /* Underflow to zero and round. */
2156 fs = normalize(rounding_mode, lfLessThanHalf);
2157 } else if ((D.normalizedExponent - 1) * 42039
2158 >= 12655 * semantics->maxExponent) {
2159 /* Overflow and round. */
2160 fs = handleOverflow(rounding_mode);
2162 integerPart *decSignificand;
2163 unsigned int partCount;
2165 /* A tight upper bound on number of bits required to hold an
2166 N-digit decimal integer is N * 196 / 59. Allocate enough space
2167 to hold the full significand, and an extra part required by
2169 partCount = (D.lastSigDigit - D.firstSigDigit) + 1;
2170 partCount = partCountForBits(1 + 196 * partCount / 59);
2171 decSignificand = new integerPart[partCount + 1];
2174 /* Convert to binary efficiently - we do almost all multiplication
2175 in an integerPart. When this would overflow do we do a single
2176 bignum multiplication, and then revert again to multiplication
2177 in an integerPart. */
2179 integerPart decValue, val, multiplier;
2188 decValue = decDigitValue(*p++);
2190 val = val * 10 + decValue;
2191 /* The maximum number that can be multiplied by ten with any
2192 digit added without overflowing an integerPart. */
2193 } while (p <= D.lastSigDigit && multiplier <= (~ (integerPart) 0 - 9) / 10);
2195 /* Multiply out the current part. */
2196 APInt::tcMultiplyPart(decSignificand, decSignificand, multiplier, val,
2197 partCount, partCount + 1, false);
2199 /* If we used another part (likely but not guaranteed), increase
2201 if (decSignificand[partCount])
2203 } while (p <= D.lastSigDigit);
2205 category = fcNormal;
2206 fs = roundSignificandWithExponent(decSignificand, partCount,
2207 D.exponent, rounding_mode);
2209 delete [] decSignificand;
2216 APFloat::convertFromString(const char *p, roundingMode rounding_mode)
2218 assertArithmeticOK(*semantics);
2220 /* Handle a leading minus sign. */
2226 if(p[0] == '0' && (p[1] == 'x' || p[1] == 'X'))
2227 return convertFromHexadecimalString(p + 2, rounding_mode);
2229 return convertFromDecimalString(p, rounding_mode);
2232 /* Write out a hexadecimal representation of the floating point value
2233 to DST, which must be of sufficient size, in the C99 form
2234 [-]0xh.hhhhp[+-]d. Return the number of characters written,
2235 excluding the terminating NUL.
2237 If UPPERCASE, the output is in upper case, otherwise in lower case.
2239 HEXDIGITS digits appear altogether, rounding the value if
2240 necessary. If HEXDIGITS is 0, the minimal precision to display the
2241 number precisely is used instead. If nothing would appear after
2242 the decimal point it is suppressed.
2244 The decimal exponent is always printed and has at least one digit.
2245 Zero values display an exponent of zero. Infinities and NaNs
2246 appear as "infinity" or "nan" respectively.
2248 The above rules are as specified by C99. There is ambiguity about
2249 what the leading hexadecimal digit should be. This implementation
2250 uses whatever is necessary so that the exponent is displayed as
2251 stored. This implies the exponent will fall within the IEEE format
2252 range, and the leading hexadecimal digit will be 0 (for denormals),
2253 1 (normal numbers) or 2 (normal numbers rounded-away-from-zero with
2254 any other digits zero).
2257 APFloat::convertToHexString(char *dst, unsigned int hexDigits,
2258 bool upperCase, roundingMode rounding_mode) const
2262 assertArithmeticOK(*semantics);
2270 memcpy (dst, upperCase ? infinityU: infinityL, sizeof infinityU - 1);
2271 dst += sizeof infinityL - 1;
2275 memcpy (dst, upperCase ? NaNU: NaNL, sizeof NaNU - 1);
2276 dst += sizeof NaNU - 1;
2281 *dst++ = upperCase ? 'X': 'x';
2283 if (hexDigits > 1) {
2285 memset (dst, '0', hexDigits - 1);
2286 dst += hexDigits - 1;
2288 *dst++ = upperCase ? 'P': 'p';
2293 dst = convertNormalToHexString (dst, hexDigits, upperCase, rounding_mode);
2302 /* Does the hard work of outputting the correctly rounded hexadecimal
2303 form of a normal floating point number with the specified number of
2304 hexadecimal digits. If HEXDIGITS is zero the minimum number of
2305 digits necessary to print the value precisely is output. */
2307 APFloat::convertNormalToHexString(char *dst, unsigned int hexDigits,
2309 roundingMode rounding_mode) const
2311 unsigned int count, valueBits, shift, partsCount, outputDigits;
2312 const char *hexDigitChars;
2313 const integerPart *significand;
2318 *dst++ = upperCase ? 'X': 'x';
2321 hexDigitChars = upperCase ? hexDigitsUpper: hexDigitsLower;
2323 significand = significandParts();
2324 partsCount = partCount();
2326 /* +3 because the first digit only uses the single integer bit, so
2327 we have 3 virtual zero most-significant-bits. */
2328 valueBits = semantics->precision + 3;
2329 shift = integerPartWidth - valueBits % integerPartWidth;
2331 /* The natural number of digits required ignoring trailing
2332 insignificant zeroes. */
2333 outputDigits = (valueBits - significandLSB () + 3) / 4;
2335 /* hexDigits of zero means use the required number for the
2336 precision. Otherwise, see if we are truncating. If we are,
2337 find out if we need to round away from zero. */
2339 if (hexDigits < outputDigits) {
2340 /* We are dropping non-zero bits, so need to check how to round.
2341 "bits" is the number of dropped bits. */
2343 lostFraction fraction;
2345 bits = valueBits - hexDigits * 4;
2346 fraction = lostFractionThroughTruncation (significand, partsCount, bits);
2347 roundUp = roundAwayFromZero(rounding_mode, fraction, bits);
2349 outputDigits = hexDigits;
2352 /* Write the digits consecutively, and start writing in the location
2353 of the hexadecimal point. We move the most significant digit
2354 left and add the hexadecimal point later. */
2357 count = (valueBits + integerPartWidth - 1) / integerPartWidth;
2359 while (outputDigits && count) {
2362 /* Put the most significant integerPartWidth bits in "part". */
2363 if (--count == partsCount)
2364 part = 0; /* An imaginary higher zero part. */
2366 part = significand[count] << shift;
2369 part |= significand[count - 1] >> (integerPartWidth - shift);
2371 /* Convert as much of "part" to hexdigits as we can. */
2372 unsigned int curDigits = integerPartWidth / 4;
2374 if (curDigits > outputDigits)
2375 curDigits = outputDigits;
2376 dst += partAsHex (dst, part, curDigits, hexDigitChars);
2377 outputDigits -= curDigits;
2383 /* Note that hexDigitChars has a trailing '0'. */
2386 *q = hexDigitChars[hexDigitValue (*q) + 1];
2387 } while (*q == '0');
2390 /* Add trailing zeroes. */
2391 memset (dst, '0', outputDigits);
2392 dst += outputDigits;
2395 /* Move the most significant digit to before the point, and if there
2396 is something after the decimal point add it. This must come
2397 after rounding above. */
2404 /* Finally output the exponent. */
2405 *dst++ = upperCase ? 'P': 'p';
2407 return writeSignedDecimal (dst, exponent);
2410 // For good performance it is desirable for different APFloats
2411 // to produce different integers.
2413 APFloat::getHashValue() const
2415 if (category==fcZero) return sign<<8 | semantics->precision ;
2416 else if (category==fcInfinity) return sign<<9 | semantics->precision;
2417 else if (category==fcNaN) return 1<<10 | semantics->precision;
2419 uint32_t hash = sign<<11 | semantics->precision | exponent<<12;
2420 const integerPart* p = significandParts();
2421 for (int i=partCount(); i>0; i--, p++)
2422 hash ^= ((uint32_t)*p) ^ (*p)>>32;
2427 // Conversion from APFloat to/from host float/double. It may eventually be
2428 // possible to eliminate these and have everybody deal with APFloats, but that
2429 // will take a while. This approach will not easily extend to long double.
2430 // Current implementation requires integerPartWidth==64, which is correct at
2431 // the moment but could be made more general.
2433 // Denormals have exponent minExponent in APFloat, but minExponent-1 in
2434 // the actual IEEE respresentations. We compensate for that here.
2437 APFloat::convertF80LongDoubleAPFloatToAPInt() const
2439 assert(semantics == (const llvm::fltSemantics*)&x87DoubleExtended);
2440 assert (partCount()==2);
2442 uint64_t myexponent, mysignificand;
2444 if (category==fcNormal) {
2445 myexponent = exponent+16383; //bias
2446 mysignificand = significandParts()[0];
2447 if (myexponent==1 && !(mysignificand & 0x8000000000000000ULL))
2448 myexponent = 0; // denormal
2449 } else if (category==fcZero) {
2452 } else if (category==fcInfinity) {
2453 myexponent = 0x7fff;
2454 mysignificand = 0x8000000000000000ULL;
2456 assert(category == fcNaN && "Unknown category");
2457 myexponent = 0x7fff;
2458 mysignificand = significandParts()[0];
2462 words[0] = (((uint64_t)sign & 1) << 63) |
2463 ((myexponent & 0x7fff) << 48) |
2464 ((mysignificand >>16) & 0xffffffffffffLL);
2465 words[1] = mysignificand & 0xffff;
2466 return APInt(80, 2, words);
2470 APFloat::convertPPCDoubleDoubleAPFloatToAPInt() const
2472 assert(semantics == (const llvm::fltSemantics*)&PPCDoubleDouble);
2473 assert (partCount()==2);
2475 uint64_t myexponent, mysignificand, myexponent2, mysignificand2;
2477 if (category==fcNormal) {
2478 myexponent = exponent + 1023; //bias
2479 myexponent2 = exponent2 + 1023;
2480 mysignificand = significandParts()[0];
2481 mysignificand2 = significandParts()[1];
2482 if (myexponent==1 && !(mysignificand & 0x10000000000000LL))
2483 myexponent = 0; // denormal
2484 if (myexponent2==1 && !(mysignificand2 & 0x10000000000000LL))
2485 myexponent2 = 0; // denormal
2486 } else if (category==fcZero) {
2491 } else if (category==fcInfinity) {
2497 assert(category == fcNaN && "Unknown category");
2499 mysignificand = significandParts()[0];
2500 myexponent2 = exponent2;
2501 mysignificand2 = significandParts()[1];
2505 words[0] = (((uint64_t)sign & 1) << 63) |
2506 ((myexponent & 0x7ff) << 52) |
2507 (mysignificand & 0xfffffffffffffLL);
2508 words[1] = (((uint64_t)sign2 & 1) << 63) |
2509 ((myexponent2 & 0x7ff) << 52) |
2510 (mysignificand2 & 0xfffffffffffffLL);
2511 return APInt(128, 2, words);
2515 APFloat::convertDoubleAPFloatToAPInt() const
2517 assert(semantics == (const llvm::fltSemantics*)&IEEEdouble);
2518 assert (partCount()==1);
2520 uint64_t myexponent, mysignificand;
2522 if (category==fcNormal) {
2523 myexponent = exponent+1023; //bias
2524 mysignificand = *significandParts();
2525 if (myexponent==1 && !(mysignificand & 0x10000000000000LL))
2526 myexponent = 0; // denormal
2527 } else if (category==fcZero) {
2530 } else if (category==fcInfinity) {
2534 assert(category == fcNaN && "Unknown category!");
2536 mysignificand = *significandParts();
2539 return APInt(64, (((((uint64_t)sign & 1) << 63) |
2540 ((myexponent & 0x7ff) << 52) |
2541 (mysignificand & 0xfffffffffffffLL))));
2545 APFloat::convertFloatAPFloatToAPInt() const
2547 assert(semantics == (const llvm::fltSemantics*)&IEEEsingle);
2548 assert (partCount()==1);
2550 uint32_t myexponent, mysignificand;
2552 if (category==fcNormal) {
2553 myexponent = exponent+127; //bias
2554 mysignificand = *significandParts();
2555 if (myexponent == 1 && !(mysignificand & 0x800000))
2556 myexponent = 0; // denormal
2557 } else if (category==fcZero) {
2560 } else if (category==fcInfinity) {
2564 assert(category == fcNaN && "Unknown category!");
2566 mysignificand = *significandParts();
2569 return APInt(32, (((sign&1) << 31) | ((myexponent&0xff) << 23) |
2570 (mysignificand & 0x7fffff)));
2573 // This function creates an APInt that is just a bit map of the floating
2574 // point constant as it would appear in memory. It is not a conversion,
2575 // and treating the result as a normal integer is unlikely to be useful.
2578 APFloat::convertToAPInt() const
2580 if (semantics == (const llvm::fltSemantics*)&IEEEsingle)
2581 return convertFloatAPFloatToAPInt();
2583 if (semantics == (const llvm::fltSemantics*)&IEEEdouble)
2584 return convertDoubleAPFloatToAPInt();
2586 if (semantics == (const llvm::fltSemantics*)&PPCDoubleDouble)
2587 return convertPPCDoubleDoubleAPFloatToAPInt();
2589 assert(semantics == (const llvm::fltSemantics*)&x87DoubleExtended &&
2591 return convertF80LongDoubleAPFloatToAPInt();
2595 APFloat::convertToFloat() const
2597 assert(semantics == (const llvm::fltSemantics*)&IEEEsingle);
2598 APInt api = convertToAPInt();
2599 return api.bitsToFloat();
2603 APFloat::convertToDouble() const
2605 assert(semantics == (const llvm::fltSemantics*)&IEEEdouble);
2606 APInt api = convertToAPInt();
2607 return api.bitsToDouble();
2610 /// Integer bit is explicit in this format. Current Intel book does not
2611 /// define meaning of:
2612 /// exponent = all 1's, integer bit not set.
2613 /// exponent = 0, integer bit set. (formerly "psuedodenormals")
2614 /// exponent!=0 nor all 1's, integer bit not set. (formerly "unnormals")
2616 APFloat::initFromF80LongDoubleAPInt(const APInt &api)
2618 assert(api.getBitWidth()==80);
2619 uint64_t i1 = api.getRawData()[0];
2620 uint64_t i2 = api.getRawData()[1];
2621 uint64_t myexponent = (i1 >> 48) & 0x7fff;
2622 uint64_t mysignificand = ((i1 << 16) & 0xffffffffffff0000ULL) |
2625 initialize(&APFloat::x87DoubleExtended);
2626 assert(partCount()==2);
2629 if (myexponent==0 && mysignificand==0) {
2630 // exponent, significand meaningless
2632 } else if (myexponent==0x7fff && mysignificand==0x8000000000000000ULL) {
2633 // exponent, significand meaningless
2634 category = fcInfinity;
2635 } else if (myexponent==0x7fff && mysignificand!=0x8000000000000000ULL) {
2636 // exponent meaningless
2638 significandParts()[0] = mysignificand;
2639 significandParts()[1] = 0;
2641 category = fcNormal;
2642 exponent = myexponent - 16383;
2643 significandParts()[0] = mysignificand;
2644 significandParts()[1] = 0;
2645 if (myexponent==0) // denormal
2651 APFloat::initFromPPCDoubleDoubleAPInt(const APInt &api)
2653 assert(api.getBitWidth()==128);
2654 uint64_t i1 = api.getRawData()[0];
2655 uint64_t i2 = api.getRawData()[1];
2656 uint64_t myexponent = (i1 >> 52) & 0x7ff;
2657 uint64_t mysignificand = i1 & 0xfffffffffffffLL;
2658 uint64_t myexponent2 = (i2 >> 52) & 0x7ff;
2659 uint64_t mysignificand2 = i2 & 0xfffffffffffffLL;
2661 initialize(&APFloat::PPCDoubleDouble);
2662 assert(partCount()==2);
2666 if (myexponent==0 && mysignificand==0) {
2667 // exponent, significand meaningless
2668 // exponent2 and significand2 are required to be 0; we don't check
2670 } else if (myexponent==0x7ff && mysignificand==0) {
2671 // exponent, significand meaningless
2672 // exponent2 and significand2 are required to be 0; we don't check
2673 category = fcInfinity;
2674 } else if (myexponent==0x7ff && mysignificand!=0) {
2675 // exponent meaningless. So is the whole second word, but keep it
2678 exponent2 = myexponent2;
2679 significandParts()[0] = mysignificand;
2680 significandParts()[1] = mysignificand2;
2682 category = fcNormal;
2683 // Note there is no category2; the second word is treated as if it is
2684 // fcNormal, although it might be something else considered by itself.
2685 exponent = myexponent - 1023;
2686 exponent2 = myexponent2 - 1023;
2687 significandParts()[0] = mysignificand;
2688 significandParts()[1] = mysignificand2;
2689 if (myexponent==0) // denormal
2692 significandParts()[0] |= 0x10000000000000LL; // integer bit
2696 significandParts()[1] |= 0x10000000000000LL; // integer bit
2701 APFloat::initFromDoubleAPInt(const APInt &api)
2703 assert(api.getBitWidth()==64);
2704 uint64_t i = *api.getRawData();
2705 uint64_t myexponent = (i >> 52) & 0x7ff;
2706 uint64_t mysignificand = i & 0xfffffffffffffLL;
2708 initialize(&APFloat::IEEEdouble);
2709 assert(partCount()==1);
2712 if (myexponent==0 && mysignificand==0) {
2713 // exponent, significand meaningless
2715 } else if (myexponent==0x7ff && mysignificand==0) {
2716 // exponent, significand meaningless
2717 category = fcInfinity;
2718 } else if (myexponent==0x7ff && mysignificand!=0) {
2719 // exponent meaningless
2721 *significandParts() = mysignificand;
2723 category = fcNormal;
2724 exponent = myexponent - 1023;
2725 *significandParts() = mysignificand;
2726 if (myexponent==0) // denormal
2729 *significandParts() |= 0x10000000000000LL; // integer bit
2734 APFloat::initFromFloatAPInt(const APInt & api)
2736 assert(api.getBitWidth()==32);
2737 uint32_t i = (uint32_t)*api.getRawData();
2738 uint32_t myexponent = (i >> 23) & 0xff;
2739 uint32_t mysignificand = i & 0x7fffff;
2741 initialize(&APFloat::IEEEsingle);
2742 assert(partCount()==1);
2745 if (myexponent==0 && mysignificand==0) {
2746 // exponent, significand meaningless
2748 } else if (myexponent==0xff && mysignificand==0) {
2749 // exponent, significand meaningless
2750 category = fcInfinity;
2751 } else if (myexponent==0xff && mysignificand!=0) {
2752 // sign, exponent, significand meaningless
2754 *significandParts() = mysignificand;
2756 category = fcNormal;
2757 exponent = myexponent - 127; //bias
2758 *significandParts() = mysignificand;
2759 if (myexponent==0) // denormal
2762 *significandParts() |= 0x800000; // integer bit
2766 /// Treat api as containing the bits of a floating point number. Currently
2767 /// we infer the floating point type from the size of the APInt. The
2768 /// isIEEE argument distinguishes between PPC128 and IEEE128 (not meaningful
2769 /// when the size is anything else).
2771 APFloat::initFromAPInt(const APInt& api, bool isIEEE)
2773 if (api.getBitWidth() == 32)
2774 return initFromFloatAPInt(api);
2775 else if (api.getBitWidth()==64)
2776 return initFromDoubleAPInt(api);
2777 else if (api.getBitWidth()==80)
2778 return initFromF80LongDoubleAPInt(api);
2779 else if (api.getBitWidth()==128 && !isIEEE)
2780 return initFromPPCDoubleDoubleAPInt(api);
2785 APFloat::APFloat(const APInt& api, bool isIEEE)
2787 initFromAPInt(api, isIEEE);
2790 APFloat::APFloat(float f)
2792 APInt api = APInt(32, 0);
2793 initFromAPInt(api.floatToBits(f));
2796 APFloat::APFloat(double d)
2798 APInt api = APInt(64, 0);
2799 initFromAPInt(api.doubleToBits(d));