1 //===-- APFloat.cpp - Implement APFloat class -----------------------------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file implements a class to represent arbitrary precision floating
11 // point values and provide a variety of arithmetic operations on them.
13 //===----------------------------------------------------------------------===//
15 #include "llvm/ADT/APFloat.h"
16 #include "llvm/ADT/StringRef.h"
17 #include "llvm/ADT/FoldingSet.h"
18 #include "llvm/Support/ErrorHandling.h"
19 #include "llvm/Support/MathExtras.h"
25 #define convolve(lhs, rhs) ((lhs) * 4 + (rhs))
27 /* Assumed in hexadecimal significand parsing, and conversion to
28 hexadecimal strings. */
29 #define COMPILE_TIME_ASSERT(cond) extern int CTAssert[(cond) ? 1 : -1]
30 COMPILE_TIME_ASSERT(integerPartWidth % 4 == 0);
34 /* Represents floating point arithmetic semantics. */
36 /* The largest E such that 2^E is representable; this matches the
37 definition of IEEE 754. */
38 exponent_t maxExponent;
40 /* The smallest E such that 2^E is a normalized number; this
41 matches the definition of IEEE 754. */
42 exponent_t minExponent;
44 /* Number of bits in the significand. This includes the integer
46 unsigned int precision;
48 /* True if arithmetic is supported. */
49 unsigned int arithmeticOK;
52 const fltSemantics APFloat::IEEEhalf = { 15, -14, 11, true };
53 const fltSemantics APFloat::IEEEsingle = { 127, -126, 24, true };
54 const fltSemantics APFloat::IEEEdouble = { 1023, -1022, 53, true };
55 const fltSemantics APFloat::IEEEquad = { 16383, -16382, 113, true };
56 const fltSemantics APFloat::x87DoubleExtended = { 16383, -16382, 64, true };
57 const fltSemantics APFloat::Bogus = { 0, 0, 0, true };
59 // The PowerPC format consists of two doubles. It does not map cleanly
60 // onto the usual format above. For now only storage of constants of
61 // this type is supported, no arithmetic.
62 const fltSemantics APFloat::PPCDoubleDouble = { 1023, -1022, 106, false };
64 /* A tight upper bound on number of parts required to hold the value
67 power * 815 / (351 * integerPartWidth) + 1
69 However, whilst the result may require only this many parts,
70 because we are multiplying two values to get it, the
71 multiplication may require an extra part with the excess part
72 being zero (consider the trivial case of 1 * 1, tcFullMultiply
73 requires two parts to hold the single-part result). So we add an
74 extra one to guarantee enough space whilst multiplying. */
75 const unsigned int maxExponent = 16383;
76 const unsigned int maxPrecision = 113;
77 const unsigned int maxPowerOfFiveExponent = maxExponent + maxPrecision - 1;
78 const unsigned int maxPowerOfFiveParts = 2 + ((maxPowerOfFiveExponent * 815)
79 / (351 * integerPartWidth));
82 /* A bunch of private, handy routines. */
84 static inline unsigned int
85 partCountForBits(unsigned int bits)
87 return ((bits) + integerPartWidth - 1) / integerPartWidth;
90 /* Returns 0U-9U. Return values >= 10U are not digits. */
91 static inline unsigned int
92 decDigitValue(unsigned int c)
98 hexDigitValue(unsigned int c)
118 assertArithmeticOK(const llvm::fltSemantics &semantics) {
119 assert(semantics.arithmeticOK
120 && "Compile-time arithmetic does not support these semantics");
123 /* Return the value of a decimal exponent of the form
126 If the exponent overflows, returns a large exponent with the
129 readExponent(StringRef::iterator begin, StringRef::iterator end)
132 unsigned int absExponent;
133 const unsigned int overlargeExponent = 24000; /* FIXME. */
134 StringRef::iterator p = begin;
136 assert(p != end && "Exponent has no digits");
138 isNegative = (*p == '-');
139 if (*p == '-' || *p == '+') {
141 assert(p != end && "Exponent has no digits");
144 absExponent = decDigitValue(*p++);
145 assert(absExponent < 10U && "Invalid character in exponent");
147 for (; p != end; ++p) {
150 value = decDigitValue(*p);
151 assert(value < 10U && "Invalid character in exponent");
153 value += absExponent * 10;
154 if (absExponent >= overlargeExponent) {
155 absExponent = overlargeExponent;
161 assert(p == end && "Invalid exponent in exponent");
164 return -(int) absExponent;
166 return (int) absExponent;
169 /* This is ugly and needs cleaning up, but I don't immediately see
170 how whilst remaining safe. */
172 totalExponent(StringRef::iterator p, StringRef::iterator end,
173 int exponentAdjustment)
175 int unsignedExponent;
176 bool negative, overflow;
179 assert(p != end && "Exponent has no digits");
181 negative = *p == '-';
182 if(*p == '-' || *p == '+') {
184 assert(p != end && "Exponent has no digits");
187 unsignedExponent = 0;
189 for(; p != end; ++p) {
192 value = decDigitValue(*p);
193 assert(value < 10U && "Invalid character in exponent");
195 unsignedExponent = unsignedExponent * 10 + value;
196 if(unsignedExponent > 65535)
200 if(exponentAdjustment > 65535 || exponentAdjustment < -65536)
204 exponent = unsignedExponent;
206 exponent = -exponent;
207 exponent += exponentAdjustment;
208 if(exponent > 65535 || exponent < -65536)
213 exponent = negative ? -65536: 65535;
218 static StringRef::iterator
219 skipLeadingZeroesAndAnyDot(StringRef::iterator begin, StringRef::iterator end,
220 StringRef::iterator *dot)
222 StringRef::iterator p = begin;
224 while(*p == '0' && p != end)
230 assert(end - begin != 1 && "Significand has no digits");
232 while(*p == '0' && p != end)
239 /* Given a normal decimal floating point number of the form
243 where the decimal point and exponent are optional, fill out the
244 structure D. Exponent is appropriate if the significand is
245 treated as an integer, and normalizedExponent if the significand
246 is taken to have the decimal point after a single leading
249 If the value is zero, V->firstSigDigit points to a non-digit, and
250 the return exponent is zero.
253 const char *firstSigDigit;
254 const char *lastSigDigit;
256 int normalizedExponent;
260 interpretDecimal(StringRef::iterator begin, StringRef::iterator end,
263 StringRef::iterator dot = end;
264 StringRef::iterator p = skipLeadingZeroesAndAnyDot (begin, end, &dot);
266 D->firstSigDigit = p;
268 D->normalizedExponent = 0;
270 for (; p != end; ++p) {
272 assert(dot == end && "String contains multiple dots");
277 if (decDigitValue(*p) >= 10U)
282 assert((*p == 'e' || *p == 'E') && "Invalid character in significand");
283 assert(p != begin && "Significand has no digits");
284 assert((dot == end || p - begin != 1) && "Significand has no digits");
286 /* p points to the first non-digit in the string */
287 D->exponent = readExponent(p + 1, end);
289 /* Implied decimal point? */
294 /* If number is all zeroes accept any exponent. */
295 if (p != D->firstSigDigit) {
296 /* Drop insignificant trailing zeroes. */
301 while (p != begin && *p == '0');
302 while (p != begin && *p == '.');
305 /* Adjust the exponents for any decimal point. */
306 D->exponent += static_cast<exponent_t>((dot - p) - (dot > p));
307 D->normalizedExponent = (D->exponent +
308 static_cast<exponent_t>((p - D->firstSigDigit)
309 - (dot > D->firstSigDigit && dot < p)));
315 /* Return the trailing fraction of a hexadecimal number.
316 DIGITVALUE is the first hex digit of the fraction, P points to
319 trailingHexadecimalFraction(StringRef::iterator p, StringRef::iterator end,
320 unsigned int digitValue)
322 unsigned int hexDigit;
324 /* If the first trailing digit isn't 0 or 8 we can work out the
325 fraction immediately. */
327 return lfMoreThanHalf;
328 else if(digitValue < 8 && digitValue > 0)
329 return lfLessThanHalf;
331 /* Otherwise we need to find the first non-zero digit. */
335 assert(p != end && "Invalid trailing hexadecimal fraction!");
337 hexDigit = hexDigitValue(*p);
339 /* If we ran off the end it is exactly zero or one-half, otherwise
342 return digitValue == 0 ? lfExactlyZero: lfExactlyHalf;
344 return digitValue == 0 ? lfLessThanHalf: lfMoreThanHalf;
347 /* Return the fraction lost were a bignum truncated losing the least
348 significant BITS bits. */
350 lostFractionThroughTruncation(const integerPart *parts,
351 unsigned int partCount,
356 lsb = APInt::tcLSB(parts, partCount);
358 /* Note this is guaranteed true if bits == 0, or LSB == -1U. */
360 return lfExactlyZero;
362 return lfExactlyHalf;
363 if(bits <= partCount * integerPartWidth
364 && APInt::tcExtractBit(parts, bits - 1))
365 return lfMoreThanHalf;
367 return lfLessThanHalf;
370 /* Shift DST right BITS bits noting lost fraction. */
372 shiftRight(integerPart *dst, unsigned int parts, unsigned int bits)
374 lostFraction lost_fraction;
376 lost_fraction = lostFractionThroughTruncation(dst, parts, bits);
378 APInt::tcShiftRight(dst, parts, bits);
380 return lost_fraction;
383 /* Combine the effect of two lost fractions. */
385 combineLostFractions(lostFraction moreSignificant,
386 lostFraction lessSignificant)
388 if(lessSignificant != lfExactlyZero) {
389 if(moreSignificant == lfExactlyZero)
390 moreSignificant = lfLessThanHalf;
391 else if(moreSignificant == lfExactlyHalf)
392 moreSignificant = lfMoreThanHalf;
395 return moreSignificant;
398 /* The error from the true value, in half-ulps, on multiplying two
399 floating point numbers, which differ from the value they
400 approximate by at most HUE1 and HUE2 half-ulps, is strictly less
401 than the returned value.
403 See "How to Read Floating Point Numbers Accurately" by William D
406 HUerrBound(bool inexactMultiply, unsigned int HUerr1, unsigned int HUerr2)
408 assert(HUerr1 < 2 || HUerr2 < 2 || (HUerr1 + HUerr2 < 8));
410 if (HUerr1 + HUerr2 == 0)
411 return inexactMultiply * 2; /* <= inexactMultiply half-ulps. */
413 return inexactMultiply + 2 * (HUerr1 + HUerr2);
416 /* The number of ulps from the boundary (zero, or half if ISNEAREST)
417 when the least significant BITS are truncated. BITS cannot be
420 ulpsFromBoundary(const integerPart *parts, unsigned int bits, bool isNearest)
422 unsigned int count, partBits;
423 integerPart part, boundary;
428 count = bits / integerPartWidth;
429 partBits = bits % integerPartWidth + 1;
431 part = parts[count] & (~(integerPart) 0 >> (integerPartWidth - partBits));
434 boundary = (integerPart) 1 << (partBits - 1);
439 if (part - boundary <= boundary - part)
440 return part - boundary;
442 return boundary - part;
445 if (part == boundary) {
448 return ~(integerPart) 0; /* A lot. */
451 } else if (part == boundary - 1) {
454 return ~(integerPart) 0; /* A lot. */
459 return ~(integerPart) 0; /* A lot. */
462 /* Place pow(5, power) in DST, and return the number of parts used.
463 DST must be at least one part larger than size of the answer. */
465 powerOf5(integerPart *dst, unsigned int power)
467 static const integerPart firstEightPowers[] = { 1, 5, 25, 125, 625, 3125,
469 integerPart pow5s[maxPowerOfFiveParts * 2 + 5];
470 pow5s[0] = 78125 * 5;
472 unsigned int partsCount[16] = { 1 };
473 integerPart scratch[maxPowerOfFiveParts], *p1, *p2, *pow5;
475 assert(power <= maxExponent);
480 *p1 = firstEightPowers[power & 7];
486 for (unsigned int n = 0; power; power >>= 1, n++) {
491 /* Calculate pow(5,pow(2,n+3)) if we haven't yet. */
493 pc = partsCount[n - 1];
494 APInt::tcFullMultiply(pow5, pow5 - pc, pow5 - pc, pc, pc);
496 if (pow5[pc - 1] == 0)
504 APInt::tcFullMultiply(p2, p1, pow5, result, pc);
506 if (p2[result - 1] == 0)
509 /* Now result is in p1 with partsCount parts and p2 is scratch
511 tmp = p1, p1 = p2, p2 = tmp;
518 APInt::tcAssign(dst, p1, result);
523 /* Zero at the end to avoid modular arithmetic when adding one; used
524 when rounding up during hexadecimal output. */
525 static const char hexDigitsLower[] = "0123456789abcdef0";
526 static const char hexDigitsUpper[] = "0123456789ABCDEF0";
527 static const char infinityL[] = "infinity";
528 static const char infinityU[] = "INFINITY";
529 static const char NaNL[] = "nan";
530 static const char NaNU[] = "NAN";
532 /* Write out an integerPart in hexadecimal, starting with the most
533 significant nibble. Write out exactly COUNT hexdigits, return
536 partAsHex (char *dst, integerPart part, unsigned int count,
537 const char *hexDigitChars)
539 unsigned int result = count;
541 assert(count != 0 && count <= integerPartWidth / 4);
543 part >>= (integerPartWidth - 4 * count);
545 dst[count] = hexDigitChars[part & 0xf];
552 /* Write out an unsigned decimal integer. */
554 writeUnsignedDecimal (char *dst, unsigned int n)
570 /* Write out a signed decimal integer. */
572 writeSignedDecimal (char *dst, int value)
576 dst = writeUnsignedDecimal(dst, -(unsigned) value);
578 dst = writeUnsignedDecimal(dst, value);
585 APFloat::initialize(const fltSemantics *ourSemantics)
589 semantics = ourSemantics;
592 significand.parts = new integerPart[count];
596 APFloat::freeSignificand()
599 delete [] significand.parts;
603 APFloat::assign(const APFloat &rhs)
605 assert(semantics == rhs.semantics);
608 category = rhs.category;
609 exponent = rhs.exponent;
611 exponent2 = rhs.exponent2;
612 if(category == fcNormal || category == fcNaN)
613 copySignificand(rhs);
617 APFloat::copySignificand(const APFloat &rhs)
619 assert(category == fcNormal || category == fcNaN);
620 assert(rhs.partCount() >= partCount());
622 APInt::tcAssign(significandParts(), rhs.significandParts(),
626 /* Make this number a NaN, with an arbitrary but deterministic value
627 for the significand. If double or longer, this is a signalling NaN,
628 which may not be ideal. If float, this is QNaN(0). */
630 APFloat::makeNaN(unsigned type)
633 // FIXME: Add double and long double support for QNaN(0).
634 if (semantics->precision == 24 && semantics->maxExponent == 127) {
636 type &= ~0x80000000U;
639 APInt::tcSet(significandParts(), type, partCount());
643 APFloat::operator=(const APFloat &rhs)
646 if(semantics != rhs.semantics) {
648 initialize(rhs.semantics);
657 APFloat::bitwiseIsEqual(const APFloat &rhs) const {
660 if (semantics != rhs.semantics ||
661 category != rhs.category ||
664 if (semantics==(const llvm::fltSemantics*)&PPCDoubleDouble &&
667 if (category==fcZero || category==fcInfinity)
669 else if (category==fcNormal && exponent!=rhs.exponent)
671 else if (semantics==(const llvm::fltSemantics*)&PPCDoubleDouble &&
672 exponent2!=rhs.exponent2)
676 const integerPart* p=significandParts();
677 const integerPart* q=rhs.significandParts();
678 for (; i>0; i--, p++, q++) {
686 APFloat::APFloat(const fltSemantics &ourSemantics, integerPart value)
688 assertArithmeticOK(ourSemantics);
689 initialize(&ourSemantics);
692 exponent = ourSemantics.precision - 1;
693 significandParts()[0] = value;
694 normalize(rmNearestTiesToEven, lfExactlyZero);
697 APFloat::APFloat(const fltSemantics &ourSemantics) {
698 assertArithmeticOK(ourSemantics);
699 initialize(&ourSemantics);
705 APFloat::APFloat(const fltSemantics &ourSemantics,
706 fltCategory ourCategory, bool negative, unsigned type)
708 assertArithmeticOK(ourSemantics);
709 initialize(&ourSemantics);
710 category = ourCategory;
712 if (category == fcNormal)
714 else if (ourCategory == fcNaN)
718 APFloat::APFloat(const fltSemantics &ourSemantics, const StringRef& text)
720 assertArithmeticOK(ourSemantics);
721 initialize(&ourSemantics);
722 convertFromString(text, rmNearestTiesToEven);
725 APFloat::APFloat(const APFloat &rhs)
727 initialize(rhs.semantics);
736 // Profile - This method 'profiles' an APFloat for use with FoldingSet.
737 void APFloat::Profile(FoldingSetNodeID& ID) const {
738 ID.Add(bitcastToAPInt());
742 APFloat::partCount() const
744 return partCountForBits(semantics->precision + 1);
748 APFloat::semanticsPrecision(const fltSemantics &semantics)
750 return semantics.precision;
754 APFloat::significandParts() const
756 return const_cast<APFloat *>(this)->significandParts();
760 APFloat::significandParts()
762 assert(category == fcNormal || category == fcNaN);
765 return significand.parts;
767 return &significand.part;
771 APFloat::zeroSignificand()
774 APInt::tcSet(significandParts(), 0, partCount());
777 /* Increment an fcNormal floating point number's significand. */
779 APFloat::incrementSignificand()
783 carry = APInt::tcIncrement(significandParts(), partCount());
785 /* Our callers should never cause us to overflow. */
789 /* Add the significand of the RHS. Returns the carry flag. */
791 APFloat::addSignificand(const APFloat &rhs)
795 parts = significandParts();
797 assert(semantics == rhs.semantics);
798 assert(exponent == rhs.exponent);
800 return APInt::tcAdd(parts, rhs.significandParts(), 0, partCount());
803 /* Subtract the significand of the RHS with a borrow flag. Returns
806 APFloat::subtractSignificand(const APFloat &rhs, integerPart borrow)
810 parts = significandParts();
812 assert(semantics == rhs.semantics);
813 assert(exponent == rhs.exponent);
815 return APInt::tcSubtract(parts, rhs.significandParts(), borrow,
819 /* Multiply the significand of the RHS. If ADDEND is non-NULL, add it
820 on to the full-precision result of the multiplication. Returns the
823 APFloat::multiplySignificand(const APFloat &rhs, const APFloat *addend)
825 unsigned int omsb; // One, not zero, based MSB.
826 unsigned int partsCount, newPartsCount, precision;
827 integerPart *lhsSignificand;
828 integerPart scratch[4];
829 integerPart *fullSignificand;
830 lostFraction lost_fraction;
833 assert(semantics == rhs.semantics);
835 precision = semantics->precision;
836 newPartsCount = partCountForBits(precision * 2);
838 if(newPartsCount > 4)
839 fullSignificand = new integerPart[newPartsCount];
841 fullSignificand = scratch;
843 lhsSignificand = significandParts();
844 partsCount = partCount();
846 APInt::tcFullMultiply(fullSignificand, lhsSignificand,
847 rhs.significandParts(), partsCount, partsCount);
849 lost_fraction = lfExactlyZero;
850 omsb = APInt::tcMSB(fullSignificand, newPartsCount) + 1;
851 exponent += rhs.exponent;
854 Significand savedSignificand = significand;
855 const fltSemantics *savedSemantics = semantics;
856 fltSemantics extendedSemantics;
858 unsigned int extendedPrecision;
860 /* Normalize our MSB. */
861 extendedPrecision = precision + precision - 1;
862 if(omsb != extendedPrecision)
864 APInt::tcShiftLeft(fullSignificand, newPartsCount,
865 extendedPrecision - omsb);
866 exponent -= extendedPrecision - omsb;
869 /* Create new semantics. */
870 extendedSemantics = *semantics;
871 extendedSemantics.precision = extendedPrecision;
873 if(newPartsCount == 1)
874 significand.part = fullSignificand[0];
876 significand.parts = fullSignificand;
877 semantics = &extendedSemantics;
879 APFloat extendedAddend(*addend);
880 status = extendedAddend.convert(extendedSemantics, rmTowardZero, &ignored);
881 assert(status == opOK);
882 lost_fraction = addOrSubtractSignificand(extendedAddend, false);
884 /* Restore our state. */
885 if(newPartsCount == 1)
886 fullSignificand[0] = significand.part;
887 significand = savedSignificand;
888 semantics = savedSemantics;
890 omsb = APInt::tcMSB(fullSignificand, newPartsCount) + 1;
893 exponent -= (precision - 1);
895 if(omsb > precision) {
896 unsigned int bits, significantParts;
899 bits = omsb - precision;
900 significantParts = partCountForBits(omsb);
901 lf = shiftRight(fullSignificand, significantParts, bits);
902 lost_fraction = combineLostFractions(lf, lost_fraction);
906 APInt::tcAssign(lhsSignificand, fullSignificand, partsCount);
908 if(newPartsCount > 4)
909 delete [] fullSignificand;
911 return lost_fraction;
914 /* Multiply the significands of LHS and RHS to DST. */
916 APFloat::divideSignificand(const APFloat &rhs)
918 unsigned int bit, i, partsCount;
919 const integerPart *rhsSignificand;
920 integerPart *lhsSignificand, *dividend, *divisor;
921 integerPart scratch[4];
922 lostFraction lost_fraction;
924 assert(semantics == rhs.semantics);
926 lhsSignificand = significandParts();
927 rhsSignificand = rhs.significandParts();
928 partsCount = partCount();
931 dividend = new integerPart[partsCount * 2];
935 divisor = dividend + partsCount;
937 /* Copy the dividend and divisor as they will be modified in-place. */
938 for(i = 0; i < partsCount; i++) {
939 dividend[i] = lhsSignificand[i];
940 divisor[i] = rhsSignificand[i];
941 lhsSignificand[i] = 0;
944 exponent -= rhs.exponent;
946 unsigned int precision = semantics->precision;
948 /* Normalize the divisor. */
949 bit = precision - APInt::tcMSB(divisor, partsCount) - 1;
952 APInt::tcShiftLeft(divisor, partsCount, bit);
955 /* Normalize the dividend. */
956 bit = precision - APInt::tcMSB(dividend, partsCount) - 1;
959 APInt::tcShiftLeft(dividend, partsCount, bit);
962 /* Ensure the dividend >= divisor initially for the loop below.
963 Incidentally, this means that the division loop below is
964 guaranteed to set the integer bit to one. */
965 if(APInt::tcCompare(dividend, divisor, partsCount) < 0) {
967 APInt::tcShiftLeft(dividend, partsCount, 1);
968 assert(APInt::tcCompare(dividend, divisor, partsCount) >= 0);
972 for(bit = precision; bit; bit -= 1) {
973 if(APInt::tcCompare(dividend, divisor, partsCount) >= 0) {
974 APInt::tcSubtract(dividend, divisor, 0, partsCount);
975 APInt::tcSetBit(lhsSignificand, bit - 1);
978 APInt::tcShiftLeft(dividend, partsCount, 1);
981 /* Figure out the lost fraction. */
982 int cmp = APInt::tcCompare(dividend, divisor, partsCount);
985 lost_fraction = lfMoreThanHalf;
987 lost_fraction = lfExactlyHalf;
988 else if(APInt::tcIsZero(dividend, partsCount))
989 lost_fraction = lfExactlyZero;
991 lost_fraction = lfLessThanHalf;
996 return lost_fraction;
1000 APFloat::significandMSB() const
1002 return APInt::tcMSB(significandParts(), partCount());
1006 APFloat::significandLSB() const
1008 return APInt::tcLSB(significandParts(), partCount());
1011 /* Note that a zero result is NOT normalized to fcZero. */
1013 APFloat::shiftSignificandRight(unsigned int bits)
1015 /* Our exponent should not overflow. */
1016 assert((exponent_t) (exponent + bits) >= exponent);
1020 return shiftRight(significandParts(), partCount(), bits);
1023 /* Shift the significand left BITS bits, subtract BITS from its exponent. */
1025 APFloat::shiftSignificandLeft(unsigned int bits)
1027 assert(bits < semantics->precision);
1030 unsigned int partsCount = partCount();
1032 APInt::tcShiftLeft(significandParts(), partsCount, bits);
1035 assert(!APInt::tcIsZero(significandParts(), partsCount));
1040 APFloat::compareAbsoluteValue(const APFloat &rhs) const
1044 assert(semantics == rhs.semantics);
1045 assert(category == fcNormal);
1046 assert(rhs.category == fcNormal);
1048 compare = exponent - rhs.exponent;
1050 /* If exponents are equal, do an unsigned bignum comparison of the
1053 compare = APInt::tcCompare(significandParts(), rhs.significandParts(),
1057 return cmpGreaterThan;
1058 else if(compare < 0)
1064 /* Handle overflow. Sign is preserved. We either become infinity or
1065 the largest finite number. */
1067 APFloat::handleOverflow(roundingMode rounding_mode)
1070 if(rounding_mode == rmNearestTiesToEven
1071 || rounding_mode == rmNearestTiesToAway
1072 || (rounding_mode == rmTowardPositive && !sign)
1073 || (rounding_mode == rmTowardNegative && sign))
1075 category = fcInfinity;
1076 return (opStatus) (opOverflow | opInexact);
1079 /* Otherwise we become the largest finite number. */
1080 category = fcNormal;
1081 exponent = semantics->maxExponent;
1082 APInt::tcSetLeastSignificantBits(significandParts(), partCount(),
1083 semantics->precision);
1088 /* Returns TRUE if, when truncating the current number, with BIT the
1089 new LSB, with the given lost fraction and rounding mode, the result
1090 would need to be rounded away from zero (i.e., by increasing the
1091 signficand). This routine must work for fcZero of both signs, and
1092 fcNormal numbers. */
1094 APFloat::roundAwayFromZero(roundingMode rounding_mode,
1095 lostFraction lost_fraction,
1096 unsigned int bit) const
1098 /* NaNs and infinities should not have lost fractions. */
1099 assert(category == fcNormal || category == fcZero);
1101 /* Current callers never pass this so we don't handle it. */
1102 assert(lost_fraction != lfExactlyZero);
1104 switch (rounding_mode) {
1106 llvm_unreachable(0);
1108 case rmNearestTiesToAway:
1109 return lost_fraction == lfExactlyHalf || lost_fraction == lfMoreThanHalf;
1111 case rmNearestTiesToEven:
1112 if(lost_fraction == lfMoreThanHalf)
1115 /* Our zeroes don't have a significand to test. */
1116 if(lost_fraction == lfExactlyHalf && category != fcZero)
1117 return APInt::tcExtractBit(significandParts(), bit);
1124 case rmTowardPositive:
1125 return sign == false;
1127 case rmTowardNegative:
1128 return sign == true;
1133 APFloat::normalize(roundingMode rounding_mode,
1134 lostFraction lost_fraction)
1136 unsigned int omsb; /* One, not zero, based MSB. */
1139 if(category != fcNormal)
1142 /* Before rounding normalize the exponent of fcNormal numbers. */
1143 omsb = significandMSB() + 1;
1146 /* OMSB is numbered from 1. We want to place it in the integer
1147 bit numbered PRECISON if possible, with a compensating change in
1149 exponentChange = omsb - semantics->precision;
1151 /* If the resulting exponent is too high, overflow according to
1152 the rounding mode. */
1153 if(exponent + exponentChange > semantics->maxExponent)
1154 return handleOverflow(rounding_mode);
1156 /* Subnormal numbers have exponent minExponent, and their MSB
1157 is forced based on that. */
1158 if(exponent + exponentChange < semantics->minExponent)
1159 exponentChange = semantics->minExponent - exponent;
1161 /* Shifting left is easy as we don't lose precision. */
1162 if(exponentChange < 0) {
1163 assert(lost_fraction == lfExactlyZero);
1165 shiftSignificandLeft(-exponentChange);
1170 if(exponentChange > 0) {
1173 /* Shift right and capture any new lost fraction. */
1174 lf = shiftSignificandRight(exponentChange);
1176 lost_fraction = combineLostFractions(lf, lost_fraction);
1178 /* Keep OMSB up-to-date. */
1179 if(omsb > (unsigned) exponentChange)
1180 omsb -= exponentChange;
1186 /* Now round the number according to rounding_mode given the lost
1189 /* As specified in IEEE 754, since we do not trap we do not report
1190 underflow for exact results. */
1191 if(lost_fraction == lfExactlyZero) {
1192 /* Canonicalize zeroes. */
1199 /* Increment the significand if we're rounding away from zero. */
1200 if(roundAwayFromZero(rounding_mode, lost_fraction, 0)) {
1202 exponent = semantics->minExponent;
1204 incrementSignificand();
1205 omsb = significandMSB() + 1;
1207 /* Did the significand increment overflow? */
1208 if(omsb == (unsigned) semantics->precision + 1) {
1209 /* Renormalize by incrementing the exponent and shifting our
1210 significand right one. However if we already have the
1211 maximum exponent we overflow to infinity. */
1212 if(exponent == semantics->maxExponent) {
1213 category = fcInfinity;
1215 return (opStatus) (opOverflow | opInexact);
1218 shiftSignificandRight(1);
1224 /* The normal case - we were and are not denormal, and any
1225 significand increment above didn't overflow. */
1226 if(omsb == semantics->precision)
1229 /* We have a non-zero denormal. */
1230 assert(omsb < semantics->precision);
1232 /* Canonicalize zeroes. */
1236 /* The fcZero case is a denormal that underflowed to zero. */
1237 return (opStatus) (opUnderflow | opInexact);
1241 APFloat::addOrSubtractSpecials(const APFloat &rhs, bool subtract)
1243 switch (convolve(category, rhs.category)) {
1245 llvm_unreachable(0);
1247 case convolve(fcNaN, fcZero):
1248 case convolve(fcNaN, fcNormal):
1249 case convolve(fcNaN, fcInfinity):
1250 case convolve(fcNaN, fcNaN):
1251 case convolve(fcNormal, fcZero):
1252 case convolve(fcInfinity, fcNormal):
1253 case convolve(fcInfinity, fcZero):
1256 case convolve(fcZero, fcNaN):
1257 case convolve(fcNormal, fcNaN):
1258 case convolve(fcInfinity, fcNaN):
1260 copySignificand(rhs);
1263 case convolve(fcNormal, fcInfinity):
1264 case convolve(fcZero, fcInfinity):
1265 category = fcInfinity;
1266 sign = rhs.sign ^ subtract;
1269 case convolve(fcZero, fcNormal):
1271 sign = rhs.sign ^ subtract;
1274 case convolve(fcZero, fcZero):
1275 /* Sign depends on rounding mode; handled by caller. */
1278 case convolve(fcInfinity, fcInfinity):
1279 /* Differently signed infinities can only be validly
1281 if(((sign ^ rhs.sign)!=0) != subtract) {
1288 case convolve(fcNormal, fcNormal):
1293 /* Add or subtract two normal numbers. */
1295 APFloat::addOrSubtractSignificand(const APFloat &rhs, bool subtract)
1298 lostFraction lost_fraction;
1301 /* Determine if the operation on the absolute values is effectively
1302 an addition or subtraction. */
1303 subtract ^= (sign ^ rhs.sign) ? true : false;
1305 /* Are we bigger exponent-wise than the RHS? */
1306 bits = exponent - rhs.exponent;
1308 /* Subtraction is more subtle than one might naively expect. */
1310 APFloat temp_rhs(rhs);
1314 reverse = compareAbsoluteValue(temp_rhs) == cmpLessThan;
1315 lost_fraction = lfExactlyZero;
1316 } else if (bits > 0) {
1317 lost_fraction = temp_rhs.shiftSignificandRight(bits - 1);
1318 shiftSignificandLeft(1);
1321 lost_fraction = shiftSignificandRight(-bits - 1);
1322 temp_rhs.shiftSignificandLeft(1);
1327 carry = temp_rhs.subtractSignificand
1328 (*this, lost_fraction != lfExactlyZero);
1329 copySignificand(temp_rhs);
1332 carry = subtractSignificand
1333 (temp_rhs, lost_fraction != lfExactlyZero);
1336 /* Invert the lost fraction - it was on the RHS and
1338 if(lost_fraction == lfLessThanHalf)
1339 lost_fraction = lfMoreThanHalf;
1340 else if(lost_fraction == lfMoreThanHalf)
1341 lost_fraction = lfLessThanHalf;
1343 /* The code above is intended to ensure that no borrow is
1348 APFloat temp_rhs(rhs);
1350 lost_fraction = temp_rhs.shiftSignificandRight(bits);
1351 carry = addSignificand(temp_rhs);
1353 lost_fraction = shiftSignificandRight(-bits);
1354 carry = addSignificand(rhs);
1357 /* We have a guard bit; generating a carry cannot happen. */
1361 return lost_fraction;
1365 APFloat::multiplySpecials(const APFloat &rhs)
1367 switch (convolve(category, rhs.category)) {
1369 llvm_unreachable(0);
1371 case convolve(fcNaN, fcZero):
1372 case convolve(fcNaN, fcNormal):
1373 case convolve(fcNaN, fcInfinity):
1374 case convolve(fcNaN, fcNaN):
1377 case convolve(fcZero, fcNaN):
1378 case convolve(fcNormal, fcNaN):
1379 case convolve(fcInfinity, fcNaN):
1381 copySignificand(rhs);
1384 case convolve(fcNormal, fcInfinity):
1385 case convolve(fcInfinity, fcNormal):
1386 case convolve(fcInfinity, fcInfinity):
1387 category = fcInfinity;
1390 case convolve(fcZero, fcNormal):
1391 case convolve(fcNormal, fcZero):
1392 case convolve(fcZero, fcZero):
1396 case convolve(fcZero, fcInfinity):
1397 case convolve(fcInfinity, fcZero):
1401 case convolve(fcNormal, fcNormal):
1407 APFloat::divideSpecials(const APFloat &rhs)
1409 switch (convolve(category, rhs.category)) {
1411 llvm_unreachable(0);
1413 case convolve(fcNaN, fcZero):
1414 case convolve(fcNaN, fcNormal):
1415 case convolve(fcNaN, fcInfinity):
1416 case convolve(fcNaN, fcNaN):
1417 case convolve(fcInfinity, fcZero):
1418 case convolve(fcInfinity, fcNormal):
1419 case convolve(fcZero, fcInfinity):
1420 case convolve(fcZero, fcNormal):
1423 case convolve(fcZero, fcNaN):
1424 case convolve(fcNormal, fcNaN):
1425 case convolve(fcInfinity, fcNaN):
1427 copySignificand(rhs);
1430 case convolve(fcNormal, fcInfinity):
1434 case convolve(fcNormal, fcZero):
1435 category = fcInfinity;
1438 case convolve(fcInfinity, fcInfinity):
1439 case convolve(fcZero, fcZero):
1443 case convolve(fcNormal, fcNormal):
1449 APFloat::modSpecials(const APFloat &rhs)
1451 switch (convolve(category, rhs.category)) {
1453 llvm_unreachable(0);
1455 case convolve(fcNaN, fcZero):
1456 case convolve(fcNaN, fcNormal):
1457 case convolve(fcNaN, fcInfinity):
1458 case convolve(fcNaN, fcNaN):
1459 case convolve(fcZero, fcInfinity):
1460 case convolve(fcZero, fcNormal):
1461 case convolve(fcNormal, fcInfinity):
1464 case convolve(fcZero, fcNaN):
1465 case convolve(fcNormal, fcNaN):
1466 case convolve(fcInfinity, fcNaN):
1468 copySignificand(rhs);
1471 case convolve(fcNormal, fcZero):
1472 case convolve(fcInfinity, fcZero):
1473 case convolve(fcInfinity, fcNormal):
1474 case convolve(fcInfinity, fcInfinity):
1475 case convolve(fcZero, fcZero):
1479 case convolve(fcNormal, fcNormal):
1486 APFloat::changeSign()
1488 /* Look mummy, this one's easy. */
1493 APFloat::clearSign()
1495 /* So is this one. */
1500 APFloat::copySign(const APFloat &rhs)
1506 /* Normalized addition or subtraction. */
1508 APFloat::addOrSubtract(const APFloat &rhs, roundingMode rounding_mode,
1513 assertArithmeticOK(*semantics);
1515 fs = addOrSubtractSpecials(rhs, subtract);
1517 /* This return code means it was not a simple case. */
1518 if(fs == opDivByZero) {
1519 lostFraction lost_fraction;
1521 lost_fraction = addOrSubtractSignificand(rhs, subtract);
1522 fs = normalize(rounding_mode, lost_fraction);
1524 /* Can only be zero if we lost no fraction. */
1525 assert(category != fcZero || lost_fraction == lfExactlyZero);
1528 /* If two numbers add (exactly) to zero, IEEE 754 decrees it is a
1529 positive zero unless rounding to minus infinity, except that
1530 adding two like-signed zeroes gives that zero. */
1531 if(category == fcZero) {
1532 if(rhs.category != fcZero || (sign == rhs.sign) == subtract)
1533 sign = (rounding_mode == rmTowardNegative);
1539 /* Normalized addition. */
1541 APFloat::add(const APFloat &rhs, roundingMode rounding_mode)
1543 return addOrSubtract(rhs, rounding_mode, false);
1546 /* Normalized subtraction. */
1548 APFloat::subtract(const APFloat &rhs, roundingMode rounding_mode)
1550 return addOrSubtract(rhs, rounding_mode, true);
1553 /* Normalized multiply. */
1555 APFloat::multiply(const APFloat &rhs, roundingMode rounding_mode)
1559 assertArithmeticOK(*semantics);
1561 fs = multiplySpecials(rhs);
1563 if(category == fcNormal) {
1564 lostFraction lost_fraction = multiplySignificand(rhs, 0);
1565 fs = normalize(rounding_mode, lost_fraction);
1566 if(lost_fraction != lfExactlyZero)
1567 fs = (opStatus) (fs | opInexact);
1573 /* Normalized divide. */
1575 APFloat::divide(const APFloat &rhs, roundingMode rounding_mode)
1579 assertArithmeticOK(*semantics);
1581 fs = divideSpecials(rhs);
1583 if(category == fcNormal) {
1584 lostFraction lost_fraction = divideSignificand(rhs);
1585 fs = normalize(rounding_mode, lost_fraction);
1586 if(lost_fraction != lfExactlyZero)
1587 fs = (opStatus) (fs | opInexact);
1593 /* Normalized remainder. This is not currently correct in all cases. */
1595 APFloat::remainder(const APFloat &rhs)
1599 unsigned int origSign = sign;
1601 assertArithmeticOK(*semantics);
1602 fs = V.divide(rhs, rmNearestTiesToEven);
1603 if (fs == opDivByZero)
1606 int parts = partCount();
1607 integerPart *x = new integerPart[parts];
1609 fs = V.convertToInteger(x, parts * integerPartWidth, true,
1610 rmNearestTiesToEven, &ignored);
1611 if (fs==opInvalidOp)
1614 fs = V.convertFromZeroExtendedInteger(x, parts * integerPartWidth, true,
1615 rmNearestTiesToEven);
1616 assert(fs==opOK); // should always work
1618 fs = V.multiply(rhs, rmNearestTiesToEven);
1619 assert(fs==opOK || fs==opInexact); // should not overflow or underflow
1621 fs = subtract(V, rmNearestTiesToEven);
1622 assert(fs==opOK || fs==opInexact); // likewise
1625 sign = origSign; // IEEE754 requires this
1630 /* Normalized llvm frem (C fmod).
1631 This is not currently correct in all cases. */
1633 APFloat::mod(const APFloat &rhs, roundingMode rounding_mode)
1636 assertArithmeticOK(*semantics);
1637 fs = modSpecials(rhs);
1639 if (category == fcNormal && rhs.category == fcNormal) {
1641 unsigned int origSign = sign;
1643 fs = V.divide(rhs, rmNearestTiesToEven);
1644 if (fs == opDivByZero)
1647 int parts = partCount();
1648 integerPart *x = new integerPart[parts];
1650 fs = V.convertToInteger(x, parts * integerPartWidth, true,
1651 rmTowardZero, &ignored);
1652 if (fs==opInvalidOp)
1655 fs = V.convertFromZeroExtendedInteger(x, parts * integerPartWidth, true,
1656 rmNearestTiesToEven);
1657 assert(fs==opOK); // should always work
1659 fs = V.multiply(rhs, rounding_mode);
1660 assert(fs==opOK || fs==opInexact); // should not overflow or underflow
1662 fs = subtract(V, rounding_mode);
1663 assert(fs==opOK || fs==opInexact); // likewise
1666 sign = origSign; // IEEE754 requires this
1672 /* Normalized fused-multiply-add. */
1674 APFloat::fusedMultiplyAdd(const APFloat &multiplicand,
1675 const APFloat &addend,
1676 roundingMode rounding_mode)
1680 assertArithmeticOK(*semantics);
1682 /* Post-multiplication sign, before addition. */
1683 sign ^= multiplicand.sign;
1685 /* If and only if all arguments are normal do we need to do an
1686 extended-precision calculation. */
1687 if(category == fcNormal
1688 && multiplicand.category == fcNormal
1689 && addend.category == fcNormal) {
1690 lostFraction lost_fraction;
1692 lost_fraction = multiplySignificand(multiplicand, &addend);
1693 fs = normalize(rounding_mode, lost_fraction);
1694 if(lost_fraction != lfExactlyZero)
1695 fs = (opStatus) (fs | opInexact);
1697 /* If two numbers add (exactly) to zero, IEEE 754 decrees it is a
1698 positive zero unless rounding to minus infinity, except that
1699 adding two like-signed zeroes gives that zero. */
1700 if(category == fcZero && sign != addend.sign)
1701 sign = (rounding_mode == rmTowardNegative);
1703 fs = multiplySpecials(multiplicand);
1705 /* FS can only be opOK or opInvalidOp. There is no more work
1706 to do in the latter case. The IEEE-754R standard says it is
1707 implementation-defined in this case whether, if ADDEND is a
1708 quiet NaN, we raise invalid op; this implementation does so.
1710 If we need to do the addition we can do so with normal
1713 fs = addOrSubtract(addend, rounding_mode, false);
1719 /* Comparison requires normalized numbers. */
1721 APFloat::compare(const APFloat &rhs) const
1725 assertArithmeticOK(*semantics);
1726 assert(semantics == rhs.semantics);
1728 switch (convolve(category, rhs.category)) {
1730 llvm_unreachable(0);
1732 case convolve(fcNaN, fcZero):
1733 case convolve(fcNaN, fcNormal):
1734 case convolve(fcNaN, fcInfinity):
1735 case convolve(fcNaN, fcNaN):
1736 case convolve(fcZero, fcNaN):
1737 case convolve(fcNormal, fcNaN):
1738 case convolve(fcInfinity, fcNaN):
1739 return cmpUnordered;
1741 case convolve(fcInfinity, fcNormal):
1742 case convolve(fcInfinity, fcZero):
1743 case convolve(fcNormal, fcZero):
1747 return cmpGreaterThan;
1749 case convolve(fcNormal, fcInfinity):
1750 case convolve(fcZero, fcInfinity):
1751 case convolve(fcZero, fcNormal):
1753 return cmpGreaterThan;
1757 case convolve(fcInfinity, fcInfinity):
1758 if(sign == rhs.sign)
1763 return cmpGreaterThan;
1765 case convolve(fcZero, fcZero):
1768 case convolve(fcNormal, fcNormal):
1772 /* Two normal numbers. Do they have the same sign? */
1773 if(sign != rhs.sign) {
1775 result = cmpLessThan;
1777 result = cmpGreaterThan;
1779 /* Compare absolute values; invert result if negative. */
1780 result = compareAbsoluteValue(rhs);
1783 if(result == cmpLessThan)
1784 result = cmpGreaterThan;
1785 else if(result == cmpGreaterThan)
1786 result = cmpLessThan;
1793 /// APFloat::convert - convert a value of one floating point type to another.
1794 /// The return value corresponds to the IEEE754 exceptions. *losesInfo
1795 /// records whether the transformation lost information, i.e. whether
1796 /// converting the result back to the original type will produce the
1797 /// original value (this is almost the same as return value==fsOK, but there
1798 /// are edge cases where this is not so).
1801 APFloat::convert(const fltSemantics &toSemantics,
1802 roundingMode rounding_mode, bool *losesInfo)
1804 lostFraction lostFraction;
1805 unsigned int newPartCount, oldPartCount;
1808 assertArithmeticOK(*semantics);
1809 assertArithmeticOK(toSemantics);
1810 lostFraction = lfExactlyZero;
1811 newPartCount = partCountForBits(toSemantics.precision + 1);
1812 oldPartCount = partCount();
1814 /* Handle storage complications. If our new form is wider,
1815 re-allocate our bit pattern into wider storage. If it is
1816 narrower, we ignore the excess parts, but if narrowing to a
1817 single part we need to free the old storage.
1818 Be careful not to reference significandParts for zeroes
1819 and infinities, since it aborts. */
1820 if (newPartCount > oldPartCount) {
1821 integerPart *newParts;
1822 newParts = new integerPart[newPartCount];
1823 APInt::tcSet(newParts, 0, newPartCount);
1824 if (category==fcNormal || category==fcNaN)
1825 APInt::tcAssign(newParts, significandParts(), oldPartCount);
1827 significand.parts = newParts;
1828 } else if (newPartCount < oldPartCount) {
1829 /* Capture any lost fraction through truncation of parts so we get
1830 correct rounding whilst normalizing. */
1831 if (category==fcNormal)
1832 lostFraction = lostFractionThroughTruncation
1833 (significandParts(), oldPartCount, toSemantics.precision);
1834 if (newPartCount == 1) {
1835 integerPart newPart = 0;
1836 if (category==fcNormal || category==fcNaN)
1837 newPart = significandParts()[0];
1839 significand.part = newPart;
1843 if(category == fcNormal) {
1844 /* Re-interpret our bit-pattern. */
1845 exponent += toSemantics.precision - semantics->precision;
1846 semantics = &toSemantics;
1847 fs = normalize(rounding_mode, lostFraction);
1848 *losesInfo = (fs != opOK);
1849 } else if (category == fcNaN) {
1850 int shift = toSemantics.precision - semantics->precision;
1851 // Do this now so significandParts gets the right answer
1852 const fltSemantics *oldSemantics = semantics;
1853 semantics = &toSemantics;
1855 // No normalization here, just truncate
1857 APInt::tcShiftLeft(significandParts(), newPartCount, shift);
1858 else if (shift < 0) {
1859 unsigned ushift = -shift;
1860 // Figure out if we are losing information. This happens
1861 // if are shifting out something other than 0s, or if the x87 long
1862 // double input did not have its integer bit set (pseudo-NaN), or if the
1863 // x87 long double input did not have its QNan bit set (because the x87
1864 // hardware sets this bit when converting a lower-precision NaN to
1865 // x87 long double).
1866 if (APInt::tcLSB(significandParts(), newPartCount) < ushift)
1868 if (oldSemantics == &APFloat::x87DoubleExtended &&
1869 (!(*significandParts() & 0x8000000000000000ULL) ||
1870 !(*significandParts() & 0x4000000000000000ULL)))
1872 APInt::tcShiftRight(significandParts(), newPartCount, ushift);
1874 // gcc forces the Quiet bit on, which means (float)(double)(float_sNan)
1875 // does not give you back the same bits. This is dubious, and we
1876 // don't currently do it. You're really supposed to get
1877 // an invalid operation signal at runtime, but nobody does that.
1880 semantics = &toSemantics;
1888 /* Convert a floating point number to an integer according to the
1889 rounding mode. If the rounded integer value is out of range this
1890 returns an invalid operation exception and the contents of the
1891 destination parts are unspecified. If the rounded value is in
1892 range but the floating point number is not the exact integer, the C
1893 standard doesn't require an inexact exception to be raised. IEEE
1894 854 does require it so we do that.
1896 Note that for conversions to integer type the C standard requires
1897 round-to-zero to always be used. */
1899 APFloat::convertToSignExtendedInteger(integerPart *parts, unsigned int width,
1901 roundingMode rounding_mode,
1902 bool *isExact) const
1904 lostFraction lost_fraction;
1905 const integerPart *src;
1906 unsigned int dstPartsCount, truncatedBits;
1908 assertArithmeticOK(*semantics);
1912 /* Handle the three special cases first. */
1913 if(category == fcInfinity || category == fcNaN)
1916 dstPartsCount = partCountForBits(width);
1918 if(category == fcZero) {
1919 APInt::tcSet(parts, 0, dstPartsCount);
1920 // Negative zero can't be represented as an int.
1925 src = significandParts();
1927 /* Step 1: place our absolute value, with any fraction truncated, in
1930 /* Our absolute value is less than one; truncate everything. */
1931 APInt::tcSet(parts, 0, dstPartsCount);
1932 /* For exponent -1 the integer bit represents .5, look at that.
1933 For smaller exponents leftmost truncated bit is 0. */
1934 truncatedBits = semantics->precision -1U - exponent;
1936 /* We want the most significant (exponent + 1) bits; the rest are
1938 unsigned int bits = exponent + 1U;
1940 /* Hopelessly large in magnitude? */
1944 if (bits < semantics->precision) {
1945 /* We truncate (semantics->precision - bits) bits. */
1946 truncatedBits = semantics->precision - bits;
1947 APInt::tcExtract(parts, dstPartsCount, src, bits, truncatedBits);
1949 /* We want at least as many bits as are available. */
1950 APInt::tcExtract(parts, dstPartsCount, src, semantics->precision, 0);
1951 APInt::tcShiftLeft(parts, dstPartsCount, bits - semantics->precision);
1956 /* Step 2: work out any lost fraction, and increment the absolute
1957 value if we would round away from zero. */
1958 if (truncatedBits) {
1959 lost_fraction = lostFractionThroughTruncation(src, partCount(),
1961 if (lost_fraction != lfExactlyZero
1962 && roundAwayFromZero(rounding_mode, lost_fraction, truncatedBits)) {
1963 if (APInt::tcIncrement(parts, dstPartsCount))
1964 return opInvalidOp; /* Overflow. */
1967 lost_fraction = lfExactlyZero;
1970 /* Step 3: check if we fit in the destination. */
1971 unsigned int omsb = APInt::tcMSB(parts, dstPartsCount) + 1;
1975 /* Negative numbers cannot be represented as unsigned. */
1979 /* It takes omsb bits to represent the unsigned integer value.
1980 We lose a bit for the sign, but care is needed as the
1981 maximally negative integer is a special case. */
1982 if (omsb == width && APInt::tcLSB(parts, dstPartsCount) + 1 != omsb)
1985 /* This case can happen because of rounding. */
1990 APInt::tcNegate (parts, dstPartsCount);
1992 if (omsb >= width + !isSigned)
1996 if (lost_fraction == lfExactlyZero) {
2003 /* Same as convertToSignExtendedInteger, except we provide
2004 deterministic values in case of an invalid operation exception,
2005 namely zero for NaNs and the minimal or maximal value respectively
2006 for underflow or overflow.
2007 The *isExact output tells whether the result is exact, in the sense
2008 that converting it back to the original floating point type produces
2009 the original value. This is almost equivalent to result==opOK,
2010 except for negative zeroes.
2013 APFloat::convertToInteger(integerPart *parts, unsigned int width,
2015 roundingMode rounding_mode, bool *isExact) const
2019 fs = convertToSignExtendedInteger(parts, width, isSigned, rounding_mode,
2022 if (fs == opInvalidOp) {
2023 unsigned int bits, dstPartsCount;
2025 dstPartsCount = partCountForBits(width);
2027 if (category == fcNaN)
2032 bits = width - isSigned;
2034 APInt::tcSetLeastSignificantBits(parts, dstPartsCount, bits);
2035 if (sign && isSigned)
2036 APInt::tcShiftLeft(parts, dstPartsCount, width - 1);
2042 /* Convert an unsigned integer SRC to a floating point number,
2043 rounding according to ROUNDING_MODE. The sign of the floating
2044 point number is not modified. */
2046 APFloat::convertFromUnsignedParts(const integerPart *src,
2047 unsigned int srcCount,
2048 roundingMode rounding_mode)
2050 unsigned int omsb, precision, dstCount;
2052 lostFraction lost_fraction;
2054 assertArithmeticOK(*semantics);
2055 category = fcNormal;
2056 omsb = APInt::tcMSB(src, srcCount) + 1;
2057 dst = significandParts();
2058 dstCount = partCount();
2059 precision = semantics->precision;
2061 /* We want the most significant PRECISON bits of SRC. There may not
2062 be that many; extract what we can. */
2063 if (precision <= omsb) {
2064 exponent = omsb - 1;
2065 lost_fraction = lostFractionThroughTruncation(src, srcCount,
2067 APInt::tcExtract(dst, dstCount, src, precision, omsb - precision);
2069 exponent = precision - 1;
2070 lost_fraction = lfExactlyZero;
2071 APInt::tcExtract(dst, dstCount, src, omsb, 0);
2074 return normalize(rounding_mode, lost_fraction);
2078 APFloat::convertFromAPInt(const APInt &Val,
2080 roundingMode rounding_mode)
2082 unsigned int partCount = Val.getNumWords();
2086 if (isSigned && api.isNegative()) {
2091 return convertFromUnsignedParts(api.getRawData(), partCount, rounding_mode);
2094 /* Convert a two's complement integer SRC to a floating point number,
2095 rounding according to ROUNDING_MODE. ISSIGNED is true if the
2096 integer is signed, in which case it must be sign-extended. */
2098 APFloat::convertFromSignExtendedInteger(const integerPart *src,
2099 unsigned int srcCount,
2101 roundingMode rounding_mode)
2105 assertArithmeticOK(*semantics);
2107 && APInt::tcExtractBit(src, srcCount * integerPartWidth - 1)) {
2110 /* If we're signed and negative negate a copy. */
2112 copy = new integerPart[srcCount];
2113 APInt::tcAssign(copy, src, srcCount);
2114 APInt::tcNegate(copy, srcCount);
2115 status = convertFromUnsignedParts(copy, srcCount, rounding_mode);
2119 status = convertFromUnsignedParts(src, srcCount, rounding_mode);
2125 /* FIXME: should this just take a const APInt reference? */
2127 APFloat::convertFromZeroExtendedInteger(const integerPart *parts,
2128 unsigned int width, bool isSigned,
2129 roundingMode rounding_mode)
2131 unsigned int partCount = partCountForBits(width);
2132 APInt api = APInt(width, partCount, parts);
2135 if(isSigned && APInt::tcExtractBit(parts, width - 1)) {
2140 return convertFromUnsignedParts(api.getRawData(), partCount, rounding_mode);
2144 APFloat::convertFromHexadecimalString(const StringRef &s,
2145 roundingMode rounding_mode)
2147 lostFraction lost_fraction = lfExactlyZero;
2148 integerPart *significand;
2149 unsigned int bitPos, partsCount;
2150 StringRef::iterator dot, firstSignificantDigit;
2154 category = fcNormal;
2156 significand = significandParts();
2157 partsCount = partCount();
2158 bitPos = partsCount * integerPartWidth;
2160 /* Skip leading zeroes and any (hexa)decimal point. */
2161 StringRef::iterator begin = s.begin();
2162 StringRef::iterator end = s.end();
2163 StringRef::iterator p = skipLeadingZeroesAndAnyDot(begin, end, &dot);
2164 firstSignificantDigit = p;
2167 integerPart hex_value;
2170 assert(dot == end && "String contains multiple dots");
2177 hex_value = hexDigitValue(*p);
2178 if(hex_value == -1U) {
2187 /* Store the number whilst 4-bit nibbles remain. */
2190 hex_value <<= bitPos % integerPartWidth;
2191 significand[bitPos / integerPartWidth] |= hex_value;
2193 lost_fraction = trailingHexadecimalFraction(p, end, hex_value);
2194 while(p != end && hexDigitValue(*p) != -1U)
2201 /* Hex floats require an exponent but not a hexadecimal point. */
2202 assert(p != end && "Hex strings require an exponent");
2203 assert((*p == 'p' || *p == 'P') && "Invalid character in significand");
2204 assert(p != begin && "Significand has no digits");
2205 assert((dot == end || p - begin != 1) && "Significand has no digits");
2207 /* Ignore the exponent if we are zero. */
2208 if(p != firstSignificantDigit) {
2211 /* Implicit hexadecimal point? */
2215 /* Calculate the exponent adjustment implicit in the number of
2216 significant digits. */
2217 expAdjustment = static_cast<int>(dot - firstSignificantDigit);
2218 if(expAdjustment < 0)
2220 expAdjustment = expAdjustment * 4 - 1;
2222 /* Adjust for writing the significand starting at the most
2223 significant nibble. */
2224 expAdjustment += semantics->precision;
2225 expAdjustment -= partsCount * integerPartWidth;
2227 /* Adjust for the given exponent. */
2228 exponent = totalExponent(p + 1, end, expAdjustment);
2231 return normalize(rounding_mode, lost_fraction);
2235 APFloat::roundSignificandWithExponent(const integerPart *decSigParts,
2236 unsigned sigPartCount, int exp,
2237 roundingMode rounding_mode)
2239 unsigned int parts, pow5PartCount;
2240 fltSemantics calcSemantics = { 32767, -32767, 0, true };
2241 integerPart pow5Parts[maxPowerOfFiveParts];
2244 isNearest = (rounding_mode == rmNearestTiesToEven
2245 || rounding_mode == rmNearestTiesToAway);
2247 parts = partCountForBits(semantics->precision + 11);
2249 /* Calculate pow(5, abs(exp)). */
2250 pow5PartCount = powerOf5(pow5Parts, exp >= 0 ? exp: -exp);
2252 for (;; parts *= 2) {
2253 opStatus sigStatus, powStatus;
2254 unsigned int excessPrecision, truncatedBits;
2256 calcSemantics.precision = parts * integerPartWidth - 1;
2257 excessPrecision = calcSemantics.precision - semantics->precision;
2258 truncatedBits = excessPrecision;
2260 APFloat decSig(calcSemantics, fcZero, sign);
2261 APFloat pow5(calcSemantics, fcZero, false);
2263 sigStatus = decSig.convertFromUnsignedParts(decSigParts, sigPartCount,
2264 rmNearestTiesToEven);
2265 powStatus = pow5.convertFromUnsignedParts(pow5Parts, pow5PartCount,
2266 rmNearestTiesToEven);
2267 /* Add exp, as 10^n = 5^n * 2^n. */
2268 decSig.exponent += exp;
2270 lostFraction calcLostFraction;
2271 integerPart HUerr, HUdistance;
2272 unsigned int powHUerr;
2275 /* multiplySignificand leaves the precision-th bit set to 1. */
2276 calcLostFraction = decSig.multiplySignificand(pow5, NULL);
2277 powHUerr = powStatus != opOK;
2279 calcLostFraction = decSig.divideSignificand(pow5);
2280 /* Denormal numbers have less precision. */
2281 if (decSig.exponent < semantics->minExponent) {
2282 excessPrecision += (semantics->minExponent - decSig.exponent);
2283 truncatedBits = excessPrecision;
2284 if (excessPrecision > calcSemantics.precision)
2285 excessPrecision = calcSemantics.precision;
2287 /* Extra half-ulp lost in reciprocal of exponent. */
2288 powHUerr = (powStatus == opOK && calcLostFraction == lfExactlyZero) ? 0:2;
2291 /* Both multiplySignificand and divideSignificand return the
2292 result with the integer bit set. */
2293 assert(APInt::tcExtractBit
2294 (decSig.significandParts(), calcSemantics.precision - 1) == 1);
2296 HUerr = HUerrBound(calcLostFraction != lfExactlyZero, sigStatus != opOK,
2298 HUdistance = 2 * ulpsFromBoundary(decSig.significandParts(),
2299 excessPrecision, isNearest);
2301 /* Are we guaranteed to round correctly if we truncate? */
2302 if (HUdistance >= HUerr) {
2303 APInt::tcExtract(significandParts(), partCount(), decSig.significandParts(),
2304 calcSemantics.precision - excessPrecision,
2306 /* Take the exponent of decSig. If we tcExtract-ed less bits
2307 above we must adjust our exponent to compensate for the
2308 implicit right shift. */
2309 exponent = (decSig.exponent + semantics->precision
2310 - (calcSemantics.precision - excessPrecision));
2311 calcLostFraction = lostFractionThroughTruncation(decSig.significandParts(),
2314 return normalize(rounding_mode, calcLostFraction);
2320 APFloat::convertFromDecimalString(const StringRef &str, roundingMode rounding_mode)
2325 /* Scan the text. */
2326 StringRef::iterator p = str.begin();
2327 interpretDecimal(p, str.end(), &D);
2329 /* Handle the quick cases. First the case of no significant digits,
2330 i.e. zero, and then exponents that are obviously too large or too
2331 small. Writing L for log 10 / log 2, a number d.ddddd*10^exp
2332 definitely overflows if
2334 (exp - 1) * L >= maxExponent
2336 and definitely underflows to zero where
2338 (exp + 1) * L <= minExponent - precision
2340 With integer arithmetic the tightest bounds for L are
2342 93/28 < L < 196/59 [ numerator <= 256 ]
2343 42039/12655 < L < 28738/8651 [ numerator <= 65536 ]
2346 if (decDigitValue(*D.firstSigDigit) >= 10U) {
2350 /* Check whether the normalized exponent is high enough to overflow
2351 max during the log-rebasing in the max-exponent check below. */
2352 } else if (D.normalizedExponent - 1 > INT_MAX / 42039) {
2353 fs = handleOverflow(rounding_mode);
2355 /* If it wasn't, then it also wasn't high enough to overflow max
2356 during the log-rebasing in the min-exponent check. Check that it
2357 won't overflow min in either check, then perform the min-exponent
2359 } else if (D.normalizedExponent - 1 < INT_MIN / 42039 ||
2360 (D.normalizedExponent + 1) * 28738 <=
2361 8651 * (semantics->minExponent - (int) semantics->precision)) {
2362 /* Underflow to zero and round. */
2364 fs = normalize(rounding_mode, lfLessThanHalf);
2366 /* We can finally safely perform the max-exponent check. */
2367 } else if ((D.normalizedExponent - 1) * 42039
2368 >= 12655 * semantics->maxExponent) {
2369 /* Overflow and round. */
2370 fs = handleOverflow(rounding_mode);
2372 integerPart *decSignificand;
2373 unsigned int partCount;
2375 /* A tight upper bound on number of bits required to hold an
2376 N-digit decimal integer is N * 196 / 59. Allocate enough space
2377 to hold the full significand, and an extra part required by
2379 partCount = static_cast<unsigned int>(D.lastSigDigit - D.firstSigDigit) + 1;
2380 partCount = partCountForBits(1 + 196 * partCount / 59);
2381 decSignificand = new integerPart[partCount + 1];
2384 /* Convert to binary efficiently - we do almost all multiplication
2385 in an integerPart. When this would overflow do we do a single
2386 bignum multiplication, and then revert again to multiplication
2387 in an integerPart. */
2389 integerPart decValue, val, multiplier;
2397 if (p == str.end()) {
2401 decValue = decDigitValue(*p++);
2402 assert(decValue < 10U && "Invalid character in significand");
2404 val = val * 10 + decValue;
2405 /* The maximum number that can be multiplied by ten with any
2406 digit added without overflowing an integerPart. */
2407 } while (p <= D.lastSigDigit && multiplier <= (~ (integerPart) 0 - 9) / 10);
2409 /* Multiply out the current part. */
2410 APInt::tcMultiplyPart(decSignificand, decSignificand, multiplier, val,
2411 partCount, partCount + 1, false);
2413 /* If we used another part (likely but not guaranteed), increase
2415 if (decSignificand[partCount])
2417 } while (p <= D.lastSigDigit);
2419 category = fcNormal;
2420 fs = roundSignificandWithExponent(decSignificand, partCount,
2421 D.exponent, rounding_mode);
2423 delete [] decSignificand;
2430 APFloat::convertFromString(const StringRef &str, roundingMode rounding_mode)
2432 assertArithmeticOK(*semantics);
2433 assert(!str.empty() && "Invalid string length");
2435 /* Handle a leading minus sign. */
2436 StringRef::iterator p = str.begin();
2437 size_t slen = str.size();
2438 sign = *p == '-' ? 1 : 0;
2439 if(*p == '-' || *p == '+') {
2442 assert(slen && "String has no digits");
2445 if(slen >= 2 && p[0] == '0' && (p[1] == 'x' || p[1] == 'X')) {
2446 assert(slen - 2 && "Invalid string");
2447 return convertFromHexadecimalString(StringRef(p + 2, slen - 2),
2451 return convertFromDecimalString(StringRef(p, slen), rounding_mode);
2454 /* Write out a hexadecimal representation of the floating point value
2455 to DST, which must be of sufficient size, in the C99 form
2456 [-]0xh.hhhhp[+-]d. Return the number of characters written,
2457 excluding the terminating NUL.
2459 If UPPERCASE, the output is in upper case, otherwise in lower case.
2461 HEXDIGITS digits appear altogether, rounding the value if
2462 necessary. If HEXDIGITS is 0, the minimal precision to display the
2463 number precisely is used instead. If nothing would appear after
2464 the decimal point it is suppressed.
2466 The decimal exponent is always printed and has at least one digit.
2467 Zero values display an exponent of zero. Infinities and NaNs
2468 appear as "infinity" or "nan" respectively.
2470 The above rules are as specified by C99. There is ambiguity about
2471 what the leading hexadecimal digit should be. This implementation
2472 uses whatever is necessary so that the exponent is displayed as
2473 stored. This implies the exponent will fall within the IEEE format
2474 range, and the leading hexadecimal digit will be 0 (for denormals),
2475 1 (normal numbers) or 2 (normal numbers rounded-away-from-zero with
2476 any other digits zero).
2479 APFloat::convertToHexString(char *dst, unsigned int hexDigits,
2480 bool upperCase, roundingMode rounding_mode) const
2484 assertArithmeticOK(*semantics);
2492 memcpy (dst, upperCase ? infinityU: infinityL, sizeof infinityU - 1);
2493 dst += sizeof infinityL - 1;
2497 memcpy (dst, upperCase ? NaNU: NaNL, sizeof NaNU - 1);
2498 dst += sizeof NaNU - 1;
2503 *dst++ = upperCase ? 'X': 'x';
2505 if (hexDigits > 1) {
2507 memset (dst, '0', hexDigits - 1);
2508 dst += hexDigits - 1;
2510 *dst++ = upperCase ? 'P': 'p';
2515 dst = convertNormalToHexString (dst, hexDigits, upperCase, rounding_mode);
2521 return static_cast<unsigned int>(dst - p);
2524 /* Does the hard work of outputting the correctly rounded hexadecimal
2525 form of a normal floating point number with the specified number of
2526 hexadecimal digits. If HEXDIGITS is zero the minimum number of
2527 digits necessary to print the value precisely is output. */
2529 APFloat::convertNormalToHexString(char *dst, unsigned int hexDigits,
2531 roundingMode rounding_mode) const
2533 unsigned int count, valueBits, shift, partsCount, outputDigits;
2534 const char *hexDigitChars;
2535 const integerPart *significand;
2540 *dst++ = upperCase ? 'X': 'x';
2543 hexDigitChars = upperCase ? hexDigitsUpper: hexDigitsLower;
2545 significand = significandParts();
2546 partsCount = partCount();
2548 /* +3 because the first digit only uses the single integer bit, so
2549 we have 3 virtual zero most-significant-bits. */
2550 valueBits = semantics->precision + 3;
2551 shift = integerPartWidth - valueBits % integerPartWidth;
2553 /* The natural number of digits required ignoring trailing
2554 insignificant zeroes. */
2555 outputDigits = (valueBits - significandLSB () + 3) / 4;
2557 /* hexDigits of zero means use the required number for the
2558 precision. Otherwise, see if we are truncating. If we are,
2559 find out if we need to round away from zero. */
2561 if (hexDigits < outputDigits) {
2562 /* We are dropping non-zero bits, so need to check how to round.
2563 "bits" is the number of dropped bits. */
2565 lostFraction fraction;
2567 bits = valueBits - hexDigits * 4;
2568 fraction = lostFractionThroughTruncation (significand, partsCount, bits);
2569 roundUp = roundAwayFromZero(rounding_mode, fraction, bits);
2571 outputDigits = hexDigits;
2574 /* Write the digits consecutively, and start writing in the location
2575 of the hexadecimal point. We move the most significant digit
2576 left and add the hexadecimal point later. */
2579 count = (valueBits + integerPartWidth - 1) / integerPartWidth;
2581 while (outputDigits && count) {
2584 /* Put the most significant integerPartWidth bits in "part". */
2585 if (--count == partsCount)
2586 part = 0; /* An imaginary higher zero part. */
2588 part = significand[count] << shift;
2591 part |= significand[count - 1] >> (integerPartWidth - shift);
2593 /* Convert as much of "part" to hexdigits as we can. */
2594 unsigned int curDigits = integerPartWidth / 4;
2596 if (curDigits > outputDigits)
2597 curDigits = outputDigits;
2598 dst += partAsHex (dst, part, curDigits, hexDigitChars);
2599 outputDigits -= curDigits;
2605 /* Note that hexDigitChars has a trailing '0'. */
2608 *q = hexDigitChars[hexDigitValue (*q) + 1];
2609 } while (*q == '0');
2612 /* Add trailing zeroes. */
2613 memset (dst, '0', outputDigits);
2614 dst += outputDigits;
2617 /* Move the most significant digit to before the point, and if there
2618 is something after the decimal point add it. This must come
2619 after rounding above. */
2626 /* Finally output the exponent. */
2627 *dst++ = upperCase ? 'P': 'p';
2629 return writeSignedDecimal (dst, exponent);
2632 // For good performance it is desirable for different APFloats
2633 // to produce different integers.
2635 APFloat::getHashValue() const
2637 if (category==fcZero) return sign<<8 | semantics->precision ;
2638 else if (category==fcInfinity) return sign<<9 | semantics->precision;
2639 else if (category==fcNaN) return 1<<10 | semantics->precision;
2641 uint32_t hash = sign<<11 | semantics->precision | exponent<<12;
2642 const integerPart* p = significandParts();
2643 for (int i=partCount(); i>0; i--, p++)
2644 hash ^= ((uint32_t)*p) ^ (uint32_t)((*p)>>32);
2649 // Conversion from APFloat to/from host float/double. It may eventually be
2650 // possible to eliminate these and have everybody deal with APFloats, but that
2651 // will take a while. This approach will not easily extend to long double.
2652 // Current implementation requires integerPartWidth==64, which is correct at
2653 // the moment but could be made more general.
2655 // Denormals have exponent minExponent in APFloat, but minExponent-1 in
2656 // the actual IEEE respresentations. We compensate for that here.
2659 APFloat::convertF80LongDoubleAPFloatToAPInt() const
2661 assert(semantics == (const llvm::fltSemantics*)&x87DoubleExtended);
2662 assert(partCount()==2);
2664 uint64_t myexponent, mysignificand;
2666 if (category==fcNormal) {
2667 myexponent = exponent+16383; //bias
2668 mysignificand = significandParts()[0];
2669 if (myexponent==1 && !(mysignificand & 0x8000000000000000ULL))
2670 myexponent = 0; // denormal
2671 } else if (category==fcZero) {
2674 } else if (category==fcInfinity) {
2675 myexponent = 0x7fff;
2676 mysignificand = 0x8000000000000000ULL;
2678 assert(category == fcNaN && "Unknown category");
2679 myexponent = 0x7fff;
2680 mysignificand = significandParts()[0];
2684 words[0] = mysignificand;
2685 words[1] = ((uint64_t)(sign & 1) << 15) |
2686 (myexponent & 0x7fffLL);
2687 return APInt(80, 2, words);
2691 APFloat::convertPPCDoubleDoubleAPFloatToAPInt() const
2693 assert(semantics == (const llvm::fltSemantics*)&PPCDoubleDouble);
2694 assert(partCount()==2);
2696 uint64_t myexponent, mysignificand, myexponent2, mysignificand2;
2698 if (category==fcNormal) {
2699 myexponent = exponent + 1023; //bias
2700 myexponent2 = exponent2 + 1023;
2701 mysignificand = significandParts()[0];
2702 mysignificand2 = significandParts()[1];
2703 if (myexponent==1 && !(mysignificand & 0x10000000000000LL))
2704 myexponent = 0; // denormal
2705 if (myexponent2==1 && !(mysignificand2 & 0x10000000000000LL))
2706 myexponent2 = 0; // denormal
2707 } else if (category==fcZero) {
2712 } else if (category==fcInfinity) {
2718 assert(category == fcNaN && "Unknown category");
2720 mysignificand = significandParts()[0];
2721 myexponent2 = exponent2;
2722 mysignificand2 = significandParts()[1];
2726 words[0] = ((uint64_t)(sign & 1) << 63) |
2727 ((myexponent & 0x7ff) << 52) |
2728 (mysignificand & 0xfffffffffffffLL);
2729 words[1] = ((uint64_t)(sign2 & 1) << 63) |
2730 ((myexponent2 & 0x7ff) << 52) |
2731 (mysignificand2 & 0xfffffffffffffLL);
2732 return APInt(128, 2, words);
2736 APFloat::convertQuadrupleAPFloatToAPInt() const
2738 assert(semantics == (const llvm::fltSemantics*)&IEEEquad);
2739 assert(partCount()==2);
2741 uint64_t myexponent, mysignificand, mysignificand2;
2743 if (category==fcNormal) {
2744 myexponent = exponent+16383; //bias
2745 mysignificand = significandParts()[0];
2746 mysignificand2 = significandParts()[1];
2747 if (myexponent==1 && !(mysignificand2 & 0x1000000000000LL))
2748 myexponent = 0; // denormal
2749 } else if (category==fcZero) {
2751 mysignificand = mysignificand2 = 0;
2752 } else if (category==fcInfinity) {
2753 myexponent = 0x7fff;
2754 mysignificand = mysignificand2 = 0;
2756 assert(category == fcNaN && "Unknown category!");
2757 myexponent = 0x7fff;
2758 mysignificand = significandParts()[0];
2759 mysignificand2 = significandParts()[1];
2763 words[0] = mysignificand;
2764 words[1] = ((uint64_t)(sign & 1) << 63) |
2765 ((myexponent & 0x7fff) << 48) |
2766 (mysignificand2 & 0xffffffffffffLL);
2768 return APInt(128, 2, words);
2772 APFloat::convertDoubleAPFloatToAPInt() const
2774 assert(semantics == (const llvm::fltSemantics*)&IEEEdouble);
2775 assert(partCount()==1);
2777 uint64_t myexponent, mysignificand;
2779 if (category==fcNormal) {
2780 myexponent = exponent+1023; //bias
2781 mysignificand = *significandParts();
2782 if (myexponent==1 && !(mysignificand & 0x10000000000000LL))
2783 myexponent = 0; // denormal
2784 } else if (category==fcZero) {
2787 } else if (category==fcInfinity) {
2791 assert(category == fcNaN && "Unknown category!");
2793 mysignificand = *significandParts();
2796 return APInt(64, ((((uint64_t)(sign & 1) << 63) |
2797 ((myexponent & 0x7ff) << 52) |
2798 (mysignificand & 0xfffffffffffffLL))));
2802 APFloat::convertFloatAPFloatToAPInt() const
2804 assert(semantics == (const llvm::fltSemantics*)&IEEEsingle);
2805 assert(partCount()==1);
2807 uint32_t myexponent, mysignificand;
2809 if (category==fcNormal) {
2810 myexponent = exponent+127; //bias
2811 mysignificand = (uint32_t)*significandParts();
2812 if (myexponent == 1 && !(mysignificand & 0x800000))
2813 myexponent = 0; // denormal
2814 } else if (category==fcZero) {
2817 } else if (category==fcInfinity) {
2821 assert(category == fcNaN && "Unknown category!");
2823 mysignificand = (uint32_t)*significandParts();
2826 return APInt(32, (((sign&1) << 31) | ((myexponent&0xff) << 23) |
2827 (mysignificand & 0x7fffff)));
2831 APFloat::convertHalfAPFloatToAPInt() const
2833 assert(semantics == (const llvm::fltSemantics*)&IEEEhalf);
2834 assert(partCount()==1);
2836 uint32_t myexponent, mysignificand;
2838 if (category==fcNormal) {
2839 myexponent = exponent+15; //bias
2840 mysignificand = (uint32_t)*significandParts();
2841 if (myexponent == 1 && !(mysignificand & 0x400))
2842 myexponent = 0; // denormal
2843 } else if (category==fcZero) {
2846 } else if (category==fcInfinity) {
2850 assert(category == fcNaN && "Unknown category!");
2852 mysignificand = (uint32_t)*significandParts();
2855 return APInt(16, (((sign&1) << 15) | ((myexponent&0x1f) << 10) |
2856 (mysignificand & 0x3ff)));
2859 // This function creates an APInt that is just a bit map of the floating
2860 // point constant as it would appear in memory. It is not a conversion,
2861 // and treating the result as a normal integer is unlikely to be useful.
2864 APFloat::bitcastToAPInt() const
2866 if (semantics == (const llvm::fltSemantics*)&IEEEhalf)
2867 return convertHalfAPFloatToAPInt();
2869 if (semantics == (const llvm::fltSemantics*)&IEEEsingle)
2870 return convertFloatAPFloatToAPInt();
2872 if (semantics == (const llvm::fltSemantics*)&IEEEdouble)
2873 return convertDoubleAPFloatToAPInt();
2875 if (semantics == (const llvm::fltSemantics*)&IEEEquad)
2876 return convertQuadrupleAPFloatToAPInt();
2878 if (semantics == (const llvm::fltSemantics*)&PPCDoubleDouble)
2879 return convertPPCDoubleDoubleAPFloatToAPInt();
2881 assert(semantics == (const llvm::fltSemantics*)&x87DoubleExtended &&
2883 return convertF80LongDoubleAPFloatToAPInt();
2887 APFloat::convertToFloat() const
2889 assert(semantics == (const llvm::fltSemantics*)&IEEEsingle &&
2890 "Float semantics are not IEEEsingle");
2891 APInt api = bitcastToAPInt();
2892 return api.bitsToFloat();
2896 APFloat::convertToDouble() const
2898 assert(semantics == (const llvm::fltSemantics*)&IEEEdouble &&
2899 "Float semantics are not IEEEdouble");
2900 APInt api = bitcastToAPInt();
2901 return api.bitsToDouble();
2904 /// Integer bit is explicit in this format. Intel hardware (387 and later)
2905 /// does not support these bit patterns:
2906 /// exponent = all 1's, integer bit 0, significand 0 ("pseudoinfinity")
2907 /// exponent = all 1's, integer bit 0, significand nonzero ("pseudoNaN")
2908 /// exponent = 0, integer bit 1 ("pseudodenormal")
2909 /// exponent!=0 nor all 1's, integer bit 0 ("unnormal")
2910 /// At the moment, the first two are treated as NaNs, the second two as Normal.
2912 APFloat::initFromF80LongDoubleAPInt(const APInt &api)
2914 assert(api.getBitWidth()==80);
2915 uint64_t i1 = api.getRawData()[0];
2916 uint64_t i2 = api.getRawData()[1];
2917 uint64_t myexponent = (i2 & 0x7fff);
2918 uint64_t mysignificand = i1;
2920 initialize(&APFloat::x87DoubleExtended);
2921 assert(partCount()==2);
2923 sign = static_cast<unsigned int>(i2>>15);
2924 if (myexponent==0 && mysignificand==0) {
2925 // exponent, significand meaningless
2927 } else if (myexponent==0x7fff && mysignificand==0x8000000000000000ULL) {
2928 // exponent, significand meaningless
2929 category = fcInfinity;
2930 } else if (myexponent==0x7fff && mysignificand!=0x8000000000000000ULL) {
2931 // exponent meaningless
2933 significandParts()[0] = mysignificand;
2934 significandParts()[1] = 0;
2936 category = fcNormal;
2937 exponent = myexponent - 16383;
2938 significandParts()[0] = mysignificand;
2939 significandParts()[1] = 0;
2940 if (myexponent==0) // denormal
2946 APFloat::initFromPPCDoubleDoubleAPInt(const APInt &api)
2948 assert(api.getBitWidth()==128);
2949 uint64_t i1 = api.getRawData()[0];
2950 uint64_t i2 = api.getRawData()[1];
2951 uint64_t myexponent = (i1 >> 52) & 0x7ff;
2952 uint64_t mysignificand = i1 & 0xfffffffffffffLL;
2953 uint64_t myexponent2 = (i2 >> 52) & 0x7ff;
2954 uint64_t mysignificand2 = i2 & 0xfffffffffffffLL;
2956 initialize(&APFloat::PPCDoubleDouble);
2957 assert(partCount()==2);
2959 sign = static_cast<unsigned int>(i1>>63);
2960 sign2 = static_cast<unsigned int>(i2>>63);
2961 if (myexponent==0 && mysignificand==0) {
2962 // exponent, significand meaningless
2963 // exponent2 and significand2 are required to be 0; we don't check
2965 } else if (myexponent==0x7ff && mysignificand==0) {
2966 // exponent, significand meaningless
2967 // exponent2 and significand2 are required to be 0; we don't check
2968 category = fcInfinity;
2969 } else if (myexponent==0x7ff && mysignificand!=0) {
2970 // exponent meaningless. So is the whole second word, but keep it
2973 exponent2 = myexponent2;
2974 significandParts()[0] = mysignificand;
2975 significandParts()[1] = mysignificand2;
2977 category = fcNormal;
2978 // Note there is no category2; the second word is treated as if it is
2979 // fcNormal, although it might be something else considered by itself.
2980 exponent = myexponent - 1023;
2981 exponent2 = myexponent2 - 1023;
2982 significandParts()[0] = mysignificand;
2983 significandParts()[1] = mysignificand2;
2984 if (myexponent==0) // denormal
2987 significandParts()[0] |= 0x10000000000000LL; // integer bit
2991 significandParts()[1] |= 0x10000000000000LL; // integer bit
2996 APFloat::initFromQuadrupleAPInt(const APInt &api)
2998 assert(api.getBitWidth()==128);
2999 uint64_t i1 = api.getRawData()[0];
3000 uint64_t i2 = api.getRawData()[1];
3001 uint64_t myexponent = (i2 >> 48) & 0x7fff;
3002 uint64_t mysignificand = i1;
3003 uint64_t mysignificand2 = i2 & 0xffffffffffffLL;
3005 initialize(&APFloat::IEEEquad);
3006 assert(partCount()==2);
3008 sign = static_cast<unsigned int>(i2>>63);
3009 if (myexponent==0 &&
3010 (mysignificand==0 && mysignificand2==0)) {
3011 // exponent, significand meaningless
3013 } else if (myexponent==0x7fff &&
3014 (mysignificand==0 && mysignificand2==0)) {
3015 // exponent, significand meaningless
3016 category = fcInfinity;
3017 } else if (myexponent==0x7fff &&
3018 (mysignificand!=0 || mysignificand2 !=0)) {
3019 // exponent meaningless
3021 significandParts()[0] = mysignificand;
3022 significandParts()[1] = mysignificand2;
3024 category = fcNormal;
3025 exponent = myexponent - 16383;
3026 significandParts()[0] = mysignificand;
3027 significandParts()[1] = mysignificand2;
3028 if (myexponent==0) // denormal
3031 significandParts()[1] |= 0x1000000000000LL; // integer bit
3036 APFloat::initFromDoubleAPInt(const APInt &api)
3038 assert(api.getBitWidth()==64);
3039 uint64_t i = *api.getRawData();
3040 uint64_t myexponent = (i >> 52) & 0x7ff;
3041 uint64_t mysignificand = i & 0xfffffffffffffLL;
3043 initialize(&APFloat::IEEEdouble);
3044 assert(partCount()==1);
3046 sign = static_cast<unsigned int>(i>>63);
3047 if (myexponent==0 && mysignificand==0) {
3048 // exponent, significand meaningless
3050 } else if (myexponent==0x7ff && mysignificand==0) {
3051 // exponent, significand meaningless
3052 category = fcInfinity;
3053 } else if (myexponent==0x7ff && mysignificand!=0) {
3054 // exponent meaningless
3056 *significandParts() = mysignificand;
3058 category = fcNormal;
3059 exponent = myexponent - 1023;
3060 *significandParts() = mysignificand;
3061 if (myexponent==0) // denormal
3064 *significandParts() |= 0x10000000000000LL; // integer bit
3069 APFloat::initFromFloatAPInt(const APInt & api)
3071 assert(api.getBitWidth()==32);
3072 uint32_t i = (uint32_t)*api.getRawData();
3073 uint32_t myexponent = (i >> 23) & 0xff;
3074 uint32_t mysignificand = i & 0x7fffff;
3076 initialize(&APFloat::IEEEsingle);
3077 assert(partCount()==1);
3080 if (myexponent==0 && mysignificand==0) {
3081 // exponent, significand meaningless
3083 } else if (myexponent==0xff && mysignificand==0) {
3084 // exponent, significand meaningless
3085 category = fcInfinity;
3086 } else if (myexponent==0xff && mysignificand!=0) {
3087 // sign, exponent, significand meaningless
3089 *significandParts() = mysignificand;
3091 category = fcNormal;
3092 exponent = myexponent - 127; //bias
3093 *significandParts() = mysignificand;
3094 if (myexponent==0) // denormal
3097 *significandParts() |= 0x800000; // integer bit
3102 APFloat::initFromHalfAPInt(const APInt & api)
3104 assert(api.getBitWidth()==16);
3105 uint32_t i = (uint32_t)*api.getRawData();
3106 uint32_t myexponent = (i >> 10) & 0x1f;
3107 uint32_t mysignificand = i & 0x3ff;
3109 initialize(&APFloat::IEEEhalf);
3110 assert(partCount()==1);
3113 if (myexponent==0 && mysignificand==0) {
3114 // exponent, significand meaningless
3116 } else if (myexponent==0x1f && mysignificand==0) {
3117 // exponent, significand meaningless
3118 category = fcInfinity;
3119 } else if (myexponent==0x1f && mysignificand!=0) {
3120 // sign, exponent, significand meaningless
3122 *significandParts() = mysignificand;
3124 category = fcNormal;
3125 exponent = myexponent - 15; //bias
3126 *significandParts() = mysignificand;
3127 if (myexponent==0) // denormal
3130 *significandParts() |= 0x400; // integer bit
3134 /// Treat api as containing the bits of a floating point number. Currently
3135 /// we infer the floating point type from the size of the APInt. The
3136 /// isIEEE argument distinguishes between PPC128 and IEEE128 (not meaningful
3137 /// when the size is anything else).
3139 APFloat::initFromAPInt(const APInt& api, bool isIEEE)
3141 if (api.getBitWidth() == 16)
3142 return initFromHalfAPInt(api);
3143 else if (api.getBitWidth() == 32)
3144 return initFromFloatAPInt(api);
3145 else if (api.getBitWidth()==64)
3146 return initFromDoubleAPInt(api);
3147 else if (api.getBitWidth()==80)
3148 return initFromF80LongDoubleAPInt(api);
3149 else if (api.getBitWidth()==128)
3151 initFromQuadrupleAPInt(api) : initFromPPCDoubleDoubleAPInt(api));
3153 llvm_unreachable(0);
3156 APFloat APFloat::getLargest(const fltSemantics &Sem, bool Negative) {
3157 APFloat Val(Sem, fcNormal, Negative);
3159 // We want (in interchange format):
3160 // sign = {Negative}
3162 // significand = 1..1
3164 Val.exponent = Sem.maxExponent; // unbiased
3166 // 1-initialize all bits....
3167 Val.zeroSignificand();
3168 integerPart *significand = Val.significandParts();
3169 unsigned N = partCountForBits(Sem.precision);
3170 for (unsigned i = 0; i != N; ++i)
3171 significand[i] = ~((integerPart) 0);
3173 // ...and then clear the top bits for internal consistency.
3175 &= (((integerPart) 1) << ((Sem.precision % integerPartWidth) - 1)) - 1;
3180 APFloat APFloat::getSmallest(const fltSemantics &Sem, bool Negative) {
3181 APFloat Val(Sem, fcNormal, Negative);
3183 // We want (in interchange format):
3184 // sign = {Negative}
3186 // significand = 0..01
3188 Val.exponent = Sem.minExponent; // unbiased
3189 Val.zeroSignificand();
3190 Val.significandParts()[0] = 1;
3194 APFloat APFloat::getSmallestNormalized(const fltSemantics &Sem, bool Negative) {
3195 APFloat Val(Sem, fcNormal, Negative);
3197 // We want (in interchange format):
3198 // sign = {Negative}
3200 // significand = 10..0
3202 Val.exponent = Sem.minExponent;
3203 Val.zeroSignificand();
3204 Val.significandParts()[partCountForBits(Sem.precision)-1]
3205 |= (((integerPart) 1) << ((Sem.precision % integerPartWidth) - 1));
3210 APFloat::APFloat(const APInt& api, bool isIEEE)
3212 initFromAPInt(api, isIEEE);
3215 APFloat::APFloat(float f)
3217 APInt api = APInt(32, 0);
3218 initFromAPInt(api.floatToBits(f));
3221 APFloat::APFloat(double d)
3223 APInt api = APInt(64, 0);
3224 initFromAPInt(api.doubleToBits(d));
3228 static void append(SmallVectorImpl<char> &Buffer,
3229 unsigned N, const char *Str) {
3230 unsigned Start = Buffer.size();
3231 Buffer.set_size(Start + N);
3232 memcpy(&Buffer[Start], Str, N);
3235 template <unsigned N>
3236 void append(SmallVectorImpl<char> &Buffer, const char (&Str)[N]) {
3237 append(Buffer, N, Str);
3240 /// Removes data from the given significand until it is no more
3241 /// precise than is required for the desired precision.
3242 void AdjustToPrecision(APInt &significand,
3243 int &exp, unsigned FormatPrecision) {
3244 unsigned bits = significand.getActiveBits();
3246 // 196/59 is a very slight overestimate of lg_2(10).
3247 unsigned bitsRequired = (FormatPrecision * 196 + 58) / 59;
3249 if (bits <= bitsRequired) return;
3251 unsigned tensRemovable = (bits - bitsRequired) * 59 / 196;
3252 if (!tensRemovable) return;
3254 exp += tensRemovable;
3256 APInt divisor(significand.getBitWidth(), 1);
3257 APInt powten(significand.getBitWidth(), 10);
3259 if (tensRemovable & 1)
3261 tensRemovable >>= 1;
3262 if (!tensRemovable) break;
3266 significand = significand.udiv(divisor);
3268 // Truncate the significand down to its active bit count, but
3269 // don't try to drop below 32.
3270 unsigned newPrecision = std::max(32U, significand.getActiveBits());
3271 significand.trunc(newPrecision);
3275 void AdjustToPrecision(SmallVectorImpl<char> &buffer,
3276 int &exp, unsigned FormatPrecision) {
3277 unsigned N = buffer.size();
3278 if (N <= FormatPrecision) return;
3280 // The most significant figures are the last ones in the buffer.
3281 unsigned FirstSignificant = N - FormatPrecision;
3284 // FIXME: this probably shouldn't use 'round half up'.
3286 // Rounding down is just a truncation, except we also want to drop
3287 // trailing zeros from the new result.
3288 if (buffer[FirstSignificant - 1] < '5') {
3289 while (buffer[FirstSignificant] == '0')
3292 exp += FirstSignificant;
3293 buffer.erase(&buffer[0], &buffer[FirstSignificant]);
3297 // Rounding up requires a decimal add-with-carry. If we continue
3298 // the carry, the newly-introduced zeros will just be truncated.
3299 for (unsigned I = FirstSignificant; I != N; ++I) {
3300 if (buffer[I] == '9') {
3308 // If we carried through, we have exactly one digit of precision.
3309 if (FirstSignificant == N) {
3310 exp += FirstSignificant;
3312 buffer.push_back('1');
3316 exp += FirstSignificant;
3317 buffer.erase(&buffer[0], &buffer[FirstSignificant]);
3321 void APFloat::toString(SmallVectorImpl<char> &Str,
3322 unsigned FormatPrecision,
3323 unsigned FormatMaxPadding) {
3327 return append(Str, "-Inf");
3329 return append(Str, "+Inf");
3331 case fcNaN: return append(Str, "NaN");
3337 if (!FormatMaxPadding)
3338 append(Str, "0.0E+0");
3350 // Decompose the number into an APInt and an exponent.
3351 int exp = exponent - ((int) semantics->precision - 1);
3352 APInt significand(semantics->precision,
3353 partCountForBits(semantics->precision),
3354 significandParts());
3356 // Set FormatPrecision if zero. We want to do this before we
3357 // truncate trailing zeros, as those are part of the precision.
3358 if (!FormatPrecision) {
3359 // It's an interesting question whether to use the nominal
3360 // precision or the active precision here for denormals.
3362 // FormatPrecision = ceil(significandBits / lg_2(10))
3363 FormatPrecision = (semantics->precision * 59 + 195) / 196;
3366 // Ignore trailing binary zeros.
3367 int trailingZeros = significand.countTrailingZeros();
3368 exp += trailingZeros;
3369 significand = significand.lshr(trailingZeros);
3371 // Change the exponent from 2^e to 10^e.
3374 } else if (exp > 0) {
3376 significand.zext(semantics->precision + exp);
3377 significand <<= exp;
3379 } else { /* exp < 0 */
3382 // We transform this using the identity:
3383 // (N)(2^-e) == (N)(5^e)(10^-e)
3384 // This means we have to multiply N (the significand) by 5^e.
3385 // To avoid overflow, we have to operate on numbers large
3386 // enough to store N * 5^e:
3387 // log2(N * 5^e) == log2(N) + e * log2(5)
3388 // <= semantics->precision + e * 137 / 59
3389 // (log_2(5) ~ 2.321928 < 2.322034 ~ 137/59)
3391 unsigned precision = semantics->precision + 137 * texp / 59;
3393 // Multiply significand by 5^e.
3394 // N * 5^0101 == N * 5^(1*1) * 5^(0*2) * 5^(1*4) * 5^(0*8)
3395 significand.zext(precision);
3396 APInt five_to_the_i(precision, 5);
3398 if (texp & 1) significand *= five_to_the_i;
3402 five_to_the_i *= five_to_the_i;
3406 AdjustToPrecision(significand, exp, FormatPrecision);
3408 llvm::SmallVector<char, 256> buffer;
3411 unsigned precision = significand.getBitWidth();
3412 APInt ten(precision, 10);
3413 APInt digit(precision, 0);
3415 bool inTrail = true;
3416 while (significand != 0) {
3417 // digit <- significand % 10
3418 // significand <- significand / 10
3419 APInt::udivrem(significand, ten, significand, digit);
3421 unsigned d = digit.getZExtValue();
3423 // Drop trailing zeros.
3424 if (inTrail && !d) exp++;
3426 buffer.push_back((char) ('0' + d));
3431 assert(!buffer.empty() && "no characters in buffer!");
3433 // Drop down to FormatPrecision.
3434 // TODO: don't do more precise calculations above than are required.
3435 AdjustToPrecision(buffer, exp, FormatPrecision);
3437 unsigned NDigits = buffer.size();
3439 // Check whether we should use scientific notation.
3440 bool FormatScientific;
3441 if (!FormatMaxPadding)
3442 FormatScientific = true;
3447 // But we shouldn't make the number look more precise than it is.
3448 FormatScientific = ((unsigned) exp > FormatMaxPadding ||
3449 NDigits + (unsigned) exp > FormatPrecision);
3451 // Power of the most significant digit.
3452 int MSD = exp + (int) (NDigits - 1);
3455 FormatScientific = false;
3457 // 765e-5 == 0.00765
3459 FormatScientific = ((unsigned) -MSD) > FormatMaxPadding;
3464 // Scientific formatting is pretty straightforward.
3465 if (FormatScientific) {
3466 exp += (NDigits - 1);
3468 Str.push_back(buffer[NDigits-1]);
3473 for (unsigned I = 1; I != NDigits; ++I)
3474 Str.push_back(buffer[NDigits-1-I]);
3477 Str.push_back(exp >= 0 ? '+' : '-');
3478 if (exp < 0) exp = -exp;
3479 SmallVector<char, 6> expbuf;
3481 expbuf.push_back((char) ('0' + (exp % 10)));
3484 for (unsigned I = 0, E = expbuf.size(); I != E; ++I)
3485 Str.push_back(expbuf[E-1-I]);
3489 // Non-scientific, positive exponents.
3491 for (unsigned I = 0; I != NDigits; ++I)
3492 Str.push_back(buffer[NDigits-1-I]);
3493 for (unsigned I = 0; I != (unsigned) exp; ++I)
3498 // Non-scientific, negative exponents.
3500 // The number of digits to the left of the decimal point.
3501 int NWholeDigits = exp + (int) NDigits;
3504 if (NWholeDigits > 0) {
3505 for (; I != (unsigned) NWholeDigits; ++I)
3506 Str.push_back(buffer[NDigits-I-1]);
3509 unsigned NZeros = 1 + (unsigned) -NWholeDigits;
3513 for (unsigned Z = 1; Z != NZeros; ++Z)
3517 for (; I != NDigits; ++I)
3518 Str.push_back(buffer[NDigits-I-1]);