1 //===-- APFloat.cpp - Implement APFloat class -----------------------------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file implements a class to represent arbitrary precision floating
11 // point values and provide a variety of arithmetic operations on them.
13 //===----------------------------------------------------------------------===//
15 #include "llvm/ADT/APFloat.h"
16 #include "llvm/ADT/StringRef.h"
17 #include "llvm/ADT/FoldingSet.h"
18 #include "llvm/Support/ErrorHandling.h"
19 #include "llvm/Support/MathExtras.h"
25 #define convolve(lhs, rhs) ((lhs) * 4 + (rhs))
27 /* Assumed in hexadecimal significand parsing, and conversion to
28 hexadecimal strings. */
29 #define COMPILE_TIME_ASSERT(cond) extern int CTAssert[(cond) ? 1 : -1]
30 COMPILE_TIME_ASSERT(integerPartWidth % 4 == 0);
34 /* Represents floating point arithmetic semantics. */
36 /* The largest E such that 2^E is representable; this matches the
37 definition of IEEE 754. */
38 exponent_t maxExponent;
40 /* The smallest E such that 2^E is a normalized number; this
41 matches the definition of IEEE 754. */
42 exponent_t minExponent;
44 /* Number of bits in the significand. This includes the integer
46 unsigned int precision;
48 /* True if arithmetic is supported. */
49 unsigned int arithmeticOK;
52 const fltSemantics APFloat::IEEEhalf = { 15, -14, 11, true };
53 const fltSemantics APFloat::IEEEsingle = { 127, -126, 24, true };
54 const fltSemantics APFloat::IEEEdouble = { 1023, -1022, 53, true };
55 const fltSemantics APFloat::IEEEquad = { 16383, -16382, 113, true };
56 const fltSemantics APFloat::x87DoubleExtended = { 16383, -16382, 64, true };
57 const fltSemantics APFloat::Bogus = { 0, 0, 0, true };
59 // The PowerPC format consists of two doubles. It does not map cleanly
60 // onto the usual format above. For now only storage of constants of
61 // this type is supported, no arithmetic.
62 const fltSemantics APFloat::PPCDoubleDouble = { 1023, -1022, 106, false };
64 /* A tight upper bound on number of parts required to hold the value
67 power * 815 / (351 * integerPartWidth) + 1
69 However, whilst the result may require only this many parts,
70 because we are multiplying two values to get it, the
71 multiplication may require an extra part with the excess part
72 being zero (consider the trivial case of 1 * 1, tcFullMultiply
73 requires two parts to hold the single-part result). So we add an
74 extra one to guarantee enough space whilst multiplying. */
75 const unsigned int maxExponent = 16383;
76 const unsigned int maxPrecision = 113;
77 const unsigned int maxPowerOfFiveExponent = maxExponent + maxPrecision - 1;
78 const unsigned int maxPowerOfFiveParts = 2 + ((maxPowerOfFiveExponent * 815)
79 / (351 * integerPartWidth));
82 /* A bunch of private, handy routines. */
84 static inline unsigned int
85 partCountForBits(unsigned int bits)
87 return ((bits) + integerPartWidth - 1) / integerPartWidth;
90 /* Returns 0U-9U. Return values >= 10U are not digits. */
91 static inline unsigned int
92 decDigitValue(unsigned int c)
98 hexDigitValue(unsigned int c)
118 assertArithmeticOK(const llvm::fltSemantics &semantics) {
119 assert(semantics.arithmeticOK
120 && "Compile-time arithmetic does not support these semantics");
123 /* Return the value of a decimal exponent of the form
126 If the exponent overflows, returns a large exponent with the
129 readExponent(StringRef::iterator begin, StringRef::iterator end)
132 unsigned int absExponent;
133 const unsigned int overlargeExponent = 24000; /* FIXME. */
134 StringRef::iterator p = begin;
136 assert(p != end && "Exponent has no digits");
138 isNegative = (*p == '-');
139 if (*p == '-' || *p == '+') {
141 assert(p != end && "Exponent has no digits");
144 absExponent = decDigitValue(*p++);
145 assert(absExponent < 10U && "Invalid character in exponent");
147 for (; p != end; ++p) {
150 value = decDigitValue(*p);
151 assert(value < 10U && "Invalid character in exponent");
153 value += absExponent * 10;
154 if (absExponent >= overlargeExponent) {
155 absExponent = overlargeExponent;
161 assert(p == end && "Invalid exponent in exponent");
164 return -(int) absExponent;
166 return (int) absExponent;
169 /* This is ugly and needs cleaning up, but I don't immediately see
170 how whilst remaining safe. */
172 totalExponent(StringRef::iterator p, StringRef::iterator end,
173 int exponentAdjustment)
175 int unsignedExponent;
176 bool negative, overflow;
179 assert(p != end && "Exponent has no digits");
181 negative = *p == '-';
182 if(*p == '-' || *p == '+') {
184 assert(p != end && "Exponent has no digits");
187 unsignedExponent = 0;
189 for(; p != end; ++p) {
192 value = decDigitValue(*p);
193 assert(value < 10U && "Invalid character in exponent");
195 unsignedExponent = unsignedExponent * 10 + value;
196 if(unsignedExponent > 65535)
200 if(exponentAdjustment > 65535 || exponentAdjustment < -65536)
204 exponent = unsignedExponent;
206 exponent = -exponent;
207 exponent += exponentAdjustment;
208 if(exponent > 65535 || exponent < -65536)
213 exponent = negative ? -65536: 65535;
218 static StringRef::iterator
219 skipLeadingZeroesAndAnyDot(StringRef::iterator begin, StringRef::iterator end,
220 StringRef::iterator *dot)
222 StringRef::iterator p = begin;
224 while(*p == '0' && p != end)
230 assert(end - begin != 1 && "Significand has no digits");
232 while(*p == '0' && p != end)
239 /* Given a normal decimal floating point number of the form
243 where the decimal point and exponent are optional, fill out the
244 structure D. Exponent is appropriate if the significand is
245 treated as an integer, and normalizedExponent if the significand
246 is taken to have the decimal point after a single leading
249 If the value is zero, V->firstSigDigit points to a non-digit, and
250 the return exponent is zero.
253 const char *firstSigDigit;
254 const char *lastSigDigit;
256 int normalizedExponent;
260 interpretDecimal(StringRef::iterator begin, StringRef::iterator end,
263 StringRef::iterator dot = end;
264 StringRef::iterator p = skipLeadingZeroesAndAnyDot (begin, end, &dot);
266 D->firstSigDigit = p;
268 D->normalizedExponent = 0;
270 for (; p != end; ++p) {
272 assert(dot == end && "String contains multiple dots");
277 if (decDigitValue(*p) >= 10U)
282 assert((*p == 'e' || *p == 'E') && "Invalid character in significand");
283 assert(p != begin && "Significand has no digits");
284 assert((dot == end || p - begin != 1) && "Significand has no digits");
286 /* p points to the first non-digit in the string */
287 D->exponent = readExponent(p + 1, end);
289 /* Implied decimal point? */
294 /* If number is all zeroes accept any exponent. */
295 if (p != D->firstSigDigit) {
296 /* Drop insignificant trailing zeroes. */
301 while (p != begin && *p == '0');
302 while (p != begin && *p == '.');
305 /* Adjust the exponents for any decimal point. */
306 D->exponent += static_cast<exponent_t>((dot - p) - (dot > p));
307 D->normalizedExponent = (D->exponent +
308 static_cast<exponent_t>((p - D->firstSigDigit)
309 - (dot > D->firstSigDigit && dot < p)));
315 /* Return the trailing fraction of a hexadecimal number.
316 DIGITVALUE is the first hex digit of the fraction, P points to
319 trailingHexadecimalFraction(StringRef::iterator p, StringRef::iterator end,
320 unsigned int digitValue)
322 unsigned int hexDigit;
324 /* If the first trailing digit isn't 0 or 8 we can work out the
325 fraction immediately. */
327 return lfMoreThanHalf;
328 else if(digitValue < 8 && digitValue > 0)
329 return lfLessThanHalf;
331 /* Otherwise we need to find the first non-zero digit. */
335 assert(p != end && "Invalid trailing hexadecimal fraction!");
337 hexDigit = hexDigitValue(*p);
339 /* If we ran off the end it is exactly zero or one-half, otherwise
342 return digitValue == 0 ? lfExactlyZero: lfExactlyHalf;
344 return digitValue == 0 ? lfLessThanHalf: lfMoreThanHalf;
347 /* Return the fraction lost were a bignum truncated losing the least
348 significant BITS bits. */
350 lostFractionThroughTruncation(const integerPart *parts,
351 unsigned int partCount,
356 lsb = APInt::tcLSB(parts, partCount);
358 /* Note this is guaranteed true if bits == 0, or LSB == -1U. */
360 return lfExactlyZero;
362 return lfExactlyHalf;
363 if(bits <= partCount * integerPartWidth
364 && APInt::tcExtractBit(parts, bits - 1))
365 return lfMoreThanHalf;
367 return lfLessThanHalf;
370 /* Shift DST right BITS bits noting lost fraction. */
372 shiftRight(integerPart *dst, unsigned int parts, unsigned int bits)
374 lostFraction lost_fraction;
376 lost_fraction = lostFractionThroughTruncation(dst, parts, bits);
378 APInt::tcShiftRight(dst, parts, bits);
380 return lost_fraction;
383 /* Combine the effect of two lost fractions. */
385 combineLostFractions(lostFraction moreSignificant,
386 lostFraction lessSignificant)
388 if(lessSignificant != lfExactlyZero) {
389 if(moreSignificant == lfExactlyZero)
390 moreSignificant = lfLessThanHalf;
391 else if(moreSignificant == lfExactlyHalf)
392 moreSignificant = lfMoreThanHalf;
395 return moreSignificant;
398 /* The error from the true value, in half-ulps, on multiplying two
399 floating point numbers, which differ from the value they
400 approximate by at most HUE1 and HUE2 half-ulps, is strictly less
401 than the returned value.
403 See "How to Read Floating Point Numbers Accurately" by William D
406 HUerrBound(bool inexactMultiply, unsigned int HUerr1, unsigned int HUerr2)
408 assert(HUerr1 < 2 || HUerr2 < 2 || (HUerr1 + HUerr2 < 8));
410 if (HUerr1 + HUerr2 == 0)
411 return inexactMultiply * 2; /* <= inexactMultiply half-ulps. */
413 return inexactMultiply + 2 * (HUerr1 + HUerr2);
416 /* The number of ulps from the boundary (zero, or half if ISNEAREST)
417 when the least significant BITS are truncated. BITS cannot be
420 ulpsFromBoundary(const integerPart *parts, unsigned int bits, bool isNearest)
422 unsigned int count, partBits;
423 integerPart part, boundary;
428 count = bits / integerPartWidth;
429 partBits = bits % integerPartWidth + 1;
431 part = parts[count] & (~(integerPart) 0 >> (integerPartWidth - partBits));
434 boundary = (integerPart) 1 << (partBits - 1);
439 if (part - boundary <= boundary - part)
440 return part - boundary;
442 return boundary - part;
445 if (part == boundary) {
448 return ~(integerPart) 0; /* A lot. */
451 } else if (part == boundary - 1) {
454 return ~(integerPart) 0; /* A lot. */
459 return ~(integerPart) 0; /* A lot. */
462 /* Place pow(5, power) in DST, and return the number of parts used.
463 DST must be at least one part larger than size of the answer. */
465 powerOf5(integerPart *dst, unsigned int power)
467 static const integerPart firstEightPowers[] = { 1, 5, 25, 125, 625, 3125,
469 integerPart pow5s[maxPowerOfFiveParts * 2 + 5];
470 pow5s[0] = 78125 * 5;
472 unsigned int partsCount[16] = { 1 };
473 integerPart scratch[maxPowerOfFiveParts], *p1, *p2, *pow5;
475 assert(power <= maxExponent);
480 *p1 = firstEightPowers[power & 7];
486 for (unsigned int n = 0; power; power >>= 1, n++) {
491 /* Calculate pow(5,pow(2,n+3)) if we haven't yet. */
493 pc = partsCount[n - 1];
494 APInt::tcFullMultiply(pow5, pow5 - pc, pow5 - pc, pc, pc);
496 if (pow5[pc - 1] == 0)
504 APInt::tcFullMultiply(p2, p1, pow5, result, pc);
506 if (p2[result - 1] == 0)
509 /* Now result is in p1 with partsCount parts and p2 is scratch
511 tmp = p1, p1 = p2, p2 = tmp;
518 APInt::tcAssign(dst, p1, result);
523 /* Zero at the end to avoid modular arithmetic when adding one; used
524 when rounding up during hexadecimal output. */
525 static const char hexDigitsLower[] = "0123456789abcdef0";
526 static const char hexDigitsUpper[] = "0123456789ABCDEF0";
527 static const char infinityL[] = "infinity";
528 static const char infinityU[] = "INFINITY";
529 static const char NaNL[] = "nan";
530 static const char NaNU[] = "NAN";
532 /* Write out an integerPart in hexadecimal, starting with the most
533 significant nibble. Write out exactly COUNT hexdigits, return
536 partAsHex (char *dst, integerPart part, unsigned int count,
537 const char *hexDigitChars)
539 unsigned int result = count;
541 assert(count != 0 && count <= integerPartWidth / 4);
543 part >>= (integerPartWidth - 4 * count);
545 dst[count] = hexDigitChars[part & 0xf];
552 /* Write out an unsigned decimal integer. */
554 writeUnsignedDecimal (char *dst, unsigned int n)
570 /* Write out a signed decimal integer. */
572 writeSignedDecimal (char *dst, int value)
576 dst = writeUnsignedDecimal(dst, -(unsigned) value);
578 dst = writeUnsignedDecimal(dst, value);
585 APFloat::initialize(const fltSemantics *ourSemantics)
589 semantics = ourSemantics;
592 significand.parts = new integerPart[count];
596 APFloat::freeSignificand()
599 delete [] significand.parts;
603 APFloat::assign(const APFloat &rhs)
605 assert(semantics == rhs.semantics);
608 category = rhs.category;
609 exponent = rhs.exponent;
611 exponent2 = rhs.exponent2;
612 if(category == fcNormal || category == fcNaN)
613 copySignificand(rhs);
617 APFloat::copySignificand(const APFloat &rhs)
619 assert(category == fcNormal || category == fcNaN);
620 assert(rhs.partCount() >= partCount());
622 APInt::tcAssign(significandParts(), rhs.significandParts(),
626 /* Make this number a NaN, with an arbitrary but deterministic value
627 for the significand. If double or longer, this is a signalling NaN,
628 which may not be ideal. If float, this is QNaN(0). */
629 void APFloat::makeNaN(bool SNaN, bool Negative, const APInt *fill)
634 // Set the significand bits to the fill.
635 if (!fill || fill->getNumWords() < partCount())
636 APInt::tcSet(significandParts(), 0, partCount());
638 APInt::tcAssign(significandParts(), fill->getRawData(), partCount());
641 // We always have to clear the QNaN bit to make it an SNaN.
642 APInt::tcClearBit(significandParts(), semantics->precision - 2);
644 // If there are no bits set in the payload, we have to set
645 // *something* to make it a NaN instead of an infinity;
646 // conventionally, this is the next bit down from the QNaN bit.
647 if (APInt::tcIsZero(significandParts(), partCount()))
648 APInt::tcSetBit(significandParts(), semantics->precision - 3);
650 // We always have to set the QNaN bit to make it a QNaN.
651 APInt::tcSetBit(significandParts(), semantics->precision - 2);
655 APFloat APFloat::makeNaN(const fltSemantics &Sem, bool SNaN, bool Negative,
657 APFloat value(Sem, uninitialized);
658 value.makeNaN(SNaN, Negative, fill);
663 APFloat::operator=(const APFloat &rhs)
666 if(semantics != rhs.semantics) {
668 initialize(rhs.semantics);
677 APFloat::bitwiseIsEqual(const APFloat &rhs) const {
680 if (semantics != rhs.semantics ||
681 category != rhs.category ||
684 if (semantics==(const llvm::fltSemantics*)&PPCDoubleDouble &&
687 if (category==fcZero || category==fcInfinity)
689 else if (category==fcNormal && exponent!=rhs.exponent)
691 else if (semantics==(const llvm::fltSemantics*)&PPCDoubleDouble &&
692 exponent2!=rhs.exponent2)
696 const integerPart* p=significandParts();
697 const integerPart* q=rhs.significandParts();
698 for (; i>0; i--, p++, q++) {
706 APFloat::APFloat(const fltSemantics &ourSemantics, integerPart value)
708 assertArithmeticOK(ourSemantics);
709 initialize(&ourSemantics);
712 exponent = ourSemantics.precision - 1;
713 significandParts()[0] = value;
714 normalize(rmNearestTiesToEven, lfExactlyZero);
717 APFloat::APFloat(const fltSemantics &ourSemantics) {
718 assertArithmeticOK(ourSemantics);
719 initialize(&ourSemantics);
724 APFloat::APFloat(const fltSemantics &ourSemantics, uninitializedTag tag) {
725 assertArithmeticOK(ourSemantics);
726 // Allocates storage if necessary but does not initialize it.
727 initialize(&ourSemantics);
730 APFloat::APFloat(const fltSemantics &ourSemantics,
731 fltCategory ourCategory, bool negative)
733 assertArithmeticOK(ourSemantics);
734 initialize(&ourSemantics);
735 category = ourCategory;
737 if (category == fcNormal)
739 else if (ourCategory == fcNaN)
743 APFloat::APFloat(const fltSemantics &ourSemantics, const StringRef& text)
745 assertArithmeticOK(ourSemantics);
746 initialize(&ourSemantics);
747 convertFromString(text, rmNearestTiesToEven);
750 APFloat::APFloat(const APFloat &rhs)
752 initialize(rhs.semantics);
761 // Profile - This method 'profiles' an APFloat for use with FoldingSet.
762 void APFloat::Profile(FoldingSetNodeID& ID) const {
763 ID.Add(bitcastToAPInt());
767 APFloat::partCount() const
769 return partCountForBits(semantics->precision + 1);
773 APFloat::semanticsPrecision(const fltSemantics &semantics)
775 return semantics.precision;
779 APFloat::significandParts() const
781 return const_cast<APFloat *>(this)->significandParts();
785 APFloat::significandParts()
787 assert(category == fcNormal || category == fcNaN);
790 return significand.parts;
792 return &significand.part;
796 APFloat::zeroSignificand()
799 APInt::tcSet(significandParts(), 0, partCount());
802 /* Increment an fcNormal floating point number's significand. */
804 APFloat::incrementSignificand()
808 carry = APInt::tcIncrement(significandParts(), partCount());
810 /* Our callers should never cause us to overflow. */
814 /* Add the significand of the RHS. Returns the carry flag. */
816 APFloat::addSignificand(const APFloat &rhs)
820 parts = significandParts();
822 assert(semantics == rhs.semantics);
823 assert(exponent == rhs.exponent);
825 return APInt::tcAdd(parts, rhs.significandParts(), 0, partCount());
828 /* Subtract the significand of the RHS with a borrow flag. Returns
831 APFloat::subtractSignificand(const APFloat &rhs, integerPart borrow)
835 parts = significandParts();
837 assert(semantics == rhs.semantics);
838 assert(exponent == rhs.exponent);
840 return APInt::tcSubtract(parts, rhs.significandParts(), borrow,
844 /* Multiply the significand of the RHS. If ADDEND is non-NULL, add it
845 on to the full-precision result of the multiplication. Returns the
848 APFloat::multiplySignificand(const APFloat &rhs, const APFloat *addend)
850 unsigned int omsb; // One, not zero, based MSB.
851 unsigned int partsCount, newPartsCount, precision;
852 integerPart *lhsSignificand;
853 integerPart scratch[4];
854 integerPart *fullSignificand;
855 lostFraction lost_fraction;
858 assert(semantics == rhs.semantics);
860 precision = semantics->precision;
861 newPartsCount = partCountForBits(precision * 2);
863 if(newPartsCount > 4)
864 fullSignificand = new integerPart[newPartsCount];
866 fullSignificand = scratch;
868 lhsSignificand = significandParts();
869 partsCount = partCount();
871 APInt::tcFullMultiply(fullSignificand, lhsSignificand,
872 rhs.significandParts(), partsCount, partsCount);
874 lost_fraction = lfExactlyZero;
875 omsb = APInt::tcMSB(fullSignificand, newPartsCount) + 1;
876 exponent += rhs.exponent;
879 Significand savedSignificand = significand;
880 const fltSemantics *savedSemantics = semantics;
881 fltSemantics extendedSemantics;
883 unsigned int extendedPrecision;
885 /* Normalize our MSB. */
886 extendedPrecision = precision + precision - 1;
887 if(omsb != extendedPrecision)
889 APInt::tcShiftLeft(fullSignificand, newPartsCount,
890 extendedPrecision - omsb);
891 exponent -= extendedPrecision - omsb;
894 /* Create new semantics. */
895 extendedSemantics = *semantics;
896 extendedSemantics.precision = extendedPrecision;
898 if(newPartsCount == 1)
899 significand.part = fullSignificand[0];
901 significand.parts = fullSignificand;
902 semantics = &extendedSemantics;
904 APFloat extendedAddend(*addend);
905 status = extendedAddend.convert(extendedSemantics, rmTowardZero, &ignored);
906 assert(status == opOK);
907 lost_fraction = addOrSubtractSignificand(extendedAddend, false);
909 /* Restore our state. */
910 if(newPartsCount == 1)
911 fullSignificand[0] = significand.part;
912 significand = savedSignificand;
913 semantics = savedSemantics;
915 omsb = APInt::tcMSB(fullSignificand, newPartsCount) + 1;
918 exponent -= (precision - 1);
920 if(omsb > precision) {
921 unsigned int bits, significantParts;
924 bits = omsb - precision;
925 significantParts = partCountForBits(omsb);
926 lf = shiftRight(fullSignificand, significantParts, bits);
927 lost_fraction = combineLostFractions(lf, lost_fraction);
931 APInt::tcAssign(lhsSignificand, fullSignificand, partsCount);
933 if(newPartsCount > 4)
934 delete [] fullSignificand;
936 return lost_fraction;
939 /* Multiply the significands of LHS and RHS to DST. */
941 APFloat::divideSignificand(const APFloat &rhs)
943 unsigned int bit, i, partsCount;
944 const integerPart *rhsSignificand;
945 integerPart *lhsSignificand, *dividend, *divisor;
946 integerPart scratch[4];
947 lostFraction lost_fraction;
949 assert(semantics == rhs.semantics);
951 lhsSignificand = significandParts();
952 rhsSignificand = rhs.significandParts();
953 partsCount = partCount();
956 dividend = new integerPart[partsCount * 2];
960 divisor = dividend + partsCount;
962 /* Copy the dividend and divisor as they will be modified in-place. */
963 for(i = 0; i < partsCount; i++) {
964 dividend[i] = lhsSignificand[i];
965 divisor[i] = rhsSignificand[i];
966 lhsSignificand[i] = 0;
969 exponent -= rhs.exponent;
971 unsigned int precision = semantics->precision;
973 /* Normalize the divisor. */
974 bit = precision - APInt::tcMSB(divisor, partsCount) - 1;
977 APInt::tcShiftLeft(divisor, partsCount, bit);
980 /* Normalize the dividend. */
981 bit = precision - APInt::tcMSB(dividend, partsCount) - 1;
984 APInt::tcShiftLeft(dividend, partsCount, bit);
987 /* Ensure the dividend >= divisor initially for the loop below.
988 Incidentally, this means that the division loop below is
989 guaranteed to set the integer bit to one. */
990 if(APInt::tcCompare(dividend, divisor, partsCount) < 0) {
992 APInt::tcShiftLeft(dividend, partsCount, 1);
993 assert(APInt::tcCompare(dividend, divisor, partsCount) >= 0);
997 for(bit = precision; bit; bit -= 1) {
998 if(APInt::tcCompare(dividend, divisor, partsCount) >= 0) {
999 APInt::tcSubtract(dividend, divisor, 0, partsCount);
1000 APInt::tcSetBit(lhsSignificand, bit - 1);
1003 APInt::tcShiftLeft(dividend, partsCount, 1);
1006 /* Figure out the lost fraction. */
1007 int cmp = APInt::tcCompare(dividend, divisor, partsCount);
1010 lost_fraction = lfMoreThanHalf;
1012 lost_fraction = lfExactlyHalf;
1013 else if(APInt::tcIsZero(dividend, partsCount))
1014 lost_fraction = lfExactlyZero;
1016 lost_fraction = lfLessThanHalf;
1021 return lost_fraction;
1025 APFloat::significandMSB() const
1027 return APInt::tcMSB(significandParts(), partCount());
1031 APFloat::significandLSB() const
1033 return APInt::tcLSB(significandParts(), partCount());
1036 /* Note that a zero result is NOT normalized to fcZero. */
1038 APFloat::shiftSignificandRight(unsigned int bits)
1040 /* Our exponent should not overflow. */
1041 assert((exponent_t) (exponent + bits) >= exponent);
1045 return shiftRight(significandParts(), partCount(), bits);
1048 /* Shift the significand left BITS bits, subtract BITS from its exponent. */
1050 APFloat::shiftSignificandLeft(unsigned int bits)
1052 assert(bits < semantics->precision);
1055 unsigned int partsCount = partCount();
1057 APInt::tcShiftLeft(significandParts(), partsCount, bits);
1060 assert(!APInt::tcIsZero(significandParts(), partsCount));
1065 APFloat::compareAbsoluteValue(const APFloat &rhs) const
1069 assert(semantics == rhs.semantics);
1070 assert(category == fcNormal);
1071 assert(rhs.category == fcNormal);
1073 compare = exponent - rhs.exponent;
1075 /* If exponents are equal, do an unsigned bignum comparison of the
1078 compare = APInt::tcCompare(significandParts(), rhs.significandParts(),
1082 return cmpGreaterThan;
1083 else if(compare < 0)
1089 /* Handle overflow. Sign is preserved. We either become infinity or
1090 the largest finite number. */
1092 APFloat::handleOverflow(roundingMode rounding_mode)
1095 if(rounding_mode == rmNearestTiesToEven
1096 || rounding_mode == rmNearestTiesToAway
1097 || (rounding_mode == rmTowardPositive && !sign)
1098 || (rounding_mode == rmTowardNegative && sign))
1100 category = fcInfinity;
1101 return (opStatus) (opOverflow | opInexact);
1104 /* Otherwise we become the largest finite number. */
1105 category = fcNormal;
1106 exponent = semantics->maxExponent;
1107 APInt::tcSetLeastSignificantBits(significandParts(), partCount(),
1108 semantics->precision);
1113 /* Returns TRUE if, when truncating the current number, with BIT the
1114 new LSB, with the given lost fraction and rounding mode, the result
1115 would need to be rounded away from zero (i.e., by increasing the
1116 signficand). This routine must work for fcZero of both signs, and
1117 fcNormal numbers. */
1119 APFloat::roundAwayFromZero(roundingMode rounding_mode,
1120 lostFraction lost_fraction,
1121 unsigned int bit) const
1123 /* NaNs and infinities should not have lost fractions. */
1124 assert(category == fcNormal || category == fcZero);
1126 /* Current callers never pass this so we don't handle it. */
1127 assert(lost_fraction != lfExactlyZero);
1129 switch (rounding_mode) {
1131 llvm_unreachable(0);
1133 case rmNearestTiesToAway:
1134 return lost_fraction == lfExactlyHalf || lost_fraction == lfMoreThanHalf;
1136 case rmNearestTiesToEven:
1137 if(lost_fraction == lfMoreThanHalf)
1140 /* Our zeroes don't have a significand to test. */
1141 if(lost_fraction == lfExactlyHalf && category != fcZero)
1142 return APInt::tcExtractBit(significandParts(), bit);
1149 case rmTowardPositive:
1150 return sign == false;
1152 case rmTowardNegative:
1153 return sign == true;
1158 APFloat::normalize(roundingMode rounding_mode,
1159 lostFraction lost_fraction)
1161 unsigned int omsb; /* One, not zero, based MSB. */
1164 if(category != fcNormal)
1167 /* Before rounding normalize the exponent of fcNormal numbers. */
1168 omsb = significandMSB() + 1;
1171 /* OMSB is numbered from 1. We want to place it in the integer
1172 bit numbered PRECISON if possible, with a compensating change in
1174 exponentChange = omsb - semantics->precision;
1176 /* If the resulting exponent is too high, overflow according to
1177 the rounding mode. */
1178 if(exponent + exponentChange > semantics->maxExponent)
1179 return handleOverflow(rounding_mode);
1181 /* Subnormal numbers have exponent minExponent, and their MSB
1182 is forced based on that. */
1183 if(exponent + exponentChange < semantics->minExponent)
1184 exponentChange = semantics->minExponent - exponent;
1186 /* Shifting left is easy as we don't lose precision. */
1187 if(exponentChange < 0) {
1188 assert(lost_fraction == lfExactlyZero);
1190 shiftSignificandLeft(-exponentChange);
1195 if(exponentChange > 0) {
1198 /* Shift right and capture any new lost fraction. */
1199 lf = shiftSignificandRight(exponentChange);
1201 lost_fraction = combineLostFractions(lf, lost_fraction);
1203 /* Keep OMSB up-to-date. */
1204 if(omsb > (unsigned) exponentChange)
1205 omsb -= exponentChange;
1211 /* Now round the number according to rounding_mode given the lost
1214 /* As specified in IEEE 754, since we do not trap we do not report
1215 underflow for exact results. */
1216 if(lost_fraction == lfExactlyZero) {
1217 /* Canonicalize zeroes. */
1224 /* Increment the significand if we're rounding away from zero. */
1225 if(roundAwayFromZero(rounding_mode, lost_fraction, 0)) {
1227 exponent = semantics->minExponent;
1229 incrementSignificand();
1230 omsb = significandMSB() + 1;
1232 /* Did the significand increment overflow? */
1233 if(omsb == (unsigned) semantics->precision + 1) {
1234 /* Renormalize by incrementing the exponent and shifting our
1235 significand right one. However if we already have the
1236 maximum exponent we overflow to infinity. */
1237 if(exponent == semantics->maxExponent) {
1238 category = fcInfinity;
1240 return (opStatus) (opOverflow | opInexact);
1243 shiftSignificandRight(1);
1249 /* The normal case - we were and are not denormal, and any
1250 significand increment above didn't overflow. */
1251 if(omsb == semantics->precision)
1254 /* We have a non-zero denormal. */
1255 assert(omsb < semantics->precision);
1257 /* Canonicalize zeroes. */
1261 /* The fcZero case is a denormal that underflowed to zero. */
1262 return (opStatus) (opUnderflow | opInexact);
1266 APFloat::addOrSubtractSpecials(const APFloat &rhs, bool subtract)
1268 switch (convolve(category, rhs.category)) {
1270 llvm_unreachable(0);
1272 case convolve(fcNaN, fcZero):
1273 case convolve(fcNaN, fcNormal):
1274 case convolve(fcNaN, fcInfinity):
1275 case convolve(fcNaN, fcNaN):
1276 case convolve(fcNormal, fcZero):
1277 case convolve(fcInfinity, fcNormal):
1278 case convolve(fcInfinity, fcZero):
1281 case convolve(fcZero, fcNaN):
1282 case convolve(fcNormal, fcNaN):
1283 case convolve(fcInfinity, fcNaN):
1285 copySignificand(rhs);
1288 case convolve(fcNormal, fcInfinity):
1289 case convolve(fcZero, fcInfinity):
1290 category = fcInfinity;
1291 sign = rhs.sign ^ subtract;
1294 case convolve(fcZero, fcNormal):
1296 sign = rhs.sign ^ subtract;
1299 case convolve(fcZero, fcZero):
1300 /* Sign depends on rounding mode; handled by caller. */
1303 case convolve(fcInfinity, fcInfinity):
1304 /* Differently signed infinities can only be validly
1306 if(((sign ^ rhs.sign)!=0) != subtract) {
1313 case convolve(fcNormal, fcNormal):
1318 /* Add or subtract two normal numbers. */
1320 APFloat::addOrSubtractSignificand(const APFloat &rhs, bool subtract)
1323 lostFraction lost_fraction;
1326 /* Determine if the operation on the absolute values is effectively
1327 an addition or subtraction. */
1328 subtract ^= (sign ^ rhs.sign) ? true : false;
1330 /* Are we bigger exponent-wise than the RHS? */
1331 bits = exponent - rhs.exponent;
1333 /* Subtraction is more subtle than one might naively expect. */
1335 APFloat temp_rhs(rhs);
1339 reverse = compareAbsoluteValue(temp_rhs) == cmpLessThan;
1340 lost_fraction = lfExactlyZero;
1341 } else if (bits > 0) {
1342 lost_fraction = temp_rhs.shiftSignificandRight(bits - 1);
1343 shiftSignificandLeft(1);
1346 lost_fraction = shiftSignificandRight(-bits - 1);
1347 temp_rhs.shiftSignificandLeft(1);
1352 carry = temp_rhs.subtractSignificand
1353 (*this, lost_fraction != lfExactlyZero);
1354 copySignificand(temp_rhs);
1357 carry = subtractSignificand
1358 (temp_rhs, lost_fraction != lfExactlyZero);
1361 /* Invert the lost fraction - it was on the RHS and
1363 if(lost_fraction == lfLessThanHalf)
1364 lost_fraction = lfMoreThanHalf;
1365 else if(lost_fraction == lfMoreThanHalf)
1366 lost_fraction = lfLessThanHalf;
1368 /* The code above is intended to ensure that no borrow is
1373 APFloat temp_rhs(rhs);
1375 lost_fraction = temp_rhs.shiftSignificandRight(bits);
1376 carry = addSignificand(temp_rhs);
1378 lost_fraction = shiftSignificandRight(-bits);
1379 carry = addSignificand(rhs);
1382 /* We have a guard bit; generating a carry cannot happen. */
1386 return lost_fraction;
1390 APFloat::multiplySpecials(const APFloat &rhs)
1392 switch (convolve(category, rhs.category)) {
1394 llvm_unreachable(0);
1396 case convolve(fcNaN, fcZero):
1397 case convolve(fcNaN, fcNormal):
1398 case convolve(fcNaN, fcInfinity):
1399 case convolve(fcNaN, fcNaN):
1402 case convolve(fcZero, fcNaN):
1403 case convolve(fcNormal, fcNaN):
1404 case convolve(fcInfinity, fcNaN):
1406 copySignificand(rhs);
1409 case convolve(fcNormal, fcInfinity):
1410 case convolve(fcInfinity, fcNormal):
1411 case convolve(fcInfinity, fcInfinity):
1412 category = fcInfinity;
1415 case convolve(fcZero, fcNormal):
1416 case convolve(fcNormal, fcZero):
1417 case convolve(fcZero, fcZero):
1421 case convolve(fcZero, fcInfinity):
1422 case convolve(fcInfinity, fcZero):
1426 case convolve(fcNormal, fcNormal):
1432 APFloat::divideSpecials(const APFloat &rhs)
1434 switch (convolve(category, rhs.category)) {
1436 llvm_unreachable(0);
1438 case convolve(fcNaN, fcZero):
1439 case convolve(fcNaN, fcNormal):
1440 case convolve(fcNaN, fcInfinity):
1441 case convolve(fcNaN, fcNaN):
1442 case convolve(fcInfinity, fcZero):
1443 case convolve(fcInfinity, fcNormal):
1444 case convolve(fcZero, fcInfinity):
1445 case convolve(fcZero, fcNormal):
1448 case convolve(fcZero, fcNaN):
1449 case convolve(fcNormal, fcNaN):
1450 case convolve(fcInfinity, fcNaN):
1452 copySignificand(rhs);
1455 case convolve(fcNormal, fcInfinity):
1459 case convolve(fcNormal, fcZero):
1460 category = fcInfinity;
1463 case convolve(fcInfinity, fcInfinity):
1464 case convolve(fcZero, fcZero):
1468 case convolve(fcNormal, fcNormal):
1474 APFloat::modSpecials(const APFloat &rhs)
1476 switch (convolve(category, rhs.category)) {
1478 llvm_unreachable(0);
1480 case convolve(fcNaN, fcZero):
1481 case convolve(fcNaN, fcNormal):
1482 case convolve(fcNaN, fcInfinity):
1483 case convolve(fcNaN, fcNaN):
1484 case convolve(fcZero, fcInfinity):
1485 case convolve(fcZero, fcNormal):
1486 case convolve(fcNormal, fcInfinity):
1489 case convolve(fcZero, fcNaN):
1490 case convolve(fcNormal, fcNaN):
1491 case convolve(fcInfinity, fcNaN):
1493 copySignificand(rhs);
1496 case convolve(fcNormal, fcZero):
1497 case convolve(fcInfinity, fcZero):
1498 case convolve(fcInfinity, fcNormal):
1499 case convolve(fcInfinity, fcInfinity):
1500 case convolve(fcZero, fcZero):
1504 case convolve(fcNormal, fcNormal):
1511 APFloat::changeSign()
1513 /* Look mummy, this one's easy. */
1518 APFloat::clearSign()
1520 /* So is this one. */
1525 APFloat::copySign(const APFloat &rhs)
1531 /* Normalized addition or subtraction. */
1533 APFloat::addOrSubtract(const APFloat &rhs, roundingMode rounding_mode,
1538 assertArithmeticOK(*semantics);
1540 fs = addOrSubtractSpecials(rhs, subtract);
1542 /* This return code means it was not a simple case. */
1543 if(fs == opDivByZero) {
1544 lostFraction lost_fraction;
1546 lost_fraction = addOrSubtractSignificand(rhs, subtract);
1547 fs = normalize(rounding_mode, lost_fraction);
1549 /* Can only be zero if we lost no fraction. */
1550 assert(category != fcZero || lost_fraction == lfExactlyZero);
1553 /* If two numbers add (exactly) to zero, IEEE 754 decrees it is a
1554 positive zero unless rounding to minus infinity, except that
1555 adding two like-signed zeroes gives that zero. */
1556 if(category == fcZero) {
1557 if(rhs.category != fcZero || (sign == rhs.sign) == subtract)
1558 sign = (rounding_mode == rmTowardNegative);
1564 /* Normalized addition. */
1566 APFloat::add(const APFloat &rhs, roundingMode rounding_mode)
1568 return addOrSubtract(rhs, rounding_mode, false);
1571 /* Normalized subtraction. */
1573 APFloat::subtract(const APFloat &rhs, roundingMode rounding_mode)
1575 return addOrSubtract(rhs, rounding_mode, true);
1578 /* Normalized multiply. */
1580 APFloat::multiply(const APFloat &rhs, roundingMode rounding_mode)
1584 assertArithmeticOK(*semantics);
1586 fs = multiplySpecials(rhs);
1588 if(category == fcNormal) {
1589 lostFraction lost_fraction = multiplySignificand(rhs, 0);
1590 fs = normalize(rounding_mode, lost_fraction);
1591 if(lost_fraction != lfExactlyZero)
1592 fs = (opStatus) (fs | opInexact);
1598 /* Normalized divide. */
1600 APFloat::divide(const APFloat &rhs, roundingMode rounding_mode)
1604 assertArithmeticOK(*semantics);
1606 fs = divideSpecials(rhs);
1608 if(category == fcNormal) {
1609 lostFraction lost_fraction = divideSignificand(rhs);
1610 fs = normalize(rounding_mode, lost_fraction);
1611 if(lost_fraction != lfExactlyZero)
1612 fs = (opStatus) (fs | opInexact);
1618 /* Normalized remainder. This is not currently correct in all cases. */
1620 APFloat::remainder(const APFloat &rhs)
1624 unsigned int origSign = sign;
1626 assertArithmeticOK(*semantics);
1627 fs = V.divide(rhs, rmNearestTiesToEven);
1628 if (fs == opDivByZero)
1631 int parts = partCount();
1632 integerPart *x = new integerPart[parts];
1634 fs = V.convertToInteger(x, parts * integerPartWidth, true,
1635 rmNearestTiesToEven, &ignored);
1636 if (fs==opInvalidOp)
1639 fs = V.convertFromZeroExtendedInteger(x, parts * integerPartWidth, true,
1640 rmNearestTiesToEven);
1641 assert(fs==opOK); // should always work
1643 fs = V.multiply(rhs, rmNearestTiesToEven);
1644 assert(fs==opOK || fs==opInexact); // should not overflow or underflow
1646 fs = subtract(V, rmNearestTiesToEven);
1647 assert(fs==opOK || fs==opInexact); // likewise
1650 sign = origSign; // IEEE754 requires this
1655 /* Normalized llvm frem (C fmod).
1656 This is not currently correct in all cases. */
1658 APFloat::mod(const APFloat &rhs, roundingMode rounding_mode)
1661 assertArithmeticOK(*semantics);
1662 fs = modSpecials(rhs);
1664 if (category == fcNormal && rhs.category == fcNormal) {
1666 unsigned int origSign = sign;
1668 fs = V.divide(rhs, rmNearestTiesToEven);
1669 if (fs == opDivByZero)
1672 int parts = partCount();
1673 integerPart *x = new integerPart[parts];
1675 fs = V.convertToInteger(x, parts * integerPartWidth, true,
1676 rmTowardZero, &ignored);
1677 if (fs==opInvalidOp)
1680 fs = V.convertFromZeroExtendedInteger(x, parts * integerPartWidth, true,
1681 rmNearestTiesToEven);
1682 assert(fs==opOK); // should always work
1684 fs = V.multiply(rhs, rounding_mode);
1685 assert(fs==opOK || fs==opInexact); // should not overflow or underflow
1687 fs = subtract(V, rounding_mode);
1688 assert(fs==opOK || fs==opInexact); // likewise
1691 sign = origSign; // IEEE754 requires this
1697 /* Normalized fused-multiply-add. */
1699 APFloat::fusedMultiplyAdd(const APFloat &multiplicand,
1700 const APFloat &addend,
1701 roundingMode rounding_mode)
1705 assertArithmeticOK(*semantics);
1707 /* Post-multiplication sign, before addition. */
1708 sign ^= multiplicand.sign;
1710 /* If and only if all arguments are normal do we need to do an
1711 extended-precision calculation. */
1712 if(category == fcNormal
1713 && multiplicand.category == fcNormal
1714 && addend.category == fcNormal) {
1715 lostFraction lost_fraction;
1717 lost_fraction = multiplySignificand(multiplicand, &addend);
1718 fs = normalize(rounding_mode, lost_fraction);
1719 if(lost_fraction != lfExactlyZero)
1720 fs = (opStatus) (fs | opInexact);
1722 /* If two numbers add (exactly) to zero, IEEE 754 decrees it is a
1723 positive zero unless rounding to minus infinity, except that
1724 adding two like-signed zeroes gives that zero. */
1725 if(category == fcZero && sign != addend.sign)
1726 sign = (rounding_mode == rmTowardNegative);
1728 fs = multiplySpecials(multiplicand);
1730 /* FS can only be opOK or opInvalidOp. There is no more work
1731 to do in the latter case. The IEEE-754R standard says it is
1732 implementation-defined in this case whether, if ADDEND is a
1733 quiet NaN, we raise invalid op; this implementation does so.
1735 If we need to do the addition we can do so with normal
1738 fs = addOrSubtract(addend, rounding_mode, false);
1744 /* Comparison requires normalized numbers. */
1746 APFloat::compare(const APFloat &rhs) const
1750 assertArithmeticOK(*semantics);
1751 assert(semantics == rhs.semantics);
1753 switch (convolve(category, rhs.category)) {
1755 llvm_unreachable(0);
1757 case convolve(fcNaN, fcZero):
1758 case convolve(fcNaN, fcNormal):
1759 case convolve(fcNaN, fcInfinity):
1760 case convolve(fcNaN, fcNaN):
1761 case convolve(fcZero, fcNaN):
1762 case convolve(fcNormal, fcNaN):
1763 case convolve(fcInfinity, fcNaN):
1764 return cmpUnordered;
1766 case convolve(fcInfinity, fcNormal):
1767 case convolve(fcInfinity, fcZero):
1768 case convolve(fcNormal, fcZero):
1772 return cmpGreaterThan;
1774 case convolve(fcNormal, fcInfinity):
1775 case convolve(fcZero, fcInfinity):
1776 case convolve(fcZero, fcNormal):
1778 return cmpGreaterThan;
1782 case convolve(fcInfinity, fcInfinity):
1783 if(sign == rhs.sign)
1788 return cmpGreaterThan;
1790 case convolve(fcZero, fcZero):
1793 case convolve(fcNormal, fcNormal):
1797 /* Two normal numbers. Do they have the same sign? */
1798 if(sign != rhs.sign) {
1800 result = cmpLessThan;
1802 result = cmpGreaterThan;
1804 /* Compare absolute values; invert result if negative. */
1805 result = compareAbsoluteValue(rhs);
1808 if(result == cmpLessThan)
1809 result = cmpGreaterThan;
1810 else if(result == cmpGreaterThan)
1811 result = cmpLessThan;
1818 /// APFloat::convert - convert a value of one floating point type to another.
1819 /// The return value corresponds to the IEEE754 exceptions. *losesInfo
1820 /// records whether the transformation lost information, i.e. whether
1821 /// converting the result back to the original type will produce the
1822 /// original value (this is almost the same as return value==fsOK, but there
1823 /// are edge cases where this is not so).
1826 APFloat::convert(const fltSemantics &toSemantics,
1827 roundingMode rounding_mode, bool *losesInfo)
1829 lostFraction lostFraction;
1830 unsigned int newPartCount, oldPartCount;
1833 assertArithmeticOK(*semantics);
1834 assertArithmeticOK(toSemantics);
1835 lostFraction = lfExactlyZero;
1836 newPartCount = partCountForBits(toSemantics.precision + 1);
1837 oldPartCount = partCount();
1839 /* Handle storage complications. If our new form is wider,
1840 re-allocate our bit pattern into wider storage. If it is
1841 narrower, we ignore the excess parts, but if narrowing to a
1842 single part we need to free the old storage.
1843 Be careful not to reference significandParts for zeroes
1844 and infinities, since it aborts. */
1845 if (newPartCount > oldPartCount) {
1846 integerPart *newParts;
1847 newParts = new integerPart[newPartCount];
1848 APInt::tcSet(newParts, 0, newPartCount);
1849 if (category==fcNormal || category==fcNaN)
1850 APInt::tcAssign(newParts, significandParts(), oldPartCount);
1852 significand.parts = newParts;
1853 } else if (newPartCount < oldPartCount) {
1854 /* Capture any lost fraction through truncation of parts so we get
1855 correct rounding whilst normalizing. */
1856 if (category==fcNormal)
1857 lostFraction = lostFractionThroughTruncation
1858 (significandParts(), oldPartCount, toSemantics.precision);
1859 if (newPartCount == 1) {
1860 integerPart newPart = 0;
1861 if (category==fcNormal || category==fcNaN)
1862 newPart = significandParts()[0];
1864 significand.part = newPart;
1868 if(category == fcNormal) {
1869 /* Re-interpret our bit-pattern. */
1870 exponent += toSemantics.precision - semantics->precision;
1871 semantics = &toSemantics;
1872 fs = normalize(rounding_mode, lostFraction);
1873 *losesInfo = (fs != opOK);
1874 } else if (category == fcNaN) {
1875 int shift = toSemantics.precision - semantics->precision;
1876 // Do this now so significandParts gets the right answer
1877 const fltSemantics *oldSemantics = semantics;
1878 semantics = &toSemantics;
1880 // No normalization here, just truncate
1882 APInt::tcShiftLeft(significandParts(), newPartCount, shift);
1883 else if (shift < 0) {
1884 unsigned ushift = -shift;
1885 // Figure out if we are losing information. This happens
1886 // if are shifting out something other than 0s, or if the x87 long
1887 // double input did not have its integer bit set (pseudo-NaN), or if the
1888 // x87 long double input did not have its QNan bit set (because the x87
1889 // hardware sets this bit when converting a lower-precision NaN to
1890 // x87 long double).
1891 if (APInt::tcLSB(significandParts(), newPartCount) < ushift)
1893 if (oldSemantics == &APFloat::x87DoubleExtended &&
1894 (!(*significandParts() & 0x8000000000000000ULL) ||
1895 !(*significandParts() & 0x4000000000000000ULL)))
1897 APInt::tcShiftRight(significandParts(), newPartCount, ushift);
1899 // gcc forces the Quiet bit on, which means (float)(double)(float_sNan)
1900 // does not give you back the same bits. This is dubious, and we
1901 // don't currently do it. You're really supposed to get
1902 // an invalid operation signal at runtime, but nobody does that.
1905 semantics = &toSemantics;
1913 /* Convert a floating point number to an integer according to the
1914 rounding mode. If the rounded integer value is out of range this
1915 returns an invalid operation exception and the contents of the
1916 destination parts are unspecified. If the rounded value is in
1917 range but the floating point number is not the exact integer, the C
1918 standard doesn't require an inexact exception to be raised. IEEE
1919 854 does require it so we do that.
1921 Note that for conversions to integer type the C standard requires
1922 round-to-zero to always be used. */
1924 APFloat::convertToSignExtendedInteger(integerPart *parts, unsigned int width,
1926 roundingMode rounding_mode,
1927 bool *isExact) const
1929 lostFraction lost_fraction;
1930 const integerPart *src;
1931 unsigned int dstPartsCount, truncatedBits;
1933 assertArithmeticOK(*semantics);
1937 /* Handle the three special cases first. */
1938 if(category == fcInfinity || category == fcNaN)
1941 dstPartsCount = partCountForBits(width);
1943 if(category == fcZero) {
1944 APInt::tcSet(parts, 0, dstPartsCount);
1945 // Negative zero can't be represented as an int.
1950 src = significandParts();
1952 /* Step 1: place our absolute value, with any fraction truncated, in
1955 /* Our absolute value is less than one; truncate everything. */
1956 APInt::tcSet(parts, 0, dstPartsCount);
1957 /* For exponent -1 the integer bit represents .5, look at that.
1958 For smaller exponents leftmost truncated bit is 0. */
1959 truncatedBits = semantics->precision -1U - exponent;
1961 /* We want the most significant (exponent + 1) bits; the rest are
1963 unsigned int bits = exponent + 1U;
1965 /* Hopelessly large in magnitude? */
1969 if (bits < semantics->precision) {
1970 /* We truncate (semantics->precision - bits) bits. */
1971 truncatedBits = semantics->precision - bits;
1972 APInt::tcExtract(parts, dstPartsCount, src, bits, truncatedBits);
1974 /* We want at least as many bits as are available. */
1975 APInt::tcExtract(parts, dstPartsCount, src, semantics->precision, 0);
1976 APInt::tcShiftLeft(parts, dstPartsCount, bits - semantics->precision);
1981 /* Step 2: work out any lost fraction, and increment the absolute
1982 value if we would round away from zero. */
1983 if (truncatedBits) {
1984 lost_fraction = lostFractionThroughTruncation(src, partCount(),
1986 if (lost_fraction != lfExactlyZero
1987 && roundAwayFromZero(rounding_mode, lost_fraction, truncatedBits)) {
1988 if (APInt::tcIncrement(parts, dstPartsCount))
1989 return opInvalidOp; /* Overflow. */
1992 lost_fraction = lfExactlyZero;
1995 /* Step 3: check if we fit in the destination. */
1996 unsigned int omsb = APInt::tcMSB(parts, dstPartsCount) + 1;
2000 /* Negative numbers cannot be represented as unsigned. */
2004 /* It takes omsb bits to represent the unsigned integer value.
2005 We lose a bit for the sign, but care is needed as the
2006 maximally negative integer is a special case. */
2007 if (omsb == width && APInt::tcLSB(parts, dstPartsCount) + 1 != omsb)
2010 /* This case can happen because of rounding. */
2015 APInt::tcNegate (parts, dstPartsCount);
2017 if (omsb >= width + !isSigned)
2021 if (lost_fraction == lfExactlyZero) {
2028 /* Same as convertToSignExtendedInteger, except we provide
2029 deterministic values in case of an invalid operation exception,
2030 namely zero for NaNs and the minimal or maximal value respectively
2031 for underflow or overflow.
2032 The *isExact output tells whether the result is exact, in the sense
2033 that converting it back to the original floating point type produces
2034 the original value. This is almost equivalent to result==opOK,
2035 except for negative zeroes.
2038 APFloat::convertToInteger(integerPart *parts, unsigned int width,
2040 roundingMode rounding_mode, bool *isExact) const
2044 fs = convertToSignExtendedInteger(parts, width, isSigned, rounding_mode,
2047 if (fs == opInvalidOp) {
2048 unsigned int bits, dstPartsCount;
2050 dstPartsCount = partCountForBits(width);
2052 if (category == fcNaN)
2057 bits = width - isSigned;
2059 APInt::tcSetLeastSignificantBits(parts, dstPartsCount, bits);
2060 if (sign && isSigned)
2061 APInt::tcShiftLeft(parts, dstPartsCount, width - 1);
2067 /* Convert an unsigned integer SRC to a floating point number,
2068 rounding according to ROUNDING_MODE. The sign of the floating
2069 point number is not modified. */
2071 APFloat::convertFromUnsignedParts(const integerPart *src,
2072 unsigned int srcCount,
2073 roundingMode rounding_mode)
2075 unsigned int omsb, precision, dstCount;
2077 lostFraction lost_fraction;
2079 assertArithmeticOK(*semantics);
2080 category = fcNormal;
2081 omsb = APInt::tcMSB(src, srcCount) + 1;
2082 dst = significandParts();
2083 dstCount = partCount();
2084 precision = semantics->precision;
2086 /* We want the most significant PRECISON bits of SRC. There may not
2087 be that many; extract what we can. */
2088 if (precision <= omsb) {
2089 exponent = omsb - 1;
2090 lost_fraction = lostFractionThroughTruncation(src, srcCount,
2092 APInt::tcExtract(dst, dstCount, src, precision, omsb - precision);
2094 exponent = precision - 1;
2095 lost_fraction = lfExactlyZero;
2096 APInt::tcExtract(dst, dstCount, src, omsb, 0);
2099 return normalize(rounding_mode, lost_fraction);
2103 APFloat::convertFromAPInt(const APInt &Val,
2105 roundingMode rounding_mode)
2107 unsigned int partCount = Val.getNumWords();
2111 if (isSigned && api.isNegative()) {
2116 return convertFromUnsignedParts(api.getRawData(), partCount, rounding_mode);
2119 /* Convert a two's complement integer SRC to a floating point number,
2120 rounding according to ROUNDING_MODE. ISSIGNED is true if the
2121 integer is signed, in which case it must be sign-extended. */
2123 APFloat::convertFromSignExtendedInteger(const integerPart *src,
2124 unsigned int srcCount,
2126 roundingMode rounding_mode)
2130 assertArithmeticOK(*semantics);
2132 && APInt::tcExtractBit(src, srcCount * integerPartWidth - 1)) {
2135 /* If we're signed and negative negate a copy. */
2137 copy = new integerPart[srcCount];
2138 APInt::tcAssign(copy, src, srcCount);
2139 APInt::tcNegate(copy, srcCount);
2140 status = convertFromUnsignedParts(copy, srcCount, rounding_mode);
2144 status = convertFromUnsignedParts(src, srcCount, rounding_mode);
2150 /* FIXME: should this just take a const APInt reference? */
2152 APFloat::convertFromZeroExtendedInteger(const integerPart *parts,
2153 unsigned int width, bool isSigned,
2154 roundingMode rounding_mode)
2156 unsigned int partCount = partCountForBits(width);
2157 APInt api = APInt(width, partCount, parts);
2160 if(isSigned && APInt::tcExtractBit(parts, width - 1)) {
2165 return convertFromUnsignedParts(api.getRawData(), partCount, rounding_mode);
2169 APFloat::convertFromHexadecimalString(const StringRef &s,
2170 roundingMode rounding_mode)
2172 lostFraction lost_fraction = lfExactlyZero;
2173 integerPart *significand;
2174 unsigned int bitPos, partsCount;
2175 StringRef::iterator dot, firstSignificantDigit;
2179 category = fcNormal;
2181 significand = significandParts();
2182 partsCount = partCount();
2183 bitPos = partsCount * integerPartWidth;
2185 /* Skip leading zeroes and any (hexa)decimal point. */
2186 StringRef::iterator begin = s.begin();
2187 StringRef::iterator end = s.end();
2188 StringRef::iterator p = skipLeadingZeroesAndAnyDot(begin, end, &dot);
2189 firstSignificantDigit = p;
2192 integerPart hex_value;
2195 assert(dot == end && "String contains multiple dots");
2202 hex_value = hexDigitValue(*p);
2203 if(hex_value == -1U) {
2212 /* Store the number whilst 4-bit nibbles remain. */
2215 hex_value <<= bitPos % integerPartWidth;
2216 significand[bitPos / integerPartWidth] |= hex_value;
2218 lost_fraction = trailingHexadecimalFraction(p, end, hex_value);
2219 while(p != end && hexDigitValue(*p) != -1U)
2226 /* Hex floats require an exponent but not a hexadecimal point. */
2227 assert(p != end && "Hex strings require an exponent");
2228 assert((*p == 'p' || *p == 'P') && "Invalid character in significand");
2229 assert(p != begin && "Significand has no digits");
2230 assert((dot == end || p - begin != 1) && "Significand has no digits");
2232 /* Ignore the exponent if we are zero. */
2233 if(p != firstSignificantDigit) {
2236 /* Implicit hexadecimal point? */
2240 /* Calculate the exponent adjustment implicit in the number of
2241 significant digits. */
2242 expAdjustment = static_cast<int>(dot - firstSignificantDigit);
2243 if(expAdjustment < 0)
2245 expAdjustment = expAdjustment * 4 - 1;
2247 /* Adjust for writing the significand starting at the most
2248 significant nibble. */
2249 expAdjustment += semantics->precision;
2250 expAdjustment -= partsCount * integerPartWidth;
2252 /* Adjust for the given exponent. */
2253 exponent = totalExponent(p + 1, end, expAdjustment);
2256 return normalize(rounding_mode, lost_fraction);
2260 APFloat::roundSignificandWithExponent(const integerPart *decSigParts,
2261 unsigned sigPartCount, int exp,
2262 roundingMode rounding_mode)
2264 unsigned int parts, pow5PartCount;
2265 fltSemantics calcSemantics = { 32767, -32767, 0, true };
2266 integerPart pow5Parts[maxPowerOfFiveParts];
2269 isNearest = (rounding_mode == rmNearestTiesToEven
2270 || rounding_mode == rmNearestTiesToAway);
2272 parts = partCountForBits(semantics->precision + 11);
2274 /* Calculate pow(5, abs(exp)). */
2275 pow5PartCount = powerOf5(pow5Parts, exp >= 0 ? exp: -exp);
2277 for (;; parts *= 2) {
2278 opStatus sigStatus, powStatus;
2279 unsigned int excessPrecision, truncatedBits;
2281 calcSemantics.precision = parts * integerPartWidth - 1;
2282 excessPrecision = calcSemantics.precision - semantics->precision;
2283 truncatedBits = excessPrecision;
2285 APFloat decSig(calcSemantics, fcZero, sign);
2286 APFloat pow5(calcSemantics, fcZero, false);
2288 sigStatus = decSig.convertFromUnsignedParts(decSigParts, sigPartCount,
2289 rmNearestTiesToEven);
2290 powStatus = pow5.convertFromUnsignedParts(pow5Parts, pow5PartCount,
2291 rmNearestTiesToEven);
2292 /* Add exp, as 10^n = 5^n * 2^n. */
2293 decSig.exponent += exp;
2295 lostFraction calcLostFraction;
2296 integerPart HUerr, HUdistance;
2297 unsigned int powHUerr;
2300 /* multiplySignificand leaves the precision-th bit set to 1. */
2301 calcLostFraction = decSig.multiplySignificand(pow5, NULL);
2302 powHUerr = powStatus != opOK;
2304 calcLostFraction = decSig.divideSignificand(pow5);
2305 /* Denormal numbers have less precision. */
2306 if (decSig.exponent < semantics->minExponent) {
2307 excessPrecision += (semantics->minExponent - decSig.exponent);
2308 truncatedBits = excessPrecision;
2309 if (excessPrecision > calcSemantics.precision)
2310 excessPrecision = calcSemantics.precision;
2312 /* Extra half-ulp lost in reciprocal of exponent. */
2313 powHUerr = (powStatus == opOK && calcLostFraction == lfExactlyZero) ? 0:2;
2316 /* Both multiplySignificand and divideSignificand return the
2317 result with the integer bit set. */
2318 assert(APInt::tcExtractBit
2319 (decSig.significandParts(), calcSemantics.precision - 1) == 1);
2321 HUerr = HUerrBound(calcLostFraction != lfExactlyZero, sigStatus != opOK,
2323 HUdistance = 2 * ulpsFromBoundary(decSig.significandParts(),
2324 excessPrecision, isNearest);
2326 /* Are we guaranteed to round correctly if we truncate? */
2327 if (HUdistance >= HUerr) {
2328 APInt::tcExtract(significandParts(), partCount(), decSig.significandParts(),
2329 calcSemantics.precision - excessPrecision,
2331 /* Take the exponent of decSig. If we tcExtract-ed less bits
2332 above we must adjust our exponent to compensate for the
2333 implicit right shift. */
2334 exponent = (decSig.exponent + semantics->precision
2335 - (calcSemantics.precision - excessPrecision));
2336 calcLostFraction = lostFractionThroughTruncation(decSig.significandParts(),
2339 return normalize(rounding_mode, calcLostFraction);
2345 APFloat::convertFromDecimalString(const StringRef &str, roundingMode rounding_mode)
2350 /* Scan the text. */
2351 StringRef::iterator p = str.begin();
2352 interpretDecimal(p, str.end(), &D);
2354 /* Handle the quick cases. First the case of no significant digits,
2355 i.e. zero, and then exponents that are obviously too large or too
2356 small. Writing L for log 10 / log 2, a number d.ddddd*10^exp
2357 definitely overflows if
2359 (exp - 1) * L >= maxExponent
2361 and definitely underflows to zero where
2363 (exp + 1) * L <= minExponent - precision
2365 With integer arithmetic the tightest bounds for L are
2367 93/28 < L < 196/59 [ numerator <= 256 ]
2368 42039/12655 < L < 28738/8651 [ numerator <= 65536 ]
2371 if (decDigitValue(*D.firstSigDigit) >= 10U) {
2375 /* Check whether the normalized exponent is high enough to overflow
2376 max during the log-rebasing in the max-exponent check below. */
2377 } else if (D.normalizedExponent - 1 > INT_MAX / 42039) {
2378 fs = handleOverflow(rounding_mode);
2380 /* If it wasn't, then it also wasn't high enough to overflow max
2381 during the log-rebasing in the min-exponent check. Check that it
2382 won't overflow min in either check, then perform the min-exponent
2384 } else if (D.normalizedExponent - 1 < INT_MIN / 42039 ||
2385 (D.normalizedExponent + 1) * 28738 <=
2386 8651 * (semantics->minExponent - (int) semantics->precision)) {
2387 /* Underflow to zero and round. */
2389 fs = normalize(rounding_mode, lfLessThanHalf);
2391 /* We can finally safely perform the max-exponent check. */
2392 } else if ((D.normalizedExponent - 1) * 42039
2393 >= 12655 * semantics->maxExponent) {
2394 /* Overflow and round. */
2395 fs = handleOverflow(rounding_mode);
2397 integerPart *decSignificand;
2398 unsigned int partCount;
2400 /* A tight upper bound on number of bits required to hold an
2401 N-digit decimal integer is N * 196 / 59. Allocate enough space
2402 to hold the full significand, and an extra part required by
2404 partCount = static_cast<unsigned int>(D.lastSigDigit - D.firstSigDigit) + 1;
2405 partCount = partCountForBits(1 + 196 * partCount / 59);
2406 decSignificand = new integerPart[partCount + 1];
2409 /* Convert to binary efficiently - we do almost all multiplication
2410 in an integerPart. When this would overflow do we do a single
2411 bignum multiplication, and then revert again to multiplication
2412 in an integerPart. */
2414 integerPart decValue, val, multiplier;
2422 if (p == str.end()) {
2426 decValue = decDigitValue(*p++);
2427 assert(decValue < 10U && "Invalid character in significand");
2429 val = val * 10 + decValue;
2430 /* The maximum number that can be multiplied by ten with any
2431 digit added without overflowing an integerPart. */
2432 } while (p <= D.lastSigDigit && multiplier <= (~ (integerPart) 0 - 9) / 10);
2434 /* Multiply out the current part. */
2435 APInt::tcMultiplyPart(decSignificand, decSignificand, multiplier, val,
2436 partCount, partCount + 1, false);
2438 /* If we used another part (likely but not guaranteed), increase
2440 if (decSignificand[partCount])
2442 } while (p <= D.lastSigDigit);
2444 category = fcNormal;
2445 fs = roundSignificandWithExponent(decSignificand, partCount,
2446 D.exponent, rounding_mode);
2448 delete [] decSignificand;
2455 APFloat::convertFromString(const StringRef &str, roundingMode rounding_mode)
2457 assertArithmeticOK(*semantics);
2458 assert(!str.empty() && "Invalid string length");
2460 /* Handle a leading minus sign. */
2461 StringRef::iterator p = str.begin();
2462 size_t slen = str.size();
2463 sign = *p == '-' ? 1 : 0;
2464 if(*p == '-' || *p == '+') {
2467 assert(slen && "String has no digits");
2470 if(slen >= 2 && p[0] == '0' && (p[1] == 'x' || p[1] == 'X')) {
2471 assert(slen - 2 && "Invalid string");
2472 return convertFromHexadecimalString(StringRef(p + 2, slen - 2),
2476 return convertFromDecimalString(StringRef(p, slen), rounding_mode);
2479 /* Write out a hexadecimal representation of the floating point value
2480 to DST, which must be of sufficient size, in the C99 form
2481 [-]0xh.hhhhp[+-]d. Return the number of characters written,
2482 excluding the terminating NUL.
2484 If UPPERCASE, the output is in upper case, otherwise in lower case.
2486 HEXDIGITS digits appear altogether, rounding the value if
2487 necessary. If HEXDIGITS is 0, the minimal precision to display the
2488 number precisely is used instead. If nothing would appear after
2489 the decimal point it is suppressed.
2491 The decimal exponent is always printed and has at least one digit.
2492 Zero values display an exponent of zero. Infinities and NaNs
2493 appear as "infinity" or "nan" respectively.
2495 The above rules are as specified by C99. There is ambiguity about
2496 what the leading hexadecimal digit should be. This implementation
2497 uses whatever is necessary so that the exponent is displayed as
2498 stored. This implies the exponent will fall within the IEEE format
2499 range, and the leading hexadecimal digit will be 0 (for denormals),
2500 1 (normal numbers) or 2 (normal numbers rounded-away-from-zero with
2501 any other digits zero).
2504 APFloat::convertToHexString(char *dst, unsigned int hexDigits,
2505 bool upperCase, roundingMode rounding_mode) const
2509 assertArithmeticOK(*semantics);
2517 memcpy (dst, upperCase ? infinityU: infinityL, sizeof infinityU - 1);
2518 dst += sizeof infinityL - 1;
2522 memcpy (dst, upperCase ? NaNU: NaNL, sizeof NaNU - 1);
2523 dst += sizeof NaNU - 1;
2528 *dst++ = upperCase ? 'X': 'x';
2530 if (hexDigits > 1) {
2532 memset (dst, '0', hexDigits - 1);
2533 dst += hexDigits - 1;
2535 *dst++ = upperCase ? 'P': 'p';
2540 dst = convertNormalToHexString (dst, hexDigits, upperCase, rounding_mode);
2546 return static_cast<unsigned int>(dst - p);
2549 /* Does the hard work of outputting the correctly rounded hexadecimal
2550 form of a normal floating point number with the specified number of
2551 hexadecimal digits. If HEXDIGITS is zero the minimum number of
2552 digits necessary to print the value precisely is output. */
2554 APFloat::convertNormalToHexString(char *dst, unsigned int hexDigits,
2556 roundingMode rounding_mode) const
2558 unsigned int count, valueBits, shift, partsCount, outputDigits;
2559 const char *hexDigitChars;
2560 const integerPart *significand;
2565 *dst++ = upperCase ? 'X': 'x';
2568 hexDigitChars = upperCase ? hexDigitsUpper: hexDigitsLower;
2570 significand = significandParts();
2571 partsCount = partCount();
2573 /* +3 because the first digit only uses the single integer bit, so
2574 we have 3 virtual zero most-significant-bits. */
2575 valueBits = semantics->precision + 3;
2576 shift = integerPartWidth - valueBits % integerPartWidth;
2578 /* The natural number of digits required ignoring trailing
2579 insignificant zeroes. */
2580 outputDigits = (valueBits - significandLSB () + 3) / 4;
2582 /* hexDigits of zero means use the required number for the
2583 precision. Otherwise, see if we are truncating. If we are,
2584 find out if we need to round away from zero. */
2586 if (hexDigits < outputDigits) {
2587 /* We are dropping non-zero bits, so need to check how to round.
2588 "bits" is the number of dropped bits. */
2590 lostFraction fraction;
2592 bits = valueBits - hexDigits * 4;
2593 fraction = lostFractionThroughTruncation (significand, partsCount, bits);
2594 roundUp = roundAwayFromZero(rounding_mode, fraction, bits);
2596 outputDigits = hexDigits;
2599 /* Write the digits consecutively, and start writing in the location
2600 of the hexadecimal point. We move the most significant digit
2601 left and add the hexadecimal point later. */
2604 count = (valueBits + integerPartWidth - 1) / integerPartWidth;
2606 while (outputDigits && count) {
2609 /* Put the most significant integerPartWidth bits in "part". */
2610 if (--count == partsCount)
2611 part = 0; /* An imaginary higher zero part. */
2613 part = significand[count] << shift;
2616 part |= significand[count - 1] >> (integerPartWidth - shift);
2618 /* Convert as much of "part" to hexdigits as we can. */
2619 unsigned int curDigits = integerPartWidth / 4;
2621 if (curDigits > outputDigits)
2622 curDigits = outputDigits;
2623 dst += partAsHex (dst, part, curDigits, hexDigitChars);
2624 outputDigits -= curDigits;
2630 /* Note that hexDigitChars has a trailing '0'. */
2633 *q = hexDigitChars[hexDigitValue (*q) + 1];
2634 } while (*q == '0');
2637 /* Add trailing zeroes. */
2638 memset (dst, '0', outputDigits);
2639 dst += outputDigits;
2642 /* Move the most significant digit to before the point, and if there
2643 is something after the decimal point add it. This must come
2644 after rounding above. */
2651 /* Finally output the exponent. */
2652 *dst++ = upperCase ? 'P': 'p';
2654 return writeSignedDecimal (dst, exponent);
2657 // For good performance it is desirable for different APFloats
2658 // to produce different integers.
2660 APFloat::getHashValue() const
2662 if (category==fcZero) return sign<<8 | semantics->precision ;
2663 else if (category==fcInfinity) return sign<<9 | semantics->precision;
2664 else if (category==fcNaN) return 1<<10 | semantics->precision;
2666 uint32_t hash = sign<<11 | semantics->precision | exponent<<12;
2667 const integerPart* p = significandParts();
2668 for (int i=partCount(); i>0; i--, p++)
2669 hash ^= ((uint32_t)*p) ^ (uint32_t)((*p)>>32);
2674 // Conversion from APFloat to/from host float/double. It may eventually be
2675 // possible to eliminate these and have everybody deal with APFloats, but that
2676 // will take a while. This approach will not easily extend to long double.
2677 // Current implementation requires integerPartWidth==64, which is correct at
2678 // the moment but could be made more general.
2680 // Denormals have exponent minExponent in APFloat, but minExponent-1 in
2681 // the actual IEEE respresentations. We compensate for that here.
2684 APFloat::convertF80LongDoubleAPFloatToAPInt() const
2686 assert(semantics == (const llvm::fltSemantics*)&x87DoubleExtended);
2687 assert(partCount()==2);
2689 uint64_t myexponent, mysignificand;
2691 if (category==fcNormal) {
2692 myexponent = exponent+16383; //bias
2693 mysignificand = significandParts()[0];
2694 if (myexponent==1 && !(mysignificand & 0x8000000000000000ULL))
2695 myexponent = 0; // denormal
2696 } else if (category==fcZero) {
2699 } else if (category==fcInfinity) {
2700 myexponent = 0x7fff;
2701 mysignificand = 0x8000000000000000ULL;
2703 assert(category == fcNaN && "Unknown category");
2704 myexponent = 0x7fff;
2705 mysignificand = significandParts()[0];
2709 words[0] = mysignificand;
2710 words[1] = ((uint64_t)(sign & 1) << 15) |
2711 (myexponent & 0x7fffLL);
2712 return APInt(80, 2, words);
2716 APFloat::convertPPCDoubleDoubleAPFloatToAPInt() const
2718 assert(semantics == (const llvm::fltSemantics*)&PPCDoubleDouble);
2719 assert(partCount()==2);
2721 uint64_t myexponent, mysignificand, myexponent2, mysignificand2;
2723 if (category==fcNormal) {
2724 myexponent = exponent + 1023; //bias
2725 myexponent2 = exponent2 + 1023;
2726 mysignificand = significandParts()[0];
2727 mysignificand2 = significandParts()[1];
2728 if (myexponent==1 && !(mysignificand & 0x10000000000000LL))
2729 myexponent = 0; // denormal
2730 if (myexponent2==1 && !(mysignificand2 & 0x10000000000000LL))
2731 myexponent2 = 0; // denormal
2732 } else if (category==fcZero) {
2737 } else if (category==fcInfinity) {
2743 assert(category == fcNaN && "Unknown category");
2745 mysignificand = significandParts()[0];
2746 myexponent2 = exponent2;
2747 mysignificand2 = significandParts()[1];
2751 words[0] = ((uint64_t)(sign & 1) << 63) |
2752 ((myexponent & 0x7ff) << 52) |
2753 (mysignificand & 0xfffffffffffffLL);
2754 words[1] = ((uint64_t)(sign2 & 1) << 63) |
2755 ((myexponent2 & 0x7ff) << 52) |
2756 (mysignificand2 & 0xfffffffffffffLL);
2757 return APInt(128, 2, words);
2761 APFloat::convertQuadrupleAPFloatToAPInt() const
2763 assert(semantics == (const llvm::fltSemantics*)&IEEEquad);
2764 assert(partCount()==2);
2766 uint64_t myexponent, mysignificand, mysignificand2;
2768 if (category==fcNormal) {
2769 myexponent = exponent+16383; //bias
2770 mysignificand = significandParts()[0];
2771 mysignificand2 = significandParts()[1];
2772 if (myexponent==1 && !(mysignificand2 & 0x1000000000000LL))
2773 myexponent = 0; // denormal
2774 } else if (category==fcZero) {
2776 mysignificand = mysignificand2 = 0;
2777 } else if (category==fcInfinity) {
2778 myexponent = 0x7fff;
2779 mysignificand = mysignificand2 = 0;
2781 assert(category == fcNaN && "Unknown category!");
2782 myexponent = 0x7fff;
2783 mysignificand = significandParts()[0];
2784 mysignificand2 = significandParts()[1];
2788 words[0] = mysignificand;
2789 words[1] = ((uint64_t)(sign & 1) << 63) |
2790 ((myexponent & 0x7fff) << 48) |
2791 (mysignificand2 & 0xffffffffffffLL);
2793 return APInt(128, 2, words);
2797 APFloat::convertDoubleAPFloatToAPInt() const
2799 assert(semantics == (const llvm::fltSemantics*)&IEEEdouble);
2800 assert(partCount()==1);
2802 uint64_t myexponent, mysignificand;
2804 if (category==fcNormal) {
2805 myexponent = exponent+1023; //bias
2806 mysignificand = *significandParts();
2807 if (myexponent==1 && !(mysignificand & 0x10000000000000LL))
2808 myexponent = 0; // denormal
2809 } else if (category==fcZero) {
2812 } else if (category==fcInfinity) {
2816 assert(category == fcNaN && "Unknown category!");
2818 mysignificand = *significandParts();
2821 return APInt(64, ((((uint64_t)(sign & 1) << 63) |
2822 ((myexponent & 0x7ff) << 52) |
2823 (mysignificand & 0xfffffffffffffLL))));
2827 APFloat::convertFloatAPFloatToAPInt() const
2829 assert(semantics == (const llvm::fltSemantics*)&IEEEsingle);
2830 assert(partCount()==1);
2832 uint32_t myexponent, mysignificand;
2834 if (category==fcNormal) {
2835 myexponent = exponent+127; //bias
2836 mysignificand = (uint32_t)*significandParts();
2837 if (myexponent == 1 && !(mysignificand & 0x800000))
2838 myexponent = 0; // denormal
2839 } else if (category==fcZero) {
2842 } else if (category==fcInfinity) {
2846 assert(category == fcNaN && "Unknown category!");
2848 mysignificand = (uint32_t)*significandParts();
2851 return APInt(32, (((sign&1) << 31) | ((myexponent&0xff) << 23) |
2852 (mysignificand & 0x7fffff)));
2856 APFloat::convertHalfAPFloatToAPInt() const
2858 assert(semantics == (const llvm::fltSemantics*)&IEEEhalf);
2859 assert(partCount()==1);
2861 uint32_t myexponent, mysignificand;
2863 if (category==fcNormal) {
2864 myexponent = exponent+15; //bias
2865 mysignificand = (uint32_t)*significandParts();
2866 if (myexponent == 1 && !(mysignificand & 0x400))
2867 myexponent = 0; // denormal
2868 } else if (category==fcZero) {
2871 } else if (category==fcInfinity) {
2875 assert(category == fcNaN && "Unknown category!");
2877 mysignificand = (uint32_t)*significandParts();
2880 return APInt(16, (((sign&1) << 15) | ((myexponent&0x1f) << 10) |
2881 (mysignificand & 0x3ff)));
2884 // This function creates an APInt that is just a bit map of the floating
2885 // point constant as it would appear in memory. It is not a conversion,
2886 // and treating the result as a normal integer is unlikely to be useful.
2889 APFloat::bitcastToAPInt() const
2891 if (semantics == (const llvm::fltSemantics*)&IEEEhalf)
2892 return convertHalfAPFloatToAPInt();
2894 if (semantics == (const llvm::fltSemantics*)&IEEEsingle)
2895 return convertFloatAPFloatToAPInt();
2897 if (semantics == (const llvm::fltSemantics*)&IEEEdouble)
2898 return convertDoubleAPFloatToAPInt();
2900 if (semantics == (const llvm::fltSemantics*)&IEEEquad)
2901 return convertQuadrupleAPFloatToAPInt();
2903 if (semantics == (const llvm::fltSemantics*)&PPCDoubleDouble)
2904 return convertPPCDoubleDoubleAPFloatToAPInt();
2906 assert(semantics == (const llvm::fltSemantics*)&x87DoubleExtended &&
2908 return convertF80LongDoubleAPFloatToAPInt();
2912 APFloat::convertToFloat() const
2914 assert(semantics == (const llvm::fltSemantics*)&IEEEsingle &&
2915 "Float semantics are not IEEEsingle");
2916 APInt api = bitcastToAPInt();
2917 return api.bitsToFloat();
2921 APFloat::convertToDouble() const
2923 assert(semantics == (const llvm::fltSemantics*)&IEEEdouble &&
2924 "Float semantics are not IEEEdouble");
2925 APInt api = bitcastToAPInt();
2926 return api.bitsToDouble();
2929 /// Integer bit is explicit in this format. Intel hardware (387 and later)
2930 /// does not support these bit patterns:
2931 /// exponent = all 1's, integer bit 0, significand 0 ("pseudoinfinity")
2932 /// exponent = all 1's, integer bit 0, significand nonzero ("pseudoNaN")
2933 /// exponent = 0, integer bit 1 ("pseudodenormal")
2934 /// exponent!=0 nor all 1's, integer bit 0 ("unnormal")
2935 /// At the moment, the first two are treated as NaNs, the second two as Normal.
2937 APFloat::initFromF80LongDoubleAPInt(const APInt &api)
2939 assert(api.getBitWidth()==80);
2940 uint64_t i1 = api.getRawData()[0];
2941 uint64_t i2 = api.getRawData()[1];
2942 uint64_t myexponent = (i2 & 0x7fff);
2943 uint64_t mysignificand = i1;
2945 initialize(&APFloat::x87DoubleExtended);
2946 assert(partCount()==2);
2948 sign = static_cast<unsigned int>(i2>>15);
2949 if (myexponent==0 && mysignificand==0) {
2950 // exponent, significand meaningless
2952 } else if (myexponent==0x7fff && mysignificand==0x8000000000000000ULL) {
2953 // exponent, significand meaningless
2954 category = fcInfinity;
2955 } else if (myexponent==0x7fff && mysignificand!=0x8000000000000000ULL) {
2956 // exponent meaningless
2958 significandParts()[0] = mysignificand;
2959 significandParts()[1] = 0;
2961 category = fcNormal;
2962 exponent = myexponent - 16383;
2963 significandParts()[0] = mysignificand;
2964 significandParts()[1] = 0;
2965 if (myexponent==0) // denormal
2971 APFloat::initFromPPCDoubleDoubleAPInt(const APInt &api)
2973 assert(api.getBitWidth()==128);
2974 uint64_t i1 = api.getRawData()[0];
2975 uint64_t i2 = api.getRawData()[1];
2976 uint64_t myexponent = (i1 >> 52) & 0x7ff;
2977 uint64_t mysignificand = i1 & 0xfffffffffffffLL;
2978 uint64_t myexponent2 = (i2 >> 52) & 0x7ff;
2979 uint64_t mysignificand2 = i2 & 0xfffffffffffffLL;
2981 initialize(&APFloat::PPCDoubleDouble);
2982 assert(partCount()==2);
2984 sign = static_cast<unsigned int>(i1>>63);
2985 sign2 = static_cast<unsigned int>(i2>>63);
2986 if (myexponent==0 && mysignificand==0) {
2987 // exponent, significand meaningless
2988 // exponent2 and significand2 are required to be 0; we don't check
2990 } else if (myexponent==0x7ff && mysignificand==0) {
2991 // exponent, significand meaningless
2992 // exponent2 and significand2 are required to be 0; we don't check
2993 category = fcInfinity;
2994 } else if (myexponent==0x7ff && mysignificand!=0) {
2995 // exponent meaningless. So is the whole second word, but keep it
2998 exponent2 = myexponent2;
2999 significandParts()[0] = mysignificand;
3000 significandParts()[1] = mysignificand2;
3002 category = fcNormal;
3003 // Note there is no category2; the second word is treated as if it is
3004 // fcNormal, although it might be something else considered by itself.
3005 exponent = myexponent - 1023;
3006 exponent2 = myexponent2 - 1023;
3007 significandParts()[0] = mysignificand;
3008 significandParts()[1] = mysignificand2;
3009 if (myexponent==0) // denormal
3012 significandParts()[0] |= 0x10000000000000LL; // integer bit
3016 significandParts()[1] |= 0x10000000000000LL; // integer bit
3021 APFloat::initFromQuadrupleAPInt(const APInt &api)
3023 assert(api.getBitWidth()==128);
3024 uint64_t i1 = api.getRawData()[0];
3025 uint64_t i2 = api.getRawData()[1];
3026 uint64_t myexponent = (i2 >> 48) & 0x7fff;
3027 uint64_t mysignificand = i1;
3028 uint64_t mysignificand2 = i2 & 0xffffffffffffLL;
3030 initialize(&APFloat::IEEEquad);
3031 assert(partCount()==2);
3033 sign = static_cast<unsigned int>(i2>>63);
3034 if (myexponent==0 &&
3035 (mysignificand==0 && mysignificand2==0)) {
3036 // exponent, significand meaningless
3038 } else if (myexponent==0x7fff &&
3039 (mysignificand==0 && mysignificand2==0)) {
3040 // exponent, significand meaningless
3041 category = fcInfinity;
3042 } else if (myexponent==0x7fff &&
3043 (mysignificand!=0 || mysignificand2 !=0)) {
3044 // exponent meaningless
3046 significandParts()[0] = mysignificand;
3047 significandParts()[1] = mysignificand2;
3049 category = fcNormal;
3050 exponent = myexponent - 16383;
3051 significandParts()[0] = mysignificand;
3052 significandParts()[1] = mysignificand2;
3053 if (myexponent==0) // denormal
3056 significandParts()[1] |= 0x1000000000000LL; // integer bit
3061 APFloat::initFromDoubleAPInt(const APInt &api)
3063 assert(api.getBitWidth()==64);
3064 uint64_t i = *api.getRawData();
3065 uint64_t myexponent = (i >> 52) & 0x7ff;
3066 uint64_t mysignificand = i & 0xfffffffffffffLL;
3068 initialize(&APFloat::IEEEdouble);
3069 assert(partCount()==1);
3071 sign = static_cast<unsigned int>(i>>63);
3072 if (myexponent==0 && mysignificand==0) {
3073 // exponent, significand meaningless
3075 } else if (myexponent==0x7ff && mysignificand==0) {
3076 // exponent, significand meaningless
3077 category = fcInfinity;
3078 } else if (myexponent==0x7ff && mysignificand!=0) {
3079 // exponent meaningless
3081 *significandParts() = mysignificand;
3083 category = fcNormal;
3084 exponent = myexponent - 1023;
3085 *significandParts() = mysignificand;
3086 if (myexponent==0) // denormal
3089 *significandParts() |= 0x10000000000000LL; // integer bit
3094 APFloat::initFromFloatAPInt(const APInt & api)
3096 assert(api.getBitWidth()==32);
3097 uint32_t i = (uint32_t)*api.getRawData();
3098 uint32_t myexponent = (i >> 23) & 0xff;
3099 uint32_t mysignificand = i & 0x7fffff;
3101 initialize(&APFloat::IEEEsingle);
3102 assert(partCount()==1);
3105 if (myexponent==0 && mysignificand==0) {
3106 // exponent, significand meaningless
3108 } else if (myexponent==0xff && mysignificand==0) {
3109 // exponent, significand meaningless
3110 category = fcInfinity;
3111 } else if (myexponent==0xff && mysignificand!=0) {
3112 // sign, exponent, significand meaningless
3114 *significandParts() = mysignificand;
3116 category = fcNormal;
3117 exponent = myexponent - 127; //bias
3118 *significandParts() = mysignificand;
3119 if (myexponent==0) // denormal
3122 *significandParts() |= 0x800000; // integer bit
3127 APFloat::initFromHalfAPInt(const APInt & api)
3129 assert(api.getBitWidth()==16);
3130 uint32_t i = (uint32_t)*api.getRawData();
3131 uint32_t myexponent = (i >> 10) & 0x1f;
3132 uint32_t mysignificand = i & 0x3ff;
3134 initialize(&APFloat::IEEEhalf);
3135 assert(partCount()==1);
3138 if (myexponent==0 && mysignificand==0) {
3139 // exponent, significand meaningless
3141 } else if (myexponent==0x1f && mysignificand==0) {
3142 // exponent, significand meaningless
3143 category = fcInfinity;
3144 } else if (myexponent==0x1f && mysignificand!=0) {
3145 // sign, exponent, significand meaningless
3147 *significandParts() = mysignificand;
3149 category = fcNormal;
3150 exponent = myexponent - 15; //bias
3151 *significandParts() = mysignificand;
3152 if (myexponent==0) // denormal
3155 *significandParts() |= 0x400; // integer bit
3159 /// Treat api as containing the bits of a floating point number. Currently
3160 /// we infer the floating point type from the size of the APInt. The
3161 /// isIEEE argument distinguishes between PPC128 and IEEE128 (not meaningful
3162 /// when the size is anything else).
3164 APFloat::initFromAPInt(const APInt& api, bool isIEEE)
3166 if (api.getBitWidth() == 16)
3167 return initFromHalfAPInt(api);
3168 else if (api.getBitWidth() == 32)
3169 return initFromFloatAPInt(api);
3170 else if (api.getBitWidth()==64)
3171 return initFromDoubleAPInt(api);
3172 else if (api.getBitWidth()==80)
3173 return initFromF80LongDoubleAPInt(api);
3174 else if (api.getBitWidth()==128)
3176 initFromQuadrupleAPInt(api) : initFromPPCDoubleDoubleAPInt(api));
3178 llvm_unreachable(0);
3181 APFloat APFloat::getLargest(const fltSemantics &Sem, bool Negative) {
3182 APFloat Val(Sem, fcNormal, Negative);
3184 // We want (in interchange format):
3185 // sign = {Negative}
3187 // significand = 1..1
3189 Val.exponent = Sem.maxExponent; // unbiased
3191 // 1-initialize all bits....
3192 Val.zeroSignificand();
3193 integerPart *significand = Val.significandParts();
3194 unsigned N = partCountForBits(Sem.precision);
3195 for (unsigned i = 0; i != N; ++i)
3196 significand[i] = ~((integerPart) 0);
3198 // ...and then clear the top bits for internal consistency.
3200 &= (((integerPart) 1) << ((Sem.precision % integerPartWidth) - 1)) - 1;
3205 APFloat APFloat::getSmallest(const fltSemantics &Sem, bool Negative) {
3206 APFloat Val(Sem, fcNormal, Negative);
3208 // We want (in interchange format):
3209 // sign = {Negative}
3211 // significand = 0..01
3213 Val.exponent = Sem.minExponent; // unbiased
3214 Val.zeroSignificand();
3215 Val.significandParts()[0] = 1;
3219 APFloat APFloat::getSmallestNormalized(const fltSemantics &Sem, bool Negative) {
3220 APFloat Val(Sem, fcNormal, Negative);
3222 // We want (in interchange format):
3223 // sign = {Negative}
3225 // significand = 10..0
3227 Val.exponent = Sem.minExponent;
3228 Val.zeroSignificand();
3229 Val.significandParts()[partCountForBits(Sem.precision)-1]
3230 |= (((integerPart) 1) << ((Sem.precision % integerPartWidth) - 1));
3235 APFloat::APFloat(const APInt& api, bool isIEEE)
3237 initFromAPInt(api, isIEEE);
3240 APFloat::APFloat(float f)
3242 APInt api = APInt(32, 0);
3243 initFromAPInt(api.floatToBits(f));
3246 APFloat::APFloat(double d)
3248 APInt api = APInt(64, 0);
3249 initFromAPInt(api.doubleToBits(d));
3253 static void append(SmallVectorImpl<char> &Buffer,
3254 unsigned N, const char *Str) {
3255 unsigned Start = Buffer.size();
3256 Buffer.set_size(Start + N);
3257 memcpy(&Buffer[Start], Str, N);
3260 template <unsigned N>
3261 void append(SmallVectorImpl<char> &Buffer, const char (&Str)[N]) {
3262 append(Buffer, N, Str);
3265 /// Removes data from the given significand until it is no more
3266 /// precise than is required for the desired precision.
3267 void AdjustToPrecision(APInt &significand,
3268 int &exp, unsigned FormatPrecision) {
3269 unsigned bits = significand.getActiveBits();
3271 // 196/59 is a very slight overestimate of lg_2(10).
3272 unsigned bitsRequired = (FormatPrecision * 196 + 58) / 59;
3274 if (bits <= bitsRequired) return;
3276 unsigned tensRemovable = (bits - bitsRequired) * 59 / 196;
3277 if (!tensRemovable) return;
3279 exp += tensRemovable;
3281 APInt divisor(significand.getBitWidth(), 1);
3282 APInt powten(significand.getBitWidth(), 10);
3284 if (tensRemovable & 1)
3286 tensRemovable >>= 1;
3287 if (!tensRemovable) break;
3291 significand = significand.udiv(divisor);
3293 // Truncate the significand down to its active bit count, but
3294 // don't try to drop below 32.
3295 unsigned newPrecision = std::max(32U, significand.getActiveBits());
3296 significand.trunc(newPrecision);
3300 void AdjustToPrecision(SmallVectorImpl<char> &buffer,
3301 int &exp, unsigned FormatPrecision) {
3302 unsigned N = buffer.size();
3303 if (N <= FormatPrecision) return;
3305 // The most significant figures are the last ones in the buffer.
3306 unsigned FirstSignificant = N - FormatPrecision;
3309 // FIXME: this probably shouldn't use 'round half up'.
3311 // Rounding down is just a truncation, except we also want to drop
3312 // trailing zeros from the new result.
3313 if (buffer[FirstSignificant - 1] < '5') {
3314 while (buffer[FirstSignificant] == '0')
3317 exp += FirstSignificant;
3318 buffer.erase(&buffer[0], &buffer[FirstSignificant]);
3322 // Rounding up requires a decimal add-with-carry. If we continue
3323 // the carry, the newly-introduced zeros will just be truncated.
3324 for (unsigned I = FirstSignificant; I != N; ++I) {
3325 if (buffer[I] == '9') {
3333 // If we carried through, we have exactly one digit of precision.
3334 if (FirstSignificant == N) {
3335 exp += FirstSignificant;
3337 buffer.push_back('1');
3341 exp += FirstSignificant;
3342 buffer.erase(&buffer[0], &buffer[FirstSignificant]);
3346 void APFloat::toString(SmallVectorImpl<char> &Str,
3347 unsigned FormatPrecision,
3348 unsigned FormatMaxPadding) {
3352 return append(Str, "-Inf");
3354 return append(Str, "+Inf");
3356 case fcNaN: return append(Str, "NaN");
3362 if (!FormatMaxPadding)
3363 append(Str, "0.0E+0");
3375 // Decompose the number into an APInt and an exponent.
3376 int exp = exponent - ((int) semantics->precision - 1);
3377 APInt significand(semantics->precision,
3378 partCountForBits(semantics->precision),
3379 significandParts());
3381 // Set FormatPrecision if zero. We want to do this before we
3382 // truncate trailing zeros, as those are part of the precision.
3383 if (!FormatPrecision) {
3384 // It's an interesting question whether to use the nominal
3385 // precision or the active precision here for denormals.
3387 // FormatPrecision = ceil(significandBits / lg_2(10))
3388 FormatPrecision = (semantics->precision * 59 + 195) / 196;
3391 // Ignore trailing binary zeros.
3392 int trailingZeros = significand.countTrailingZeros();
3393 exp += trailingZeros;
3394 significand = significand.lshr(trailingZeros);
3396 // Change the exponent from 2^e to 10^e.
3399 } else if (exp > 0) {
3401 significand.zext(semantics->precision + exp);
3402 significand <<= exp;
3404 } else { /* exp < 0 */
3407 // We transform this using the identity:
3408 // (N)(2^-e) == (N)(5^e)(10^-e)
3409 // This means we have to multiply N (the significand) by 5^e.
3410 // To avoid overflow, we have to operate on numbers large
3411 // enough to store N * 5^e:
3412 // log2(N * 5^e) == log2(N) + e * log2(5)
3413 // <= semantics->precision + e * 137 / 59
3414 // (log_2(5) ~ 2.321928 < 2.322034 ~ 137/59)
3416 unsigned precision = semantics->precision + 137 * texp / 59;
3418 // Multiply significand by 5^e.
3419 // N * 5^0101 == N * 5^(1*1) * 5^(0*2) * 5^(1*4) * 5^(0*8)
3420 significand.zext(precision);
3421 APInt five_to_the_i(precision, 5);
3423 if (texp & 1) significand *= five_to_the_i;
3427 five_to_the_i *= five_to_the_i;
3431 AdjustToPrecision(significand, exp, FormatPrecision);
3433 llvm::SmallVector<char, 256> buffer;
3436 unsigned precision = significand.getBitWidth();
3437 APInt ten(precision, 10);
3438 APInt digit(precision, 0);
3440 bool inTrail = true;
3441 while (significand != 0) {
3442 // digit <- significand % 10
3443 // significand <- significand / 10
3444 APInt::udivrem(significand, ten, significand, digit);
3446 unsigned d = digit.getZExtValue();
3448 // Drop trailing zeros.
3449 if (inTrail && !d) exp++;
3451 buffer.push_back((char) ('0' + d));
3456 assert(!buffer.empty() && "no characters in buffer!");
3458 // Drop down to FormatPrecision.
3459 // TODO: don't do more precise calculations above than are required.
3460 AdjustToPrecision(buffer, exp, FormatPrecision);
3462 unsigned NDigits = buffer.size();
3464 // Check whether we should use scientific notation.
3465 bool FormatScientific;
3466 if (!FormatMaxPadding)
3467 FormatScientific = true;
3472 // But we shouldn't make the number look more precise than it is.
3473 FormatScientific = ((unsigned) exp > FormatMaxPadding ||
3474 NDigits + (unsigned) exp > FormatPrecision);
3476 // Power of the most significant digit.
3477 int MSD = exp + (int) (NDigits - 1);
3480 FormatScientific = false;
3482 // 765e-5 == 0.00765
3484 FormatScientific = ((unsigned) -MSD) > FormatMaxPadding;
3489 // Scientific formatting is pretty straightforward.
3490 if (FormatScientific) {
3491 exp += (NDigits - 1);
3493 Str.push_back(buffer[NDigits-1]);
3498 for (unsigned I = 1; I != NDigits; ++I)
3499 Str.push_back(buffer[NDigits-1-I]);
3502 Str.push_back(exp >= 0 ? '+' : '-');
3503 if (exp < 0) exp = -exp;
3504 SmallVector<char, 6> expbuf;
3506 expbuf.push_back((char) ('0' + (exp % 10)));
3509 for (unsigned I = 0, E = expbuf.size(); I != E; ++I)
3510 Str.push_back(expbuf[E-1-I]);
3514 // Non-scientific, positive exponents.
3516 for (unsigned I = 0; I != NDigits; ++I)
3517 Str.push_back(buffer[NDigits-1-I]);
3518 for (unsigned I = 0; I != (unsigned) exp; ++I)
3523 // Non-scientific, negative exponents.
3525 // The number of digits to the left of the decimal point.
3526 int NWholeDigits = exp + (int) NDigits;
3529 if (NWholeDigits > 0) {
3530 for (; I != (unsigned) NWholeDigits; ++I)
3531 Str.push_back(buffer[NDigits-I-1]);
3534 unsigned NZeros = 1 + (unsigned) -NWholeDigits;
3538 for (unsigned Z = 1; Z != NZeros; ++Z)
3542 for (; I != NDigits; ++I)
3543 Str.push_back(buffer[NDigits-I-1]);