1 //===-- APFloat.cpp - Implement APFloat class -----------------------------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file implements a class to represent arbitrary precision floating
11 // point values and provide a variety of arithmetic operations on them.
13 //===----------------------------------------------------------------------===//
15 #include "llvm/ADT/APFloat.h"
16 #include "llvm/ADT/FoldingSet.h"
17 #include "llvm/Support/MathExtras.h"
22 #define convolve(lhs, rhs) ((lhs) * 4 + (rhs))
24 /* Assumed in hexadecimal significand parsing, and conversion to
25 hexadecimal strings. */
26 #define COMPILE_TIME_ASSERT(cond) extern int CTAssert[(cond) ? 1 : -1]
27 COMPILE_TIME_ASSERT(integerPartWidth % 4 == 0);
31 /* Represents floating point arithmetic semantics. */
33 /* The largest E such that 2^E is representable; this matches the
34 definition of IEEE 754. */
35 exponent_t maxExponent;
37 /* The smallest E such that 2^E is a normalized number; this
38 matches the definition of IEEE 754. */
39 exponent_t minExponent;
41 /* Number of bits in the significand. This includes the integer
43 unsigned int precision;
45 /* True if arithmetic is supported. */
46 unsigned int arithmeticOK;
49 const fltSemantics APFloat::IEEEsingle = { 127, -126, 24, true };
50 const fltSemantics APFloat::IEEEdouble = { 1023, -1022, 53, true };
51 const fltSemantics APFloat::IEEEquad = { 16383, -16382, 113, true };
52 const fltSemantics APFloat::x87DoubleExtended = { 16383, -16382, 64, true };
53 const fltSemantics APFloat::Bogus = { 0, 0, 0, true };
55 // The PowerPC format consists of two doubles. It does not map cleanly
56 // onto the usual format above. For now only storage of constants of
57 // this type is supported, no arithmetic.
58 const fltSemantics APFloat::PPCDoubleDouble = { 1023, -1022, 106, false };
60 /* A tight upper bound on number of parts required to hold the value
63 power * 815 / (351 * integerPartWidth) + 1
65 However, whilst the result may require only this many parts,
66 because we are multiplying two values to get it, the
67 multiplication may require an extra part with the excess part
68 being zero (consider the trivial case of 1 * 1, tcFullMultiply
69 requires two parts to hold the single-part result). So we add an
70 extra one to guarantee enough space whilst multiplying. */
71 const unsigned int maxExponent = 16383;
72 const unsigned int maxPrecision = 113;
73 const unsigned int maxPowerOfFiveExponent = maxExponent + maxPrecision - 1;
74 const unsigned int maxPowerOfFiveParts = 2 + ((maxPowerOfFiveExponent * 815)
75 / (351 * integerPartWidth));
78 /* A bunch of private, handy routines. */
80 static inline unsigned int
81 partCountForBits(unsigned int bits)
83 return ((bits) + integerPartWidth - 1) / integerPartWidth;
86 /* Returns 0U-9U. Return values >= 10U are not digits. */
87 static inline unsigned int
88 decDigitValue(unsigned int c)
94 hexDigitValue(unsigned int c)
114 assertArithmeticOK(const llvm::fltSemantics &semantics) {
115 assert(semantics.arithmeticOK
116 && "Compile-time arithmetic does not support these semantics");
119 /* Return the value of a decimal exponent of the form
122 If the exponent overflows, returns a large exponent with the
125 readExponent(const char *p)
128 unsigned int absExponent;
129 const unsigned int overlargeExponent = 24000; /* FIXME. */
131 isNegative = (*p == '-');
132 if (*p == '-' || *p == '+')
135 absExponent = decDigitValue(*p++);
136 assert (absExponent < 10U);
141 value = decDigitValue(*p);
146 value += absExponent * 10;
147 if (absExponent >= overlargeExponent) {
148 absExponent = overlargeExponent;
155 return -(int) absExponent;
157 return (int) absExponent;
160 /* This is ugly and needs cleaning up, but I don't immediately see
161 how whilst remaining safe. */
163 totalExponent(const char *p, int exponentAdjustment)
165 int unsignedExponent;
166 bool negative, overflow;
169 /* Move past the exponent letter and sign to the digits. */
171 negative = *p == '-';
172 if(*p == '-' || *p == '+')
175 unsignedExponent = 0;
180 value = decDigitValue(*p);
185 unsignedExponent = unsignedExponent * 10 + value;
186 if(unsignedExponent > 65535)
190 if(exponentAdjustment > 65535 || exponentAdjustment < -65536)
194 exponent = unsignedExponent;
196 exponent = -exponent;
197 exponent += exponentAdjustment;
198 if(exponent > 65535 || exponent < -65536)
203 exponent = negative ? -65536: 65535;
209 skipLeadingZeroesAndAnyDot(const char *p, const char **dot)
224 /* Given a normal decimal floating point number of the form
228 where the decimal point and exponent are optional, fill out the
229 structure D. Exponent is appropriate if the significand is
230 treated as an integer, and normalizedExponent if the significand
231 is taken to have the decimal point after a single leading
234 If the value is zero, V->firstSigDigit points to a non-digit, and
235 the return exponent is zero.
238 const char *firstSigDigit;
239 const char *lastSigDigit;
241 int normalizedExponent;
245 interpretDecimal(const char *p, decimalInfo *D)
249 p = skipLeadingZeroesAndAnyDot (p, &dot);
251 D->firstSigDigit = p;
253 D->normalizedExponent = 0;
260 if (decDigitValue(*p) >= 10U)
265 /* If number is all zerooes accept any exponent. */
266 if (p != D->firstSigDigit) {
267 if (*p == 'e' || *p == 'E')
268 D->exponent = readExponent(p + 1);
270 /* Implied decimal point? */
274 /* Drop insignificant trailing zeroes. */
281 /* Adjust the exponents for any decimal point. */
282 D->exponent += static_cast<exponent_t>((dot - p) - (dot > p));
283 D->normalizedExponent = (D->exponent +
284 static_cast<exponent_t>((p - D->firstSigDigit)
285 - (dot > D->firstSigDigit && dot < p)));
291 /* Return the trailing fraction of a hexadecimal number.
292 DIGITVALUE is the first hex digit of the fraction, P points to
295 trailingHexadecimalFraction(const char *p, unsigned int digitValue)
297 unsigned int hexDigit;
299 /* If the first trailing digit isn't 0 or 8 we can work out the
300 fraction immediately. */
302 return lfMoreThanHalf;
303 else if(digitValue < 8 && digitValue > 0)
304 return lfLessThanHalf;
306 /* Otherwise we need to find the first non-zero digit. */
310 hexDigit = hexDigitValue(*p);
312 /* If we ran off the end it is exactly zero or one-half, otherwise
315 return digitValue == 0 ? lfExactlyZero: lfExactlyHalf;
317 return digitValue == 0 ? lfLessThanHalf: lfMoreThanHalf;
320 /* Return the fraction lost were a bignum truncated losing the least
321 significant BITS bits. */
323 lostFractionThroughTruncation(const integerPart *parts,
324 unsigned int partCount,
329 lsb = APInt::tcLSB(parts, partCount);
331 /* Note this is guaranteed true if bits == 0, or LSB == -1U. */
333 return lfExactlyZero;
335 return lfExactlyHalf;
336 if(bits <= partCount * integerPartWidth
337 && APInt::tcExtractBit(parts, bits - 1))
338 return lfMoreThanHalf;
340 return lfLessThanHalf;
343 /* Shift DST right BITS bits noting lost fraction. */
345 shiftRight(integerPart *dst, unsigned int parts, unsigned int bits)
347 lostFraction lost_fraction;
349 lost_fraction = lostFractionThroughTruncation(dst, parts, bits);
351 APInt::tcShiftRight(dst, parts, bits);
353 return lost_fraction;
356 /* Combine the effect of two lost fractions. */
358 combineLostFractions(lostFraction moreSignificant,
359 lostFraction lessSignificant)
361 if(lessSignificant != lfExactlyZero) {
362 if(moreSignificant == lfExactlyZero)
363 moreSignificant = lfLessThanHalf;
364 else if(moreSignificant == lfExactlyHalf)
365 moreSignificant = lfMoreThanHalf;
368 return moreSignificant;
371 /* The error from the true value, in half-ulps, on multiplying two
372 floating point numbers, which differ from the value they
373 approximate by at most HUE1 and HUE2 half-ulps, is strictly less
374 than the returned value.
376 See "How to Read Floating Point Numbers Accurately" by William D
379 HUerrBound(bool inexactMultiply, unsigned int HUerr1, unsigned int HUerr2)
381 assert(HUerr1 < 2 || HUerr2 < 2 || (HUerr1 + HUerr2 < 8));
383 if (HUerr1 + HUerr2 == 0)
384 return inexactMultiply * 2; /* <= inexactMultiply half-ulps. */
386 return inexactMultiply + 2 * (HUerr1 + HUerr2);
389 /* The number of ulps from the boundary (zero, or half if ISNEAREST)
390 when the least significant BITS are truncated. BITS cannot be
393 ulpsFromBoundary(const integerPart *parts, unsigned int bits, bool isNearest)
395 unsigned int count, partBits;
396 integerPart part, boundary;
401 count = bits / integerPartWidth;
402 partBits = bits % integerPartWidth + 1;
404 part = parts[count] & (~(integerPart) 0 >> (integerPartWidth - partBits));
407 boundary = (integerPart) 1 << (partBits - 1);
412 if (part - boundary <= boundary - part)
413 return part - boundary;
415 return boundary - part;
418 if (part == boundary) {
421 return ~(integerPart) 0; /* A lot. */
424 } else if (part == boundary - 1) {
427 return ~(integerPart) 0; /* A lot. */
432 return ~(integerPart) 0; /* A lot. */
435 /* Place pow(5, power) in DST, and return the number of parts used.
436 DST must be at least one part larger than size of the answer. */
438 powerOf5(integerPart *dst, unsigned int power)
440 static const integerPart firstEightPowers[] = { 1, 5, 25, 125, 625, 3125,
442 integerPart pow5s[maxPowerOfFiveParts * 2 + 5];
443 pow5s[0] = 78125 * 5;
445 unsigned int partsCount[16] = { 1 };
446 integerPart scratch[maxPowerOfFiveParts], *p1, *p2, *pow5;
448 assert(power <= maxExponent);
453 *p1 = firstEightPowers[power & 7];
459 for (unsigned int n = 0; power; power >>= 1, n++) {
464 /* Calculate pow(5,pow(2,n+3)) if we haven't yet. */
466 pc = partsCount[n - 1];
467 APInt::tcFullMultiply(pow5, pow5 - pc, pow5 - pc, pc, pc);
469 if (pow5[pc - 1] == 0)
477 APInt::tcFullMultiply(p2, p1, pow5, result, pc);
479 if (p2[result - 1] == 0)
482 /* Now result is in p1 with partsCount parts and p2 is scratch
484 tmp = p1, p1 = p2, p2 = tmp;
491 APInt::tcAssign(dst, p1, result);
496 /* Zero at the end to avoid modular arithmetic when adding one; used
497 when rounding up during hexadecimal output. */
498 static const char hexDigitsLower[] = "0123456789abcdef0";
499 static const char hexDigitsUpper[] = "0123456789ABCDEF0";
500 static const char infinityL[] = "infinity";
501 static const char infinityU[] = "INFINITY";
502 static const char NaNL[] = "nan";
503 static const char NaNU[] = "NAN";
505 /* Write out an integerPart in hexadecimal, starting with the most
506 significant nibble. Write out exactly COUNT hexdigits, return
509 partAsHex (char *dst, integerPart part, unsigned int count,
510 const char *hexDigitChars)
512 unsigned int result = count;
514 assert (count != 0 && count <= integerPartWidth / 4);
516 part >>= (integerPartWidth - 4 * count);
518 dst[count] = hexDigitChars[part & 0xf];
525 /* Write out an unsigned decimal integer. */
527 writeUnsignedDecimal (char *dst, unsigned int n)
543 /* Write out a signed decimal integer. */
545 writeSignedDecimal (char *dst, int value)
549 dst = writeUnsignedDecimal(dst, -(unsigned) value);
551 dst = writeUnsignedDecimal(dst, value);
558 APFloat::initialize(const fltSemantics *ourSemantics)
562 semantics = ourSemantics;
565 significand.parts = new integerPart[count];
569 APFloat::freeSignificand()
572 delete [] significand.parts;
576 APFloat::assign(const APFloat &rhs)
578 assert(semantics == rhs.semantics);
581 category = rhs.category;
582 exponent = rhs.exponent;
584 exponent2 = rhs.exponent2;
585 if(category == fcNormal || category == fcNaN)
586 copySignificand(rhs);
590 APFloat::copySignificand(const APFloat &rhs)
592 assert(category == fcNormal || category == fcNaN);
593 assert(rhs.partCount() >= partCount());
595 APInt::tcAssign(significandParts(), rhs.significandParts(),
599 /* Make this number a NaN, with an arbitrary but deterministic value
600 for the significand. If double or longer, this is a signalling NaN,
601 which may not be ideal. If float, this is QNaN(0). */
603 APFloat::makeNaN(unsigned type)
606 // FIXME: Add double and long double support for QNaN(0).
607 if (semantics->precision == 24 && semantics->maxExponent == 127) {
609 type &= ~0x80000000U;
612 APInt::tcSet(significandParts(), type, partCount());
616 APFloat::operator=(const APFloat &rhs)
619 if(semantics != rhs.semantics) {
621 initialize(rhs.semantics);
630 APFloat::bitwiseIsEqual(const APFloat &rhs) const {
633 if (semantics != rhs.semantics ||
634 category != rhs.category ||
637 if (semantics==(const llvm::fltSemantics*)&PPCDoubleDouble &&
640 if (category==fcZero || category==fcInfinity)
642 else if (category==fcNormal && exponent!=rhs.exponent)
644 else if (semantics==(const llvm::fltSemantics*)&PPCDoubleDouble &&
645 exponent2!=rhs.exponent2)
649 const integerPart* p=significandParts();
650 const integerPart* q=rhs.significandParts();
651 for (; i>0; i--, p++, q++) {
659 APFloat::APFloat(const fltSemantics &ourSemantics, integerPart value)
661 assertArithmeticOK(ourSemantics);
662 initialize(&ourSemantics);
665 exponent = ourSemantics.precision - 1;
666 significandParts()[0] = value;
667 normalize(rmNearestTiesToEven, lfExactlyZero);
670 APFloat::APFloat(const fltSemantics &ourSemantics,
671 fltCategory ourCategory, bool negative, unsigned type)
673 assertArithmeticOK(ourSemantics);
674 initialize(&ourSemantics);
675 category = ourCategory;
677 if (category == fcNormal)
679 else if (ourCategory == fcNaN)
683 APFloat::APFloat(const fltSemantics &ourSemantics, const char *text)
685 assertArithmeticOK(ourSemantics);
686 initialize(&ourSemantics);
687 convertFromString(text, rmNearestTiesToEven);
690 APFloat::APFloat(const APFloat &rhs)
692 initialize(rhs.semantics);
701 // Profile - This method 'profiles' an APFloat for use with FoldingSet.
702 void APFloat::Profile(FoldingSetNodeID& ID) const {
703 ID.Add(bitcastToAPInt());
707 APFloat::partCount() const
709 return partCountForBits(semantics->precision + 1);
713 APFloat::semanticsPrecision(const fltSemantics &semantics)
715 return semantics.precision;
719 APFloat::significandParts() const
721 return const_cast<APFloat *>(this)->significandParts();
725 APFloat::significandParts()
727 assert(category == fcNormal || category == fcNaN);
730 return significand.parts;
732 return &significand.part;
736 APFloat::zeroSignificand()
739 APInt::tcSet(significandParts(), 0, partCount());
742 /* Increment an fcNormal floating point number's significand. */
744 APFloat::incrementSignificand()
748 carry = APInt::tcIncrement(significandParts(), partCount());
750 /* Our callers should never cause us to overflow. */
754 /* Add the significand of the RHS. Returns the carry flag. */
756 APFloat::addSignificand(const APFloat &rhs)
760 parts = significandParts();
762 assert(semantics == rhs.semantics);
763 assert(exponent == rhs.exponent);
765 return APInt::tcAdd(parts, rhs.significandParts(), 0, partCount());
768 /* Subtract the significand of the RHS with a borrow flag. Returns
771 APFloat::subtractSignificand(const APFloat &rhs, integerPart borrow)
775 parts = significandParts();
777 assert(semantics == rhs.semantics);
778 assert(exponent == rhs.exponent);
780 return APInt::tcSubtract(parts, rhs.significandParts(), borrow,
784 /* Multiply the significand of the RHS. If ADDEND is non-NULL, add it
785 on to the full-precision result of the multiplication. Returns the
788 APFloat::multiplySignificand(const APFloat &rhs, const APFloat *addend)
790 unsigned int omsb; // One, not zero, based MSB.
791 unsigned int partsCount, newPartsCount, precision;
792 integerPart *lhsSignificand;
793 integerPart scratch[4];
794 integerPart *fullSignificand;
795 lostFraction lost_fraction;
798 assert(semantics == rhs.semantics);
800 precision = semantics->precision;
801 newPartsCount = partCountForBits(precision * 2);
803 if(newPartsCount > 4)
804 fullSignificand = new integerPart[newPartsCount];
806 fullSignificand = scratch;
808 lhsSignificand = significandParts();
809 partsCount = partCount();
811 APInt::tcFullMultiply(fullSignificand, lhsSignificand,
812 rhs.significandParts(), partsCount, partsCount);
814 lost_fraction = lfExactlyZero;
815 omsb = APInt::tcMSB(fullSignificand, newPartsCount) + 1;
816 exponent += rhs.exponent;
819 Significand savedSignificand = significand;
820 const fltSemantics *savedSemantics = semantics;
821 fltSemantics extendedSemantics;
823 unsigned int extendedPrecision;
825 /* Normalize our MSB. */
826 extendedPrecision = precision + precision - 1;
827 if(omsb != extendedPrecision)
829 APInt::tcShiftLeft(fullSignificand, newPartsCount,
830 extendedPrecision - omsb);
831 exponent -= extendedPrecision - omsb;
834 /* Create new semantics. */
835 extendedSemantics = *semantics;
836 extendedSemantics.precision = extendedPrecision;
838 if(newPartsCount == 1)
839 significand.part = fullSignificand[0];
841 significand.parts = fullSignificand;
842 semantics = &extendedSemantics;
844 APFloat extendedAddend(*addend);
845 status = extendedAddend.convert(extendedSemantics, rmTowardZero, &ignored);
846 assert(status == opOK);
847 lost_fraction = addOrSubtractSignificand(extendedAddend, false);
849 /* Restore our state. */
850 if(newPartsCount == 1)
851 fullSignificand[0] = significand.part;
852 significand = savedSignificand;
853 semantics = savedSemantics;
855 omsb = APInt::tcMSB(fullSignificand, newPartsCount) + 1;
858 exponent -= (precision - 1);
860 if(omsb > precision) {
861 unsigned int bits, significantParts;
864 bits = omsb - precision;
865 significantParts = partCountForBits(omsb);
866 lf = shiftRight(fullSignificand, significantParts, bits);
867 lost_fraction = combineLostFractions(lf, lost_fraction);
871 APInt::tcAssign(lhsSignificand, fullSignificand, partsCount);
873 if(newPartsCount > 4)
874 delete [] fullSignificand;
876 return lost_fraction;
879 /* Multiply the significands of LHS and RHS to DST. */
881 APFloat::divideSignificand(const APFloat &rhs)
883 unsigned int bit, i, partsCount;
884 const integerPart *rhsSignificand;
885 integerPart *lhsSignificand, *dividend, *divisor;
886 integerPart scratch[4];
887 lostFraction lost_fraction;
889 assert(semantics == rhs.semantics);
891 lhsSignificand = significandParts();
892 rhsSignificand = rhs.significandParts();
893 partsCount = partCount();
896 dividend = new integerPart[partsCount * 2];
900 divisor = dividend + partsCount;
902 /* Copy the dividend and divisor as they will be modified in-place. */
903 for(i = 0; i < partsCount; i++) {
904 dividend[i] = lhsSignificand[i];
905 divisor[i] = rhsSignificand[i];
906 lhsSignificand[i] = 0;
909 exponent -= rhs.exponent;
911 unsigned int precision = semantics->precision;
913 /* Normalize the divisor. */
914 bit = precision - APInt::tcMSB(divisor, partsCount) - 1;
917 APInt::tcShiftLeft(divisor, partsCount, bit);
920 /* Normalize the dividend. */
921 bit = precision - APInt::tcMSB(dividend, partsCount) - 1;
924 APInt::tcShiftLeft(dividend, partsCount, bit);
927 /* Ensure the dividend >= divisor initially for the loop below.
928 Incidentally, this means that the division loop below is
929 guaranteed to set the integer bit to one. */
930 if(APInt::tcCompare(dividend, divisor, partsCount) < 0) {
932 APInt::tcShiftLeft(dividend, partsCount, 1);
933 assert(APInt::tcCompare(dividend, divisor, partsCount) >= 0);
937 for(bit = precision; bit; bit -= 1) {
938 if(APInt::tcCompare(dividend, divisor, partsCount) >= 0) {
939 APInt::tcSubtract(dividend, divisor, 0, partsCount);
940 APInt::tcSetBit(lhsSignificand, bit - 1);
943 APInt::tcShiftLeft(dividend, partsCount, 1);
946 /* Figure out the lost fraction. */
947 int cmp = APInt::tcCompare(dividend, divisor, partsCount);
950 lost_fraction = lfMoreThanHalf;
952 lost_fraction = lfExactlyHalf;
953 else if(APInt::tcIsZero(dividend, partsCount))
954 lost_fraction = lfExactlyZero;
956 lost_fraction = lfLessThanHalf;
961 return lost_fraction;
965 APFloat::significandMSB() const
967 return APInt::tcMSB(significandParts(), partCount());
971 APFloat::significandLSB() const
973 return APInt::tcLSB(significandParts(), partCount());
976 /* Note that a zero result is NOT normalized to fcZero. */
978 APFloat::shiftSignificandRight(unsigned int bits)
980 /* Our exponent should not overflow. */
981 assert((exponent_t) (exponent + bits) >= exponent);
985 return shiftRight(significandParts(), partCount(), bits);
988 /* Shift the significand left BITS bits, subtract BITS from its exponent. */
990 APFloat::shiftSignificandLeft(unsigned int bits)
992 assert(bits < semantics->precision);
995 unsigned int partsCount = partCount();
997 APInt::tcShiftLeft(significandParts(), partsCount, bits);
1000 assert(!APInt::tcIsZero(significandParts(), partsCount));
1005 APFloat::compareAbsoluteValue(const APFloat &rhs) const
1009 assert(semantics == rhs.semantics);
1010 assert(category == fcNormal);
1011 assert(rhs.category == fcNormal);
1013 compare = exponent - rhs.exponent;
1015 /* If exponents are equal, do an unsigned bignum comparison of the
1018 compare = APInt::tcCompare(significandParts(), rhs.significandParts(),
1022 return cmpGreaterThan;
1023 else if(compare < 0)
1029 /* Handle overflow. Sign is preserved. We either become infinity or
1030 the largest finite number. */
1032 APFloat::handleOverflow(roundingMode rounding_mode)
1035 if(rounding_mode == rmNearestTiesToEven
1036 || rounding_mode == rmNearestTiesToAway
1037 || (rounding_mode == rmTowardPositive && !sign)
1038 || (rounding_mode == rmTowardNegative && sign))
1040 category = fcInfinity;
1041 return (opStatus) (opOverflow | opInexact);
1044 /* Otherwise we become the largest finite number. */
1045 category = fcNormal;
1046 exponent = semantics->maxExponent;
1047 APInt::tcSetLeastSignificantBits(significandParts(), partCount(),
1048 semantics->precision);
1053 /* Returns TRUE if, when truncating the current number, with BIT the
1054 new LSB, with the given lost fraction and rounding mode, the result
1055 would need to be rounded away from zero (i.e., by increasing the
1056 signficand). This routine must work for fcZero of both signs, and
1057 fcNormal numbers. */
1059 APFloat::roundAwayFromZero(roundingMode rounding_mode,
1060 lostFraction lost_fraction,
1061 unsigned int bit) const
1063 /* NaNs and infinities should not have lost fractions. */
1064 assert(category == fcNormal || category == fcZero);
1066 /* Current callers never pass this so we don't handle it. */
1067 assert(lost_fraction != lfExactlyZero);
1069 switch (rounding_mode) {
1073 case rmNearestTiesToAway:
1074 return lost_fraction == lfExactlyHalf || lost_fraction == lfMoreThanHalf;
1076 case rmNearestTiesToEven:
1077 if(lost_fraction == lfMoreThanHalf)
1080 /* Our zeroes don't have a significand to test. */
1081 if(lost_fraction == lfExactlyHalf && category != fcZero)
1082 return APInt::tcExtractBit(significandParts(), bit);
1089 case rmTowardPositive:
1090 return sign == false;
1092 case rmTowardNegative:
1093 return sign == true;
1098 APFloat::normalize(roundingMode rounding_mode,
1099 lostFraction lost_fraction)
1101 unsigned int omsb; /* One, not zero, based MSB. */
1104 if(category != fcNormal)
1107 /* Before rounding normalize the exponent of fcNormal numbers. */
1108 omsb = significandMSB() + 1;
1111 /* OMSB is numbered from 1. We want to place it in the integer
1112 bit numbered PRECISON if possible, with a compensating change in
1114 exponentChange = omsb - semantics->precision;
1116 /* If the resulting exponent is too high, overflow according to
1117 the rounding mode. */
1118 if(exponent + exponentChange > semantics->maxExponent)
1119 return handleOverflow(rounding_mode);
1121 /* Subnormal numbers have exponent minExponent, and their MSB
1122 is forced based on that. */
1123 if(exponent + exponentChange < semantics->minExponent)
1124 exponentChange = semantics->minExponent - exponent;
1126 /* Shifting left is easy as we don't lose precision. */
1127 if(exponentChange < 0) {
1128 assert(lost_fraction == lfExactlyZero);
1130 shiftSignificandLeft(-exponentChange);
1135 if(exponentChange > 0) {
1138 /* Shift right and capture any new lost fraction. */
1139 lf = shiftSignificandRight(exponentChange);
1141 lost_fraction = combineLostFractions(lf, lost_fraction);
1143 /* Keep OMSB up-to-date. */
1144 if(omsb > (unsigned) exponentChange)
1145 omsb -= exponentChange;
1151 /* Now round the number according to rounding_mode given the lost
1154 /* As specified in IEEE 754, since we do not trap we do not report
1155 underflow for exact results. */
1156 if(lost_fraction == lfExactlyZero) {
1157 /* Canonicalize zeroes. */
1164 /* Increment the significand if we're rounding away from zero. */
1165 if(roundAwayFromZero(rounding_mode, lost_fraction, 0)) {
1167 exponent = semantics->minExponent;
1169 incrementSignificand();
1170 omsb = significandMSB() + 1;
1172 /* Did the significand increment overflow? */
1173 if(omsb == (unsigned) semantics->precision + 1) {
1174 /* Renormalize by incrementing the exponent and shifting our
1175 significand right one. However if we already have the
1176 maximum exponent we overflow to infinity. */
1177 if(exponent == semantics->maxExponent) {
1178 category = fcInfinity;
1180 return (opStatus) (opOverflow | opInexact);
1183 shiftSignificandRight(1);
1189 /* The normal case - we were and are not denormal, and any
1190 significand increment above didn't overflow. */
1191 if(omsb == semantics->precision)
1194 /* We have a non-zero denormal. */
1195 assert(omsb < semantics->precision);
1197 /* Canonicalize zeroes. */
1201 /* The fcZero case is a denormal that underflowed to zero. */
1202 return (opStatus) (opUnderflow | opInexact);
1206 APFloat::addOrSubtractSpecials(const APFloat &rhs, bool subtract)
1208 switch (convolve(category, rhs.category)) {
1212 case convolve(fcNaN, fcZero):
1213 case convolve(fcNaN, fcNormal):
1214 case convolve(fcNaN, fcInfinity):
1215 case convolve(fcNaN, fcNaN):
1216 case convolve(fcNormal, fcZero):
1217 case convolve(fcInfinity, fcNormal):
1218 case convolve(fcInfinity, fcZero):
1221 case convolve(fcZero, fcNaN):
1222 case convolve(fcNormal, fcNaN):
1223 case convolve(fcInfinity, fcNaN):
1225 copySignificand(rhs);
1228 case convolve(fcNormal, fcInfinity):
1229 case convolve(fcZero, fcInfinity):
1230 category = fcInfinity;
1231 sign = rhs.sign ^ subtract;
1234 case convolve(fcZero, fcNormal):
1236 sign = rhs.sign ^ subtract;
1239 case convolve(fcZero, fcZero):
1240 /* Sign depends on rounding mode; handled by caller. */
1243 case convolve(fcInfinity, fcInfinity):
1244 /* Differently signed infinities can only be validly
1246 if(((sign ^ rhs.sign)!=0) != subtract) {
1253 case convolve(fcNormal, fcNormal):
1258 /* Add or subtract two normal numbers. */
1260 APFloat::addOrSubtractSignificand(const APFloat &rhs, bool subtract)
1263 lostFraction lost_fraction;
1266 /* Determine if the operation on the absolute values is effectively
1267 an addition or subtraction. */
1268 subtract ^= (sign ^ rhs.sign) ? true : false;
1270 /* Are we bigger exponent-wise than the RHS? */
1271 bits = exponent - rhs.exponent;
1273 /* Subtraction is more subtle than one might naively expect. */
1275 APFloat temp_rhs(rhs);
1279 reverse = compareAbsoluteValue(temp_rhs) == cmpLessThan;
1280 lost_fraction = lfExactlyZero;
1281 } else if (bits > 0) {
1282 lost_fraction = temp_rhs.shiftSignificandRight(bits - 1);
1283 shiftSignificandLeft(1);
1286 lost_fraction = shiftSignificandRight(-bits - 1);
1287 temp_rhs.shiftSignificandLeft(1);
1292 carry = temp_rhs.subtractSignificand
1293 (*this, lost_fraction != lfExactlyZero);
1294 copySignificand(temp_rhs);
1297 carry = subtractSignificand
1298 (temp_rhs, lost_fraction != lfExactlyZero);
1301 /* Invert the lost fraction - it was on the RHS and
1303 if(lost_fraction == lfLessThanHalf)
1304 lost_fraction = lfMoreThanHalf;
1305 else if(lost_fraction == lfMoreThanHalf)
1306 lost_fraction = lfLessThanHalf;
1308 /* The code above is intended to ensure that no borrow is
1313 APFloat temp_rhs(rhs);
1315 lost_fraction = temp_rhs.shiftSignificandRight(bits);
1316 carry = addSignificand(temp_rhs);
1318 lost_fraction = shiftSignificandRight(-bits);
1319 carry = addSignificand(rhs);
1322 /* We have a guard bit; generating a carry cannot happen. */
1326 return lost_fraction;
1330 APFloat::multiplySpecials(const APFloat &rhs)
1332 switch (convolve(category, rhs.category)) {
1336 case convolve(fcNaN, fcZero):
1337 case convolve(fcNaN, fcNormal):
1338 case convolve(fcNaN, fcInfinity):
1339 case convolve(fcNaN, fcNaN):
1342 case convolve(fcZero, fcNaN):
1343 case convolve(fcNormal, fcNaN):
1344 case convolve(fcInfinity, fcNaN):
1346 copySignificand(rhs);
1349 case convolve(fcNormal, fcInfinity):
1350 case convolve(fcInfinity, fcNormal):
1351 case convolve(fcInfinity, fcInfinity):
1352 category = fcInfinity;
1355 case convolve(fcZero, fcNormal):
1356 case convolve(fcNormal, fcZero):
1357 case convolve(fcZero, fcZero):
1361 case convolve(fcZero, fcInfinity):
1362 case convolve(fcInfinity, fcZero):
1366 case convolve(fcNormal, fcNormal):
1372 APFloat::divideSpecials(const APFloat &rhs)
1374 switch (convolve(category, rhs.category)) {
1378 case convolve(fcNaN, fcZero):
1379 case convolve(fcNaN, fcNormal):
1380 case convolve(fcNaN, fcInfinity):
1381 case convolve(fcNaN, fcNaN):
1382 case convolve(fcInfinity, fcZero):
1383 case convolve(fcInfinity, fcNormal):
1384 case convolve(fcZero, fcInfinity):
1385 case convolve(fcZero, fcNormal):
1388 case convolve(fcZero, fcNaN):
1389 case convolve(fcNormal, fcNaN):
1390 case convolve(fcInfinity, fcNaN):
1392 copySignificand(rhs);
1395 case convolve(fcNormal, fcInfinity):
1399 case convolve(fcNormal, fcZero):
1400 category = fcInfinity;
1403 case convolve(fcInfinity, fcInfinity):
1404 case convolve(fcZero, fcZero):
1408 case convolve(fcNormal, fcNormal):
1414 APFloat::modSpecials(const APFloat &rhs)
1416 switch (convolve(category, rhs.category)) {
1420 case convolve(fcNaN, fcZero):
1421 case convolve(fcNaN, fcNormal):
1422 case convolve(fcNaN, fcInfinity):
1423 case convolve(fcNaN, fcNaN):
1424 case convolve(fcZero, fcInfinity):
1425 case convolve(fcZero, fcNormal):
1426 case convolve(fcNormal, fcInfinity):
1429 case convolve(fcZero, fcNaN):
1430 case convolve(fcNormal, fcNaN):
1431 case convolve(fcInfinity, fcNaN):
1433 copySignificand(rhs);
1436 case convolve(fcNormal, fcZero):
1437 case convolve(fcInfinity, fcZero):
1438 case convolve(fcInfinity, fcNormal):
1439 case convolve(fcInfinity, fcInfinity):
1440 case convolve(fcZero, fcZero):
1444 case convolve(fcNormal, fcNormal):
1451 APFloat::changeSign()
1453 /* Look mummy, this one's easy. */
1458 APFloat::clearSign()
1460 /* So is this one. */
1465 APFloat::copySign(const APFloat &rhs)
1471 /* Normalized addition or subtraction. */
1473 APFloat::addOrSubtract(const APFloat &rhs, roundingMode rounding_mode,
1478 assertArithmeticOK(*semantics);
1480 fs = addOrSubtractSpecials(rhs, subtract);
1482 /* This return code means it was not a simple case. */
1483 if(fs == opDivByZero) {
1484 lostFraction lost_fraction;
1486 lost_fraction = addOrSubtractSignificand(rhs, subtract);
1487 fs = normalize(rounding_mode, lost_fraction);
1489 /* Can only be zero if we lost no fraction. */
1490 assert(category != fcZero || lost_fraction == lfExactlyZero);
1493 /* If two numbers add (exactly) to zero, IEEE 754 decrees it is a
1494 positive zero unless rounding to minus infinity, except that
1495 adding two like-signed zeroes gives that zero. */
1496 if(category == fcZero) {
1497 if(rhs.category != fcZero || (sign == rhs.sign) == subtract)
1498 sign = (rounding_mode == rmTowardNegative);
1504 /* Normalized addition. */
1506 APFloat::add(const APFloat &rhs, roundingMode rounding_mode)
1508 return addOrSubtract(rhs, rounding_mode, false);
1511 /* Normalized subtraction. */
1513 APFloat::subtract(const APFloat &rhs, roundingMode rounding_mode)
1515 return addOrSubtract(rhs, rounding_mode, true);
1518 /* Normalized multiply. */
1520 APFloat::multiply(const APFloat &rhs, roundingMode rounding_mode)
1524 assertArithmeticOK(*semantics);
1526 fs = multiplySpecials(rhs);
1528 if(category == fcNormal) {
1529 lostFraction lost_fraction = multiplySignificand(rhs, 0);
1530 fs = normalize(rounding_mode, lost_fraction);
1531 if(lost_fraction != lfExactlyZero)
1532 fs = (opStatus) (fs | opInexact);
1538 /* Normalized divide. */
1540 APFloat::divide(const APFloat &rhs, roundingMode rounding_mode)
1544 assertArithmeticOK(*semantics);
1546 fs = divideSpecials(rhs);
1548 if(category == fcNormal) {
1549 lostFraction lost_fraction = divideSignificand(rhs);
1550 fs = normalize(rounding_mode, lost_fraction);
1551 if(lost_fraction != lfExactlyZero)
1552 fs = (opStatus) (fs | opInexact);
1558 /* Normalized remainder. This is not currently correct in all cases. */
1560 APFloat::remainder(const APFloat &rhs)
1564 unsigned int origSign = sign;
1566 assertArithmeticOK(*semantics);
1567 fs = V.divide(rhs, rmNearestTiesToEven);
1568 if (fs == opDivByZero)
1571 int parts = partCount();
1572 integerPart *x = new integerPart[parts];
1574 fs = V.convertToInteger(x, parts * integerPartWidth, true,
1575 rmNearestTiesToEven, &ignored);
1576 if (fs==opInvalidOp)
1579 fs = V.convertFromZeroExtendedInteger(x, parts * integerPartWidth, true,
1580 rmNearestTiesToEven);
1581 assert(fs==opOK); // should always work
1583 fs = V.multiply(rhs, rmNearestTiesToEven);
1584 assert(fs==opOK || fs==opInexact); // should not overflow or underflow
1586 fs = subtract(V, rmNearestTiesToEven);
1587 assert(fs==opOK || fs==opInexact); // likewise
1590 sign = origSign; // IEEE754 requires this
1595 /* Normalized llvm frem (C fmod).
1596 This is not currently correct in all cases. */
1598 APFloat::mod(const APFloat &rhs, roundingMode rounding_mode)
1601 assertArithmeticOK(*semantics);
1602 fs = modSpecials(rhs);
1604 if (category == fcNormal && rhs.category == fcNormal) {
1606 unsigned int origSign = sign;
1608 fs = V.divide(rhs, rmNearestTiesToEven);
1609 if (fs == opDivByZero)
1612 int parts = partCount();
1613 integerPart *x = new integerPart[parts];
1615 fs = V.convertToInteger(x, parts * integerPartWidth, true,
1616 rmTowardZero, &ignored);
1617 if (fs==opInvalidOp)
1620 fs = V.convertFromZeroExtendedInteger(x, parts * integerPartWidth, true,
1621 rmNearestTiesToEven);
1622 assert(fs==opOK); // should always work
1624 fs = V.multiply(rhs, rounding_mode);
1625 assert(fs==opOK || fs==opInexact); // should not overflow or underflow
1627 fs = subtract(V, rounding_mode);
1628 assert(fs==opOK || fs==opInexact); // likewise
1631 sign = origSign; // IEEE754 requires this
1637 /* Normalized fused-multiply-add. */
1639 APFloat::fusedMultiplyAdd(const APFloat &multiplicand,
1640 const APFloat &addend,
1641 roundingMode rounding_mode)
1645 assertArithmeticOK(*semantics);
1647 /* Post-multiplication sign, before addition. */
1648 sign ^= multiplicand.sign;
1650 /* If and only if all arguments are normal do we need to do an
1651 extended-precision calculation. */
1652 if(category == fcNormal
1653 && multiplicand.category == fcNormal
1654 && addend.category == fcNormal) {
1655 lostFraction lost_fraction;
1657 lost_fraction = multiplySignificand(multiplicand, &addend);
1658 fs = normalize(rounding_mode, lost_fraction);
1659 if(lost_fraction != lfExactlyZero)
1660 fs = (opStatus) (fs | opInexact);
1662 /* If two numbers add (exactly) to zero, IEEE 754 decrees it is a
1663 positive zero unless rounding to minus infinity, except that
1664 adding two like-signed zeroes gives that zero. */
1665 if(category == fcZero && sign != addend.sign)
1666 sign = (rounding_mode == rmTowardNegative);
1668 fs = multiplySpecials(multiplicand);
1670 /* FS can only be opOK or opInvalidOp. There is no more work
1671 to do in the latter case. The IEEE-754R standard says it is
1672 implementation-defined in this case whether, if ADDEND is a
1673 quiet NaN, we raise invalid op; this implementation does so.
1675 If we need to do the addition we can do so with normal
1678 fs = addOrSubtract(addend, rounding_mode, false);
1684 /* Comparison requires normalized numbers. */
1686 APFloat::compare(const APFloat &rhs) const
1690 assertArithmeticOK(*semantics);
1691 assert(semantics == rhs.semantics);
1693 switch (convolve(category, rhs.category)) {
1697 case convolve(fcNaN, fcZero):
1698 case convolve(fcNaN, fcNormal):
1699 case convolve(fcNaN, fcInfinity):
1700 case convolve(fcNaN, fcNaN):
1701 case convolve(fcZero, fcNaN):
1702 case convolve(fcNormal, fcNaN):
1703 case convolve(fcInfinity, fcNaN):
1704 return cmpUnordered;
1706 case convolve(fcInfinity, fcNormal):
1707 case convolve(fcInfinity, fcZero):
1708 case convolve(fcNormal, fcZero):
1712 return cmpGreaterThan;
1714 case convolve(fcNormal, fcInfinity):
1715 case convolve(fcZero, fcInfinity):
1716 case convolve(fcZero, fcNormal):
1718 return cmpGreaterThan;
1722 case convolve(fcInfinity, fcInfinity):
1723 if(sign == rhs.sign)
1728 return cmpGreaterThan;
1730 case convolve(fcZero, fcZero):
1733 case convolve(fcNormal, fcNormal):
1737 /* Two normal numbers. Do they have the same sign? */
1738 if(sign != rhs.sign) {
1740 result = cmpLessThan;
1742 result = cmpGreaterThan;
1744 /* Compare absolute values; invert result if negative. */
1745 result = compareAbsoluteValue(rhs);
1748 if(result == cmpLessThan)
1749 result = cmpGreaterThan;
1750 else if(result == cmpGreaterThan)
1751 result = cmpLessThan;
1758 /// APFloat::convert - convert a value of one floating point type to another.
1759 /// The return value corresponds to the IEEE754 exceptions. *losesInfo
1760 /// records whether the transformation lost information, i.e. whether
1761 /// converting the result back to the original type will produce the
1762 /// original value (this is almost the same as return value==fsOK, but there
1763 /// are edge cases where this is not so).
1766 APFloat::convert(const fltSemantics &toSemantics,
1767 roundingMode rounding_mode, bool *losesInfo)
1769 lostFraction lostFraction;
1770 unsigned int newPartCount, oldPartCount;
1773 assertArithmeticOK(*semantics);
1774 assertArithmeticOK(toSemantics);
1775 lostFraction = lfExactlyZero;
1776 newPartCount = partCountForBits(toSemantics.precision + 1);
1777 oldPartCount = partCount();
1779 /* Handle storage complications. If our new form is wider,
1780 re-allocate our bit pattern into wider storage. If it is
1781 narrower, we ignore the excess parts, but if narrowing to a
1782 single part we need to free the old storage.
1783 Be careful not to reference significandParts for zeroes
1784 and infinities, since it aborts. */
1785 if (newPartCount > oldPartCount) {
1786 integerPart *newParts;
1787 newParts = new integerPart[newPartCount];
1788 APInt::tcSet(newParts, 0, newPartCount);
1789 if (category==fcNormal || category==fcNaN)
1790 APInt::tcAssign(newParts, significandParts(), oldPartCount);
1792 significand.parts = newParts;
1793 } else if (newPartCount < oldPartCount) {
1794 /* Capture any lost fraction through truncation of parts so we get
1795 correct rounding whilst normalizing. */
1796 if (category==fcNormal)
1797 lostFraction = lostFractionThroughTruncation
1798 (significandParts(), oldPartCount, toSemantics.precision);
1799 if (newPartCount == 1) {
1800 integerPart newPart = 0;
1801 if (category==fcNormal || category==fcNaN)
1802 newPart = significandParts()[0];
1804 significand.part = newPart;
1808 if(category == fcNormal) {
1809 /* Re-interpret our bit-pattern. */
1810 exponent += toSemantics.precision - semantics->precision;
1811 semantics = &toSemantics;
1812 fs = normalize(rounding_mode, lostFraction);
1813 *losesInfo = (fs != opOK);
1814 } else if (category == fcNaN) {
1815 int shift = toSemantics.precision - semantics->precision;
1816 // Do this now so significandParts gets the right answer
1817 const fltSemantics *oldSemantics = semantics;
1818 semantics = &toSemantics;
1820 // No normalization here, just truncate
1822 APInt::tcShiftLeft(significandParts(), newPartCount, shift);
1823 else if (shift < 0) {
1824 unsigned ushift = -shift;
1825 // Figure out if we are losing information. This happens
1826 // if are shifting out something other than 0s, or if the x87 long
1827 // double input did not have its integer bit set (pseudo-NaN), or if the
1828 // x87 long double input did not have its QNan bit set (because the x87
1829 // hardware sets this bit when converting a lower-precision NaN to
1830 // x87 long double).
1831 if (APInt::tcLSB(significandParts(), newPartCount) < ushift)
1833 if (oldSemantics == &APFloat::x87DoubleExtended &&
1834 (!(*significandParts() & 0x8000000000000000ULL) ||
1835 !(*significandParts() & 0x4000000000000000ULL)))
1837 APInt::tcShiftRight(significandParts(), newPartCount, ushift);
1839 // gcc forces the Quiet bit on, which means (float)(double)(float_sNan)
1840 // does not give you back the same bits. This is dubious, and we
1841 // don't currently do it. You're really supposed to get
1842 // an invalid operation signal at runtime, but nobody does that.
1845 semantics = &toSemantics;
1853 /* Convert a floating point number to an integer according to the
1854 rounding mode. If the rounded integer value is out of range this
1855 returns an invalid operation exception and the contents of the
1856 destination parts are unspecified. If the rounded value is in
1857 range but the floating point number is not the exact integer, the C
1858 standard doesn't require an inexact exception to be raised. IEEE
1859 854 does require it so we do that.
1861 Note that for conversions to integer type the C standard requires
1862 round-to-zero to always be used. */
1864 APFloat::convertToSignExtendedInteger(integerPart *parts, unsigned int width,
1866 roundingMode rounding_mode,
1867 bool *isExact) const
1869 lostFraction lost_fraction;
1870 const integerPart *src;
1871 unsigned int dstPartsCount, truncatedBits;
1873 assertArithmeticOK(*semantics);
1877 /* Handle the three special cases first. */
1878 if(category == fcInfinity || category == fcNaN)
1881 dstPartsCount = partCountForBits(width);
1883 if(category == fcZero) {
1884 APInt::tcSet(parts, 0, dstPartsCount);
1885 // Negative zero can't be represented as an int.
1890 src = significandParts();
1892 /* Step 1: place our absolute value, with any fraction truncated, in
1895 /* Our absolute value is less than one; truncate everything. */
1896 APInt::tcSet(parts, 0, dstPartsCount);
1897 /* For exponent -1 the integer bit represents .5, look at that.
1898 For smaller exponents leftmost truncated bit is 0. */
1899 truncatedBits = semantics->precision -1U - exponent;
1901 /* We want the most significant (exponent + 1) bits; the rest are
1903 unsigned int bits = exponent + 1U;
1905 /* Hopelessly large in magnitude? */
1909 if (bits < semantics->precision) {
1910 /* We truncate (semantics->precision - bits) bits. */
1911 truncatedBits = semantics->precision - bits;
1912 APInt::tcExtract(parts, dstPartsCount, src, bits, truncatedBits);
1914 /* We want at least as many bits as are available. */
1915 APInt::tcExtract(parts, dstPartsCount, src, semantics->precision, 0);
1916 APInt::tcShiftLeft(parts, dstPartsCount, bits - semantics->precision);
1921 /* Step 2: work out any lost fraction, and increment the absolute
1922 value if we would round away from zero. */
1923 if (truncatedBits) {
1924 lost_fraction = lostFractionThroughTruncation(src, partCount(),
1926 if (lost_fraction != lfExactlyZero
1927 && roundAwayFromZero(rounding_mode, lost_fraction, truncatedBits)) {
1928 if (APInt::tcIncrement(parts, dstPartsCount))
1929 return opInvalidOp; /* Overflow. */
1932 lost_fraction = lfExactlyZero;
1935 /* Step 3: check if we fit in the destination. */
1936 unsigned int omsb = APInt::tcMSB(parts, dstPartsCount) + 1;
1940 /* Negative numbers cannot be represented as unsigned. */
1944 /* It takes omsb bits to represent the unsigned integer value.
1945 We lose a bit for the sign, but care is needed as the
1946 maximally negative integer is a special case. */
1947 if (omsb == width && APInt::tcLSB(parts, dstPartsCount) + 1 != omsb)
1950 /* This case can happen because of rounding. */
1955 APInt::tcNegate (parts, dstPartsCount);
1957 if (omsb >= width + !isSigned)
1961 if (lost_fraction == lfExactlyZero) {
1968 /* Same as convertToSignExtendedInteger, except we provide
1969 deterministic values in case of an invalid operation exception,
1970 namely zero for NaNs and the minimal or maximal value respectively
1971 for underflow or overflow.
1972 The *isExact output tells whether the result is exact, in the sense
1973 that converting it back to the original floating point type produces
1974 the original value. This is almost equivalent to result==opOK,
1975 except for negative zeroes.
1978 APFloat::convertToInteger(integerPart *parts, unsigned int width,
1980 roundingMode rounding_mode, bool *isExact) const
1984 fs = convertToSignExtendedInteger(parts, width, isSigned, rounding_mode,
1987 if (fs == opInvalidOp) {
1988 unsigned int bits, dstPartsCount;
1990 dstPartsCount = partCountForBits(width);
1992 if (category == fcNaN)
1997 bits = width - isSigned;
1999 APInt::tcSetLeastSignificantBits(parts, dstPartsCount, bits);
2000 if (sign && isSigned)
2001 APInt::tcShiftLeft(parts, dstPartsCount, width - 1);
2007 /* Convert an unsigned integer SRC to a floating point number,
2008 rounding according to ROUNDING_MODE. The sign of the floating
2009 point number is not modified. */
2011 APFloat::convertFromUnsignedParts(const integerPart *src,
2012 unsigned int srcCount,
2013 roundingMode rounding_mode)
2015 unsigned int omsb, precision, dstCount;
2017 lostFraction lost_fraction;
2019 assertArithmeticOK(*semantics);
2020 category = fcNormal;
2021 omsb = APInt::tcMSB(src, srcCount) + 1;
2022 dst = significandParts();
2023 dstCount = partCount();
2024 precision = semantics->precision;
2026 /* We want the most significant PRECISON bits of SRC. There may not
2027 be that many; extract what we can. */
2028 if (precision <= omsb) {
2029 exponent = omsb - 1;
2030 lost_fraction = lostFractionThroughTruncation(src, srcCount,
2032 APInt::tcExtract(dst, dstCount, src, precision, omsb - precision);
2034 exponent = precision - 1;
2035 lost_fraction = lfExactlyZero;
2036 APInt::tcExtract(dst, dstCount, src, omsb, 0);
2039 return normalize(rounding_mode, lost_fraction);
2043 APFloat::convertFromAPInt(const APInt &Val,
2045 roundingMode rounding_mode)
2047 unsigned int partCount = Val.getNumWords();
2051 if (isSigned && api.isNegative()) {
2056 return convertFromUnsignedParts(api.getRawData(), partCount, rounding_mode);
2059 /* Convert a two's complement integer SRC to a floating point number,
2060 rounding according to ROUNDING_MODE. ISSIGNED is true if the
2061 integer is signed, in which case it must be sign-extended. */
2063 APFloat::convertFromSignExtendedInteger(const integerPart *src,
2064 unsigned int srcCount,
2066 roundingMode rounding_mode)
2070 assertArithmeticOK(*semantics);
2072 && APInt::tcExtractBit(src, srcCount * integerPartWidth - 1)) {
2075 /* If we're signed and negative negate a copy. */
2077 copy = new integerPart[srcCount];
2078 APInt::tcAssign(copy, src, srcCount);
2079 APInt::tcNegate(copy, srcCount);
2080 status = convertFromUnsignedParts(copy, srcCount, rounding_mode);
2084 status = convertFromUnsignedParts(src, srcCount, rounding_mode);
2090 /* FIXME: should this just take a const APInt reference? */
2092 APFloat::convertFromZeroExtendedInteger(const integerPart *parts,
2093 unsigned int width, bool isSigned,
2094 roundingMode rounding_mode)
2096 unsigned int partCount = partCountForBits(width);
2097 APInt api = APInt(width, partCount, parts);
2100 if(isSigned && APInt::tcExtractBit(parts, width - 1)) {
2105 return convertFromUnsignedParts(api.getRawData(), partCount, rounding_mode);
2109 APFloat::convertFromHexadecimalString(const char *p,
2110 roundingMode rounding_mode)
2112 lostFraction lost_fraction;
2113 integerPart *significand;
2114 unsigned int bitPos, partsCount;
2115 const char *dot, *firstSignificantDigit;
2119 category = fcNormal;
2121 significand = significandParts();
2122 partsCount = partCount();
2123 bitPos = partsCount * integerPartWidth;
2125 /* Skip leading zeroes and any (hexa)decimal point. */
2126 p = skipLeadingZeroesAndAnyDot(p, &dot);
2127 firstSignificantDigit = p;
2130 integerPart hex_value;
2137 hex_value = hexDigitValue(*p);
2138 if(hex_value == -1U) {
2139 lost_fraction = lfExactlyZero;
2145 /* Store the number whilst 4-bit nibbles remain. */
2148 hex_value <<= bitPos % integerPartWidth;
2149 significand[bitPos / integerPartWidth] |= hex_value;
2151 lost_fraction = trailingHexadecimalFraction(p, hex_value);
2152 while(hexDigitValue(*p) != -1U)
2158 /* Hex floats require an exponent but not a hexadecimal point. */
2159 assert(*p == 'p' || *p == 'P');
2161 /* Ignore the exponent if we are zero. */
2162 if(p != firstSignificantDigit) {
2165 /* Implicit hexadecimal point? */
2169 /* Calculate the exponent adjustment implicit in the number of
2170 significant digits. */
2171 expAdjustment = static_cast<int>(dot - firstSignificantDigit);
2172 if(expAdjustment < 0)
2174 expAdjustment = expAdjustment * 4 - 1;
2176 /* Adjust for writing the significand starting at the most
2177 significant nibble. */
2178 expAdjustment += semantics->precision;
2179 expAdjustment -= partsCount * integerPartWidth;
2181 /* Adjust for the given exponent. */
2182 exponent = totalExponent(p, expAdjustment);
2185 return normalize(rounding_mode, lost_fraction);
2189 APFloat::roundSignificandWithExponent(const integerPart *decSigParts,
2190 unsigned sigPartCount, int exp,
2191 roundingMode rounding_mode)
2193 unsigned int parts, pow5PartCount;
2194 fltSemantics calcSemantics = { 32767, -32767, 0, true };
2195 integerPart pow5Parts[maxPowerOfFiveParts];
2198 isNearest = (rounding_mode == rmNearestTiesToEven
2199 || rounding_mode == rmNearestTiesToAway);
2201 parts = partCountForBits(semantics->precision + 11);
2203 /* Calculate pow(5, abs(exp)). */
2204 pow5PartCount = powerOf5(pow5Parts, exp >= 0 ? exp: -exp);
2206 for (;; parts *= 2) {
2207 opStatus sigStatus, powStatus;
2208 unsigned int excessPrecision, truncatedBits;
2210 calcSemantics.precision = parts * integerPartWidth - 1;
2211 excessPrecision = calcSemantics.precision - semantics->precision;
2212 truncatedBits = excessPrecision;
2214 APFloat decSig(calcSemantics, fcZero, sign);
2215 APFloat pow5(calcSemantics, fcZero, false);
2217 sigStatus = decSig.convertFromUnsignedParts(decSigParts, sigPartCount,
2218 rmNearestTiesToEven);
2219 powStatus = pow5.convertFromUnsignedParts(pow5Parts, pow5PartCount,
2220 rmNearestTiesToEven);
2221 /* Add exp, as 10^n = 5^n * 2^n. */
2222 decSig.exponent += exp;
2224 lostFraction calcLostFraction;
2225 integerPart HUerr, HUdistance;
2226 unsigned int powHUerr;
2229 /* multiplySignificand leaves the precision-th bit set to 1. */
2230 calcLostFraction = decSig.multiplySignificand(pow5, NULL);
2231 powHUerr = powStatus != opOK;
2233 calcLostFraction = decSig.divideSignificand(pow5);
2234 /* Denormal numbers have less precision. */
2235 if (decSig.exponent < semantics->minExponent) {
2236 excessPrecision += (semantics->minExponent - decSig.exponent);
2237 truncatedBits = excessPrecision;
2238 if (excessPrecision > calcSemantics.precision)
2239 excessPrecision = calcSemantics.precision;
2241 /* Extra half-ulp lost in reciprocal of exponent. */
2242 powHUerr = (powStatus == opOK && calcLostFraction == lfExactlyZero) ? 0:2;
2245 /* Both multiplySignificand and divideSignificand return the
2246 result with the integer bit set. */
2247 assert (APInt::tcExtractBit
2248 (decSig.significandParts(), calcSemantics.precision - 1) == 1);
2250 HUerr = HUerrBound(calcLostFraction != lfExactlyZero, sigStatus != opOK,
2252 HUdistance = 2 * ulpsFromBoundary(decSig.significandParts(),
2253 excessPrecision, isNearest);
2255 /* Are we guaranteed to round correctly if we truncate? */
2256 if (HUdistance >= HUerr) {
2257 APInt::tcExtract(significandParts(), partCount(), decSig.significandParts(),
2258 calcSemantics.precision - excessPrecision,
2260 /* Take the exponent of decSig. If we tcExtract-ed less bits
2261 above we must adjust our exponent to compensate for the
2262 implicit right shift. */
2263 exponent = (decSig.exponent + semantics->precision
2264 - (calcSemantics.precision - excessPrecision));
2265 calcLostFraction = lostFractionThroughTruncation(decSig.significandParts(),
2268 return normalize(rounding_mode, calcLostFraction);
2274 APFloat::convertFromDecimalString(const char *p, roundingMode rounding_mode)
2279 /* Scan the text. */
2280 interpretDecimal(p, &D);
2282 /* Handle the quick cases. First the case of no significant digits,
2283 i.e. zero, and then exponents that are obviously too large or too
2284 small. Writing L for log 10 / log 2, a number d.ddddd*10^exp
2285 definitely overflows if
2287 (exp - 1) * L >= maxExponent
2289 and definitely underflows to zero where
2291 (exp + 1) * L <= minExponent - precision
2293 With integer arithmetic the tightest bounds for L are
2295 93/28 < L < 196/59 [ numerator <= 256 ]
2296 42039/12655 < L < 28738/8651 [ numerator <= 65536 ]
2299 if (decDigitValue(*D.firstSigDigit) >= 10U) {
2302 } else if ((D.normalizedExponent + 1) * 28738
2303 <= 8651 * (semantics->minExponent - (int) semantics->precision)) {
2304 /* Underflow to zero and round. */
2306 fs = normalize(rounding_mode, lfLessThanHalf);
2307 } else if ((D.normalizedExponent - 1) * 42039
2308 >= 12655 * semantics->maxExponent) {
2309 /* Overflow and round. */
2310 fs = handleOverflow(rounding_mode);
2312 integerPart *decSignificand;
2313 unsigned int partCount;
2315 /* A tight upper bound on number of bits required to hold an
2316 N-digit decimal integer is N * 196 / 59. Allocate enough space
2317 to hold the full significand, and an extra part required by
2319 partCount = static_cast<unsigned int>(D.lastSigDigit - D.firstSigDigit) + 1;
2320 partCount = partCountForBits(1 + 196 * partCount / 59);
2321 decSignificand = new integerPart[partCount + 1];
2324 /* Convert to binary efficiently - we do almost all multiplication
2325 in an integerPart. When this would overflow do we do a single
2326 bignum multiplication, and then revert again to multiplication
2327 in an integerPart. */
2329 integerPart decValue, val, multiplier;
2338 decValue = decDigitValue(*p++);
2340 val = val * 10 + decValue;
2341 /* The maximum number that can be multiplied by ten with any
2342 digit added without overflowing an integerPart. */
2343 } while (p <= D.lastSigDigit && multiplier <= (~ (integerPart) 0 - 9) / 10);
2345 /* Multiply out the current part. */
2346 APInt::tcMultiplyPart(decSignificand, decSignificand, multiplier, val,
2347 partCount, partCount + 1, false);
2349 /* If we used another part (likely but not guaranteed), increase
2351 if (decSignificand[partCount])
2353 } while (p <= D.lastSigDigit);
2355 category = fcNormal;
2356 fs = roundSignificandWithExponent(decSignificand, partCount,
2357 D.exponent, rounding_mode);
2359 delete [] decSignificand;
2366 APFloat::convertFromString(const char *p, roundingMode rounding_mode)
2368 assertArithmeticOK(*semantics);
2370 /* Handle a leading minus sign. */
2376 if(p[0] == '0' && (p[1] == 'x' || p[1] == 'X'))
2377 return convertFromHexadecimalString(p + 2, rounding_mode);
2379 return convertFromDecimalString(p, rounding_mode);
2382 /* Write out a hexadecimal representation of the floating point value
2383 to DST, which must be of sufficient size, in the C99 form
2384 [-]0xh.hhhhp[+-]d. Return the number of characters written,
2385 excluding the terminating NUL.
2387 If UPPERCASE, the output is in upper case, otherwise in lower case.
2389 HEXDIGITS digits appear altogether, rounding the value if
2390 necessary. If HEXDIGITS is 0, the minimal precision to display the
2391 number precisely is used instead. If nothing would appear after
2392 the decimal point it is suppressed.
2394 The decimal exponent is always printed and has at least one digit.
2395 Zero values display an exponent of zero. Infinities and NaNs
2396 appear as "infinity" or "nan" respectively.
2398 The above rules are as specified by C99. There is ambiguity about
2399 what the leading hexadecimal digit should be. This implementation
2400 uses whatever is necessary so that the exponent is displayed as
2401 stored. This implies the exponent will fall within the IEEE format
2402 range, and the leading hexadecimal digit will be 0 (for denormals),
2403 1 (normal numbers) or 2 (normal numbers rounded-away-from-zero with
2404 any other digits zero).
2407 APFloat::convertToHexString(char *dst, unsigned int hexDigits,
2408 bool upperCase, roundingMode rounding_mode) const
2412 assertArithmeticOK(*semantics);
2420 memcpy (dst, upperCase ? infinityU: infinityL, sizeof infinityU - 1);
2421 dst += sizeof infinityL - 1;
2425 memcpy (dst, upperCase ? NaNU: NaNL, sizeof NaNU - 1);
2426 dst += sizeof NaNU - 1;
2431 *dst++ = upperCase ? 'X': 'x';
2433 if (hexDigits > 1) {
2435 memset (dst, '0', hexDigits - 1);
2436 dst += hexDigits - 1;
2438 *dst++ = upperCase ? 'P': 'p';
2443 dst = convertNormalToHexString (dst, hexDigits, upperCase, rounding_mode);
2449 return static_cast<unsigned int>(dst - p);
2452 /* Does the hard work of outputting the correctly rounded hexadecimal
2453 form of a normal floating point number with the specified number of
2454 hexadecimal digits. If HEXDIGITS is zero the minimum number of
2455 digits necessary to print the value precisely is output. */
2457 APFloat::convertNormalToHexString(char *dst, unsigned int hexDigits,
2459 roundingMode rounding_mode) const
2461 unsigned int count, valueBits, shift, partsCount, outputDigits;
2462 const char *hexDigitChars;
2463 const integerPart *significand;
2468 *dst++ = upperCase ? 'X': 'x';
2471 hexDigitChars = upperCase ? hexDigitsUpper: hexDigitsLower;
2473 significand = significandParts();
2474 partsCount = partCount();
2476 /* +3 because the first digit only uses the single integer bit, so
2477 we have 3 virtual zero most-significant-bits. */
2478 valueBits = semantics->precision + 3;
2479 shift = integerPartWidth - valueBits % integerPartWidth;
2481 /* The natural number of digits required ignoring trailing
2482 insignificant zeroes. */
2483 outputDigits = (valueBits - significandLSB () + 3) / 4;
2485 /* hexDigits of zero means use the required number for the
2486 precision. Otherwise, see if we are truncating. If we are,
2487 find out if we need to round away from zero. */
2489 if (hexDigits < outputDigits) {
2490 /* We are dropping non-zero bits, so need to check how to round.
2491 "bits" is the number of dropped bits. */
2493 lostFraction fraction;
2495 bits = valueBits - hexDigits * 4;
2496 fraction = lostFractionThroughTruncation (significand, partsCount, bits);
2497 roundUp = roundAwayFromZero(rounding_mode, fraction, bits);
2499 outputDigits = hexDigits;
2502 /* Write the digits consecutively, and start writing in the location
2503 of the hexadecimal point. We move the most significant digit
2504 left and add the hexadecimal point later. */
2507 count = (valueBits + integerPartWidth - 1) / integerPartWidth;
2509 while (outputDigits && count) {
2512 /* Put the most significant integerPartWidth bits in "part". */
2513 if (--count == partsCount)
2514 part = 0; /* An imaginary higher zero part. */
2516 part = significand[count] << shift;
2519 part |= significand[count - 1] >> (integerPartWidth - shift);
2521 /* Convert as much of "part" to hexdigits as we can. */
2522 unsigned int curDigits = integerPartWidth / 4;
2524 if (curDigits > outputDigits)
2525 curDigits = outputDigits;
2526 dst += partAsHex (dst, part, curDigits, hexDigitChars);
2527 outputDigits -= curDigits;
2533 /* Note that hexDigitChars has a trailing '0'. */
2536 *q = hexDigitChars[hexDigitValue (*q) + 1];
2537 } while (*q == '0');
2540 /* Add trailing zeroes. */
2541 memset (dst, '0', outputDigits);
2542 dst += outputDigits;
2545 /* Move the most significant digit to before the point, and if there
2546 is something after the decimal point add it. This must come
2547 after rounding above. */
2554 /* Finally output the exponent. */
2555 *dst++ = upperCase ? 'P': 'p';
2557 return writeSignedDecimal (dst, exponent);
2560 // For good performance it is desirable for different APFloats
2561 // to produce different integers.
2563 APFloat::getHashValue() const
2565 if (category==fcZero) return sign<<8 | semantics->precision ;
2566 else if (category==fcInfinity) return sign<<9 | semantics->precision;
2567 else if (category==fcNaN) return 1<<10 | semantics->precision;
2569 uint32_t hash = sign<<11 | semantics->precision | exponent<<12;
2570 const integerPart* p = significandParts();
2571 for (int i=partCount(); i>0; i--, p++)
2572 hash ^= ((uint32_t)*p) ^ (uint32_t)((*p)>>32);
2577 // Conversion from APFloat to/from host float/double. It may eventually be
2578 // possible to eliminate these and have everybody deal with APFloats, but that
2579 // will take a while. This approach will not easily extend to long double.
2580 // Current implementation requires integerPartWidth==64, which is correct at
2581 // the moment but could be made more general.
2583 // Denormals have exponent minExponent in APFloat, but minExponent-1 in
2584 // the actual IEEE respresentations. We compensate for that here.
2587 APFloat::convertF80LongDoubleAPFloatToAPInt() const
2589 assert(semantics == (const llvm::fltSemantics*)&x87DoubleExtended);
2590 assert (partCount()==2);
2592 uint64_t myexponent, mysignificand;
2594 if (category==fcNormal) {
2595 myexponent = exponent+16383; //bias
2596 mysignificand = significandParts()[0];
2597 if (myexponent==1 && !(mysignificand & 0x8000000000000000ULL))
2598 myexponent = 0; // denormal
2599 } else if (category==fcZero) {
2602 } else if (category==fcInfinity) {
2603 myexponent = 0x7fff;
2604 mysignificand = 0x8000000000000000ULL;
2606 assert(category == fcNaN && "Unknown category");
2607 myexponent = 0x7fff;
2608 mysignificand = significandParts()[0];
2612 words[0] = mysignificand;
2613 words[1] = ((uint64_t)(sign & 1) << 15) |
2614 (myexponent & 0x7fffLL);
2615 return APInt(80, 2, words);
2619 APFloat::convertPPCDoubleDoubleAPFloatToAPInt() const
2621 assert(semantics == (const llvm::fltSemantics*)&PPCDoubleDouble);
2622 assert (partCount()==2);
2624 uint64_t myexponent, mysignificand, myexponent2, mysignificand2;
2626 if (category==fcNormal) {
2627 myexponent = exponent + 1023; //bias
2628 myexponent2 = exponent2 + 1023;
2629 mysignificand = significandParts()[0];
2630 mysignificand2 = significandParts()[1];
2631 if (myexponent==1 && !(mysignificand & 0x10000000000000LL))
2632 myexponent = 0; // denormal
2633 if (myexponent2==1 && !(mysignificand2 & 0x10000000000000LL))
2634 myexponent2 = 0; // denormal
2635 } else if (category==fcZero) {
2640 } else if (category==fcInfinity) {
2646 assert(category == fcNaN && "Unknown category");
2648 mysignificand = significandParts()[0];
2649 myexponent2 = exponent2;
2650 mysignificand2 = significandParts()[1];
2654 words[0] = ((uint64_t)(sign & 1) << 63) |
2655 ((myexponent & 0x7ff) << 52) |
2656 (mysignificand & 0xfffffffffffffLL);
2657 words[1] = ((uint64_t)(sign2 & 1) << 63) |
2658 ((myexponent2 & 0x7ff) << 52) |
2659 (mysignificand2 & 0xfffffffffffffLL);
2660 return APInt(128, 2, words);
2664 APFloat::convertDoubleAPFloatToAPInt() const
2666 assert(semantics == (const llvm::fltSemantics*)&IEEEdouble);
2667 assert (partCount()==1);
2669 uint64_t myexponent, mysignificand;
2671 if (category==fcNormal) {
2672 myexponent = exponent+1023; //bias
2673 mysignificand = *significandParts();
2674 if (myexponent==1 && !(mysignificand & 0x10000000000000LL))
2675 myexponent = 0; // denormal
2676 } else if (category==fcZero) {
2679 } else if (category==fcInfinity) {
2683 assert(category == fcNaN && "Unknown category!");
2685 mysignificand = *significandParts();
2688 return APInt(64, ((((uint64_t)(sign & 1) << 63) |
2689 ((myexponent & 0x7ff) << 52) |
2690 (mysignificand & 0xfffffffffffffLL))));
2694 APFloat::convertFloatAPFloatToAPInt() const
2696 assert(semantics == (const llvm::fltSemantics*)&IEEEsingle);
2697 assert (partCount()==1);
2699 uint32_t myexponent, mysignificand;
2701 if (category==fcNormal) {
2702 myexponent = exponent+127; //bias
2703 mysignificand = (uint32_t)*significandParts();
2704 if (myexponent == 1 && !(mysignificand & 0x800000))
2705 myexponent = 0; // denormal
2706 } else if (category==fcZero) {
2709 } else if (category==fcInfinity) {
2713 assert(category == fcNaN && "Unknown category!");
2715 mysignificand = (uint32_t)*significandParts();
2718 return APInt(32, (((sign&1) << 31) | ((myexponent&0xff) << 23) |
2719 (mysignificand & 0x7fffff)));
2722 // This function creates an APInt that is just a bit map of the floating
2723 // point constant as it would appear in memory. It is not a conversion,
2724 // and treating the result as a normal integer is unlikely to be useful.
2727 APFloat::bitcastToAPInt() const
2729 if (semantics == (const llvm::fltSemantics*)&IEEEsingle)
2730 return convertFloatAPFloatToAPInt();
2732 if (semantics == (const llvm::fltSemantics*)&IEEEdouble)
2733 return convertDoubleAPFloatToAPInt();
2735 if (semantics == (const llvm::fltSemantics*)&PPCDoubleDouble)
2736 return convertPPCDoubleDoubleAPFloatToAPInt();
2738 assert(semantics == (const llvm::fltSemantics*)&x87DoubleExtended &&
2740 return convertF80LongDoubleAPFloatToAPInt();
2744 APFloat::convertToFloat() const
2746 assert(semantics == (const llvm::fltSemantics*)&IEEEsingle);
2747 APInt api = bitcastToAPInt();
2748 return api.bitsToFloat();
2752 APFloat::convertToDouble() const
2754 assert(semantics == (const llvm::fltSemantics*)&IEEEdouble);
2755 APInt api = bitcastToAPInt();
2756 return api.bitsToDouble();
2759 /// Integer bit is explicit in this format. Intel hardware (387 and later)
2760 /// does not support these bit patterns:
2761 /// exponent = all 1's, integer bit 0, significand 0 ("pseudoinfinity")
2762 /// exponent = all 1's, integer bit 0, significand nonzero ("pseudoNaN")
2763 /// exponent = 0, integer bit 1 ("pseudodenormal")
2764 /// exponent!=0 nor all 1's, integer bit 0 ("unnormal")
2765 /// At the moment, the first two are treated as NaNs, the second two as Normal.
2767 APFloat::initFromF80LongDoubleAPInt(const APInt &api)
2769 assert(api.getBitWidth()==80);
2770 uint64_t i1 = api.getRawData()[0];
2771 uint64_t i2 = api.getRawData()[1];
2772 uint64_t myexponent = (i2 & 0x7fff);
2773 uint64_t mysignificand = i1;
2775 initialize(&APFloat::x87DoubleExtended);
2776 assert(partCount()==2);
2778 sign = static_cast<unsigned int>(i2>>15);
2779 if (myexponent==0 && mysignificand==0) {
2780 // exponent, significand meaningless
2782 } else if (myexponent==0x7fff && mysignificand==0x8000000000000000ULL) {
2783 // exponent, significand meaningless
2784 category = fcInfinity;
2785 } else if (myexponent==0x7fff && mysignificand!=0x8000000000000000ULL) {
2786 // exponent meaningless
2788 significandParts()[0] = mysignificand;
2789 significandParts()[1] = 0;
2791 category = fcNormal;
2792 exponent = myexponent - 16383;
2793 significandParts()[0] = mysignificand;
2794 significandParts()[1] = 0;
2795 if (myexponent==0) // denormal
2801 APFloat::initFromPPCDoubleDoubleAPInt(const APInt &api)
2803 assert(api.getBitWidth()==128);
2804 uint64_t i1 = api.getRawData()[0];
2805 uint64_t i2 = api.getRawData()[1];
2806 uint64_t myexponent = (i1 >> 52) & 0x7ff;
2807 uint64_t mysignificand = i1 & 0xfffffffffffffLL;
2808 uint64_t myexponent2 = (i2 >> 52) & 0x7ff;
2809 uint64_t mysignificand2 = i2 & 0xfffffffffffffLL;
2811 initialize(&APFloat::PPCDoubleDouble);
2812 assert(partCount()==2);
2814 sign = static_cast<unsigned int>(i1>>63);
2815 sign2 = static_cast<unsigned int>(i2>>63);
2816 if (myexponent==0 && mysignificand==0) {
2817 // exponent, significand meaningless
2818 // exponent2 and significand2 are required to be 0; we don't check
2820 } else if (myexponent==0x7ff && mysignificand==0) {
2821 // exponent, significand meaningless
2822 // exponent2 and significand2 are required to be 0; we don't check
2823 category = fcInfinity;
2824 } else if (myexponent==0x7ff && mysignificand!=0) {
2825 // exponent meaningless. So is the whole second word, but keep it
2828 exponent2 = myexponent2;
2829 significandParts()[0] = mysignificand;
2830 significandParts()[1] = mysignificand2;
2832 category = fcNormal;
2833 // Note there is no category2; the second word is treated as if it is
2834 // fcNormal, although it might be something else considered by itself.
2835 exponent = myexponent - 1023;
2836 exponent2 = myexponent2 - 1023;
2837 significandParts()[0] = mysignificand;
2838 significandParts()[1] = mysignificand2;
2839 if (myexponent==0) // denormal
2842 significandParts()[0] |= 0x10000000000000LL; // integer bit
2846 significandParts()[1] |= 0x10000000000000LL; // integer bit
2851 APFloat::initFromDoubleAPInt(const APInt &api)
2853 assert(api.getBitWidth()==64);
2854 uint64_t i = *api.getRawData();
2855 uint64_t myexponent = (i >> 52) & 0x7ff;
2856 uint64_t mysignificand = i & 0xfffffffffffffLL;
2858 initialize(&APFloat::IEEEdouble);
2859 assert(partCount()==1);
2861 sign = static_cast<unsigned int>(i>>63);
2862 if (myexponent==0 && mysignificand==0) {
2863 // exponent, significand meaningless
2865 } else if (myexponent==0x7ff && mysignificand==0) {
2866 // exponent, significand meaningless
2867 category = fcInfinity;
2868 } else if (myexponent==0x7ff && mysignificand!=0) {
2869 // exponent meaningless
2871 *significandParts() = mysignificand;
2873 category = fcNormal;
2874 exponent = myexponent - 1023;
2875 *significandParts() = mysignificand;
2876 if (myexponent==0) // denormal
2879 *significandParts() |= 0x10000000000000LL; // integer bit
2884 APFloat::initFromFloatAPInt(const APInt & api)
2886 assert(api.getBitWidth()==32);
2887 uint32_t i = (uint32_t)*api.getRawData();
2888 uint32_t myexponent = (i >> 23) & 0xff;
2889 uint32_t mysignificand = i & 0x7fffff;
2891 initialize(&APFloat::IEEEsingle);
2892 assert(partCount()==1);
2895 if (myexponent==0 && mysignificand==0) {
2896 // exponent, significand meaningless
2898 } else if (myexponent==0xff && mysignificand==0) {
2899 // exponent, significand meaningless
2900 category = fcInfinity;
2901 } else if (myexponent==0xff && mysignificand!=0) {
2902 // sign, exponent, significand meaningless
2904 *significandParts() = mysignificand;
2906 category = fcNormal;
2907 exponent = myexponent - 127; //bias
2908 *significandParts() = mysignificand;
2909 if (myexponent==0) // denormal
2912 *significandParts() |= 0x800000; // integer bit
2916 /// Treat api as containing the bits of a floating point number. Currently
2917 /// we infer the floating point type from the size of the APInt. The
2918 /// isIEEE argument distinguishes between PPC128 and IEEE128 (not meaningful
2919 /// when the size is anything else).
2921 APFloat::initFromAPInt(const APInt& api, bool isIEEE)
2923 if (api.getBitWidth() == 32)
2924 return initFromFloatAPInt(api);
2925 else if (api.getBitWidth()==64)
2926 return initFromDoubleAPInt(api);
2927 else if (api.getBitWidth()==80)
2928 return initFromF80LongDoubleAPInt(api);
2929 else if (api.getBitWidth()==128 && !isIEEE)
2930 return initFromPPCDoubleDoubleAPInt(api);
2935 APFloat::APFloat(const APInt& api, bool isIEEE)
2937 initFromAPInt(api, isIEEE);
2940 APFloat::APFloat(float f)
2942 APInt api = APInt(32, 0);
2943 initFromAPInt(api.floatToBits(f));
2946 APFloat::APFloat(double d)
2948 APInt api = APInt(64, 0);
2949 initFromAPInt(api.doubleToBits(d));