1 //===-- APFloat.cpp - Implement APFloat class -----------------------------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file implements a class to represent arbitrary precision floating
11 // point values and provide a variety of arithmetic operations on them.
13 //===----------------------------------------------------------------------===//
15 #include "llvm/ADT/APFloat.h"
16 #include "llvm/ADT/StringRef.h"
17 #include "llvm/ADT/FoldingSet.h"
18 #include "llvm/Support/ErrorHandling.h"
19 #include "llvm/Support/MathExtras.h"
24 #define convolve(lhs, rhs) ((lhs) * 4 + (rhs))
26 /* Assumed in hexadecimal significand parsing, and conversion to
27 hexadecimal strings. */
28 #define COMPILE_TIME_ASSERT(cond) extern int CTAssert[(cond) ? 1 : -1]
29 COMPILE_TIME_ASSERT(integerPartWidth % 4 == 0);
33 /* Represents floating point arithmetic semantics. */
35 /* The largest E such that 2^E is representable; this matches the
36 definition of IEEE 754. */
37 exponent_t maxExponent;
39 /* The smallest E such that 2^E is a normalized number; this
40 matches the definition of IEEE 754. */
41 exponent_t minExponent;
43 /* Number of bits in the significand. This includes the integer
45 unsigned int precision;
47 /* True if arithmetic is supported. */
48 unsigned int arithmeticOK;
51 const fltSemantics APFloat::IEEEsingle = { 127, -126, 24, true };
52 const fltSemantics APFloat::IEEEdouble = { 1023, -1022, 53, true };
53 const fltSemantics APFloat::IEEEquad = { 16383, -16382, 113, true };
54 const fltSemantics APFloat::x87DoubleExtended = { 16383, -16382, 64, true };
55 const fltSemantics APFloat::Bogus = { 0, 0, 0, true };
57 // The PowerPC format consists of two doubles. It does not map cleanly
58 // onto the usual format above. For now only storage of constants of
59 // this type is supported, no arithmetic.
60 const fltSemantics APFloat::PPCDoubleDouble = { 1023, -1022, 106, false };
62 /* A tight upper bound on number of parts required to hold the value
65 power * 815 / (351 * integerPartWidth) + 1
67 However, whilst the result may require only this many parts,
68 because we are multiplying two values to get it, the
69 multiplication may require an extra part with the excess part
70 being zero (consider the trivial case of 1 * 1, tcFullMultiply
71 requires two parts to hold the single-part result). So we add an
72 extra one to guarantee enough space whilst multiplying. */
73 const unsigned int maxExponent = 16383;
74 const unsigned int maxPrecision = 113;
75 const unsigned int maxPowerOfFiveExponent = maxExponent + maxPrecision - 1;
76 const unsigned int maxPowerOfFiveParts = 2 + ((maxPowerOfFiveExponent * 815)
77 / (351 * integerPartWidth));
80 /* A bunch of private, handy routines. */
82 static inline unsigned int
83 partCountForBits(unsigned int bits)
85 return ((bits) + integerPartWidth - 1) / integerPartWidth;
88 /* Returns 0U-9U. Return values >= 10U are not digits. */
89 static inline unsigned int
90 decDigitValue(unsigned int c)
96 hexDigitValue(unsigned int c)
116 assertArithmeticOK(const llvm::fltSemantics &semantics) {
117 assert(semantics.arithmeticOK
118 && "Compile-time arithmetic does not support these semantics");
121 /* Return the value of a decimal exponent of the form
124 If the exponent overflows, returns a large exponent with the
127 readExponent(StringRef::iterator begin, StringRef::iterator end)
130 unsigned int absExponent;
131 const unsigned int overlargeExponent = 24000; /* FIXME. */
132 StringRef::iterator p = begin;
134 assert(p != end && "Exponent has no digits");
136 isNegative = (*p == '-');
137 if (*p == '-' || *p == '+') {
139 assert(p != end && "Exponent has no digits");
142 absExponent = decDigitValue(*p++);
143 assert(absExponent < 10U && "Invalid character in exponent");
145 for (; p != end; ++p) {
148 value = decDigitValue(*p);
149 assert(value < 10U && "Invalid character in exponent");
151 value += absExponent * 10;
152 if (absExponent >= overlargeExponent) {
153 absExponent = overlargeExponent;
159 assert(p == end && "Invalid exponent in exponent");
162 return -(int) absExponent;
164 return (int) absExponent;
167 /* This is ugly and needs cleaning up, but I don't immediately see
168 how whilst remaining safe. */
170 totalExponent(StringRef::iterator p, StringRef::iterator end,
171 int exponentAdjustment)
173 int unsignedExponent;
174 bool negative, overflow;
177 /* Move past the exponent letter and sign to the digits. */
179 negative = *p == '-';
180 if(*p == '-' || *p == '+')
183 unsignedExponent = 0;
185 for(; p != end; ++p) {
188 value = decDigitValue(*p);
189 assert(value < 10U && "Invalid character in exponent");
191 unsignedExponent = unsignedExponent * 10 + value;
192 if(unsignedExponent > 65535)
196 if(exponentAdjustment > 65535 || exponentAdjustment < -65536)
200 exponent = unsignedExponent;
202 exponent = -exponent;
203 exponent += exponentAdjustment;
204 if(exponent > 65535 || exponent < -65536)
209 exponent = negative ? -65536: 65535;
214 static StringRef::iterator
215 skipLeadingZeroesAndAnyDot(StringRef::iterator begin, StringRef::iterator end,
216 StringRef::iterator *dot)
218 StringRef::iterator p = begin;
220 while(*p == '0' && p != end)
226 assert(end - begin != 1 && "String cannot be just a dot");
228 while(*p == '0' && p != end)
235 /* Given a normal decimal floating point number of the form
239 where the decimal point and exponent are optional, fill out the
240 structure D. Exponent is appropriate if the significand is
241 treated as an integer, and normalizedExponent if the significand
242 is taken to have the decimal point after a single leading
245 If the value is zero, V->firstSigDigit points to a non-digit, and
246 the return exponent is zero.
249 const char *firstSigDigit;
250 const char *lastSigDigit;
252 int normalizedExponent;
256 interpretDecimal(StringRef::iterator begin, StringRef::iterator end,
259 StringRef::iterator dot = end;
260 StringRef::iterator p = skipLeadingZeroesAndAnyDot (begin, end, &dot);
262 D->firstSigDigit = p;
264 D->normalizedExponent = 0;
266 for (; p != end; ++p) {
268 assert(dot == end && "Multiple dots in float");
273 if (decDigitValue(*p) >= 10U)
278 assert((*p == 'e' || *p == 'E') && "Invalid character in digit string");
280 /* p points to the first non-digit in the string */
281 if (*p == 'e' || *p == 'E') {
282 D->exponent = readExponent(p + 1, end);
285 /* Implied decimal point? */
290 /* If number is all zeroes accept any exponent. */
291 if (p != D->firstSigDigit) {
292 /* Drop insignificant trailing zeroes. */
297 while (p != begin && *p == '0');
298 while (p != begin && *p == '.');
301 /* Adjust the exponents for any decimal point. */
302 D->exponent += static_cast<exponent_t>((dot - p) - (dot > p));
303 D->normalizedExponent = (D->exponent +
304 static_cast<exponent_t>((p - D->firstSigDigit)
305 - (dot > D->firstSigDigit && dot < p)));
311 /* Return the trailing fraction of a hexadecimal number.
312 DIGITVALUE is the first hex digit of the fraction, P points to
315 trailingHexadecimalFraction(StringRef::iterator p, StringRef::iterator end,
316 unsigned int digitValue)
318 unsigned int hexDigit;
320 /* If the first trailing digit isn't 0 or 8 we can work out the
321 fraction immediately. */
323 return lfMoreThanHalf;
324 else if(digitValue < 8 && digitValue > 0)
325 return lfLessThanHalf;
327 /* Otherwise we need to find the first non-zero digit. */
331 assert(p != end && "Invalid trailing hexadecimal fraction!");
333 hexDigit = hexDigitValue(*p);
335 /* If we ran off the end it is exactly zero or one-half, otherwise
338 return digitValue == 0 ? lfExactlyZero: lfExactlyHalf;
340 return digitValue == 0 ? lfLessThanHalf: lfMoreThanHalf;
343 /* Return the fraction lost were a bignum truncated losing the least
344 significant BITS bits. */
346 lostFractionThroughTruncation(const integerPart *parts,
347 unsigned int partCount,
352 lsb = APInt::tcLSB(parts, partCount);
354 /* Note this is guaranteed true if bits == 0, or LSB == -1U. */
356 return lfExactlyZero;
358 return lfExactlyHalf;
359 if(bits <= partCount * integerPartWidth
360 && APInt::tcExtractBit(parts, bits - 1))
361 return lfMoreThanHalf;
363 return lfLessThanHalf;
366 /* Shift DST right BITS bits noting lost fraction. */
368 shiftRight(integerPart *dst, unsigned int parts, unsigned int bits)
370 lostFraction lost_fraction;
372 lost_fraction = lostFractionThroughTruncation(dst, parts, bits);
374 APInt::tcShiftRight(dst, parts, bits);
376 return lost_fraction;
379 /* Combine the effect of two lost fractions. */
381 combineLostFractions(lostFraction moreSignificant,
382 lostFraction lessSignificant)
384 if(lessSignificant != lfExactlyZero) {
385 if(moreSignificant == lfExactlyZero)
386 moreSignificant = lfLessThanHalf;
387 else if(moreSignificant == lfExactlyHalf)
388 moreSignificant = lfMoreThanHalf;
391 return moreSignificant;
394 /* The error from the true value, in half-ulps, on multiplying two
395 floating point numbers, which differ from the value they
396 approximate by at most HUE1 and HUE2 half-ulps, is strictly less
397 than the returned value.
399 See "How to Read Floating Point Numbers Accurately" by William D
402 HUerrBound(bool inexactMultiply, unsigned int HUerr1, unsigned int HUerr2)
404 assert(HUerr1 < 2 || HUerr2 < 2 || (HUerr1 + HUerr2 < 8));
406 if (HUerr1 + HUerr2 == 0)
407 return inexactMultiply * 2; /* <= inexactMultiply half-ulps. */
409 return inexactMultiply + 2 * (HUerr1 + HUerr2);
412 /* The number of ulps from the boundary (zero, or half if ISNEAREST)
413 when the least significant BITS are truncated. BITS cannot be
416 ulpsFromBoundary(const integerPart *parts, unsigned int bits, bool isNearest)
418 unsigned int count, partBits;
419 integerPart part, boundary;
424 count = bits / integerPartWidth;
425 partBits = bits % integerPartWidth + 1;
427 part = parts[count] & (~(integerPart) 0 >> (integerPartWidth - partBits));
430 boundary = (integerPart) 1 << (partBits - 1);
435 if (part - boundary <= boundary - part)
436 return part - boundary;
438 return boundary - part;
441 if (part == boundary) {
444 return ~(integerPart) 0; /* A lot. */
447 } else if (part == boundary - 1) {
450 return ~(integerPart) 0; /* A lot. */
455 return ~(integerPart) 0; /* A lot. */
458 /* Place pow(5, power) in DST, and return the number of parts used.
459 DST must be at least one part larger than size of the answer. */
461 powerOf5(integerPart *dst, unsigned int power)
463 static const integerPart firstEightPowers[] = { 1, 5, 25, 125, 625, 3125,
465 integerPart pow5s[maxPowerOfFiveParts * 2 + 5];
466 pow5s[0] = 78125 * 5;
468 unsigned int partsCount[16] = { 1 };
469 integerPart scratch[maxPowerOfFiveParts], *p1, *p2, *pow5;
471 assert(power <= maxExponent);
476 *p1 = firstEightPowers[power & 7];
482 for (unsigned int n = 0; power; power >>= 1, n++) {
487 /* Calculate pow(5,pow(2,n+3)) if we haven't yet. */
489 pc = partsCount[n - 1];
490 APInt::tcFullMultiply(pow5, pow5 - pc, pow5 - pc, pc, pc);
492 if (pow5[pc - 1] == 0)
500 APInt::tcFullMultiply(p2, p1, pow5, result, pc);
502 if (p2[result - 1] == 0)
505 /* Now result is in p1 with partsCount parts and p2 is scratch
507 tmp = p1, p1 = p2, p2 = tmp;
514 APInt::tcAssign(dst, p1, result);
519 /* Zero at the end to avoid modular arithmetic when adding one; used
520 when rounding up during hexadecimal output. */
521 static const char hexDigitsLower[] = "0123456789abcdef0";
522 static const char hexDigitsUpper[] = "0123456789ABCDEF0";
523 static const char infinityL[] = "infinity";
524 static const char infinityU[] = "INFINITY";
525 static const char NaNL[] = "nan";
526 static const char NaNU[] = "NAN";
528 /* Write out an integerPart in hexadecimal, starting with the most
529 significant nibble. Write out exactly COUNT hexdigits, return
532 partAsHex (char *dst, integerPart part, unsigned int count,
533 const char *hexDigitChars)
535 unsigned int result = count;
537 assert (count != 0 && count <= integerPartWidth / 4);
539 part >>= (integerPartWidth - 4 * count);
541 dst[count] = hexDigitChars[part & 0xf];
548 /* Write out an unsigned decimal integer. */
550 writeUnsignedDecimal (char *dst, unsigned int n)
566 /* Write out a signed decimal integer. */
568 writeSignedDecimal (char *dst, int value)
572 dst = writeUnsignedDecimal(dst, -(unsigned) value);
574 dst = writeUnsignedDecimal(dst, value);
581 APFloat::initialize(const fltSemantics *ourSemantics)
585 semantics = ourSemantics;
588 significand.parts = new integerPart[count];
592 APFloat::freeSignificand()
595 delete [] significand.parts;
599 APFloat::assign(const APFloat &rhs)
601 assert(semantics == rhs.semantics);
604 category = rhs.category;
605 exponent = rhs.exponent;
607 exponent2 = rhs.exponent2;
608 if(category == fcNormal || category == fcNaN)
609 copySignificand(rhs);
613 APFloat::copySignificand(const APFloat &rhs)
615 assert(category == fcNormal || category == fcNaN);
616 assert(rhs.partCount() >= partCount());
618 APInt::tcAssign(significandParts(), rhs.significandParts(),
622 /* Make this number a NaN, with an arbitrary but deterministic value
623 for the significand. If double or longer, this is a signalling NaN,
624 which may not be ideal. If float, this is QNaN(0). */
626 APFloat::makeNaN(unsigned type)
629 // FIXME: Add double and long double support for QNaN(0).
630 if (semantics->precision == 24 && semantics->maxExponent == 127) {
632 type &= ~0x80000000U;
635 APInt::tcSet(significandParts(), type, partCount());
639 APFloat::operator=(const APFloat &rhs)
642 if(semantics != rhs.semantics) {
644 initialize(rhs.semantics);
653 APFloat::bitwiseIsEqual(const APFloat &rhs) const {
656 if (semantics != rhs.semantics ||
657 category != rhs.category ||
660 if (semantics==(const llvm::fltSemantics*)&PPCDoubleDouble &&
663 if (category==fcZero || category==fcInfinity)
665 else if (category==fcNormal && exponent!=rhs.exponent)
667 else if (semantics==(const llvm::fltSemantics*)&PPCDoubleDouble &&
668 exponent2!=rhs.exponent2)
672 const integerPart* p=significandParts();
673 const integerPart* q=rhs.significandParts();
674 for (; i>0; i--, p++, q++) {
682 APFloat::APFloat(const fltSemantics &ourSemantics, integerPart value)
684 assertArithmeticOK(ourSemantics);
685 initialize(&ourSemantics);
688 exponent = ourSemantics.precision - 1;
689 significandParts()[0] = value;
690 normalize(rmNearestTiesToEven, lfExactlyZero);
693 APFloat::APFloat(const fltSemantics &ourSemantics,
694 fltCategory ourCategory, bool negative, unsigned type)
696 assertArithmeticOK(ourSemantics);
697 initialize(&ourSemantics);
698 category = ourCategory;
700 if (category == fcNormal)
702 else if (ourCategory == fcNaN)
706 APFloat::APFloat(const fltSemantics &ourSemantics, const StringRef& text)
708 assertArithmeticOK(ourSemantics);
709 initialize(&ourSemantics);
710 convertFromString(text, rmNearestTiesToEven);
713 APFloat::APFloat(const APFloat &rhs)
715 initialize(rhs.semantics);
724 // Profile - This method 'profiles' an APFloat for use with FoldingSet.
725 void APFloat::Profile(FoldingSetNodeID& ID) const {
726 ID.Add(bitcastToAPInt());
730 APFloat::partCount() const
732 return partCountForBits(semantics->precision + 1);
736 APFloat::semanticsPrecision(const fltSemantics &semantics)
738 return semantics.precision;
742 APFloat::significandParts() const
744 return const_cast<APFloat *>(this)->significandParts();
748 APFloat::significandParts()
750 assert(category == fcNormal || category == fcNaN);
753 return significand.parts;
755 return &significand.part;
759 APFloat::zeroSignificand()
762 APInt::tcSet(significandParts(), 0, partCount());
765 /* Increment an fcNormal floating point number's significand. */
767 APFloat::incrementSignificand()
771 carry = APInt::tcIncrement(significandParts(), partCount());
773 /* Our callers should never cause us to overflow. */
777 /* Add the significand of the RHS. Returns the carry flag. */
779 APFloat::addSignificand(const APFloat &rhs)
783 parts = significandParts();
785 assert(semantics == rhs.semantics);
786 assert(exponent == rhs.exponent);
788 return APInt::tcAdd(parts, rhs.significandParts(), 0, partCount());
791 /* Subtract the significand of the RHS with a borrow flag. Returns
794 APFloat::subtractSignificand(const APFloat &rhs, integerPart borrow)
798 parts = significandParts();
800 assert(semantics == rhs.semantics);
801 assert(exponent == rhs.exponent);
803 return APInt::tcSubtract(parts, rhs.significandParts(), borrow,
807 /* Multiply the significand of the RHS. If ADDEND is non-NULL, add it
808 on to the full-precision result of the multiplication. Returns the
811 APFloat::multiplySignificand(const APFloat &rhs, const APFloat *addend)
813 unsigned int omsb; // One, not zero, based MSB.
814 unsigned int partsCount, newPartsCount, precision;
815 integerPart *lhsSignificand;
816 integerPart scratch[4];
817 integerPart *fullSignificand;
818 lostFraction lost_fraction;
821 assert(semantics == rhs.semantics);
823 precision = semantics->precision;
824 newPartsCount = partCountForBits(precision * 2);
826 if(newPartsCount > 4)
827 fullSignificand = new integerPart[newPartsCount];
829 fullSignificand = scratch;
831 lhsSignificand = significandParts();
832 partsCount = partCount();
834 APInt::tcFullMultiply(fullSignificand, lhsSignificand,
835 rhs.significandParts(), partsCount, partsCount);
837 lost_fraction = lfExactlyZero;
838 omsb = APInt::tcMSB(fullSignificand, newPartsCount) + 1;
839 exponent += rhs.exponent;
842 Significand savedSignificand = significand;
843 const fltSemantics *savedSemantics = semantics;
844 fltSemantics extendedSemantics;
846 unsigned int extendedPrecision;
848 /* Normalize our MSB. */
849 extendedPrecision = precision + precision - 1;
850 if(omsb != extendedPrecision)
852 APInt::tcShiftLeft(fullSignificand, newPartsCount,
853 extendedPrecision - omsb);
854 exponent -= extendedPrecision - omsb;
857 /* Create new semantics. */
858 extendedSemantics = *semantics;
859 extendedSemantics.precision = extendedPrecision;
861 if(newPartsCount == 1)
862 significand.part = fullSignificand[0];
864 significand.parts = fullSignificand;
865 semantics = &extendedSemantics;
867 APFloat extendedAddend(*addend);
868 status = extendedAddend.convert(extendedSemantics, rmTowardZero, &ignored);
869 assert(status == opOK);
870 lost_fraction = addOrSubtractSignificand(extendedAddend, false);
872 /* Restore our state. */
873 if(newPartsCount == 1)
874 fullSignificand[0] = significand.part;
875 significand = savedSignificand;
876 semantics = savedSemantics;
878 omsb = APInt::tcMSB(fullSignificand, newPartsCount) + 1;
881 exponent -= (precision - 1);
883 if(omsb > precision) {
884 unsigned int bits, significantParts;
887 bits = omsb - precision;
888 significantParts = partCountForBits(omsb);
889 lf = shiftRight(fullSignificand, significantParts, bits);
890 lost_fraction = combineLostFractions(lf, lost_fraction);
894 APInt::tcAssign(lhsSignificand, fullSignificand, partsCount);
896 if(newPartsCount > 4)
897 delete [] fullSignificand;
899 return lost_fraction;
902 /* Multiply the significands of LHS and RHS to DST. */
904 APFloat::divideSignificand(const APFloat &rhs)
906 unsigned int bit, i, partsCount;
907 const integerPart *rhsSignificand;
908 integerPart *lhsSignificand, *dividend, *divisor;
909 integerPart scratch[4];
910 lostFraction lost_fraction;
912 assert(semantics == rhs.semantics);
914 lhsSignificand = significandParts();
915 rhsSignificand = rhs.significandParts();
916 partsCount = partCount();
919 dividend = new integerPart[partsCount * 2];
923 divisor = dividend + partsCount;
925 /* Copy the dividend and divisor as they will be modified in-place. */
926 for(i = 0; i < partsCount; i++) {
927 dividend[i] = lhsSignificand[i];
928 divisor[i] = rhsSignificand[i];
929 lhsSignificand[i] = 0;
932 exponent -= rhs.exponent;
934 unsigned int precision = semantics->precision;
936 /* Normalize the divisor. */
937 bit = precision - APInt::tcMSB(divisor, partsCount) - 1;
940 APInt::tcShiftLeft(divisor, partsCount, bit);
943 /* Normalize the dividend. */
944 bit = precision - APInt::tcMSB(dividend, partsCount) - 1;
947 APInt::tcShiftLeft(dividend, partsCount, bit);
950 /* Ensure the dividend >= divisor initially for the loop below.
951 Incidentally, this means that the division loop below is
952 guaranteed to set the integer bit to one. */
953 if(APInt::tcCompare(dividend, divisor, partsCount) < 0) {
955 APInt::tcShiftLeft(dividend, partsCount, 1);
956 assert(APInt::tcCompare(dividend, divisor, partsCount) >= 0);
960 for(bit = precision; bit; bit -= 1) {
961 if(APInt::tcCompare(dividend, divisor, partsCount) >= 0) {
962 APInt::tcSubtract(dividend, divisor, 0, partsCount);
963 APInt::tcSetBit(lhsSignificand, bit - 1);
966 APInt::tcShiftLeft(dividend, partsCount, 1);
969 /* Figure out the lost fraction. */
970 int cmp = APInt::tcCompare(dividend, divisor, partsCount);
973 lost_fraction = lfMoreThanHalf;
975 lost_fraction = lfExactlyHalf;
976 else if(APInt::tcIsZero(dividend, partsCount))
977 lost_fraction = lfExactlyZero;
979 lost_fraction = lfLessThanHalf;
984 return lost_fraction;
988 APFloat::significandMSB() const
990 return APInt::tcMSB(significandParts(), partCount());
994 APFloat::significandLSB() const
996 return APInt::tcLSB(significandParts(), partCount());
999 /* Note that a zero result is NOT normalized to fcZero. */
1001 APFloat::shiftSignificandRight(unsigned int bits)
1003 /* Our exponent should not overflow. */
1004 assert((exponent_t) (exponent + bits) >= exponent);
1008 return shiftRight(significandParts(), partCount(), bits);
1011 /* Shift the significand left BITS bits, subtract BITS from its exponent. */
1013 APFloat::shiftSignificandLeft(unsigned int bits)
1015 assert(bits < semantics->precision);
1018 unsigned int partsCount = partCount();
1020 APInt::tcShiftLeft(significandParts(), partsCount, bits);
1023 assert(!APInt::tcIsZero(significandParts(), partsCount));
1028 APFloat::compareAbsoluteValue(const APFloat &rhs) const
1032 assert(semantics == rhs.semantics);
1033 assert(category == fcNormal);
1034 assert(rhs.category == fcNormal);
1036 compare = exponent - rhs.exponent;
1038 /* If exponents are equal, do an unsigned bignum comparison of the
1041 compare = APInt::tcCompare(significandParts(), rhs.significandParts(),
1045 return cmpGreaterThan;
1046 else if(compare < 0)
1052 /* Handle overflow. Sign is preserved. We either become infinity or
1053 the largest finite number. */
1055 APFloat::handleOverflow(roundingMode rounding_mode)
1058 if(rounding_mode == rmNearestTiesToEven
1059 || rounding_mode == rmNearestTiesToAway
1060 || (rounding_mode == rmTowardPositive && !sign)
1061 || (rounding_mode == rmTowardNegative && sign))
1063 category = fcInfinity;
1064 return (opStatus) (opOverflow | opInexact);
1067 /* Otherwise we become the largest finite number. */
1068 category = fcNormal;
1069 exponent = semantics->maxExponent;
1070 APInt::tcSetLeastSignificantBits(significandParts(), partCount(),
1071 semantics->precision);
1076 /* Returns TRUE if, when truncating the current number, with BIT the
1077 new LSB, with the given lost fraction and rounding mode, the result
1078 would need to be rounded away from zero (i.e., by increasing the
1079 signficand). This routine must work for fcZero of both signs, and
1080 fcNormal numbers. */
1082 APFloat::roundAwayFromZero(roundingMode rounding_mode,
1083 lostFraction lost_fraction,
1084 unsigned int bit) const
1086 /* NaNs and infinities should not have lost fractions. */
1087 assert(category == fcNormal || category == fcZero);
1089 /* Current callers never pass this so we don't handle it. */
1090 assert(lost_fraction != lfExactlyZero);
1092 switch (rounding_mode) {
1094 llvm_unreachable(0);
1096 case rmNearestTiesToAway:
1097 return lost_fraction == lfExactlyHalf || lost_fraction == lfMoreThanHalf;
1099 case rmNearestTiesToEven:
1100 if(lost_fraction == lfMoreThanHalf)
1103 /* Our zeroes don't have a significand to test. */
1104 if(lost_fraction == lfExactlyHalf && category != fcZero)
1105 return APInt::tcExtractBit(significandParts(), bit);
1112 case rmTowardPositive:
1113 return sign == false;
1115 case rmTowardNegative:
1116 return sign == true;
1121 APFloat::normalize(roundingMode rounding_mode,
1122 lostFraction lost_fraction)
1124 unsigned int omsb; /* One, not zero, based MSB. */
1127 if(category != fcNormal)
1130 /* Before rounding normalize the exponent of fcNormal numbers. */
1131 omsb = significandMSB() + 1;
1134 /* OMSB is numbered from 1. We want to place it in the integer
1135 bit numbered PRECISON if possible, with a compensating change in
1137 exponentChange = omsb - semantics->precision;
1139 /* If the resulting exponent is too high, overflow according to
1140 the rounding mode. */
1141 if(exponent + exponentChange > semantics->maxExponent)
1142 return handleOverflow(rounding_mode);
1144 /* Subnormal numbers have exponent minExponent, and their MSB
1145 is forced based on that. */
1146 if(exponent + exponentChange < semantics->minExponent)
1147 exponentChange = semantics->minExponent - exponent;
1149 /* Shifting left is easy as we don't lose precision. */
1150 if(exponentChange < 0) {
1151 assert(lost_fraction == lfExactlyZero);
1153 shiftSignificandLeft(-exponentChange);
1158 if(exponentChange > 0) {
1161 /* Shift right and capture any new lost fraction. */
1162 lf = shiftSignificandRight(exponentChange);
1164 lost_fraction = combineLostFractions(lf, lost_fraction);
1166 /* Keep OMSB up-to-date. */
1167 if(omsb > (unsigned) exponentChange)
1168 omsb -= exponentChange;
1174 /* Now round the number according to rounding_mode given the lost
1177 /* As specified in IEEE 754, since we do not trap we do not report
1178 underflow for exact results. */
1179 if(lost_fraction == lfExactlyZero) {
1180 /* Canonicalize zeroes. */
1187 /* Increment the significand if we're rounding away from zero. */
1188 if(roundAwayFromZero(rounding_mode, lost_fraction, 0)) {
1190 exponent = semantics->minExponent;
1192 incrementSignificand();
1193 omsb = significandMSB() + 1;
1195 /* Did the significand increment overflow? */
1196 if(omsb == (unsigned) semantics->precision + 1) {
1197 /* Renormalize by incrementing the exponent and shifting our
1198 significand right one. However if we already have the
1199 maximum exponent we overflow to infinity. */
1200 if(exponent == semantics->maxExponent) {
1201 category = fcInfinity;
1203 return (opStatus) (opOverflow | opInexact);
1206 shiftSignificandRight(1);
1212 /* The normal case - we were and are not denormal, and any
1213 significand increment above didn't overflow. */
1214 if(omsb == semantics->precision)
1217 /* We have a non-zero denormal. */
1218 assert(omsb < semantics->precision);
1220 /* Canonicalize zeroes. */
1224 /* The fcZero case is a denormal that underflowed to zero. */
1225 return (opStatus) (opUnderflow | opInexact);
1229 APFloat::addOrSubtractSpecials(const APFloat &rhs, bool subtract)
1231 switch (convolve(category, rhs.category)) {
1233 llvm_unreachable(0);
1235 case convolve(fcNaN, fcZero):
1236 case convolve(fcNaN, fcNormal):
1237 case convolve(fcNaN, fcInfinity):
1238 case convolve(fcNaN, fcNaN):
1239 case convolve(fcNormal, fcZero):
1240 case convolve(fcInfinity, fcNormal):
1241 case convolve(fcInfinity, fcZero):
1244 case convolve(fcZero, fcNaN):
1245 case convolve(fcNormal, fcNaN):
1246 case convolve(fcInfinity, fcNaN):
1248 copySignificand(rhs);
1251 case convolve(fcNormal, fcInfinity):
1252 case convolve(fcZero, fcInfinity):
1253 category = fcInfinity;
1254 sign = rhs.sign ^ subtract;
1257 case convolve(fcZero, fcNormal):
1259 sign = rhs.sign ^ subtract;
1262 case convolve(fcZero, fcZero):
1263 /* Sign depends on rounding mode; handled by caller. */
1266 case convolve(fcInfinity, fcInfinity):
1267 /* Differently signed infinities can only be validly
1269 if(((sign ^ rhs.sign)!=0) != subtract) {
1276 case convolve(fcNormal, fcNormal):
1281 /* Add or subtract two normal numbers. */
1283 APFloat::addOrSubtractSignificand(const APFloat &rhs, bool subtract)
1286 lostFraction lost_fraction;
1289 /* Determine if the operation on the absolute values is effectively
1290 an addition or subtraction. */
1291 subtract ^= (sign ^ rhs.sign) ? true : false;
1293 /* Are we bigger exponent-wise than the RHS? */
1294 bits = exponent - rhs.exponent;
1296 /* Subtraction is more subtle than one might naively expect. */
1298 APFloat temp_rhs(rhs);
1302 reverse = compareAbsoluteValue(temp_rhs) == cmpLessThan;
1303 lost_fraction = lfExactlyZero;
1304 } else if (bits > 0) {
1305 lost_fraction = temp_rhs.shiftSignificandRight(bits - 1);
1306 shiftSignificandLeft(1);
1309 lost_fraction = shiftSignificandRight(-bits - 1);
1310 temp_rhs.shiftSignificandLeft(1);
1315 carry = temp_rhs.subtractSignificand
1316 (*this, lost_fraction != lfExactlyZero);
1317 copySignificand(temp_rhs);
1320 carry = subtractSignificand
1321 (temp_rhs, lost_fraction != lfExactlyZero);
1324 /* Invert the lost fraction - it was on the RHS and
1326 if(lost_fraction == lfLessThanHalf)
1327 lost_fraction = lfMoreThanHalf;
1328 else if(lost_fraction == lfMoreThanHalf)
1329 lost_fraction = lfLessThanHalf;
1331 /* The code above is intended to ensure that no borrow is
1336 APFloat temp_rhs(rhs);
1338 lost_fraction = temp_rhs.shiftSignificandRight(bits);
1339 carry = addSignificand(temp_rhs);
1341 lost_fraction = shiftSignificandRight(-bits);
1342 carry = addSignificand(rhs);
1345 /* We have a guard bit; generating a carry cannot happen. */
1349 return lost_fraction;
1353 APFloat::multiplySpecials(const APFloat &rhs)
1355 switch (convolve(category, rhs.category)) {
1357 llvm_unreachable(0);
1359 case convolve(fcNaN, fcZero):
1360 case convolve(fcNaN, fcNormal):
1361 case convolve(fcNaN, fcInfinity):
1362 case convolve(fcNaN, fcNaN):
1365 case convolve(fcZero, fcNaN):
1366 case convolve(fcNormal, fcNaN):
1367 case convolve(fcInfinity, fcNaN):
1369 copySignificand(rhs);
1372 case convolve(fcNormal, fcInfinity):
1373 case convolve(fcInfinity, fcNormal):
1374 case convolve(fcInfinity, fcInfinity):
1375 category = fcInfinity;
1378 case convolve(fcZero, fcNormal):
1379 case convolve(fcNormal, fcZero):
1380 case convolve(fcZero, fcZero):
1384 case convolve(fcZero, fcInfinity):
1385 case convolve(fcInfinity, fcZero):
1389 case convolve(fcNormal, fcNormal):
1395 APFloat::divideSpecials(const APFloat &rhs)
1397 switch (convolve(category, rhs.category)) {
1399 llvm_unreachable(0);
1401 case convolve(fcNaN, fcZero):
1402 case convolve(fcNaN, fcNormal):
1403 case convolve(fcNaN, fcInfinity):
1404 case convolve(fcNaN, fcNaN):
1405 case convolve(fcInfinity, fcZero):
1406 case convolve(fcInfinity, fcNormal):
1407 case convolve(fcZero, fcInfinity):
1408 case convolve(fcZero, fcNormal):
1411 case convolve(fcZero, fcNaN):
1412 case convolve(fcNormal, fcNaN):
1413 case convolve(fcInfinity, fcNaN):
1415 copySignificand(rhs);
1418 case convolve(fcNormal, fcInfinity):
1422 case convolve(fcNormal, fcZero):
1423 category = fcInfinity;
1426 case convolve(fcInfinity, fcInfinity):
1427 case convolve(fcZero, fcZero):
1431 case convolve(fcNormal, fcNormal):
1437 APFloat::modSpecials(const APFloat &rhs)
1439 switch (convolve(category, rhs.category)) {
1441 llvm_unreachable(0);
1443 case convolve(fcNaN, fcZero):
1444 case convolve(fcNaN, fcNormal):
1445 case convolve(fcNaN, fcInfinity):
1446 case convolve(fcNaN, fcNaN):
1447 case convolve(fcZero, fcInfinity):
1448 case convolve(fcZero, fcNormal):
1449 case convolve(fcNormal, fcInfinity):
1452 case convolve(fcZero, fcNaN):
1453 case convolve(fcNormal, fcNaN):
1454 case convolve(fcInfinity, fcNaN):
1456 copySignificand(rhs);
1459 case convolve(fcNormal, fcZero):
1460 case convolve(fcInfinity, fcZero):
1461 case convolve(fcInfinity, fcNormal):
1462 case convolve(fcInfinity, fcInfinity):
1463 case convolve(fcZero, fcZero):
1467 case convolve(fcNormal, fcNormal):
1474 APFloat::changeSign()
1476 /* Look mummy, this one's easy. */
1481 APFloat::clearSign()
1483 /* So is this one. */
1488 APFloat::copySign(const APFloat &rhs)
1494 /* Normalized addition or subtraction. */
1496 APFloat::addOrSubtract(const APFloat &rhs, roundingMode rounding_mode,
1501 assertArithmeticOK(*semantics);
1503 fs = addOrSubtractSpecials(rhs, subtract);
1505 /* This return code means it was not a simple case. */
1506 if(fs == opDivByZero) {
1507 lostFraction lost_fraction;
1509 lost_fraction = addOrSubtractSignificand(rhs, subtract);
1510 fs = normalize(rounding_mode, lost_fraction);
1512 /* Can only be zero if we lost no fraction. */
1513 assert(category != fcZero || lost_fraction == lfExactlyZero);
1516 /* If two numbers add (exactly) to zero, IEEE 754 decrees it is a
1517 positive zero unless rounding to minus infinity, except that
1518 adding two like-signed zeroes gives that zero. */
1519 if(category == fcZero) {
1520 if(rhs.category != fcZero || (sign == rhs.sign) == subtract)
1521 sign = (rounding_mode == rmTowardNegative);
1527 /* Normalized addition. */
1529 APFloat::add(const APFloat &rhs, roundingMode rounding_mode)
1531 return addOrSubtract(rhs, rounding_mode, false);
1534 /* Normalized subtraction. */
1536 APFloat::subtract(const APFloat &rhs, roundingMode rounding_mode)
1538 return addOrSubtract(rhs, rounding_mode, true);
1541 /* Normalized multiply. */
1543 APFloat::multiply(const APFloat &rhs, roundingMode rounding_mode)
1547 assertArithmeticOK(*semantics);
1549 fs = multiplySpecials(rhs);
1551 if(category == fcNormal) {
1552 lostFraction lost_fraction = multiplySignificand(rhs, 0);
1553 fs = normalize(rounding_mode, lost_fraction);
1554 if(lost_fraction != lfExactlyZero)
1555 fs = (opStatus) (fs | opInexact);
1561 /* Normalized divide. */
1563 APFloat::divide(const APFloat &rhs, roundingMode rounding_mode)
1567 assertArithmeticOK(*semantics);
1569 fs = divideSpecials(rhs);
1571 if(category == fcNormal) {
1572 lostFraction lost_fraction = divideSignificand(rhs);
1573 fs = normalize(rounding_mode, lost_fraction);
1574 if(lost_fraction != lfExactlyZero)
1575 fs = (opStatus) (fs | opInexact);
1581 /* Normalized remainder. This is not currently correct in all cases. */
1583 APFloat::remainder(const APFloat &rhs)
1587 unsigned int origSign = sign;
1589 assertArithmeticOK(*semantics);
1590 fs = V.divide(rhs, rmNearestTiesToEven);
1591 if (fs == opDivByZero)
1594 int parts = partCount();
1595 integerPart *x = new integerPart[parts];
1597 fs = V.convertToInteger(x, parts * integerPartWidth, true,
1598 rmNearestTiesToEven, &ignored);
1599 if (fs==opInvalidOp)
1602 fs = V.convertFromZeroExtendedInteger(x, parts * integerPartWidth, true,
1603 rmNearestTiesToEven);
1604 assert(fs==opOK); // should always work
1606 fs = V.multiply(rhs, rmNearestTiesToEven);
1607 assert(fs==opOK || fs==opInexact); // should not overflow or underflow
1609 fs = subtract(V, rmNearestTiesToEven);
1610 assert(fs==opOK || fs==opInexact); // likewise
1613 sign = origSign; // IEEE754 requires this
1618 /* Normalized llvm frem (C fmod).
1619 This is not currently correct in all cases. */
1621 APFloat::mod(const APFloat &rhs, roundingMode rounding_mode)
1624 assertArithmeticOK(*semantics);
1625 fs = modSpecials(rhs);
1627 if (category == fcNormal && rhs.category == fcNormal) {
1629 unsigned int origSign = sign;
1631 fs = V.divide(rhs, rmNearestTiesToEven);
1632 if (fs == opDivByZero)
1635 int parts = partCount();
1636 integerPart *x = new integerPart[parts];
1638 fs = V.convertToInteger(x, parts * integerPartWidth, true,
1639 rmTowardZero, &ignored);
1640 if (fs==opInvalidOp)
1643 fs = V.convertFromZeroExtendedInteger(x, parts * integerPartWidth, true,
1644 rmNearestTiesToEven);
1645 assert(fs==opOK); // should always work
1647 fs = V.multiply(rhs, rounding_mode);
1648 assert(fs==opOK || fs==opInexact); // should not overflow or underflow
1650 fs = subtract(V, rounding_mode);
1651 assert(fs==opOK || fs==opInexact); // likewise
1654 sign = origSign; // IEEE754 requires this
1660 /* Normalized fused-multiply-add. */
1662 APFloat::fusedMultiplyAdd(const APFloat &multiplicand,
1663 const APFloat &addend,
1664 roundingMode rounding_mode)
1668 assertArithmeticOK(*semantics);
1670 /* Post-multiplication sign, before addition. */
1671 sign ^= multiplicand.sign;
1673 /* If and only if all arguments are normal do we need to do an
1674 extended-precision calculation. */
1675 if(category == fcNormal
1676 && multiplicand.category == fcNormal
1677 && addend.category == fcNormal) {
1678 lostFraction lost_fraction;
1680 lost_fraction = multiplySignificand(multiplicand, &addend);
1681 fs = normalize(rounding_mode, lost_fraction);
1682 if(lost_fraction != lfExactlyZero)
1683 fs = (opStatus) (fs | opInexact);
1685 /* If two numbers add (exactly) to zero, IEEE 754 decrees it is a
1686 positive zero unless rounding to minus infinity, except that
1687 adding two like-signed zeroes gives that zero. */
1688 if(category == fcZero && sign != addend.sign)
1689 sign = (rounding_mode == rmTowardNegative);
1691 fs = multiplySpecials(multiplicand);
1693 /* FS can only be opOK or opInvalidOp. There is no more work
1694 to do in the latter case. The IEEE-754R standard says it is
1695 implementation-defined in this case whether, if ADDEND is a
1696 quiet NaN, we raise invalid op; this implementation does so.
1698 If we need to do the addition we can do so with normal
1701 fs = addOrSubtract(addend, rounding_mode, false);
1707 /* Comparison requires normalized numbers. */
1709 APFloat::compare(const APFloat &rhs) const
1713 assertArithmeticOK(*semantics);
1714 assert(semantics == rhs.semantics);
1716 switch (convolve(category, rhs.category)) {
1718 llvm_unreachable(0);
1720 case convolve(fcNaN, fcZero):
1721 case convolve(fcNaN, fcNormal):
1722 case convolve(fcNaN, fcInfinity):
1723 case convolve(fcNaN, fcNaN):
1724 case convolve(fcZero, fcNaN):
1725 case convolve(fcNormal, fcNaN):
1726 case convolve(fcInfinity, fcNaN):
1727 return cmpUnordered;
1729 case convolve(fcInfinity, fcNormal):
1730 case convolve(fcInfinity, fcZero):
1731 case convolve(fcNormal, fcZero):
1735 return cmpGreaterThan;
1737 case convolve(fcNormal, fcInfinity):
1738 case convolve(fcZero, fcInfinity):
1739 case convolve(fcZero, fcNormal):
1741 return cmpGreaterThan;
1745 case convolve(fcInfinity, fcInfinity):
1746 if(sign == rhs.sign)
1751 return cmpGreaterThan;
1753 case convolve(fcZero, fcZero):
1756 case convolve(fcNormal, fcNormal):
1760 /* Two normal numbers. Do they have the same sign? */
1761 if(sign != rhs.sign) {
1763 result = cmpLessThan;
1765 result = cmpGreaterThan;
1767 /* Compare absolute values; invert result if negative. */
1768 result = compareAbsoluteValue(rhs);
1771 if(result == cmpLessThan)
1772 result = cmpGreaterThan;
1773 else if(result == cmpGreaterThan)
1774 result = cmpLessThan;
1781 /// APFloat::convert - convert a value of one floating point type to another.
1782 /// The return value corresponds to the IEEE754 exceptions. *losesInfo
1783 /// records whether the transformation lost information, i.e. whether
1784 /// converting the result back to the original type will produce the
1785 /// original value (this is almost the same as return value==fsOK, but there
1786 /// are edge cases where this is not so).
1789 APFloat::convert(const fltSemantics &toSemantics,
1790 roundingMode rounding_mode, bool *losesInfo)
1792 lostFraction lostFraction;
1793 unsigned int newPartCount, oldPartCount;
1796 assertArithmeticOK(*semantics);
1797 assertArithmeticOK(toSemantics);
1798 lostFraction = lfExactlyZero;
1799 newPartCount = partCountForBits(toSemantics.precision + 1);
1800 oldPartCount = partCount();
1802 /* Handle storage complications. If our new form is wider,
1803 re-allocate our bit pattern into wider storage. If it is
1804 narrower, we ignore the excess parts, but if narrowing to a
1805 single part we need to free the old storage.
1806 Be careful not to reference significandParts for zeroes
1807 and infinities, since it aborts. */
1808 if (newPartCount > oldPartCount) {
1809 integerPart *newParts;
1810 newParts = new integerPart[newPartCount];
1811 APInt::tcSet(newParts, 0, newPartCount);
1812 if (category==fcNormal || category==fcNaN)
1813 APInt::tcAssign(newParts, significandParts(), oldPartCount);
1815 significand.parts = newParts;
1816 } else if (newPartCount < oldPartCount) {
1817 /* Capture any lost fraction through truncation of parts so we get
1818 correct rounding whilst normalizing. */
1819 if (category==fcNormal)
1820 lostFraction = lostFractionThroughTruncation
1821 (significandParts(), oldPartCount, toSemantics.precision);
1822 if (newPartCount == 1) {
1823 integerPart newPart = 0;
1824 if (category==fcNormal || category==fcNaN)
1825 newPart = significandParts()[0];
1827 significand.part = newPart;
1831 if(category == fcNormal) {
1832 /* Re-interpret our bit-pattern. */
1833 exponent += toSemantics.precision - semantics->precision;
1834 semantics = &toSemantics;
1835 fs = normalize(rounding_mode, lostFraction);
1836 *losesInfo = (fs != opOK);
1837 } else if (category == fcNaN) {
1838 int shift = toSemantics.precision - semantics->precision;
1839 // Do this now so significandParts gets the right answer
1840 const fltSemantics *oldSemantics = semantics;
1841 semantics = &toSemantics;
1843 // No normalization here, just truncate
1845 APInt::tcShiftLeft(significandParts(), newPartCount, shift);
1846 else if (shift < 0) {
1847 unsigned ushift = -shift;
1848 // Figure out if we are losing information. This happens
1849 // if are shifting out something other than 0s, or if the x87 long
1850 // double input did not have its integer bit set (pseudo-NaN), or if the
1851 // x87 long double input did not have its QNan bit set (because the x87
1852 // hardware sets this bit when converting a lower-precision NaN to
1853 // x87 long double).
1854 if (APInt::tcLSB(significandParts(), newPartCount) < ushift)
1856 if (oldSemantics == &APFloat::x87DoubleExtended &&
1857 (!(*significandParts() & 0x8000000000000000ULL) ||
1858 !(*significandParts() & 0x4000000000000000ULL)))
1860 APInt::tcShiftRight(significandParts(), newPartCount, ushift);
1862 // gcc forces the Quiet bit on, which means (float)(double)(float_sNan)
1863 // does not give you back the same bits. This is dubious, and we
1864 // don't currently do it. You're really supposed to get
1865 // an invalid operation signal at runtime, but nobody does that.
1868 semantics = &toSemantics;
1876 /* Convert a floating point number to an integer according to the
1877 rounding mode. If the rounded integer value is out of range this
1878 returns an invalid operation exception and the contents of the
1879 destination parts are unspecified. If the rounded value is in
1880 range but the floating point number is not the exact integer, the C
1881 standard doesn't require an inexact exception to be raised. IEEE
1882 854 does require it so we do that.
1884 Note that for conversions to integer type the C standard requires
1885 round-to-zero to always be used. */
1887 APFloat::convertToSignExtendedInteger(integerPart *parts, unsigned int width,
1889 roundingMode rounding_mode,
1890 bool *isExact) const
1892 lostFraction lost_fraction;
1893 const integerPart *src;
1894 unsigned int dstPartsCount, truncatedBits;
1896 assertArithmeticOK(*semantics);
1900 /* Handle the three special cases first. */
1901 if(category == fcInfinity || category == fcNaN)
1904 dstPartsCount = partCountForBits(width);
1906 if(category == fcZero) {
1907 APInt::tcSet(parts, 0, dstPartsCount);
1908 // Negative zero can't be represented as an int.
1913 src = significandParts();
1915 /* Step 1: place our absolute value, with any fraction truncated, in
1918 /* Our absolute value is less than one; truncate everything. */
1919 APInt::tcSet(parts, 0, dstPartsCount);
1920 /* For exponent -1 the integer bit represents .5, look at that.
1921 For smaller exponents leftmost truncated bit is 0. */
1922 truncatedBits = semantics->precision -1U - exponent;
1924 /* We want the most significant (exponent + 1) bits; the rest are
1926 unsigned int bits = exponent + 1U;
1928 /* Hopelessly large in magnitude? */
1932 if (bits < semantics->precision) {
1933 /* We truncate (semantics->precision - bits) bits. */
1934 truncatedBits = semantics->precision - bits;
1935 APInt::tcExtract(parts, dstPartsCount, src, bits, truncatedBits);
1937 /* We want at least as many bits as are available. */
1938 APInt::tcExtract(parts, dstPartsCount, src, semantics->precision, 0);
1939 APInt::tcShiftLeft(parts, dstPartsCount, bits - semantics->precision);
1944 /* Step 2: work out any lost fraction, and increment the absolute
1945 value if we would round away from zero. */
1946 if (truncatedBits) {
1947 lost_fraction = lostFractionThroughTruncation(src, partCount(),
1949 if (lost_fraction != lfExactlyZero
1950 && roundAwayFromZero(rounding_mode, lost_fraction, truncatedBits)) {
1951 if (APInt::tcIncrement(parts, dstPartsCount))
1952 return opInvalidOp; /* Overflow. */
1955 lost_fraction = lfExactlyZero;
1958 /* Step 3: check if we fit in the destination. */
1959 unsigned int omsb = APInt::tcMSB(parts, dstPartsCount) + 1;
1963 /* Negative numbers cannot be represented as unsigned. */
1967 /* It takes omsb bits to represent the unsigned integer value.
1968 We lose a bit for the sign, but care is needed as the
1969 maximally negative integer is a special case. */
1970 if (omsb == width && APInt::tcLSB(parts, dstPartsCount) + 1 != omsb)
1973 /* This case can happen because of rounding. */
1978 APInt::tcNegate (parts, dstPartsCount);
1980 if (omsb >= width + !isSigned)
1984 if (lost_fraction == lfExactlyZero) {
1991 /* Same as convertToSignExtendedInteger, except we provide
1992 deterministic values in case of an invalid operation exception,
1993 namely zero for NaNs and the minimal or maximal value respectively
1994 for underflow or overflow.
1995 The *isExact output tells whether the result is exact, in the sense
1996 that converting it back to the original floating point type produces
1997 the original value. This is almost equivalent to result==opOK,
1998 except for negative zeroes.
2001 APFloat::convertToInteger(integerPart *parts, unsigned int width,
2003 roundingMode rounding_mode, bool *isExact) const
2007 fs = convertToSignExtendedInteger(parts, width, isSigned, rounding_mode,
2010 if (fs == opInvalidOp) {
2011 unsigned int bits, dstPartsCount;
2013 dstPartsCount = partCountForBits(width);
2015 if (category == fcNaN)
2020 bits = width - isSigned;
2022 APInt::tcSetLeastSignificantBits(parts, dstPartsCount, bits);
2023 if (sign && isSigned)
2024 APInt::tcShiftLeft(parts, dstPartsCount, width - 1);
2030 /* Convert an unsigned integer SRC to a floating point number,
2031 rounding according to ROUNDING_MODE. The sign of the floating
2032 point number is not modified. */
2034 APFloat::convertFromUnsignedParts(const integerPart *src,
2035 unsigned int srcCount,
2036 roundingMode rounding_mode)
2038 unsigned int omsb, precision, dstCount;
2040 lostFraction lost_fraction;
2042 assertArithmeticOK(*semantics);
2043 category = fcNormal;
2044 omsb = APInt::tcMSB(src, srcCount) + 1;
2045 dst = significandParts();
2046 dstCount = partCount();
2047 precision = semantics->precision;
2049 /* We want the most significant PRECISON bits of SRC. There may not
2050 be that many; extract what we can. */
2051 if (precision <= omsb) {
2052 exponent = omsb - 1;
2053 lost_fraction = lostFractionThroughTruncation(src, srcCount,
2055 APInt::tcExtract(dst, dstCount, src, precision, omsb - precision);
2057 exponent = precision - 1;
2058 lost_fraction = lfExactlyZero;
2059 APInt::tcExtract(dst, dstCount, src, omsb, 0);
2062 return normalize(rounding_mode, lost_fraction);
2066 APFloat::convertFromAPInt(const APInt &Val,
2068 roundingMode rounding_mode)
2070 unsigned int partCount = Val.getNumWords();
2074 if (isSigned && api.isNegative()) {
2079 return convertFromUnsignedParts(api.getRawData(), partCount, rounding_mode);
2082 /* Convert a two's complement integer SRC to a floating point number,
2083 rounding according to ROUNDING_MODE. ISSIGNED is true if the
2084 integer is signed, in which case it must be sign-extended. */
2086 APFloat::convertFromSignExtendedInteger(const integerPart *src,
2087 unsigned int srcCount,
2089 roundingMode rounding_mode)
2093 assertArithmeticOK(*semantics);
2095 && APInt::tcExtractBit(src, srcCount * integerPartWidth - 1)) {
2098 /* If we're signed and negative negate a copy. */
2100 copy = new integerPart[srcCount];
2101 APInt::tcAssign(copy, src, srcCount);
2102 APInt::tcNegate(copy, srcCount);
2103 status = convertFromUnsignedParts(copy, srcCount, rounding_mode);
2107 status = convertFromUnsignedParts(src, srcCount, rounding_mode);
2113 /* FIXME: should this just take a const APInt reference? */
2115 APFloat::convertFromZeroExtendedInteger(const integerPart *parts,
2116 unsigned int width, bool isSigned,
2117 roundingMode rounding_mode)
2119 unsigned int partCount = partCountForBits(width);
2120 APInt api = APInt(width, partCount, parts);
2123 if(isSigned && APInt::tcExtractBit(parts, width - 1)) {
2128 return convertFromUnsignedParts(api.getRawData(), partCount, rounding_mode);
2132 APFloat::convertFromHexadecimalString(const StringRef &s,
2133 roundingMode rounding_mode)
2135 lostFraction lost_fraction;
2136 integerPart *significand;
2137 unsigned int bitPos, partsCount;
2138 StringRef::iterator dot, firstSignificantDigit;
2142 category = fcNormal;
2144 significand = significandParts();
2145 partsCount = partCount();
2146 bitPos = partsCount * integerPartWidth;
2148 /* Skip leading zeroes and any (hexa)decimal point. */
2149 StringRef::iterator p = skipLeadingZeroesAndAnyDot(s.begin(), s.end(), &dot);
2150 firstSignificantDigit = p;
2152 for(; p != s.end();) {
2153 integerPart hex_value;
2160 hex_value = hexDigitValue(*p);
2161 if(hex_value == -1U) {
2162 lost_fraction = lfExactlyZero;
2171 /* Store the number whilst 4-bit nibbles remain. */
2174 hex_value <<= bitPos % integerPartWidth;
2175 significand[bitPos / integerPartWidth] |= hex_value;
2177 lost_fraction = trailingHexadecimalFraction(p, s.end(), hex_value);
2178 while(p != s.end() && hexDigitValue(*p) != -1U)
2185 /* Hex floats require an exponent but not a hexadecimal point. */
2186 assert(p != s.end() && (*p == 'p' || *p == 'P') &&
2187 "Hex strings require an exponent");
2189 /* Ignore the exponent if we are zero. */
2190 if(p != firstSignificantDigit) {
2193 /* Implicit hexadecimal point? */
2197 /* Calculate the exponent adjustment implicit in the number of
2198 significant digits. */
2199 expAdjustment = static_cast<int>(dot - firstSignificantDigit);
2200 if(expAdjustment < 0)
2202 expAdjustment = expAdjustment * 4 - 1;
2204 /* Adjust for writing the significand starting at the most
2205 significant nibble. */
2206 expAdjustment += semantics->precision;
2207 expAdjustment -= partsCount * integerPartWidth;
2209 /* Adjust for the given exponent. */
2210 exponent = totalExponent(p, s.end(), expAdjustment);
2213 return normalize(rounding_mode, lost_fraction);
2217 APFloat::roundSignificandWithExponent(const integerPart *decSigParts,
2218 unsigned sigPartCount, int exp,
2219 roundingMode rounding_mode)
2221 unsigned int parts, pow5PartCount;
2222 fltSemantics calcSemantics = { 32767, -32767, 0, true };
2223 integerPart pow5Parts[maxPowerOfFiveParts];
2226 isNearest = (rounding_mode == rmNearestTiesToEven
2227 || rounding_mode == rmNearestTiesToAway);
2229 parts = partCountForBits(semantics->precision + 11);
2231 /* Calculate pow(5, abs(exp)). */
2232 pow5PartCount = powerOf5(pow5Parts, exp >= 0 ? exp: -exp);
2234 for (;; parts *= 2) {
2235 opStatus sigStatus, powStatus;
2236 unsigned int excessPrecision, truncatedBits;
2238 calcSemantics.precision = parts * integerPartWidth - 1;
2239 excessPrecision = calcSemantics.precision - semantics->precision;
2240 truncatedBits = excessPrecision;
2242 APFloat decSig(calcSemantics, fcZero, sign);
2243 APFloat pow5(calcSemantics, fcZero, false);
2245 sigStatus = decSig.convertFromUnsignedParts(decSigParts, sigPartCount,
2246 rmNearestTiesToEven);
2247 powStatus = pow5.convertFromUnsignedParts(pow5Parts, pow5PartCount,
2248 rmNearestTiesToEven);
2249 /* Add exp, as 10^n = 5^n * 2^n. */
2250 decSig.exponent += exp;
2252 lostFraction calcLostFraction;
2253 integerPart HUerr, HUdistance;
2254 unsigned int powHUerr;
2257 /* multiplySignificand leaves the precision-th bit set to 1. */
2258 calcLostFraction = decSig.multiplySignificand(pow5, NULL);
2259 powHUerr = powStatus != opOK;
2261 calcLostFraction = decSig.divideSignificand(pow5);
2262 /* Denormal numbers have less precision. */
2263 if (decSig.exponent < semantics->minExponent) {
2264 excessPrecision += (semantics->minExponent - decSig.exponent);
2265 truncatedBits = excessPrecision;
2266 if (excessPrecision > calcSemantics.precision)
2267 excessPrecision = calcSemantics.precision;
2269 /* Extra half-ulp lost in reciprocal of exponent. */
2270 powHUerr = (powStatus == opOK && calcLostFraction == lfExactlyZero) ? 0:2;
2273 /* Both multiplySignificand and divideSignificand return the
2274 result with the integer bit set. */
2275 assert (APInt::tcExtractBit
2276 (decSig.significandParts(), calcSemantics.precision - 1) == 1);
2278 HUerr = HUerrBound(calcLostFraction != lfExactlyZero, sigStatus != opOK,
2280 HUdistance = 2 * ulpsFromBoundary(decSig.significandParts(),
2281 excessPrecision, isNearest);
2283 /* Are we guaranteed to round correctly if we truncate? */
2284 if (HUdistance >= HUerr) {
2285 APInt::tcExtract(significandParts(), partCount(), decSig.significandParts(),
2286 calcSemantics.precision - excessPrecision,
2288 /* Take the exponent of decSig. If we tcExtract-ed less bits
2289 above we must adjust our exponent to compensate for the
2290 implicit right shift. */
2291 exponent = (decSig.exponent + semantics->precision
2292 - (calcSemantics.precision - excessPrecision));
2293 calcLostFraction = lostFractionThroughTruncation(decSig.significandParts(),
2296 return normalize(rounding_mode, calcLostFraction);
2302 APFloat::convertFromDecimalString(const StringRef &str, roundingMode rounding_mode)
2307 /* Scan the text. */
2308 StringRef::iterator p = str.begin();
2309 interpretDecimal(p, str.end(), &D);
2311 /* Handle the quick cases. First the case of no significant digits,
2312 i.e. zero, and then exponents that are obviously too large or too
2313 small. Writing L for log 10 / log 2, a number d.ddddd*10^exp
2314 definitely overflows if
2316 (exp - 1) * L >= maxExponent
2318 and definitely underflows to zero where
2320 (exp + 1) * L <= minExponent - precision
2322 With integer arithmetic the tightest bounds for L are
2324 93/28 < L < 196/59 [ numerator <= 256 ]
2325 42039/12655 < L < 28738/8651 [ numerator <= 65536 ]
2328 if (decDigitValue(*D.firstSigDigit) >= 10U) {
2331 } else if ((D.normalizedExponent + 1) * 28738
2332 <= 8651 * (semantics->minExponent - (int) semantics->precision)) {
2333 /* Underflow to zero and round. */
2335 fs = normalize(rounding_mode, lfLessThanHalf);
2336 } else if ((D.normalizedExponent - 1) * 42039
2337 >= 12655 * semantics->maxExponent) {
2338 /* Overflow and round. */
2339 fs = handleOverflow(rounding_mode);
2341 integerPart *decSignificand;
2342 unsigned int partCount;
2344 /* A tight upper bound on number of bits required to hold an
2345 N-digit decimal integer is N * 196 / 59. Allocate enough space
2346 to hold the full significand, and an extra part required by
2348 partCount = static_cast<unsigned int>(D.lastSigDigit - D.firstSigDigit) + 1;
2349 partCount = partCountForBits(1 + 196 * partCount / 59);
2350 decSignificand = new integerPart[partCount + 1];
2353 /* Convert to binary efficiently - we do almost all multiplication
2354 in an integerPart. When this would overflow do we do a single
2355 bignum multiplication, and then revert again to multiplication
2356 in an integerPart. */
2358 integerPart decValue, val, multiplier;
2366 if (p == str.end()) {
2370 decValue = decDigitValue(*p++);
2371 assert(decValue < 10U && "Invalid character in digit string");
2373 val = val * 10 + decValue;
2374 /* The maximum number that can be multiplied by ten with any
2375 digit added without overflowing an integerPart. */
2376 } while (p <= D.lastSigDigit && multiplier <= (~ (integerPart) 0 - 9) / 10);
2378 /* Multiply out the current part. */
2379 APInt::tcMultiplyPart(decSignificand, decSignificand, multiplier, val,
2380 partCount, partCount + 1, false);
2382 /* If we used another part (likely but not guaranteed), increase
2384 if (decSignificand[partCount])
2386 } while (p <= D.lastSigDigit);
2388 category = fcNormal;
2389 fs = roundSignificandWithExponent(decSignificand, partCount,
2390 D.exponent, rounding_mode);
2392 delete [] decSignificand;
2399 APFloat::convertFromString(const StringRef &str, roundingMode rounding_mode)
2401 assertArithmeticOK(*semantics);
2402 assert(!str.empty() && "Invalid string length");
2404 /* Handle a leading minus sign. */
2405 StringRef::iterator p = str.begin();
2406 size_t slen = str.size();
2407 unsigned isNegative = str.front() == '-';
2412 assert(slen && "String is only a minus!");
2417 if(slen >= 2 && p[0] == '0' && (p[1] == 'x' || p[1] == 'X')) {
2418 assert(slen - 2 && "Invalid string");
2419 return convertFromHexadecimalString(str.substr(isNegative + 2),
2423 return convertFromDecimalString(str.substr(isNegative), rounding_mode);
2426 /* Write out a hexadecimal representation of the floating point value
2427 to DST, which must be of sufficient size, in the C99 form
2428 [-]0xh.hhhhp[+-]d. Return the number of characters written,
2429 excluding the terminating NUL.
2431 If UPPERCASE, the output is in upper case, otherwise in lower case.
2433 HEXDIGITS digits appear altogether, rounding the value if
2434 necessary. If HEXDIGITS is 0, the minimal precision to display the
2435 number precisely is used instead. If nothing would appear after
2436 the decimal point it is suppressed.
2438 The decimal exponent is always printed and has at least one digit.
2439 Zero values display an exponent of zero. Infinities and NaNs
2440 appear as "infinity" or "nan" respectively.
2442 The above rules are as specified by C99. There is ambiguity about
2443 what the leading hexadecimal digit should be. This implementation
2444 uses whatever is necessary so that the exponent is displayed as
2445 stored. This implies the exponent will fall within the IEEE format
2446 range, and the leading hexadecimal digit will be 0 (for denormals),
2447 1 (normal numbers) or 2 (normal numbers rounded-away-from-zero with
2448 any other digits zero).
2451 APFloat::convertToHexString(char *dst, unsigned int hexDigits,
2452 bool upperCase, roundingMode rounding_mode) const
2456 assertArithmeticOK(*semantics);
2464 memcpy (dst, upperCase ? infinityU: infinityL, sizeof infinityU - 1);
2465 dst += sizeof infinityL - 1;
2469 memcpy (dst, upperCase ? NaNU: NaNL, sizeof NaNU - 1);
2470 dst += sizeof NaNU - 1;
2475 *dst++ = upperCase ? 'X': 'x';
2477 if (hexDigits > 1) {
2479 memset (dst, '0', hexDigits - 1);
2480 dst += hexDigits - 1;
2482 *dst++ = upperCase ? 'P': 'p';
2487 dst = convertNormalToHexString (dst, hexDigits, upperCase, rounding_mode);
2493 return static_cast<unsigned int>(dst - p);
2496 /* Does the hard work of outputting the correctly rounded hexadecimal
2497 form of a normal floating point number with the specified number of
2498 hexadecimal digits. If HEXDIGITS is zero the minimum number of
2499 digits necessary to print the value precisely is output. */
2501 APFloat::convertNormalToHexString(char *dst, unsigned int hexDigits,
2503 roundingMode rounding_mode) const
2505 unsigned int count, valueBits, shift, partsCount, outputDigits;
2506 const char *hexDigitChars;
2507 const integerPart *significand;
2512 *dst++ = upperCase ? 'X': 'x';
2515 hexDigitChars = upperCase ? hexDigitsUpper: hexDigitsLower;
2517 significand = significandParts();
2518 partsCount = partCount();
2520 /* +3 because the first digit only uses the single integer bit, so
2521 we have 3 virtual zero most-significant-bits. */
2522 valueBits = semantics->precision + 3;
2523 shift = integerPartWidth - valueBits % integerPartWidth;
2525 /* The natural number of digits required ignoring trailing
2526 insignificant zeroes. */
2527 outputDigits = (valueBits - significandLSB () + 3) / 4;
2529 /* hexDigits of zero means use the required number for the
2530 precision. Otherwise, see if we are truncating. If we are,
2531 find out if we need to round away from zero. */
2533 if (hexDigits < outputDigits) {
2534 /* We are dropping non-zero bits, so need to check how to round.
2535 "bits" is the number of dropped bits. */
2537 lostFraction fraction;
2539 bits = valueBits - hexDigits * 4;
2540 fraction = lostFractionThroughTruncation (significand, partsCount, bits);
2541 roundUp = roundAwayFromZero(rounding_mode, fraction, bits);
2543 outputDigits = hexDigits;
2546 /* Write the digits consecutively, and start writing in the location
2547 of the hexadecimal point. We move the most significant digit
2548 left and add the hexadecimal point later. */
2551 count = (valueBits + integerPartWidth - 1) / integerPartWidth;
2553 while (outputDigits && count) {
2556 /* Put the most significant integerPartWidth bits in "part". */
2557 if (--count == partsCount)
2558 part = 0; /* An imaginary higher zero part. */
2560 part = significand[count] << shift;
2563 part |= significand[count - 1] >> (integerPartWidth - shift);
2565 /* Convert as much of "part" to hexdigits as we can. */
2566 unsigned int curDigits = integerPartWidth / 4;
2568 if (curDigits > outputDigits)
2569 curDigits = outputDigits;
2570 dst += partAsHex (dst, part, curDigits, hexDigitChars);
2571 outputDigits -= curDigits;
2577 /* Note that hexDigitChars has a trailing '0'. */
2580 *q = hexDigitChars[hexDigitValue (*q) + 1];
2581 } while (*q == '0');
2584 /* Add trailing zeroes. */
2585 memset (dst, '0', outputDigits);
2586 dst += outputDigits;
2589 /* Move the most significant digit to before the point, and if there
2590 is something after the decimal point add it. This must come
2591 after rounding above. */
2598 /* Finally output the exponent. */
2599 *dst++ = upperCase ? 'P': 'p';
2601 return writeSignedDecimal (dst, exponent);
2604 // For good performance it is desirable for different APFloats
2605 // to produce different integers.
2607 APFloat::getHashValue() const
2609 if (category==fcZero) return sign<<8 | semantics->precision ;
2610 else if (category==fcInfinity) return sign<<9 | semantics->precision;
2611 else if (category==fcNaN) return 1<<10 | semantics->precision;
2613 uint32_t hash = sign<<11 | semantics->precision | exponent<<12;
2614 const integerPart* p = significandParts();
2615 for (int i=partCount(); i>0; i--, p++)
2616 hash ^= ((uint32_t)*p) ^ (uint32_t)((*p)>>32);
2621 // Conversion from APFloat to/from host float/double. It may eventually be
2622 // possible to eliminate these and have everybody deal with APFloats, but that
2623 // will take a while. This approach will not easily extend to long double.
2624 // Current implementation requires integerPartWidth==64, which is correct at
2625 // the moment but could be made more general.
2627 // Denormals have exponent minExponent in APFloat, but minExponent-1 in
2628 // the actual IEEE respresentations. We compensate for that here.
2631 APFloat::convertF80LongDoubleAPFloatToAPInt() const
2633 assert(semantics == (const llvm::fltSemantics*)&x87DoubleExtended);
2634 assert (partCount()==2);
2636 uint64_t myexponent, mysignificand;
2638 if (category==fcNormal) {
2639 myexponent = exponent+16383; //bias
2640 mysignificand = significandParts()[0];
2641 if (myexponent==1 && !(mysignificand & 0x8000000000000000ULL))
2642 myexponent = 0; // denormal
2643 } else if (category==fcZero) {
2646 } else if (category==fcInfinity) {
2647 myexponent = 0x7fff;
2648 mysignificand = 0x8000000000000000ULL;
2650 assert(category == fcNaN && "Unknown category");
2651 myexponent = 0x7fff;
2652 mysignificand = significandParts()[0];
2656 words[0] = mysignificand;
2657 words[1] = ((uint64_t)(sign & 1) << 15) |
2658 (myexponent & 0x7fffLL);
2659 return APInt(80, 2, words);
2663 APFloat::convertPPCDoubleDoubleAPFloatToAPInt() const
2665 assert(semantics == (const llvm::fltSemantics*)&PPCDoubleDouble);
2666 assert (partCount()==2);
2668 uint64_t myexponent, mysignificand, myexponent2, mysignificand2;
2670 if (category==fcNormal) {
2671 myexponent = exponent + 1023; //bias
2672 myexponent2 = exponent2 + 1023;
2673 mysignificand = significandParts()[0];
2674 mysignificand2 = significandParts()[1];
2675 if (myexponent==1 && !(mysignificand & 0x10000000000000LL))
2676 myexponent = 0; // denormal
2677 if (myexponent2==1 && !(mysignificand2 & 0x10000000000000LL))
2678 myexponent2 = 0; // denormal
2679 } else if (category==fcZero) {
2684 } else if (category==fcInfinity) {
2690 assert(category == fcNaN && "Unknown category");
2692 mysignificand = significandParts()[0];
2693 myexponent2 = exponent2;
2694 mysignificand2 = significandParts()[1];
2698 words[0] = ((uint64_t)(sign & 1) << 63) |
2699 ((myexponent & 0x7ff) << 52) |
2700 (mysignificand & 0xfffffffffffffLL);
2701 words[1] = ((uint64_t)(sign2 & 1) << 63) |
2702 ((myexponent2 & 0x7ff) << 52) |
2703 (mysignificand2 & 0xfffffffffffffLL);
2704 return APInt(128, 2, words);
2708 APFloat::convertDoubleAPFloatToAPInt() const
2710 assert(semantics == (const llvm::fltSemantics*)&IEEEdouble);
2711 assert (partCount()==1);
2713 uint64_t myexponent, mysignificand;
2715 if (category==fcNormal) {
2716 myexponent = exponent+1023; //bias
2717 mysignificand = *significandParts();
2718 if (myexponent==1 && !(mysignificand & 0x10000000000000LL))
2719 myexponent = 0; // denormal
2720 } else if (category==fcZero) {
2723 } else if (category==fcInfinity) {
2727 assert(category == fcNaN && "Unknown category!");
2729 mysignificand = *significandParts();
2732 return APInt(64, ((((uint64_t)(sign & 1) << 63) |
2733 ((myexponent & 0x7ff) << 52) |
2734 (mysignificand & 0xfffffffffffffLL))));
2738 APFloat::convertFloatAPFloatToAPInt() const
2740 assert(semantics == (const llvm::fltSemantics*)&IEEEsingle);
2741 assert (partCount()==1);
2743 uint32_t myexponent, mysignificand;
2745 if (category==fcNormal) {
2746 myexponent = exponent+127; //bias
2747 mysignificand = (uint32_t)*significandParts();
2748 if (myexponent == 1 && !(mysignificand & 0x800000))
2749 myexponent = 0; // denormal
2750 } else if (category==fcZero) {
2753 } else if (category==fcInfinity) {
2757 assert(category == fcNaN && "Unknown category!");
2759 mysignificand = (uint32_t)*significandParts();
2762 return APInt(32, (((sign&1) << 31) | ((myexponent&0xff) << 23) |
2763 (mysignificand & 0x7fffff)));
2766 // This function creates an APInt that is just a bit map of the floating
2767 // point constant as it would appear in memory. It is not a conversion,
2768 // and treating the result as a normal integer is unlikely to be useful.
2771 APFloat::bitcastToAPInt() const
2773 if (semantics == (const llvm::fltSemantics*)&IEEEsingle)
2774 return convertFloatAPFloatToAPInt();
2776 if (semantics == (const llvm::fltSemantics*)&IEEEdouble)
2777 return convertDoubleAPFloatToAPInt();
2779 if (semantics == (const llvm::fltSemantics*)&PPCDoubleDouble)
2780 return convertPPCDoubleDoubleAPFloatToAPInt();
2782 assert(semantics == (const llvm::fltSemantics*)&x87DoubleExtended &&
2784 return convertF80LongDoubleAPFloatToAPInt();
2788 APFloat::convertToFloat() const
2790 assert(semantics == (const llvm::fltSemantics*)&IEEEsingle && "Float semantics are not IEEEsingle");
2791 APInt api = bitcastToAPInt();
2792 return api.bitsToFloat();
2796 APFloat::convertToDouble() const
2798 assert(semantics == (const llvm::fltSemantics*)&IEEEdouble && "Float semantics are not IEEEdouble");
2799 APInt api = bitcastToAPInt();
2800 return api.bitsToDouble();
2803 /// Integer bit is explicit in this format. Intel hardware (387 and later)
2804 /// does not support these bit patterns:
2805 /// exponent = all 1's, integer bit 0, significand 0 ("pseudoinfinity")
2806 /// exponent = all 1's, integer bit 0, significand nonzero ("pseudoNaN")
2807 /// exponent = 0, integer bit 1 ("pseudodenormal")
2808 /// exponent!=0 nor all 1's, integer bit 0 ("unnormal")
2809 /// At the moment, the first two are treated as NaNs, the second two as Normal.
2811 APFloat::initFromF80LongDoubleAPInt(const APInt &api)
2813 assert(api.getBitWidth()==80);
2814 uint64_t i1 = api.getRawData()[0];
2815 uint64_t i2 = api.getRawData()[1];
2816 uint64_t myexponent = (i2 & 0x7fff);
2817 uint64_t mysignificand = i1;
2819 initialize(&APFloat::x87DoubleExtended);
2820 assert(partCount()==2);
2822 sign = static_cast<unsigned int>(i2>>15);
2823 if (myexponent==0 && mysignificand==0) {
2824 // exponent, significand meaningless
2826 } else if (myexponent==0x7fff && mysignificand==0x8000000000000000ULL) {
2827 // exponent, significand meaningless
2828 category = fcInfinity;
2829 } else if (myexponent==0x7fff && mysignificand!=0x8000000000000000ULL) {
2830 // exponent meaningless
2832 significandParts()[0] = mysignificand;
2833 significandParts()[1] = 0;
2835 category = fcNormal;
2836 exponent = myexponent - 16383;
2837 significandParts()[0] = mysignificand;
2838 significandParts()[1] = 0;
2839 if (myexponent==0) // denormal
2845 APFloat::initFromPPCDoubleDoubleAPInt(const APInt &api)
2847 assert(api.getBitWidth()==128);
2848 uint64_t i1 = api.getRawData()[0];
2849 uint64_t i2 = api.getRawData()[1];
2850 uint64_t myexponent = (i1 >> 52) & 0x7ff;
2851 uint64_t mysignificand = i1 & 0xfffffffffffffLL;
2852 uint64_t myexponent2 = (i2 >> 52) & 0x7ff;
2853 uint64_t mysignificand2 = i2 & 0xfffffffffffffLL;
2855 initialize(&APFloat::PPCDoubleDouble);
2856 assert(partCount()==2);
2858 sign = static_cast<unsigned int>(i1>>63);
2859 sign2 = static_cast<unsigned int>(i2>>63);
2860 if (myexponent==0 && mysignificand==0) {
2861 // exponent, significand meaningless
2862 // exponent2 and significand2 are required to be 0; we don't check
2864 } else if (myexponent==0x7ff && mysignificand==0) {
2865 // exponent, significand meaningless
2866 // exponent2 and significand2 are required to be 0; we don't check
2867 category = fcInfinity;
2868 } else if (myexponent==0x7ff && mysignificand!=0) {
2869 // exponent meaningless. So is the whole second word, but keep it
2872 exponent2 = myexponent2;
2873 significandParts()[0] = mysignificand;
2874 significandParts()[1] = mysignificand2;
2876 category = fcNormal;
2877 // Note there is no category2; the second word is treated as if it is
2878 // fcNormal, although it might be something else considered by itself.
2879 exponent = myexponent - 1023;
2880 exponent2 = myexponent2 - 1023;
2881 significandParts()[0] = mysignificand;
2882 significandParts()[1] = mysignificand2;
2883 if (myexponent==0) // denormal
2886 significandParts()[0] |= 0x10000000000000LL; // integer bit
2890 significandParts()[1] |= 0x10000000000000LL; // integer bit
2895 APFloat::initFromDoubleAPInt(const APInt &api)
2897 assert(api.getBitWidth()==64);
2898 uint64_t i = *api.getRawData();
2899 uint64_t myexponent = (i >> 52) & 0x7ff;
2900 uint64_t mysignificand = i & 0xfffffffffffffLL;
2902 initialize(&APFloat::IEEEdouble);
2903 assert(partCount()==1);
2905 sign = static_cast<unsigned int>(i>>63);
2906 if (myexponent==0 && mysignificand==0) {
2907 // exponent, significand meaningless
2909 } else if (myexponent==0x7ff && mysignificand==0) {
2910 // exponent, significand meaningless
2911 category = fcInfinity;
2912 } else if (myexponent==0x7ff && mysignificand!=0) {
2913 // exponent meaningless
2915 *significandParts() = mysignificand;
2917 category = fcNormal;
2918 exponent = myexponent - 1023;
2919 *significandParts() = mysignificand;
2920 if (myexponent==0) // denormal
2923 *significandParts() |= 0x10000000000000LL; // integer bit
2928 APFloat::initFromFloatAPInt(const APInt & api)
2930 assert(api.getBitWidth()==32);
2931 uint32_t i = (uint32_t)*api.getRawData();
2932 uint32_t myexponent = (i >> 23) & 0xff;
2933 uint32_t mysignificand = i & 0x7fffff;
2935 initialize(&APFloat::IEEEsingle);
2936 assert(partCount()==1);
2939 if (myexponent==0 && mysignificand==0) {
2940 // exponent, significand meaningless
2942 } else if (myexponent==0xff && mysignificand==0) {
2943 // exponent, significand meaningless
2944 category = fcInfinity;
2945 } else if (myexponent==0xff && mysignificand!=0) {
2946 // sign, exponent, significand meaningless
2948 *significandParts() = mysignificand;
2950 category = fcNormal;
2951 exponent = myexponent - 127; //bias
2952 *significandParts() = mysignificand;
2953 if (myexponent==0) // denormal
2956 *significandParts() |= 0x800000; // integer bit
2960 /// Treat api as containing the bits of a floating point number. Currently
2961 /// we infer the floating point type from the size of the APInt. The
2962 /// isIEEE argument distinguishes between PPC128 and IEEE128 (not meaningful
2963 /// when the size is anything else).
2965 APFloat::initFromAPInt(const APInt& api, bool isIEEE)
2967 if (api.getBitWidth() == 32)
2968 return initFromFloatAPInt(api);
2969 else if (api.getBitWidth()==64)
2970 return initFromDoubleAPInt(api);
2971 else if (api.getBitWidth()==80)
2972 return initFromF80LongDoubleAPInt(api);
2973 else if (api.getBitWidth()==128 && !isIEEE)
2974 return initFromPPCDoubleDoubleAPInt(api);
2976 llvm_unreachable(0);
2979 APFloat::APFloat(const APInt& api, bool isIEEE)
2981 initFromAPInt(api, isIEEE);
2984 APFloat::APFloat(float f)
2986 APInt api = APInt(32, 0);
2987 initFromAPInt(api.floatToBits(f));
2990 APFloat::APFloat(double d)
2992 APInt api = APInt(64, 0);
2993 initFromAPInt(api.doubleToBits(d));