1 //===-- APFloat.cpp - Implement APFloat class -----------------------------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file implements a class to represent arbitrary precision floating
11 // point values and provide a variety of arithmetic operations on them.
13 //===----------------------------------------------------------------------===//
15 #include "llvm/ADT/APFloat.h"
16 #include "llvm/ADT/StringRef.h"
17 #include "llvm/ADT/FoldingSet.h"
18 #include "llvm/Support/ErrorHandling.h"
19 #include "llvm/Support/MathExtras.h"
24 #define convolve(lhs, rhs) ((lhs) * 4 + (rhs))
26 /* Assumed in hexadecimal significand parsing, and conversion to
27 hexadecimal strings. */
28 #define COMPILE_TIME_ASSERT(cond) extern int CTAssert[(cond) ? 1 : -1]
29 COMPILE_TIME_ASSERT(integerPartWidth % 4 == 0);
33 /* Represents floating point arithmetic semantics. */
35 /* The largest E such that 2^E is representable; this matches the
36 definition of IEEE 754. */
37 exponent_t maxExponent;
39 /* The smallest E such that 2^E is a normalized number; this
40 matches the definition of IEEE 754. */
41 exponent_t minExponent;
43 /* Number of bits in the significand. This includes the integer
45 unsigned int precision;
47 /* True if arithmetic is supported. */
48 unsigned int arithmeticOK;
51 const fltSemantics APFloat::IEEEsingle = { 127, -126, 24, true };
52 const fltSemantics APFloat::IEEEdouble = { 1023, -1022, 53, true };
53 const fltSemantics APFloat::IEEEquad = { 16383, -16382, 113, true };
54 const fltSemantics APFloat::x87DoubleExtended = { 16383, -16382, 64, true };
55 const fltSemantics APFloat::Bogus = { 0, 0, 0, true };
57 // The PowerPC format consists of two doubles. It does not map cleanly
58 // onto the usual format above. For now only storage of constants of
59 // this type is supported, no arithmetic.
60 const fltSemantics APFloat::PPCDoubleDouble = { 1023, -1022, 106, false };
62 /* A tight upper bound on number of parts required to hold the value
65 power * 815 / (351 * integerPartWidth) + 1
67 However, whilst the result may require only this many parts,
68 because we are multiplying two values to get it, the
69 multiplication may require an extra part with the excess part
70 being zero (consider the trivial case of 1 * 1, tcFullMultiply
71 requires two parts to hold the single-part result). So we add an
72 extra one to guarantee enough space whilst multiplying. */
73 const unsigned int maxExponent = 16383;
74 const unsigned int maxPrecision = 113;
75 const unsigned int maxPowerOfFiveExponent = maxExponent + maxPrecision - 1;
76 const unsigned int maxPowerOfFiveParts = 2 + ((maxPowerOfFiveExponent * 815)
77 / (351 * integerPartWidth));
80 /* A bunch of private, handy routines. */
82 static inline unsigned int
83 partCountForBits(unsigned int bits)
85 return ((bits) + integerPartWidth - 1) / integerPartWidth;
88 /* Returns 0U-9U. Return values >= 10U are not digits. */
89 static inline unsigned int
90 decDigitValue(unsigned int c)
96 hexDigitValue(unsigned int c)
116 assertArithmeticOK(const llvm::fltSemantics &semantics) {
117 assert(semantics.arithmeticOK
118 && "Compile-time arithmetic does not support these semantics");
121 /* Return the value of a decimal exponent of the form
124 If the exponent overflows, returns a large exponent with the
127 readExponent(StringRef::iterator begin, StringRef::iterator end)
130 unsigned int absExponent;
131 const unsigned int overlargeExponent = 24000; /* FIXME. */
132 StringRef::iterator p = begin;
134 assert(p != end && "Exponent has no digits");
136 isNegative = (*p == '-');
137 if (*p == '-' || *p == '+') {
139 assert(p != end && "Exponent has no digits");
142 absExponent = decDigitValue(*p++);
143 assert(absExponent < 10U && "Invalid character in exponent");
145 for (; p != end; ++p) {
148 value = decDigitValue(*p);
149 assert(value < 10U && "Invalid character in exponent");
151 value += absExponent * 10;
152 if (absExponent >= overlargeExponent) {
153 absExponent = overlargeExponent;
159 assert(p == end && "Invalid exponent in exponent");
162 return -(int) absExponent;
164 return (int) absExponent;
167 /* This is ugly and needs cleaning up, but I don't immediately see
168 how whilst remaining safe. */
170 totalExponent(StringRef::iterator p, StringRef::iterator end,
171 int exponentAdjustment)
173 int unsignedExponent;
174 bool negative, overflow;
177 /* Move past the exponent letter and sign to the digits. */
179 negative = *p == '-';
180 if(*p == '-' || *p == '+')
183 unsignedExponent = 0;
185 for(; p != end; ++p) {
188 value = decDigitValue(*p);
189 assert(value < 10U && "Invalid character in exponent");
191 unsignedExponent = unsignedExponent * 10 + value;
192 if(unsignedExponent > 65535)
196 if(exponentAdjustment > 65535 || exponentAdjustment < -65536)
200 exponent = unsignedExponent;
202 exponent = -exponent;
203 exponent += exponentAdjustment;
204 if(exponent > 65535 || exponent < -65536)
209 exponent = negative ? -65536: 65535;
214 static StringRef::iterator
215 skipLeadingZeroesAndAnyDot(StringRef::iterator begin, StringRef::iterator end,
216 StringRef::iterator *dot)
218 StringRef::iterator p = begin;
220 while(*p == '0' && p != end)
226 assert(end - begin != 1 && "String cannot be just a dot");
228 while(*p == '0' && p != end)
235 /* Given a normal decimal floating point number of the form
239 where the decimal point and exponent are optional, fill out the
240 structure D. Exponent is appropriate if the significand is
241 treated as an integer, and normalizedExponent if the significand
242 is taken to have the decimal point after a single leading
245 If the value is zero, V->firstSigDigit points to a non-digit, and
246 the return exponent is zero.
249 const char *firstSigDigit;
250 const char *lastSigDigit;
252 int normalizedExponent;
256 interpretDecimal(StringRef::iterator begin, StringRef::iterator end,
259 StringRef::iterator dot = end;
260 StringRef::iterator p = skipLeadingZeroesAndAnyDot (begin, end, &dot);
262 D->firstSigDigit = p;
264 D->normalizedExponent = 0;
266 for (; p != end; ++p) {
268 assert(dot == end && "Multiple dots in float");
273 if (decDigitValue(*p) >= 10U)
278 assert((*p == 'e' || *p == 'E') && "Invalid character in digit string");
280 /* p points to the first non-digit in the string */
281 if (*p == 'e' || *p == 'E') {
282 D->exponent = readExponent(p + 1, end);
285 /* Implied decimal point? */
290 /* If number is all zeroes accept any exponent. */
291 if (p != D->firstSigDigit) {
292 /* Drop insignificant trailing zeroes. */
297 while (p != begin && *p == '0');
298 while (p != begin && *p == '.');
301 /* Adjust the exponents for any decimal point. */
302 D->exponent += static_cast<exponent_t>((dot - p) - (dot > p));
303 D->normalizedExponent = (D->exponent +
304 static_cast<exponent_t>((p - D->firstSigDigit)
305 - (dot > D->firstSigDigit && dot < p)));
311 /* Return the trailing fraction of a hexadecimal number.
312 DIGITVALUE is the first hex digit of the fraction, P points to
315 trailingHexadecimalFraction(StringRef::iterator p, StringRef::iterator end,
316 unsigned int digitValue)
318 unsigned int hexDigit;
320 /* If the first trailing digit isn't 0 or 8 we can work out the
321 fraction immediately. */
323 return lfMoreThanHalf;
324 else if(digitValue < 8 && digitValue > 0)
325 return lfLessThanHalf;
327 /* Otherwise we need to find the first non-zero digit. */
331 assert(p != end && "Invalid trailing hexadecimal fraction!");
333 hexDigit = hexDigitValue(*p);
335 /* If we ran off the end it is exactly zero or one-half, otherwise
338 return digitValue == 0 ? lfExactlyZero: lfExactlyHalf;
340 return digitValue == 0 ? lfLessThanHalf: lfMoreThanHalf;
343 /* Return the fraction lost were a bignum truncated losing the least
344 significant BITS bits. */
346 lostFractionThroughTruncation(const integerPart *parts,
347 unsigned int partCount,
352 lsb = APInt::tcLSB(parts, partCount);
354 /* Note this is guaranteed true if bits == 0, or LSB == -1U. */
356 return lfExactlyZero;
358 return lfExactlyHalf;
359 if(bits <= partCount * integerPartWidth
360 && APInt::tcExtractBit(parts, bits - 1))
361 return lfMoreThanHalf;
363 return lfLessThanHalf;
366 /* Shift DST right BITS bits noting lost fraction. */
368 shiftRight(integerPart *dst, unsigned int parts, unsigned int bits)
370 lostFraction lost_fraction;
372 lost_fraction = lostFractionThroughTruncation(dst, parts, bits);
374 APInt::tcShiftRight(dst, parts, bits);
376 return lost_fraction;
379 /* Combine the effect of two lost fractions. */
381 combineLostFractions(lostFraction moreSignificant,
382 lostFraction lessSignificant)
384 if(lessSignificant != lfExactlyZero) {
385 if(moreSignificant == lfExactlyZero)
386 moreSignificant = lfLessThanHalf;
387 else if(moreSignificant == lfExactlyHalf)
388 moreSignificant = lfMoreThanHalf;
391 return moreSignificant;
394 /* The error from the true value, in half-ulps, on multiplying two
395 floating point numbers, which differ from the value they
396 approximate by at most HUE1 and HUE2 half-ulps, is strictly less
397 than the returned value.
399 See "How to Read Floating Point Numbers Accurately" by William D
402 HUerrBound(bool inexactMultiply, unsigned int HUerr1, unsigned int HUerr2)
404 assert(HUerr1 < 2 || HUerr2 < 2 || (HUerr1 + HUerr2 < 8));
406 if (HUerr1 + HUerr2 == 0)
407 return inexactMultiply * 2; /* <= inexactMultiply half-ulps. */
409 return inexactMultiply + 2 * (HUerr1 + HUerr2);
412 /* The number of ulps from the boundary (zero, or half if ISNEAREST)
413 when the least significant BITS are truncated. BITS cannot be
416 ulpsFromBoundary(const integerPart *parts, unsigned int bits, bool isNearest)
418 unsigned int count, partBits;
419 integerPart part, boundary;
424 count = bits / integerPartWidth;
425 partBits = bits % integerPartWidth + 1;
427 part = parts[count] & (~(integerPart) 0 >> (integerPartWidth - partBits));
430 boundary = (integerPart) 1 << (partBits - 1);
435 if (part - boundary <= boundary - part)
436 return part - boundary;
438 return boundary - part;
441 if (part == boundary) {
444 return ~(integerPart) 0; /* A lot. */
447 } else if (part == boundary - 1) {
450 return ~(integerPart) 0; /* A lot. */
455 return ~(integerPart) 0; /* A lot. */
458 /* Place pow(5, power) in DST, and return the number of parts used.
459 DST must be at least one part larger than size of the answer. */
461 powerOf5(integerPart *dst, unsigned int power)
463 static const integerPart firstEightPowers[] = { 1, 5, 25, 125, 625, 3125,
465 integerPart pow5s[maxPowerOfFiveParts * 2 + 5];
466 pow5s[0] = 78125 * 5;
468 unsigned int partsCount[16] = { 1 };
469 integerPart scratch[maxPowerOfFiveParts], *p1, *p2, *pow5;
471 assert(power <= maxExponent);
476 *p1 = firstEightPowers[power & 7];
482 for (unsigned int n = 0; power; power >>= 1, n++) {
487 /* Calculate pow(5,pow(2,n+3)) if we haven't yet. */
489 pc = partsCount[n - 1];
490 APInt::tcFullMultiply(pow5, pow5 - pc, pow5 - pc, pc, pc);
492 if (pow5[pc - 1] == 0)
500 APInt::tcFullMultiply(p2, p1, pow5, result, pc);
502 if (p2[result - 1] == 0)
505 /* Now result is in p1 with partsCount parts and p2 is scratch
507 tmp = p1, p1 = p2, p2 = tmp;
514 APInt::tcAssign(dst, p1, result);
519 /* Zero at the end to avoid modular arithmetic when adding one; used
520 when rounding up during hexadecimal output. */
521 static const char hexDigitsLower[] = "0123456789abcdef0";
522 static const char hexDigitsUpper[] = "0123456789ABCDEF0";
523 static const char infinityL[] = "infinity";
524 static const char infinityU[] = "INFINITY";
525 static const char NaNL[] = "nan";
526 static const char NaNU[] = "NAN";
528 /* Write out an integerPart in hexadecimal, starting with the most
529 significant nibble. Write out exactly COUNT hexdigits, return
532 partAsHex (char *dst, integerPart part, unsigned int count,
533 const char *hexDigitChars)
535 unsigned int result = count;
537 assert (count != 0 && count <= integerPartWidth / 4);
539 part >>= (integerPartWidth - 4 * count);
541 dst[count] = hexDigitChars[part & 0xf];
548 /* Write out an unsigned decimal integer. */
550 writeUnsignedDecimal (char *dst, unsigned int n)
566 /* Write out a signed decimal integer. */
568 writeSignedDecimal (char *dst, int value)
572 dst = writeUnsignedDecimal(dst, -(unsigned) value);
574 dst = writeUnsignedDecimal(dst, value);
581 APFloat::initialize(const fltSemantics *ourSemantics)
585 semantics = ourSemantics;
588 significand.parts = new integerPart[count];
592 APFloat::freeSignificand()
595 delete [] significand.parts;
599 APFloat::assign(const APFloat &rhs)
601 assert(semantics == rhs.semantics);
604 category = rhs.category;
605 exponent = rhs.exponent;
607 exponent2 = rhs.exponent2;
608 if(category == fcNormal || category == fcNaN)
609 copySignificand(rhs);
613 APFloat::copySignificand(const APFloat &rhs)
615 assert(category == fcNormal || category == fcNaN);
616 assert(rhs.partCount() >= partCount());
618 APInt::tcAssign(significandParts(), rhs.significandParts(),
622 /* Make this number a NaN, with an arbitrary but deterministic value
623 for the significand. If double or longer, this is a signalling NaN,
624 which may not be ideal. If float, this is QNaN(0). */
626 APFloat::makeNaN(unsigned type)
629 // FIXME: Add double and long double support for QNaN(0).
630 if (semantics->precision == 24 && semantics->maxExponent == 127) {
632 type &= ~0x80000000U;
635 APInt::tcSet(significandParts(), type, partCount());
639 APFloat::operator=(const APFloat &rhs)
642 if(semantics != rhs.semantics) {
644 initialize(rhs.semantics);
653 APFloat::bitwiseIsEqual(const APFloat &rhs) const {
656 if (semantics != rhs.semantics ||
657 category != rhs.category ||
660 if (semantics==(const llvm::fltSemantics*)&PPCDoubleDouble &&
663 if (category==fcZero || category==fcInfinity)
665 else if (category==fcNormal && exponent!=rhs.exponent)
667 else if (semantics==(const llvm::fltSemantics*)&PPCDoubleDouble &&
668 exponent2!=rhs.exponent2)
672 const integerPart* p=significandParts();
673 const integerPart* q=rhs.significandParts();
674 for (; i>0; i--, p++, q++) {
682 APFloat::APFloat(const fltSemantics &ourSemantics, integerPart value)
684 assertArithmeticOK(ourSemantics);
685 initialize(&ourSemantics);
688 exponent = ourSemantics.precision - 1;
689 significandParts()[0] = value;
690 normalize(rmNearestTiesToEven, lfExactlyZero);
693 APFloat::APFloat(const fltSemantics &ourSemantics,
694 fltCategory ourCategory, bool negative, unsigned type)
696 assertArithmeticOK(ourSemantics);
697 initialize(&ourSemantics);
698 category = ourCategory;
700 if (category == fcNormal)
702 else if (ourCategory == fcNaN)
706 APFloat::APFloat(const fltSemantics &ourSemantics, const StringRef& text)
708 assertArithmeticOK(ourSemantics);
709 initialize(&ourSemantics);
710 convertFromString(text, rmNearestTiesToEven);
713 APFloat::APFloat(const APFloat &rhs)
715 initialize(rhs.semantics);
724 // Profile - This method 'profiles' an APFloat for use with FoldingSet.
725 void APFloat::Profile(FoldingSetNodeID& ID) const {
726 ID.Add(bitcastToAPInt());
730 APFloat::partCount() const
732 return partCountForBits(semantics->precision + 1);
736 APFloat::semanticsPrecision(const fltSemantics &semantics)
738 return semantics.precision;
742 APFloat::significandParts() const
744 return const_cast<APFloat *>(this)->significandParts();
748 APFloat::significandParts()
750 assert(category == fcNormal || category == fcNaN);
753 return significand.parts;
755 return &significand.part;
759 APFloat::zeroSignificand()
762 APInt::tcSet(significandParts(), 0, partCount());
765 /* Increment an fcNormal floating point number's significand. */
767 APFloat::incrementSignificand()
771 carry = APInt::tcIncrement(significandParts(), partCount());
773 /* Our callers should never cause us to overflow. */
777 /* Add the significand of the RHS. Returns the carry flag. */
779 APFloat::addSignificand(const APFloat &rhs)
783 parts = significandParts();
785 assert(semantics == rhs.semantics);
786 assert(exponent == rhs.exponent);
788 return APInt::tcAdd(parts, rhs.significandParts(), 0, partCount());
791 /* Subtract the significand of the RHS with a borrow flag. Returns
794 APFloat::subtractSignificand(const APFloat &rhs, integerPart borrow)
798 parts = significandParts();
800 assert(semantics == rhs.semantics);
801 assert(exponent == rhs.exponent);
803 return APInt::tcSubtract(parts, rhs.significandParts(), borrow,
807 /* Multiply the significand of the RHS. If ADDEND is non-NULL, add it
808 on to the full-precision result of the multiplication. Returns the
811 APFloat::multiplySignificand(const APFloat &rhs, const APFloat *addend)
813 unsigned int omsb; // One, not zero, based MSB.
814 unsigned int partsCount, newPartsCount, precision;
815 integerPart *lhsSignificand;
816 integerPart scratch[4];
817 integerPart *fullSignificand;
818 lostFraction lost_fraction;
821 assert(semantics == rhs.semantics);
823 precision = semantics->precision;
824 newPartsCount = partCountForBits(precision * 2);
826 if(newPartsCount > 4)
827 fullSignificand = new integerPart[newPartsCount];
829 fullSignificand = scratch;
831 lhsSignificand = significandParts();
832 partsCount = partCount();
834 APInt::tcFullMultiply(fullSignificand, lhsSignificand,
835 rhs.significandParts(), partsCount, partsCount);
837 lost_fraction = lfExactlyZero;
838 omsb = APInt::tcMSB(fullSignificand, newPartsCount) + 1;
839 exponent += rhs.exponent;
842 Significand savedSignificand = significand;
843 const fltSemantics *savedSemantics = semantics;
844 fltSemantics extendedSemantics;
846 unsigned int extendedPrecision;
848 /* Normalize our MSB. */
849 extendedPrecision = precision + precision - 1;
850 if(omsb != extendedPrecision)
852 APInt::tcShiftLeft(fullSignificand, newPartsCount,
853 extendedPrecision - omsb);
854 exponent -= extendedPrecision - omsb;
857 /* Create new semantics. */
858 extendedSemantics = *semantics;
859 extendedSemantics.precision = extendedPrecision;
861 if(newPartsCount == 1)
862 significand.part = fullSignificand[0];
864 significand.parts = fullSignificand;
865 semantics = &extendedSemantics;
867 APFloat extendedAddend(*addend);
868 status = extendedAddend.convert(extendedSemantics, rmTowardZero, &ignored);
869 assert(status == opOK);
870 lost_fraction = addOrSubtractSignificand(extendedAddend, false);
872 /* Restore our state. */
873 if(newPartsCount == 1)
874 fullSignificand[0] = significand.part;
875 significand = savedSignificand;
876 semantics = savedSemantics;
878 omsb = APInt::tcMSB(fullSignificand, newPartsCount) + 1;
881 exponent -= (precision - 1);
883 if(omsb > precision) {
884 unsigned int bits, significantParts;
887 bits = omsb - precision;
888 significantParts = partCountForBits(omsb);
889 lf = shiftRight(fullSignificand, significantParts, bits);
890 lost_fraction = combineLostFractions(lf, lost_fraction);
894 APInt::tcAssign(lhsSignificand, fullSignificand, partsCount);
896 if(newPartsCount > 4)
897 delete [] fullSignificand;
899 return lost_fraction;
902 /* Multiply the significands of LHS and RHS to DST. */
904 APFloat::divideSignificand(const APFloat &rhs)
906 unsigned int bit, i, partsCount;
907 const integerPart *rhsSignificand;
908 integerPart *lhsSignificand, *dividend, *divisor;
909 integerPart scratch[4];
910 lostFraction lost_fraction;
912 assert(semantics == rhs.semantics);
914 lhsSignificand = significandParts();
915 rhsSignificand = rhs.significandParts();
916 partsCount = partCount();
919 dividend = new integerPart[partsCount * 2];
923 divisor = dividend + partsCount;
925 /* Copy the dividend and divisor as they will be modified in-place. */
926 for(i = 0; i < partsCount; i++) {
927 dividend[i] = lhsSignificand[i];
928 divisor[i] = rhsSignificand[i];
929 lhsSignificand[i] = 0;
932 exponent -= rhs.exponent;
934 unsigned int precision = semantics->precision;
936 /* Normalize the divisor. */
937 bit = precision - APInt::tcMSB(divisor, partsCount) - 1;
940 APInt::tcShiftLeft(divisor, partsCount, bit);
943 /* Normalize the dividend. */
944 bit = precision - APInt::tcMSB(dividend, partsCount) - 1;
947 APInt::tcShiftLeft(dividend, partsCount, bit);
950 /* Ensure the dividend >= divisor initially for the loop below.
951 Incidentally, this means that the division loop below is
952 guaranteed to set the integer bit to one. */
953 if(APInt::tcCompare(dividend, divisor, partsCount) < 0) {
955 APInt::tcShiftLeft(dividend, partsCount, 1);
956 assert(APInt::tcCompare(dividend, divisor, partsCount) >= 0);
960 for(bit = precision; bit; bit -= 1) {
961 if(APInt::tcCompare(dividend, divisor, partsCount) >= 0) {
962 APInt::tcSubtract(dividend, divisor, 0, partsCount);
963 APInt::tcSetBit(lhsSignificand, bit - 1);
966 APInt::tcShiftLeft(dividend, partsCount, 1);
969 /* Figure out the lost fraction. */
970 int cmp = APInt::tcCompare(dividend, divisor, partsCount);
973 lost_fraction = lfMoreThanHalf;
975 lost_fraction = lfExactlyHalf;
976 else if(APInt::tcIsZero(dividend, partsCount))
977 lost_fraction = lfExactlyZero;
979 lost_fraction = lfLessThanHalf;
984 return lost_fraction;
988 APFloat::significandMSB() const
990 return APInt::tcMSB(significandParts(), partCount());
994 APFloat::significandLSB() const
996 return APInt::tcLSB(significandParts(), partCount());
999 /* Note that a zero result is NOT normalized to fcZero. */
1001 APFloat::shiftSignificandRight(unsigned int bits)
1003 /* Our exponent should not overflow. */
1004 assert((exponent_t) (exponent + bits) >= exponent);
1008 return shiftRight(significandParts(), partCount(), bits);
1011 /* Shift the significand left BITS bits, subtract BITS from its exponent. */
1013 APFloat::shiftSignificandLeft(unsigned int bits)
1015 assert(bits < semantics->precision);
1018 unsigned int partsCount = partCount();
1020 APInt::tcShiftLeft(significandParts(), partsCount, bits);
1023 assert(!APInt::tcIsZero(significandParts(), partsCount));
1028 APFloat::compareAbsoluteValue(const APFloat &rhs) const
1032 assert(semantics == rhs.semantics);
1033 assert(category == fcNormal);
1034 assert(rhs.category == fcNormal);
1036 compare = exponent - rhs.exponent;
1038 /* If exponents are equal, do an unsigned bignum comparison of the
1041 compare = APInt::tcCompare(significandParts(), rhs.significandParts(),
1045 return cmpGreaterThan;
1046 else if(compare < 0)
1052 /* Handle overflow. Sign is preserved. We either become infinity or
1053 the largest finite number. */
1055 APFloat::handleOverflow(roundingMode rounding_mode)
1058 if(rounding_mode == rmNearestTiesToEven
1059 || rounding_mode == rmNearestTiesToAway
1060 || (rounding_mode == rmTowardPositive && !sign)
1061 || (rounding_mode == rmTowardNegative && sign))
1063 category = fcInfinity;
1064 return (opStatus) (opOverflow | opInexact);
1067 /* Otherwise we become the largest finite number. */
1068 category = fcNormal;
1069 exponent = semantics->maxExponent;
1070 APInt::tcSetLeastSignificantBits(significandParts(), partCount(),
1071 semantics->precision);
1076 /* Returns TRUE if, when truncating the current number, with BIT the
1077 new LSB, with the given lost fraction and rounding mode, the result
1078 would need to be rounded away from zero (i.e., by increasing the
1079 signficand). This routine must work for fcZero of both signs, and
1080 fcNormal numbers. */
1082 APFloat::roundAwayFromZero(roundingMode rounding_mode,
1083 lostFraction lost_fraction,
1084 unsigned int bit) const
1086 /* NaNs and infinities should not have lost fractions. */
1087 assert(category == fcNormal || category == fcZero);
1089 /* Current callers never pass this so we don't handle it. */
1090 assert(lost_fraction != lfExactlyZero);
1092 switch (rounding_mode) {
1094 llvm_unreachable(0);
1096 case rmNearestTiesToAway:
1097 return lost_fraction == lfExactlyHalf || lost_fraction == lfMoreThanHalf;
1099 case rmNearestTiesToEven:
1100 if(lost_fraction == lfMoreThanHalf)
1103 /* Our zeroes don't have a significand to test. */
1104 if(lost_fraction == lfExactlyHalf && category != fcZero)
1105 return APInt::tcExtractBit(significandParts(), bit);
1112 case rmTowardPositive:
1113 return sign == false;
1115 case rmTowardNegative:
1116 return sign == true;
1121 APFloat::normalize(roundingMode rounding_mode,
1122 lostFraction lost_fraction)
1124 unsigned int omsb; /* One, not zero, based MSB. */
1127 if(category != fcNormal)
1130 /* Before rounding normalize the exponent of fcNormal numbers. */
1131 omsb = significandMSB() + 1;
1134 /* OMSB is numbered from 1. We want to place it in the integer
1135 bit numbered PRECISON if possible, with a compensating change in
1137 exponentChange = omsb - semantics->precision;
1139 /* If the resulting exponent is too high, overflow according to
1140 the rounding mode. */
1141 if(exponent + exponentChange > semantics->maxExponent)
1142 return handleOverflow(rounding_mode);
1144 /* Subnormal numbers have exponent minExponent, and their MSB
1145 is forced based on that. */
1146 if(exponent + exponentChange < semantics->minExponent)
1147 exponentChange = semantics->minExponent - exponent;
1149 /* Shifting left is easy as we don't lose precision. */
1150 if(exponentChange < 0) {
1151 assert(lost_fraction == lfExactlyZero);
1153 shiftSignificandLeft(-exponentChange);
1158 if(exponentChange > 0) {
1161 /* Shift right and capture any new lost fraction. */
1162 lf = shiftSignificandRight(exponentChange);
1164 lost_fraction = combineLostFractions(lf, lost_fraction);
1166 /* Keep OMSB up-to-date. */
1167 if(omsb > (unsigned) exponentChange)
1168 omsb -= exponentChange;
1174 /* Now round the number according to rounding_mode given the lost
1177 /* As specified in IEEE 754, since we do not trap we do not report
1178 underflow for exact results. */
1179 if(lost_fraction == lfExactlyZero) {
1180 /* Canonicalize zeroes. */
1187 /* Increment the significand if we're rounding away from zero. */
1188 if(roundAwayFromZero(rounding_mode, lost_fraction, 0)) {
1190 exponent = semantics->minExponent;
1192 incrementSignificand();
1193 omsb = significandMSB() + 1;
1195 /* Did the significand increment overflow? */
1196 if(omsb == (unsigned) semantics->precision + 1) {
1197 /* Renormalize by incrementing the exponent and shifting our
1198 significand right one. However if we already have the
1199 maximum exponent we overflow to infinity. */
1200 if(exponent == semantics->maxExponent) {
1201 category = fcInfinity;
1203 return (opStatus) (opOverflow | opInexact);
1206 shiftSignificandRight(1);
1212 /* The normal case - we were and are not denormal, and any
1213 significand increment above didn't overflow. */
1214 if(omsb == semantics->precision)
1217 /* We have a non-zero denormal. */
1218 assert(omsb < semantics->precision);
1220 /* Canonicalize zeroes. */
1224 /* The fcZero case is a denormal that underflowed to zero. */
1225 return (opStatus) (opUnderflow | opInexact);
1229 APFloat::addOrSubtractSpecials(const APFloat &rhs, bool subtract)
1231 switch (convolve(category, rhs.category)) {
1233 llvm_unreachable(0);
1235 case convolve(fcNaN, fcZero):
1236 case convolve(fcNaN, fcNormal):
1237 case convolve(fcNaN, fcInfinity):
1238 case convolve(fcNaN, fcNaN):
1239 case convolve(fcNormal, fcZero):
1240 case convolve(fcInfinity, fcNormal):
1241 case convolve(fcInfinity, fcZero):
1244 case convolve(fcZero, fcNaN):
1245 case convolve(fcNormal, fcNaN):
1246 case convolve(fcInfinity, fcNaN):
1248 copySignificand(rhs);
1251 case convolve(fcNormal, fcInfinity):
1252 case convolve(fcZero, fcInfinity):
1253 category = fcInfinity;
1254 sign = rhs.sign ^ subtract;
1257 case convolve(fcZero, fcNormal):
1259 sign = rhs.sign ^ subtract;
1262 case convolve(fcZero, fcZero):
1263 /* Sign depends on rounding mode; handled by caller. */
1266 case convolve(fcInfinity, fcInfinity):
1267 /* Differently signed infinities can only be validly
1269 if(((sign ^ rhs.sign)!=0) != subtract) {
1276 case convolve(fcNormal, fcNormal):
1281 /* Add or subtract two normal numbers. */
1283 APFloat::addOrSubtractSignificand(const APFloat &rhs, bool subtract)
1286 lostFraction lost_fraction;
1289 /* Determine if the operation on the absolute values is effectively
1290 an addition or subtraction. */
1291 subtract ^= (sign ^ rhs.sign) ? true : false;
1293 /* Are we bigger exponent-wise than the RHS? */
1294 bits = exponent - rhs.exponent;
1296 /* Subtraction is more subtle than one might naively expect. */
1298 APFloat temp_rhs(rhs);
1302 reverse = compareAbsoluteValue(temp_rhs) == cmpLessThan;
1303 lost_fraction = lfExactlyZero;
1304 } else if (bits > 0) {
1305 lost_fraction = temp_rhs.shiftSignificandRight(bits - 1);
1306 shiftSignificandLeft(1);
1309 lost_fraction = shiftSignificandRight(-bits - 1);
1310 temp_rhs.shiftSignificandLeft(1);
1315 carry = temp_rhs.subtractSignificand
1316 (*this, lost_fraction != lfExactlyZero);
1317 copySignificand(temp_rhs);
1320 carry = subtractSignificand
1321 (temp_rhs, lost_fraction != lfExactlyZero);
1324 /* Invert the lost fraction - it was on the RHS and
1326 if(lost_fraction == lfLessThanHalf)
1327 lost_fraction = lfMoreThanHalf;
1328 else if(lost_fraction == lfMoreThanHalf)
1329 lost_fraction = lfLessThanHalf;
1331 /* The code above is intended to ensure that no borrow is
1336 APFloat temp_rhs(rhs);
1338 lost_fraction = temp_rhs.shiftSignificandRight(bits);
1339 carry = addSignificand(temp_rhs);
1341 lost_fraction = shiftSignificandRight(-bits);
1342 carry = addSignificand(rhs);
1345 /* We have a guard bit; generating a carry cannot happen. */
1349 return lost_fraction;
1353 APFloat::multiplySpecials(const APFloat &rhs)
1355 switch (convolve(category, rhs.category)) {
1357 llvm_unreachable(0);
1359 case convolve(fcNaN, fcZero):
1360 case convolve(fcNaN, fcNormal):
1361 case convolve(fcNaN, fcInfinity):
1362 case convolve(fcNaN, fcNaN):
1365 case convolve(fcZero, fcNaN):
1366 case convolve(fcNormal, fcNaN):
1367 case convolve(fcInfinity, fcNaN):
1369 copySignificand(rhs);
1372 case convolve(fcNormal, fcInfinity):
1373 case convolve(fcInfinity, fcNormal):
1374 case convolve(fcInfinity, fcInfinity):
1375 category = fcInfinity;
1378 case convolve(fcZero, fcNormal):
1379 case convolve(fcNormal, fcZero):
1380 case convolve(fcZero, fcZero):
1384 case convolve(fcZero, fcInfinity):
1385 case convolve(fcInfinity, fcZero):
1389 case convolve(fcNormal, fcNormal):
1395 APFloat::divideSpecials(const APFloat &rhs)
1397 switch (convolve(category, rhs.category)) {
1399 llvm_unreachable(0);
1401 case convolve(fcNaN, fcZero):
1402 case convolve(fcNaN, fcNormal):
1403 case convolve(fcNaN, fcInfinity):
1404 case convolve(fcNaN, fcNaN):
1405 case convolve(fcInfinity, fcZero):
1406 case convolve(fcInfinity, fcNormal):
1407 case convolve(fcZero, fcInfinity):
1408 case convolve(fcZero, fcNormal):
1411 case convolve(fcZero, fcNaN):
1412 case convolve(fcNormal, fcNaN):
1413 case convolve(fcInfinity, fcNaN):
1415 copySignificand(rhs);
1418 case convolve(fcNormal, fcInfinity):
1422 case convolve(fcNormal, fcZero):
1423 category = fcInfinity;
1426 case convolve(fcInfinity, fcInfinity):
1427 case convolve(fcZero, fcZero):
1431 case convolve(fcNormal, fcNormal):
1437 APFloat::modSpecials(const APFloat &rhs)
1439 switch (convolve(category, rhs.category)) {
1441 llvm_unreachable(0);
1443 case convolve(fcNaN, fcZero):
1444 case convolve(fcNaN, fcNormal):
1445 case convolve(fcNaN, fcInfinity):
1446 case convolve(fcNaN, fcNaN):
1447 case convolve(fcZero, fcInfinity):
1448 case convolve(fcZero, fcNormal):
1449 case convolve(fcNormal, fcInfinity):
1452 case convolve(fcZero, fcNaN):
1453 case convolve(fcNormal, fcNaN):
1454 case convolve(fcInfinity, fcNaN):
1456 copySignificand(rhs);
1459 case convolve(fcNormal, fcZero):
1460 case convolve(fcInfinity, fcZero):
1461 case convolve(fcInfinity, fcNormal):
1462 case convolve(fcInfinity, fcInfinity):
1463 case convolve(fcZero, fcZero):
1467 case convolve(fcNormal, fcNormal):
1474 APFloat::changeSign()
1476 /* Look mummy, this one's easy. */
1481 APFloat::clearSign()
1483 /* So is this one. */
1488 APFloat::copySign(const APFloat &rhs)
1494 /* Normalized addition or subtraction. */
1496 APFloat::addOrSubtract(const APFloat &rhs, roundingMode rounding_mode,
1501 assertArithmeticOK(*semantics);
1503 fs = addOrSubtractSpecials(rhs, subtract);
1505 /* This return code means it was not a simple case. */
1506 if(fs == opDivByZero) {
1507 lostFraction lost_fraction;
1509 lost_fraction = addOrSubtractSignificand(rhs, subtract);
1510 fs = normalize(rounding_mode, lost_fraction);
1512 /* Can only be zero if we lost no fraction. */
1513 assert(category != fcZero || lost_fraction == lfExactlyZero);
1516 /* If two numbers add (exactly) to zero, IEEE 754 decrees it is a
1517 positive zero unless rounding to minus infinity, except that
1518 adding two like-signed zeroes gives that zero. */
1519 if(category == fcZero) {
1520 if(rhs.category != fcZero || (sign == rhs.sign) == subtract)
1521 sign = (rounding_mode == rmTowardNegative);
1527 /* Normalized addition. */
1529 APFloat::add(const APFloat &rhs, roundingMode rounding_mode)
1531 return addOrSubtract(rhs, rounding_mode, false);
1534 /* Normalized subtraction. */
1536 APFloat::subtract(const APFloat &rhs, roundingMode rounding_mode)
1538 return addOrSubtract(rhs, rounding_mode, true);
1541 /* Normalized multiply. */
1543 APFloat::multiply(const APFloat &rhs, roundingMode rounding_mode)
1547 assertArithmeticOK(*semantics);
1549 fs = multiplySpecials(rhs);
1551 if(category == fcNormal) {
1552 lostFraction lost_fraction = multiplySignificand(rhs, 0);
1553 fs = normalize(rounding_mode, lost_fraction);
1554 if(lost_fraction != lfExactlyZero)
1555 fs = (opStatus) (fs | opInexact);
1561 /* Normalized divide. */
1563 APFloat::divide(const APFloat &rhs, roundingMode rounding_mode)
1567 assertArithmeticOK(*semantics);
1569 fs = divideSpecials(rhs);
1571 if(category == fcNormal) {
1572 lostFraction lost_fraction = divideSignificand(rhs);
1573 fs = normalize(rounding_mode, lost_fraction);
1574 if(lost_fraction != lfExactlyZero)
1575 fs = (opStatus) (fs | opInexact);
1581 /* Normalized remainder. This is not currently correct in all cases. */
1583 APFloat::remainder(const APFloat &rhs)
1587 unsigned int origSign = sign;
1589 assertArithmeticOK(*semantics);
1590 fs = V.divide(rhs, rmNearestTiesToEven);
1591 if (fs == opDivByZero)
1594 int parts = partCount();
1595 integerPart *x = new integerPart[parts];
1597 fs = V.convertToInteger(x, parts * integerPartWidth, true,
1598 rmNearestTiesToEven, &ignored);
1599 if (fs==opInvalidOp)
1602 fs = V.convertFromZeroExtendedInteger(x, parts * integerPartWidth, true,
1603 rmNearestTiesToEven);
1604 assert(fs==opOK); // should always work
1606 fs = V.multiply(rhs, rmNearestTiesToEven);
1607 assert(fs==opOK || fs==opInexact); // should not overflow or underflow
1609 fs = subtract(V, rmNearestTiesToEven);
1610 assert(fs==opOK || fs==opInexact); // likewise
1613 sign = origSign; // IEEE754 requires this
1618 /* Normalized llvm frem (C fmod).
1619 This is not currently correct in all cases. */
1621 APFloat::mod(const APFloat &rhs, roundingMode rounding_mode)
1624 assertArithmeticOK(*semantics);
1625 fs = modSpecials(rhs);
1627 if (category == fcNormal && rhs.category == fcNormal) {
1629 unsigned int origSign = sign;
1631 fs = V.divide(rhs, rmNearestTiesToEven);
1632 if (fs == opDivByZero)
1635 int parts = partCount();
1636 integerPart *x = new integerPart[parts];
1638 fs = V.convertToInteger(x, parts * integerPartWidth, true,
1639 rmTowardZero, &ignored);
1640 if (fs==opInvalidOp)
1643 fs = V.convertFromZeroExtendedInteger(x, parts * integerPartWidth, true,
1644 rmNearestTiesToEven);
1645 assert(fs==opOK); // should always work
1647 fs = V.multiply(rhs, rounding_mode);
1648 assert(fs==opOK || fs==opInexact); // should not overflow or underflow
1650 fs = subtract(V, rounding_mode);
1651 assert(fs==opOK || fs==opInexact); // likewise
1654 sign = origSign; // IEEE754 requires this
1660 /* Normalized fused-multiply-add. */
1662 APFloat::fusedMultiplyAdd(const APFloat &multiplicand,
1663 const APFloat &addend,
1664 roundingMode rounding_mode)
1668 assertArithmeticOK(*semantics);
1670 /* Post-multiplication sign, before addition. */
1671 sign ^= multiplicand.sign;
1673 /* If and only if all arguments are normal do we need to do an
1674 extended-precision calculation. */
1675 if(category == fcNormal
1676 && multiplicand.category == fcNormal
1677 && addend.category == fcNormal) {
1678 lostFraction lost_fraction;
1680 lost_fraction = multiplySignificand(multiplicand, &addend);
1681 fs = normalize(rounding_mode, lost_fraction);
1682 if(lost_fraction != lfExactlyZero)
1683 fs = (opStatus) (fs | opInexact);
1685 /* If two numbers add (exactly) to zero, IEEE 754 decrees it is a
1686 positive zero unless rounding to minus infinity, except that
1687 adding two like-signed zeroes gives that zero. */
1688 if(category == fcZero && sign != addend.sign)
1689 sign = (rounding_mode == rmTowardNegative);
1691 fs = multiplySpecials(multiplicand);
1693 /* FS can only be opOK or opInvalidOp. There is no more work
1694 to do in the latter case. The IEEE-754R standard says it is
1695 implementation-defined in this case whether, if ADDEND is a
1696 quiet NaN, we raise invalid op; this implementation does so.
1698 If we need to do the addition we can do so with normal
1701 fs = addOrSubtract(addend, rounding_mode, false);
1707 /* Comparison requires normalized numbers. */
1709 APFloat::compare(const APFloat &rhs) const
1713 assertArithmeticOK(*semantics);
1714 assert(semantics == rhs.semantics);
1716 switch (convolve(category, rhs.category)) {
1718 llvm_unreachable(0);
1720 case convolve(fcNaN, fcZero):
1721 case convolve(fcNaN, fcNormal):
1722 case convolve(fcNaN, fcInfinity):
1723 case convolve(fcNaN, fcNaN):
1724 case convolve(fcZero, fcNaN):
1725 case convolve(fcNormal, fcNaN):
1726 case convolve(fcInfinity, fcNaN):
1727 return cmpUnordered;
1729 case convolve(fcInfinity, fcNormal):
1730 case convolve(fcInfinity, fcZero):
1731 case convolve(fcNormal, fcZero):
1735 return cmpGreaterThan;
1737 case convolve(fcNormal, fcInfinity):
1738 case convolve(fcZero, fcInfinity):
1739 case convolve(fcZero, fcNormal):
1741 return cmpGreaterThan;
1745 case convolve(fcInfinity, fcInfinity):
1746 if(sign == rhs.sign)
1751 return cmpGreaterThan;
1753 case convolve(fcZero, fcZero):
1756 case convolve(fcNormal, fcNormal):
1760 /* Two normal numbers. Do they have the same sign? */
1761 if(sign != rhs.sign) {
1763 result = cmpLessThan;
1765 result = cmpGreaterThan;
1767 /* Compare absolute values; invert result if negative. */
1768 result = compareAbsoluteValue(rhs);
1771 if(result == cmpLessThan)
1772 result = cmpGreaterThan;
1773 else if(result == cmpGreaterThan)
1774 result = cmpLessThan;
1781 /// APFloat::convert - convert a value of one floating point type to another.
1782 /// The return value corresponds to the IEEE754 exceptions. *losesInfo
1783 /// records whether the transformation lost information, i.e. whether
1784 /// converting the result back to the original type will produce the
1785 /// original value (this is almost the same as return value==fsOK, but there
1786 /// are edge cases where this is not so).
1789 APFloat::convert(const fltSemantics &toSemantics,
1790 roundingMode rounding_mode, bool *losesInfo)
1792 lostFraction lostFraction;
1793 unsigned int newPartCount, oldPartCount;
1796 assertArithmeticOK(*semantics);
1797 assertArithmeticOK(toSemantics);
1798 lostFraction = lfExactlyZero;
1799 newPartCount = partCountForBits(toSemantics.precision + 1);
1800 oldPartCount = partCount();
1802 /* Handle storage complications. If our new form is wider,
1803 re-allocate our bit pattern into wider storage. If it is
1804 narrower, we ignore the excess parts, but if narrowing to a
1805 single part we need to free the old storage.
1806 Be careful not to reference significandParts for zeroes
1807 and infinities, since it aborts. */
1808 if (newPartCount > oldPartCount) {
1809 integerPart *newParts;
1810 newParts = new integerPart[newPartCount];
1811 APInt::tcSet(newParts, 0, newPartCount);
1812 if (category==fcNormal || category==fcNaN)
1813 APInt::tcAssign(newParts, significandParts(), oldPartCount);
1815 significand.parts = newParts;
1816 } else if (newPartCount < oldPartCount) {
1817 /* Capture any lost fraction through truncation of parts so we get
1818 correct rounding whilst normalizing. */
1819 if (category==fcNormal)
1820 lostFraction = lostFractionThroughTruncation
1821 (significandParts(), oldPartCount, toSemantics.precision);
1822 if (newPartCount == 1) {
1823 integerPart newPart = 0;
1824 if (category==fcNormal || category==fcNaN)
1825 newPart = significandParts()[0];
1827 significand.part = newPart;
1831 if(category == fcNormal) {
1832 /* Re-interpret our bit-pattern. */
1833 exponent += toSemantics.precision - semantics->precision;
1834 semantics = &toSemantics;
1835 fs = normalize(rounding_mode, lostFraction);
1836 *losesInfo = (fs != opOK);
1837 } else if (category == fcNaN) {
1838 int shift = toSemantics.precision - semantics->precision;
1839 // Do this now so significandParts gets the right answer
1840 const fltSemantics *oldSemantics = semantics;
1841 semantics = &toSemantics;
1843 // No normalization here, just truncate
1845 APInt::tcShiftLeft(significandParts(), newPartCount, shift);
1846 else if (shift < 0) {
1847 unsigned ushift = -shift;
1848 // Figure out if we are losing information. This happens
1849 // if are shifting out something other than 0s, or if the x87 long
1850 // double input did not have its integer bit set (pseudo-NaN), or if the
1851 // x87 long double input did not have its QNan bit set (because the x87
1852 // hardware sets this bit when converting a lower-precision NaN to
1853 // x87 long double).
1854 if (APInt::tcLSB(significandParts(), newPartCount) < ushift)
1856 if (oldSemantics == &APFloat::x87DoubleExtended &&
1857 (!(*significandParts() & 0x8000000000000000ULL) ||
1858 !(*significandParts() & 0x4000000000000000ULL)))
1860 APInt::tcShiftRight(significandParts(), newPartCount, ushift);
1862 // gcc forces the Quiet bit on, which means (float)(double)(float_sNan)
1863 // does not give you back the same bits. This is dubious, and we
1864 // don't currently do it. You're really supposed to get
1865 // an invalid operation signal at runtime, but nobody does that.
1868 semantics = &toSemantics;
1876 /* Convert a floating point number to an integer according to the
1877 rounding mode. If the rounded integer value is out of range this
1878 returns an invalid operation exception and the contents of the
1879 destination parts are unspecified. If the rounded value is in
1880 range but the floating point number is not the exact integer, the C
1881 standard doesn't require an inexact exception to be raised. IEEE
1882 854 does require it so we do that.
1884 Note that for conversions to integer type the C standard requires
1885 round-to-zero to always be used. */
1887 APFloat::convertToSignExtendedInteger(integerPart *parts, unsigned int width,
1889 roundingMode rounding_mode,
1890 bool *isExact) const
1892 lostFraction lost_fraction;
1893 const integerPart *src;
1894 unsigned int dstPartsCount, truncatedBits;
1896 assertArithmeticOK(*semantics);
1900 /* Handle the three special cases first. */
1901 if(category == fcInfinity || category == fcNaN)
1904 dstPartsCount = partCountForBits(width);
1906 if(category == fcZero) {
1907 APInt::tcSet(parts, 0, dstPartsCount);
1908 // Negative zero can't be represented as an int.
1913 src = significandParts();
1915 /* Step 1: place our absolute value, with any fraction truncated, in
1918 /* Our absolute value is less than one; truncate everything. */
1919 APInt::tcSet(parts, 0, dstPartsCount);
1920 /* For exponent -1 the integer bit represents .5, look at that.
1921 For smaller exponents leftmost truncated bit is 0. */
1922 truncatedBits = semantics->precision -1U - exponent;
1924 /* We want the most significant (exponent + 1) bits; the rest are
1926 unsigned int bits = exponent + 1U;
1928 /* Hopelessly large in magnitude? */
1932 if (bits < semantics->precision) {
1933 /* We truncate (semantics->precision - bits) bits. */
1934 truncatedBits = semantics->precision - bits;
1935 APInt::tcExtract(parts, dstPartsCount, src, bits, truncatedBits);
1937 /* We want at least as many bits as are available. */
1938 APInt::tcExtract(parts, dstPartsCount, src, semantics->precision, 0);
1939 APInt::tcShiftLeft(parts, dstPartsCount, bits - semantics->precision);
1944 /* Step 2: work out any lost fraction, and increment the absolute
1945 value if we would round away from zero. */
1946 if (truncatedBits) {
1947 lost_fraction = lostFractionThroughTruncation(src, partCount(),
1949 if (lost_fraction != lfExactlyZero
1950 && roundAwayFromZero(rounding_mode, lost_fraction, truncatedBits)) {
1951 if (APInt::tcIncrement(parts, dstPartsCount))
1952 return opInvalidOp; /* Overflow. */
1955 lost_fraction = lfExactlyZero;
1958 /* Step 3: check if we fit in the destination. */
1959 unsigned int omsb = APInt::tcMSB(parts, dstPartsCount) + 1;
1963 /* Negative numbers cannot be represented as unsigned. */
1967 /* It takes omsb bits to represent the unsigned integer value.
1968 We lose a bit for the sign, but care is needed as the
1969 maximally negative integer is a special case. */
1970 if (omsb == width && APInt::tcLSB(parts, dstPartsCount) + 1 != omsb)
1973 /* This case can happen because of rounding. */
1978 APInt::tcNegate (parts, dstPartsCount);
1980 if (omsb >= width + !isSigned)
1984 if (lost_fraction == lfExactlyZero) {
1991 /* Same as convertToSignExtendedInteger, except we provide
1992 deterministic values in case of an invalid operation exception,
1993 namely zero for NaNs and the minimal or maximal value respectively
1994 for underflow or overflow.
1995 The *isExact output tells whether the result is exact, in the sense
1996 that converting it back to the original floating point type produces
1997 the original value. This is almost equivalent to result==opOK,
1998 except for negative zeroes.
2001 APFloat::convertToInteger(integerPart *parts, unsigned int width,
2003 roundingMode rounding_mode, bool *isExact) const
2007 fs = convertToSignExtendedInteger(parts, width, isSigned, rounding_mode,
2010 if (fs == opInvalidOp) {
2011 unsigned int bits, dstPartsCount;
2013 dstPartsCount = partCountForBits(width);
2015 if (category == fcNaN)
2020 bits = width - isSigned;
2022 APInt::tcSetLeastSignificantBits(parts, dstPartsCount, bits);
2023 if (sign && isSigned)
2024 APInt::tcShiftLeft(parts, dstPartsCount, width - 1);
2030 /* Convert an unsigned integer SRC to a floating point number,
2031 rounding according to ROUNDING_MODE. The sign of the floating
2032 point number is not modified. */
2034 APFloat::convertFromUnsignedParts(const integerPart *src,
2035 unsigned int srcCount,
2036 roundingMode rounding_mode)
2038 unsigned int omsb, precision, dstCount;
2040 lostFraction lost_fraction;
2042 assertArithmeticOK(*semantics);
2043 category = fcNormal;
2044 omsb = APInt::tcMSB(src, srcCount) + 1;
2045 dst = significandParts();
2046 dstCount = partCount();
2047 precision = semantics->precision;
2049 /* We want the most significant PRECISON bits of SRC. There may not
2050 be that many; extract what we can. */
2051 if (precision <= omsb) {
2052 exponent = omsb - 1;
2053 lost_fraction = lostFractionThroughTruncation(src, srcCount,
2055 APInt::tcExtract(dst, dstCount, src, precision, omsb - precision);
2057 exponent = precision - 1;
2058 lost_fraction = lfExactlyZero;
2059 APInt::tcExtract(dst, dstCount, src, omsb, 0);
2062 return normalize(rounding_mode, lost_fraction);
2066 APFloat::convertFromAPInt(const APInt &Val,
2068 roundingMode rounding_mode)
2070 unsigned int partCount = Val.getNumWords();
2074 if (isSigned && api.isNegative()) {
2079 return convertFromUnsignedParts(api.getRawData(), partCount, rounding_mode);
2082 /* Convert a two's complement integer SRC to a floating point number,
2083 rounding according to ROUNDING_MODE. ISSIGNED is true if the
2084 integer is signed, in which case it must be sign-extended. */
2086 APFloat::convertFromSignExtendedInteger(const integerPart *src,
2087 unsigned int srcCount,
2089 roundingMode rounding_mode)
2093 assertArithmeticOK(*semantics);
2095 && APInt::tcExtractBit(src, srcCount * integerPartWidth - 1)) {
2098 /* If we're signed and negative negate a copy. */
2100 copy = new integerPart[srcCount];
2101 APInt::tcAssign(copy, src, srcCount);
2102 APInt::tcNegate(copy, srcCount);
2103 status = convertFromUnsignedParts(copy, srcCount, rounding_mode);
2107 status = convertFromUnsignedParts(src, srcCount, rounding_mode);
2113 /* FIXME: should this just take a const APInt reference? */
2115 APFloat::convertFromZeroExtendedInteger(const integerPart *parts,
2116 unsigned int width, bool isSigned,
2117 roundingMode rounding_mode)
2119 unsigned int partCount = partCountForBits(width);
2120 APInt api = APInt(width, partCount, parts);
2123 if(isSigned && APInt::tcExtractBit(parts, width - 1)) {
2128 return convertFromUnsignedParts(api.getRawData(), partCount, rounding_mode);
2132 APFloat::convertFromHexadecimalString(const StringRef &s,
2133 roundingMode rounding_mode)
2135 lostFraction lost_fraction = lfExactlyZero;
2136 integerPart *significand;
2137 unsigned int bitPos, partsCount;
2138 StringRef::iterator dot, firstSignificantDigit;
2142 category = fcNormal;
2144 significand = significandParts();
2145 partsCount = partCount();
2146 bitPos = partsCount * integerPartWidth;
2148 /* Skip leading zeroes and any (hexa)decimal point. */
2149 StringRef::iterator p = skipLeadingZeroesAndAnyDot(s.begin(), s.end(), &dot);
2150 firstSignificantDigit = p;
2152 for(; p != s.end();) {
2153 integerPart hex_value;
2160 hex_value = hexDigitValue(*p);
2161 if(hex_value == -1U) {
2170 /* Store the number whilst 4-bit nibbles remain. */
2173 hex_value <<= bitPos % integerPartWidth;
2174 significand[bitPos / integerPartWidth] |= hex_value;
2176 lost_fraction = trailingHexadecimalFraction(p, s.end(), hex_value);
2177 while(p != s.end() && hexDigitValue(*p) != -1U)
2184 /* Hex floats require an exponent but not a hexadecimal point. */
2185 assert(p != s.end() && (*p == 'p' || *p == 'P') &&
2186 "Hex strings require an exponent");
2188 /* Ignore the exponent if we are zero. */
2189 if(p != firstSignificantDigit) {
2192 /* Implicit hexadecimal point? */
2196 /* Calculate the exponent adjustment implicit in the number of
2197 significant digits. */
2198 expAdjustment = static_cast<int>(dot - firstSignificantDigit);
2199 if(expAdjustment < 0)
2201 expAdjustment = expAdjustment * 4 - 1;
2203 /* Adjust for writing the significand starting at the most
2204 significant nibble. */
2205 expAdjustment += semantics->precision;
2206 expAdjustment -= partsCount * integerPartWidth;
2208 /* Adjust for the given exponent. */
2209 exponent = totalExponent(p, s.end(), expAdjustment);
2212 return normalize(rounding_mode, lost_fraction);
2216 APFloat::roundSignificandWithExponent(const integerPart *decSigParts,
2217 unsigned sigPartCount, int exp,
2218 roundingMode rounding_mode)
2220 unsigned int parts, pow5PartCount;
2221 fltSemantics calcSemantics = { 32767, -32767, 0, true };
2222 integerPart pow5Parts[maxPowerOfFiveParts];
2225 isNearest = (rounding_mode == rmNearestTiesToEven
2226 || rounding_mode == rmNearestTiesToAway);
2228 parts = partCountForBits(semantics->precision + 11);
2230 /* Calculate pow(5, abs(exp)). */
2231 pow5PartCount = powerOf5(pow5Parts, exp >= 0 ? exp: -exp);
2233 for (;; parts *= 2) {
2234 opStatus sigStatus, powStatus;
2235 unsigned int excessPrecision, truncatedBits;
2237 calcSemantics.precision = parts * integerPartWidth - 1;
2238 excessPrecision = calcSemantics.precision - semantics->precision;
2239 truncatedBits = excessPrecision;
2241 APFloat decSig(calcSemantics, fcZero, sign);
2242 APFloat pow5(calcSemantics, fcZero, false);
2244 sigStatus = decSig.convertFromUnsignedParts(decSigParts, sigPartCount,
2245 rmNearestTiesToEven);
2246 powStatus = pow5.convertFromUnsignedParts(pow5Parts, pow5PartCount,
2247 rmNearestTiesToEven);
2248 /* Add exp, as 10^n = 5^n * 2^n. */
2249 decSig.exponent += exp;
2251 lostFraction calcLostFraction;
2252 integerPart HUerr, HUdistance;
2253 unsigned int powHUerr;
2256 /* multiplySignificand leaves the precision-th bit set to 1. */
2257 calcLostFraction = decSig.multiplySignificand(pow5, NULL);
2258 powHUerr = powStatus != opOK;
2260 calcLostFraction = decSig.divideSignificand(pow5);
2261 /* Denormal numbers have less precision. */
2262 if (decSig.exponent < semantics->minExponent) {
2263 excessPrecision += (semantics->minExponent - decSig.exponent);
2264 truncatedBits = excessPrecision;
2265 if (excessPrecision > calcSemantics.precision)
2266 excessPrecision = calcSemantics.precision;
2268 /* Extra half-ulp lost in reciprocal of exponent. */
2269 powHUerr = (powStatus == opOK && calcLostFraction == lfExactlyZero) ? 0:2;
2272 /* Both multiplySignificand and divideSignificand return the
2273 result with the integer bit set. */
2274 assert (APInt::tcExtractBit
2275 (decSig.significandParts(), calcSemantics.precision - 1) == 1);
2277 HUerr = HUerrBound(calcLostFraction != lfExactlyZero, sigStatus != opOK,
2279 HUdistance = 2 * ulpsFromBoundary(decSig.significandParts(),
2280 excessPrecision, isNearest);
2282 /* Are we guaranteed to round correctly if we truncate? */
2283 if (HUdistance >= HUerr) {
2284 APInt::tcExtract(significandParts(), partCount(), decSig.significandParts(),
2285 calcSemantics.precision - excessPrecision,
2287 /* Take the exponent of decSig. If we tcExtract-ed less bits
2288 above we must adjust our exponent to compensate for the
2289 implicit right shift. */
2290 exponent = (decSig.exponent + semantics->precision
2291 - (calcSemantics.precision - excessPrecision));
2292 calcLostFraction = lostFractionThroughTruncation(decSig.significandParts(),
2295 return normalize(rounding_mode, calcLostFraction);
2301 APFloat::convertFromDecimalString(const StringRef &str, roundingMode rounding_mode)
2306 /* Scan the text. */
2307 StringRef::iterator p = str.begin();
2308 interpretDecimal(p, str.end(), &D);
2310 /* Handle the quick cases. First the case of no significant digits,
2311 i.e. zero, and then exponents that are obviously too large or too
2312 small. Writing L for log 10 / log 2, a number d.ddddd*10^exp
2313 definitely overflows if
2315 (exp - 1) * L >= maxExponent
2317 and definitely underflows to zero where
2319 (exp + 1) * L <= minExponent - precision
2321 With integer arithmetic the tightest bounds for L are
2323 93/28 < L < 196/59 [ numerator <= 256 ]
2324 42039/12655 < L < 28738/8651 [ numerator <= 65536 ]
2327 if (decDigitValue(*D.firstSigDigit) >= 10U) {
2330 } else if ((D.normalizedExponent + 1) * 28738
2331 <= 8651 * (semantics->minExponent - (int) semantics->precision)) {
2332 /* Underflow to zero and round. */
2334 fs = normalize(rounding_mode, lfLessThanHalf);
2335 } else if ((D.normalizedExponent - 1) * 42039
2336 >= 12655 * semantics->maxExponent) {
2337 /* Overflow and round. */
2338 fs = handleOverflow(rounding_mode);
2340 integerPart *decSignificand;
2341 unsigned int partCount;
2343 /* A tight upper bound on number of bits required to hold an
2344 N-digit decimal integer is N * 196 / 59. Allocate enough space
2345 to hold the full significand, and an extra part required by
2347 partCount = static_cast<unsigned int>(D.lastSigDigit - D.firstSigDigit) + 1;
2348 partCount = partCountForBits(1 + 196 * partCount / 59);
2349 decSignificand = new integerPart[partCount + 1];
2352 /* Convert to binary efficiently - we do almost all multiplication
2353 in an integerPart. When this would overflow do we do a single
2354 bignum multiplication, and then revert again to multiplication
2355 in an integerPart. */
2357 integerPart decValue, val, multiplier;
2365 if (p == str.end()) {
2369 decValue = decDigitValue(*p++);
2370 assert(decValue < 10U && "Invalid character in digit string");
2372 val = val * 10 + decValue;
2373 /* The maximum number that can be multiplied by ten with any
2374 digit added without overflowing an integerPart. */
2375 } while (p <= D.lastSigDigit && multiplier <= (~ (integerPart) 0 - 9) / 10);
2377 /* Multiply out the current part. */
2378 APInt::tcMultiplyPart(decSignificand, decSignificand, multiplier, val,
2379 partCount, partCount + 1, false);
2381 /* If we used another part (likely but not guaranteed), increase
2383 if (decSignificand[partCount])
2385 } while (p <= D.lastSigDigit);
2387 category = fcNormal;
2388 fs = roundSignificandWithExponent(decSignificand, partCount,
2389 D.exponent, rounding_mode);
2391 delete [] decSignificand;
2398 APFloat::convertFromString(const StringRef &str, roundingMode rounding_mode)
2400 assertArithmeticOK(*semantics);
2401 assert(!str.empty() && "Invalid string length");
2403 /* Handle a leading minus sign. */
2404 StringRef::iterator p = str.begin();
2405 size_t slen = str.size();
2406 unsigned isNegative = str.front() == '-';
2411 assert(slen && "String is only a minus!");
2416 if(slen >= 2 && p[0] == '0' && (p[1] == 'x' || p[1] == 'X')) {
2417 assert(slen - 2 && "Invalid string");
2418 return convertFromHexadecimalString(str.substr(isNegative + 2),
2422 return convertFromDecimalString(str.substr(isNegative), rounding_mode);
2425 /* Write out a hexadecimal representation of the floating point value
2426 to DST, which must be of sufficient size, in the C99 form
2427 [-]0xh.hhhhp[+-]d. Return the number of characters written,
2428 excluding the terminating NUL.
2430 If UPPERCASE, the output is in upper case, otherwise in lower case.
2432 HEXDIGITS digits appear altogether, rounding the value if
2433 necessary. If HEXDIGITS is 0, the minimal precision to display the
2434 number precisely is used instead. If nothing would appear after
2435 the decimal point it is suppressed.
2437 The decimal exponent is always printed and has at least one digit.
2438 Zero values display an exponent of zero. Infinities and NaNs
2439 appear as "infinity" or "nan" respectively.
2441 The above rules are as specified by C99. There is ambiguity about
2442 what the leading hexadecimal digit should be. This implementation
2443 uses whatever is necessary so that the exponent is displayed as
2444 stored. This implies the exponent will fall within the IEEE format
2445 range, and the leading hexadecimal digit will be 0 (for denormals),
2446 1 (normal numbers) or 2 (normal numbers rounded-away-from-zero with
2447 any other digits zero).
2450 APFloat::convertToHexString(char *dst, unsigned int hexDigits,
2451 bool upperCase, roundingMode rounding_mode) const
2455 assertArithmeticOK(*semantics);
2463 memcpy (dst, upperCase ? infinityU: infinityL, sizeof infinityU - 1);
2464 dst += sizeof infinityL - 1;
2468 memcpy (dst, upperCase ? NaNU: NaNL, sizeof NaNU - 1);
2469 dst += sizeof NaNU - 1;
2474 *dst++ = upperCase ? 'X': 'x';
2476 if (hexDigits > 1) {
2478 memset (dst, '0', hexDigits - 1);
2479 dst += hexDigits - 1;
2481 *dst++ = upperCase ? 'P': 'p';
2486 dst = convertNormalToHexString (dst, hexDigits, upperCase, rounding_mode);
2492 return static_cast<unsigned int>(dst - p);
2495 /* Does the hard work of outputting the correctly rounded hexadecimal
2496 form of a normal floating point number with the specified number of
2497 hexadecimal digits. If HEXDIGITS is zero the minimum number of
2498 digits necessary to print the value precisely is output. */
2500 APFloat::convertNormalToHexString(char *dst, unsigned int hexDigits,
2502 roundingMode rounding_mode) const
2504 unsigned int count, valueBits, shift, partsCount, outputDigits;
2505 const char *hexDigitChars;
2506 const integerPart *significand;
2511 *dst++ = upperCase ? 'X': 'x';
2514 hexDigitChars = upperCase ? hexDigitsUpper: hexDigitsLower;
2516 significand = significandParts();
2517 partsCount = partCount();
2519 /* +3 because the first digit only uses the single integer bit, so
2520 we have 3 virtual zero most-significant-bits. */
2521 valueBits = semantics->precision + 3;
2522 shift = integerPartWidth - valueBits % integerPartWidth;
2524 /* The natural number of digits required ignoring trailing
2525 insignificant zeroes. */
2526 outputDigits = (valueBits - significandLSB () + 3) / 4;
2528 /* hexDigits of zero means use the required number for the
2529 precision. Otherwise, see if we are truncating. If we are,
2530 find out if we need to round away from zero. */
2532 if (hexDigits < outputDigits) {
2533 /* We are dropping non-zero bits, so need to check how to round.
2534 "bits" is the number of dropped bits. */
2536 lostFraction fraction;
2538 bits = valueBits - hexDigits * 4;
2539 fraction = lostFractionThroughTruncation (significand, partsCount, bits);
2540 roundUp = roundAwayFromZero(rounding_mode, fraction, bits);
2542 outputDigits = hexDigits;
2545 /* Write the digits consecutively, and start writing in the location
2546 of the hexadecimal point. We move the most significant digit
2547 left and add the hexadecimal point later. */
2550 count = (valueBits + integerPartWidth - 1) / integerPartWidth;
2552 while (outputDigits && count) {
2555 /* Put the most significant integerPartWidth bits in "part". */
2556 if (--count == partsCount)
2557 part = 0; /* An imaginary higher zero part. */
2559 part = significand[count] << shift;
2562 part |= significand[count - 1] >> (integerPartWidth - shift);
2564 /* Convert as much of "part" to hexdigits as we can. */
2565 unsigned int curDigits = integerPartWidth / 4;
2567 if (curDigits > outputDigits)
2568 curDigits = outputDigits;
2569 dst += partAsHex (dst, part, curDigits, hexDigitChars);
2570 outputDigits -= curDigits;
2576 /* Note that hexDigitChars has a trailing '0'. */
2579 *q = hexDigitChars[hexDigitValue (*q) + 1];
2580 } while (*q == '0');
2583 /* Add trailing zeroes. */
2584 memset (dst, '0', outputDigits);
2585 dst += outputDigits;
2588 /* Move the most significant digit to before the point, and if there
2589 is something after the decimal point add it. This must come
2590 after rounding above. */
2597 /* Finally output the exponent. */
2598 *dst++ = upperCase ? 'P': 'p';
2600 return writeSignedDecimal (dst, exponent);
2603 // For good performance it is desirable for different APFloats
2604 // to produce different integers.
2606 APFloat::getHashValue() const
2608 if (category==fcZero) return sign<<8 | semantics->precision ;
2609 else if (category==fcInfinity) return sign<<9 | semantics->precision;
2610 else if (category==fcNaN) return 1<<10 | semantics->precision;
2612 uint32_t hash = sign<<11 | semantics->precision | exponent<<12;
2613 const integerPart* p = significandParts();
2614 for (int i=partCount(); i>0; i--, p++)
2615 hash ^= ((uint32_t)*p) ^ (uint32_t)((*p)>>32);
2620 // Conversion from APFloat to/from host float/double. It may eventually be
2621 // possible to eliminate these and have everybody deal with APFloats, but that
2622 // will take a while. This approach will not easily extend to long double.
2623 // Current implementation requires integerPartWidth==64, which is correct at
2624 // the moment but could be made more general.
2626 // Denormals have exponent minExponent in APFloat, but minExponent-1 in
2627 // the actual IEEE respresentations. We compensate for that here.
2630 APFloat::convertF80LongDoubleAPFloatToAPInt() const
2632 assert(semantics == (const llvm::fltSemantics*)&x87DoubleExtended);
2633 assert (partCount()==2);
2635 uint64_t myexponent, mysignificand;
2637 if (category==fcNormal) {
2638 myexponent = exponent+16383; //bias
2639 mysignificand = significandParts()[0];
2640 if (myexponent==1 && !(mysignificand & 0x8000000000000000ULL))
2641 myexponent = 0; // denormal
2642 } else if (category==fcZero) {
2645 } else if (category==fcInfinity) {
2646 myexponent = 0x7fff;
2647 mysignificand = 0x8000000000000000ULL;
2649 assert(category == fcNaN && "Unknown category");
2650 myexponent = 0x7fff;
2651 mysignificand = significandParts()[0];
2655 words[0] = mysignificand;
2656 words[1] = ((uint64_t)(sign & 1) << 15) |
2657 (myexponent & 0x7fffLL);
2658 return APInt(80, 2, words);
2662 APFloat::convertPPCDoubleDoubleAPFloatToAPInt() const
2664 assert(semantics == (const llvm::fltSemantics*)&PPCDoubleDouble);
2665 assert (partCount()==2);
2667 uint64_t myexponent, mysignificand, myexponent2, mysignificand2;
2669 if (category==fcNormal) {
2670 myexponent = exponent + 1023; //bias
2671 myexponent2 = exponent2 + 1023;
2672 mysignificand = significandParts()[0];
2673 mysignificand2 = significandParts()[1];
2674 if (myexponent==1 && !(mysignificand & 0x10000000000000LL))
2675 myexponent = 0; // denormal
2676 if (myexponent2==1 && !(mysignificand2 & 0x10000000000000LL))
2677 myexponent2 = 0; // denormal
2678 } else if (category==fcZero) {
2683 } else if (category==fcInfinity) {
2689 assert(category == fcNaN && "Unknown category");
2691 mysignificand = significandParts()[0];
2692 myexponent2 = exponent2;
2693 mysignificand2 = significandParts()[1];
2697 words[0] = ((uint64_t)(sign & 1) << 63) |
2698 ((myexponent & 0x7ff) << 52) |
2699 (mysignificand & 0xfffffffffffffLL);
2700 words[1] = ((uint64_t)(sign2 & 1) << 63) |
2701 ((myexponent2 & 0x7ff) << 52) |
2702 (mysignificand2 & 0xfffffffffffffLL);
2703 return APInt(128, 2, words);
2707 APFloat::convertDoubleAPFloatToAPInt() const
2709 assert(semantics == (const llvm::fltSemantics*)&IEEEdouble);
2710 assert (partCount()==1);
2712 uint64_t myexponent, mysignificand;
2714 if (category==fcNormal) {
2715 myexponent = exponent+1023; //bias
2716 mysignificand = *significandParts();
2717 if (myexponent==1 && !(mysignificand & 0x10000000000000LL))
2718 myexponent = 0; // denormal
2719 } else if (category==fcZero) {
2722 } else if (category==fcInfinity) {
2726 assert(category == fcNaN && "Unknown category!");
2728 mysignificand = *significandParts();
2731 return APInt(64, ((((uint64_t)(sign & 1) << 63) |
2732 ((myexponent & 0x7ff) << 52) |
2733 (mysignificand & 0xfffffffffffffLL))));
2737 APFloat::convertFloatAPFloatToAPInt() const
2739 assert(semantics == (const llvm::fltSemantics*)&IEEEsingle);
2740 assert (partCount()==1);
2742 uint32_t myexponent, mysignificand;
2744 if (category==fcNormal) {
2745 myexponent = exponent+127; //bias
2746 mysignificand = (uint32_t)*significandParts();
2747 if (myexponent == 1 && !(mysignificand & 0x800000))
2748 myexponent = 0; // denormal
2749 } else if (category==fcZero) {
2752 } else if (category==fcInfinity) {
2756 assert(category == fcNaN && "Unknown category!");
2758 mysignificand = (uint32_t)*significandParts();
2761 return APInt(32, (((sign&1) << 31) | ((myexponent&0xff) << 23) |
2762 (mysignificand & 0x7fffff)));
2765 // This function creates an APInt that is just a bit map of the floating
2766 // point constant as it would appear in memory. It is not a conversion,
2767 // and treating the result as a normal integer is unlikely to be useful.
2770 APFloat::bitcastToAPInt() const
2772 if (semantics == (const llvm::fltSemantics*)&IEEEsingle)
2773 return convertFloatAPFloatToAPInt();
2775 if (semantics == (const llvm::fltSemantics*)&IEEEdouble)
2776 return convertDoubleAPFloatToAPInt();
2778 if (semantics == (const llvm::fltSemantics*)&PPCDoubleDouble)
2779 return convertPPCDoubleDoubleAPFloatToAPInt();
2781 assert(semantics == (const llvm::fltSemantics*)&x87DoubleExtended &&
2783 return convertF80LongDoubleAPFloatToAPInt();
2787 APFloat::convertToFloat() const
2789 assert(semantics == (const llvm::fltSemantics*)&IEEEsingle && "Float semantics are not IEEEsingle");
2790 APInt api = bitcastToAPInt();
2791 return api.bitsToFloat();
2795 APFloat::convertToDouble() const
2797 assert(semantics == (const llvm::fltSemantics*)&IEEEdouble && "Float semantics are not IEEEdouble");
2798 APInt api = bitcastToAPInt();
2799 return api.bitsToDouble();
2802 /// Integer bit is explicit in this format. Intel hardware (387 and later)
2803 /// does not support these bit patterns:
2804 /// exponent = all 1's, integer bit 0, significand 0 ("pseudoinfinity")
2805 /// exponent = all 1's, integer bit 0, significand nonzero ("pseudoNaN")
2806 /// exponent = 0, integer bit 1 ("pseudodenormal")
2807 /// exponent!=0 nor all 1's, integer bit 0 ("unnormal")
2808 /// At the moment, the first two are treated as NaNs, the second two as Normal.
2810 APFloat::initFromF80LongDoubleAPInt(const APInt &api)
2812 assert(api.getBitWidth()==80);
2813 uint64_t i1 = api.getRawData()[0];
2814 uint64_t i2 = api.getRawData()[1];
2815 uint64_t myexponent = (i2 & 0x7fff);
2816 uint64_t mysignificand = i1;
2818 initialize(&APFloat::x87DoubleExtended);
2819 assert(partCount()==2);
2821 sign = static_cast<unsigned int>(i2>>15);
2822 if (myexponent==0 && mysignificand==0) {
2823 // exponent, significand meaningless
2825 } else if (myexponent==0x7fff && mysignificand==0x8000000000000000ULL) {
2826 // exponent, significand meaningless
2827 category = fcInfinity;
2828 } else if (myexponent==0x7fff && mysignificand!=0x8000000000000000ULL) {
2829 // exponent meaningless
2831 significandParts()[0] = mysignificand;
2832 significandParts()[1] = 0;
2834 category = fcNormal;
2835 exponent = myexponent - 16383;
2836 significandParts()[0] = mysignificand;
2837 significandParts()[1] = 0;
2838 if (myexponent==0) // denormal
2844 APFloat::initFromPPCDoubleDoubleAPInt(const APInt &api)
2846 assert(api.getBitWidth()==128);
2847 uint64_t i1 = api.getRawData()[0];
2848 uint64_t i2 = api.getRawData()[1];
2849 uint64_t myexponent = (i1 >> 52) & 0x7ff;
2850 uint64_t mysignificand = i1 & 0xfffffffffffffLL;
2851 uint64_t myexponent2 = (i2 >> 52) & 0x7ff;
2852 uint64_t mysignificand2 = i2 & 0xfffffffffffffLL;
2854 initialize(&APFloat::PPCDoubleDouble);
2855 assert(partCount()==2);
2857 sign = static_cast<unsigned int>(i1>>63);
2858 sign2 = static_cast<unsigned int>(i2>>63);
2859 if (myexponent==0 && mysignificand==0) {
2860 // exponent, significand meaningless
2861 // exponent2 and significand2 are required to be 0; we don't check
2863 } else if (myexponent==0x7ff && mysignificand==0) {
2864 // exponent, significand meaningless
2865 // exponent2 and significand2 are required to be 0; we don't check
2866 category = fcInfinity;
2867 } else if (myexponent==0x7ff && mysignificand!=0) {
2868 // exponent meaningless. So is the whole second word, but keep it
2871 exponent2 = myexponent2;
2872 significandParts()[0] = mysignificand;
2873 significandParts()[1] = mysignificand2;
2875 category = fcNormal;
2876 // Note there is no category2; the second word is treated as if it is
2877 // fcNormal, although it might be something else considered by itself.
2878 exponent = myexponent - 1023;
2879 exponent2 = myexponent2 - 1023;
2880 significandParts()[0] = mysignificand;
2881 significandParts()[1] = mysignificand2;
2882 if (myexponent==0) // denormal
2885 significandParts()[0] |= 0x10000000000000LL; // integer bit
2889 significandParts()[1] |= 0x10000000000000LL; // integer bit
2894 APFloat::initFromDoubleAPInt(const APInt &api)
2896 assert(api.getBitWidth()==64);
2897 uint64_t i = *api.getRawData();
2898 uint64_t myexponent = (i >> 52) & 0x7ff;
2899 uint64_t mysignificand = i & 0xfffffffffffffLL;
2901 initialize(&APFloat::IEEEdouble);
2902 assert(partCount()==1);
2904 sign = static_cast<unsigned int>(i>>63);
2905 if (myexponent==0 && mysignificand==0) {
2906 // exponent, significand meaningless
2908 } else if (myexponent==0x7ff && mysignificand==0) {
2909 // exponent, significand meaningless
2910 category = fcInfinity;
2911 } else if (myexponent==0x7ff && mysignificand!=0) {
2912 // exponent meaningless
2914 *significandParts() = mysignificand;
2916 category = fcNormal;
2917 exponent = myexponent - 1023;
2918 *significandParts() = mysignificand;
2919 if (myexponent==0) // denormal
2922 *significandParts() |= 0x10000000000000LL; // integer bit
2927 APFloat::initFromFloatAPInt(const APInt & api)
2929 assert(api.getBitWidth()==32);
2930 uint32_t i = (uint32_t)*api.getRawData();
2931 uint32_t myexponent = (i >> 23) & 0xff;
2932 uint32_t mysignificand = i & 0x7fffff;
2934 initialize(&APFloat::IEEEsingle);
2935 assert(partCount()==1);
2938 if (myexponent==0 && mysignificand==0) {
2939 // exponent, significand meaningless
2941 } else if (myexponent==0xff && mysignificand==0) {
2942 // exponent, significand meaningless
2943 category = fcInfinity;
2944 } else if (myexponent==0xff && mysignificand!=0) {
2945 // sign, exponent, significand meaningless
2947 *significandParts() = mysignificand;
2949 category = fcNormal;
2950 exponent = myexponent - 127; //bias
2951 *significandParts() = mysignificand;
2952 if (myexponent==0) // denormal
2955 *significandParts() |= 0x800000; // integer bit
2959 /// Treat api as containing the bits of a floating point number. Currently
2960 /// we infer the floating point type from the size of the APInt. The
2961 /// isIEEE argument distinguishes between PPC128 and IEEE128 (not meaningful
2962 /// when the size is anything else).
2964 APFloat::initFromAPInt(const APInt& api, bool isIEEE)
2966 if (api.getBitWidth() == 32)
2967 return initFromFloatAPInt(api);
2968 else if (api.getBitWidth()==64)
2969 return initFromDoubleAPInt(api);
2970 else if (api.getBitWidth()==80)
2971 return initFromF80LongDoubleAPInt(api);
2972 else if (api.getBitWidth()==128 && !isIEEE)
2973 return initFromPPCDoubleDoubleAPInt(api);
2975 llvm_unreachable(0);
2978 APFloat::APFloat(const APInt& api, bool isIEEE)
2980 initFromAPInt(api, isIEEE);
2983 APFloat::APFloat(float f)
2985 APInt api = APInt(32, 0);
2986 initFromAPInt(api.floatToBits(f));
2989 APFloat::APFloat(double d)
2991 APInt api = APInt(64, 0);
2992 initFromAPInt(api.doubleToBits(d));