1 //===-- APFloat.cpp - Implement APFloat class -----------------------------===//
3 // The LLVM Compiler Infrastructure
5 // This file was developed by Neil Booth and is distributed under the
6 // University of Illinois Open Source License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file implements a class to represent arbitrary precision floating
11 // point values and provide a variety of arithmetic operations on them.
13 //===----------------------------------------------------------------------===//
16 #include "llvm/ADT/APFloat.h"
17 #include "llvm/Support/MathExtras.h"
21 #define convolve(lhs, rhs) ((lhs) * 4 + (rhs))
23 /* Assumed in hexadecimal significand parsing. */
24 COMPILE_TIME_ASSERT(integerPartWidth % 4 == 0);
28 /* Represents floating point arithmetic semantics. */
30 /* The largest E such that 2^E is representable; this matches the
31 definition of IEEE 754. */
32 exponent_t maxExponent;
34 /* The smallest E such that 2^E is a normalized number; this
35 matches the definition of IEEE 754. */
36 exponent_t minExponent;
38 /* Number of bits in the significand. This includes the integer
40 unsigned char precision;
42 /* If the target format has an implicit integer bit. */
43 bool implicitIntegerBit;
46 const fltSemantics APFloat::IEEEsingle = { 127, -126, 24, true };
47 const fltSemantics APFloat::IEEEdouble = { 1023, -1022, 53, true };
48 const fltSemantics APFloat::IEEEquad = { 16383, -16382, 113, true };
49 const fltSemantics APFloat::x87DoubleExtended = { 16383, -16382, 64, false };
50 const fltSemantics APFloat::Bogus = { 0, 0, 0, false };
53 /* Put a bunch of private, handy routines in an anonymous namespace. */
57 partCountForBits(unsigned int bits)
59 return ((bits) + integerPartWidth - 1) / integerPartWidth;
63 digitValue(unsigned int c)
75 hexDigitValue (unsigned int c)
94 /* This is ugly and needs cleaning up, but I don't immediately see
95 how whilst remaining safe. */
97 totalExponent(const char *p, int exponentAdjustment)
99 integerPart unsignedExponent;
100 bool negative, overflow;
103 /* Move past the exponent letter and sign to the digits. */
105 negative = *p == '-';
106 if(*p == '-' || *p == '+')
109 unsignedExponent = 0;
114 value = digitValue(*p);
119 unsignedExponent = unsignedExponent * 10 + value;
120 if(unsignedExponent > 65535)
124 if(exponentAdjustment > 65535 || exponentAdjustment < -65536)
128 exponent = unsignedExponent;
130 exponent = -exponent;
131 exponent += exponentAdjustment;
132 if(exponent > 65535 || exponent < -65536)
137 exponent = negative ? -65536: 65535;
143 skipLeadingZeroesAndAnyDot(const char *p, const char **dot)
158 /* Return the trailing fraction of a hexadecimal number.
159 DIGITVALUE is the first hex digit of the fraction, P points to
162 trailingHexadecimalFraction(const char *p, unsigned int digitValue)
164 unsigned int hexDigit;
166 /* If the first trailing digit isn't 0 or 8 we can work out the
167 fraction immediately. */
169 return lfMoreThanHalf;
170 else if(digitValue < 8 && digitValue > 0)
171 return lfLessThanHalf;
173 /* Otherwise we need to find the first non-zero digit. */
177 hexDigit = hexDigitValue(*p);
179 /* If we ran off the end it is exactly zero or one-half, otherwise
182 return digitValue == 0 ? lfExactlyZero: lfExactlyHalf;
184 return digitValue == 0 ? lfLessThanHalf: lfMoreThanHalf;
187 /* Return the fraction lost were a bignum truncated losing the least
188 significant BITS bits. */
190 lostFractionThroughTruncation(integerPart *parts,
191 unsigned int partCount,
196 lsb = APInt::tcLSB(parts, partCount);
198 /* Note this is guaranteed true if bits == 0, or LSB == -1U. */
200 return lfExactlyZero;
202 return lfExactlyHalf;
203 if(bits <= partCount * integerPartWidth
204 && APInt::tcExtractBit(parts, bits - 1))
205 return lfMoreThanHalf;
207 return lfLessThanHalf;
210 /* Shift DST right BITS bits noting lost fraction. */
212 shiftRight(integerPart *dst, unsigned int parts, unsigned int bits)
214 lostFraction lost_fraction;
216 lost_fraction = lostFractionThroughTruncation(dst, parts, bits);
218 APInt::tcShiftRight(dst, parts, bits);
220 return lost_fraction;
226 APFloat::initialize(const fltSemantics *ourSemantics)
230 semantics = ourSemantics;
233 significand.parts = new integerPart[count];
237 APFloat::freeSignificand()
240 delete [] significand.parts;
244 APFloat::assign(const APFloat &rhs)
246 assert(semantics == rhs.semantics);
249 category = rhs.category;
250 exponent = rhs.exponent;
251 if(category == fcNormal || category == fcNaN)
252 copySignificand(rhs);
256 APFloat::copySignificand(const APFloat &rhs)
258 assert(category == fcNormal || category == fcNaN);
259 assert(rhs.partCount() >= partCount());
261 APInt::tcAssign(significandParts(), rhs.significandParts(),
266 APFloat::operator=(const APFloat &rhs)
269 if(semantics != rhs.semantics) {
271 initialize(rhs.semantics);
280 APFloat::bitwiseIsEqual(const APFloat &rhs) const {
283 if (semantics != rhs.semantics ||
284 category != rhs.category ||
287 if (category==fcZero || category==fcInfinity)
289 else if (category==fcNormal && exponent!=rhs.exponent)
293 const integerPart* p=significandParts();
294 const integerPart* q=rhs.significandParts();
295 for (; i>0; i--, p++, q++) {
303 APFloat::APFloat(const fltSemantics &ourSemantics, integerPart value)
305 initialize(&ourSemantics);
308 exponent = ourSemantics.precision - 1;
309 significandParts()[0] = value;
310 normalize(rmNearestTiesToEven, lfExactlyZero);
313 APFloat::APFloat(const fltSemantics &ourSemantics,
314 fltCategory ourCategory, bool negative)
316 initialize(&ourSemantics);
317 category = ourCategory;
319 if(category == fcNormal)
323 APFloat::APFloat(const fltSemantics &ourSemantics, const char *text)
325 initialize(&ourSemantics);
326 convertFromString(text, rmNearestTiesToEven);
329 APFloat::APFloat(const APFloat &rhs)
331 initialize(rhs.semantics);
341 APFloat::partCount() const
343 return partCountForBits(semantics->precision + 1);
347 APFloat::semanticsPrecision(const fltSemantics &semantics)
349 return semantics.precision;
353 APFloat::significandParts() const
355 return const_cast<APFloat *>(this)->significandParts();
359 APFloat::significandParts()
361 assert(category == fcNormal || category == fcNaN);
364 return significand.parts;
366 return &significand.part;
369 /* Combine the effect of two lost fractions. */
371 APFloat::combineLostFractions(lostFraction moreSignificant,
372 lostFraction lessSignificant)
374 if(lessSignificant != lfExactlyZero) {
375 if(moreSignificant == lfExactlyZero)
376 moreSignificant = lfLessThanHalf;
377 else if(moreSignificant == lfExactlyHalf)
378 moreSignificant = lfMoreThanHalf;
381 return moreSignificant;
385 APFloat::zeroSignificand()
388 APInt::tcSet(significandParts(), 0, partCount());
391 /* Increment an fcNormal floating point number's significand. */
393 APFloat::incrementSignificand()
397 carry = APInt::tcIncrement(significandParts(), partCount());
399 /* Our callers should never cause us to overflow. */
403 /* Add the significand of the RHS. Returns the carry flag. */
405 APFloat::addSignificand(const APFloat &rhs)
409 parts = significandParts();
411 assert(semantics == rhs.semantics);
412 assert(exponent == rhs.exponent);
414 return APInt::tcAdd(parts, rhs.significandParts(), 0, partCount());
417 /* Subtract the significand of the RHS with a borrow flag. Returns
420 APFloat::subtractSignificand(const APFloat &rhs, integerPart borrow)
424 parts = significandParts();
426 assert(semantics == rhs.semantics);
427 assert(exponent == rhs.exponent);
429 return APInt::tcSubtract(parts, rhs.significandParts(), borrow,
433 /* Multiply the significand of the RHS. If ADDEND is non-NULL, add it
434 on to the full-precision result of the multiplication. Returns the
437 APFloat::multiplySignificand(const APFloat &rhs, const APFloat *addend)
439 unsigned int omsb; // One, not zero, based MSB.
440 unsigned int partsCount, newPartsCount, precision;
441 integerPart *lhsSignificand;
442 integerPart scratch[4];
443 integerPart *fullSignificand;
444 lostFraction lost_fraction;
446 assert(semantics == rhs.semantics);
448 precision = semantics->precision;
449 newPartsCount = partCountForBits(precision * 2);
451 if(newPartsCount > 4)
452 fullSignificand = new integerPart[newPartsCount];
454 fullSignificand = scratch;
456 lhsSignificand = significandParts();
457 partsCount = partCount();
459 APInt::tcFullMultiply(fullSignificand, lhsSignificand,
460 rhs.significandParts(), partsCount);
462 lost_fraction = lfExactlyZero;
463 omsb = APInt::tcMSB(fullSignificand, newPartsCount) + 1;
464 exponent += rhs.exponent;
467 Significand savedSignificand = significand;
468 const fltSemantics *savedSemantics = semantics;
469 fltSemantics extendedSemantics;
471 unsigned int extendedPrecision;
473 /* Normalize our MSB. */
474 extendedPrecision = precision + precision - 1;
475 if(omsb != extendedPrecision)
477 APInt::tcShiftLeft(fullSignificand, newPartsCount,
478 extendedPrecision - omsb);
479 exponent -= extendedPrecision - omsb;
482 /* Create new semantics. */
483 extendedSemantics = *semantics;
484 extendedSemantics.precision = extendedPrecision;
486 if(newPartsCount == 1)
487 significand.part = fullSignificand[0];
489 significand.parts = fullSignificand;
490 semantics = &extendedSemantics;
492 APFloat extendedAddend(*addend);
493 status = extendedAddend.convert(extendedSemantics, rmTowardZero);
494 assert(status == opOK);
495 lost_fraction = addOrSubtractSignificand(extendedAddend, false);
497 /* Restore our state. */
498 if(newPartsCount == 1)
499 fullSignificand[0] = significand.part;
500 significand = savedSignificand;
501 semantics = savedSemantics;
503 omsb = APInt::tcMSB(fullSignificand, newPartsCount) + 1;
506 exponent -= (precision - 1);
508 if(omsb > precision) {
509 unsigned int bits, significantParts;
512 bits = omsb - precision;
513 significantParts = partCountForBits(omsb);
514 lf = shiftRight(fullSignificand, significantParts, bits);
515 lost_fraction = combineLostFractions(lf, lost_fraction);
519 APInt::tcAssign(lhsSignificand, fullSignificand, partsCount);
521 if(newPartsCount > 4)
522 delete [] fullSignificand;
524 return lost_fraction;
527 /* Multiply the significands of LHS and RHS to DST. */
529 APFloat::divideSignificand(const APFloat &rhs)
531 unsigned int bit, i, partsCount;
532 const integerPart *rhsSignificand;
533 integerPart *lhsSignificand, *dividend, *divisor;
534 integerPart scratch[4];
535 lostFraction lost_fraction;
537 assert(semantics == rhs.semantics);
539 lhsSignificand = significandParts();
540 rhsSignificand = rhs.significandParts();
541 partsCount = partCount();
544 dividend = new integerPart[partsCount * 2];
548 divisor = dividend + partsCount;
550 /* Copy the dividend and divisor as they will be modified in-place. */
551 for(i = 0; i < partsCount; i++) {
552 dividend[i] = lhsSignificand[i];
553 divisor[i] = rhsSignificand[i];
554 lhsSignificand[i] = 0;
557 exponent -= rhs.exponent;
559 unsigned int precision = semantics->precision;
561 /* Normalize the divisor. */
562 bit = precision - APInt::tcMSB(divisor, partsCount) - 1;
565 APInt::tcShiftLeft(divisor, partsCount, bit);
568 /* Normalize the dividend. */
569 bit = precision - APInt::tcMSB(dividend, partsCount) - 1;
572 APInt::tcShiftLeft(dividend, partsCount, bit);
575 if(APInt::tcCompare(dividend, divisor, partsCount) < 0) {
577 APInt::tcShiftLeft(dividend, partsCount, 1);
578 assert(APInt::tcCompare(dividend, divisor, partsCount) >= 0);
582 for(bit = precision; bit; bit -= 1) {
583 if(APInt::tcCompare(dividend, divisor, partsCount) >= 0) {
584 APInt::tcSubtract(dividend, divisor, 0, partsCount);
585 APInt::tcSetBit(lhsSignificand, bit - 1);
588 APInt::tcShiftLeft(dividend, partsCount, 1);
591 /* Figure out the lost fraction. */
592 int cmp = APInt::tcCompare(dividend, divisor, partsCount);
595 lost_fraction = lfMoreThanHalf;
597 lost_fraction = lfExactlyHalf;
598 else if(APInt::tcIsZero(dividend, partsCount))
599 lost_fraction = lfExactlyZero;
601 lost_fraction = lfLessThanHalf;
606 return lost_fraction;
610 APFloat::significandMSB() const
612 return APInt::tcMSB(significandParts(), partCount());
616 APFloat::significandLSB() const
618 return APInt::tcLSB(significandParts(), partCount());
621 /* Note that a zero result is NOT normalized to fcZero. */
623 APFloat::shiftSignificandRight(unsigned int bits)
625 /* Our exponent should not overflow. */
626 assert((exponent_t) (exponent + bits) >= exponent);
630 return shiftRight(significandParts(), partCount(), bits);
633 /* Shift the significand left BITS bits, subtract BITS from its exponent. */
635 APFloat::shiftSignificandLeft(unsigned int bits)
637 assert(bits < semantics->precision);
640 unsigned int partsCount = partCount();
642 APInt::tcShiftLeft(significandParts(), partsCount, bits);
645 assert(!APInt::tcIsZero(significandParts(), partsCount));
650 APFloat::compareAbsoluteValue(const APFloat &rhs) const
654 assert(semantics == rhs.semantics);
655 assert(category == fcNormal);
656 assert(rhs.category == fcNormal);
658 compare = exponent - rhs.exponent;
660 /* If exponents are equal, do an unsigned bignum comparison of the
663 compare = APInt::tcCompare(significandParts(), rhs.significandParts(),
667 return cmpGreaterThan;
674 /* Handle overflow. Sign is preserved. We either become infinity or
675 the largest finite number. */
677 APFloat::handleOverflow(roundingMode rounding_mode)
680 if(rounding_mode == rmNearestTiesToEven
681 || rounding_mode == rmNearestTiesToAway
682 || (rounding_mode == rmTowardPositive && !sign)
683 || (rounding_mode == rmTowardNegative && sign))
685 category = fcInfinity;
686 return (opStatus) (opOverflow | opInexact);
689 /* Otherwise we become the largest finite number. */
691 exponent = semantics->maxExponent;
692 APInt::tcSetLeastSignificantBits(significandParts(), partCount(),
693 semantics->precision);
698 /* Returns TRUE if, when truncating the current number, with BIT the
699 new LSB, with the given lost fraction and rounding mode, the result
700 would need to be rounded away from zero (i.e., by increasing the
701 signficand). This routine must work for fcZero of both signs, and
704 APFloat::roundAwayFromZero(roundingMode rounding_mode,
705 lostFraction lost_fraction,
706 unsigned int bit) const
708 /* NaNs and infinities should not have lost fractions. */
709 assert(category == fcNormal || category == fcZero);
711 /* Current callers never pass this so we don't handle it. */
712 assert(lost_fraction != lfExactlyZero);
714 switch(rounding_mode) {
718 case rmNearestTiesToAway:
719 return lost_fraction == lfExactlyHalf || lost_fraction == lfMoreThanHalf;
721 case rmNearestTiesToEven:
722 if(lost_fraction == lfMoreThanHalf)
725 /* Our zeroes don't have a significand to test. */
726 if(lost_fraction == lfExactlyHalf && category != fcZero)
727 return APInt::tcExtractBit(significandParts(), bit);
734 case rmTowardPositive:
735 return sign == false;
737 case rmTowardNegative:
743 APFloat::normalize(roundingMode rounding_mode,
744 lostFraction lost_fraction)
746 unsigned int omsb; /* One, not zero, based MSB. */
749 if(category != fcNormal)
752 /* Before rounding normalize the exponent of fcNormal numbers. */
753 omsb = significandMSB() + 1;
756 /* OMSB is numbered from 1. We want to place it in the integer
757 bit numbered PRECISON if possible, with a compensating change in
759 exponentChange = omsb - semantics->precision;
761 /* If the resulting exponent is too high, overflow according to
762 the rounding mode. */
763 if(exponent + exponentChange > semantics->maxExponent)
764 return handleOverflow(rounding_mode);
766 /* Subnormal numbers have exponent minExponent, and their MSB
767 is forced based on that. */
768 if(exponent + exponentChange < semantics->minExponent)
769 exponentChange = semantics->minExponent - exponent;
771 /* Shifting left is easy as we don't lose precision. */
772 if(exponentChange < 0) {
773 assert(lost_fraction == lfExactlyZero);
775 shiftSignificandLeft(-exponentChange);
780 if(exponentChange > 0) {
783 /* Shift right and capture any new lost fraction. */
784 lf = shiftSignificandRight(exponentChange);
786 lost_fraction = combineLostFractions(lf, lost_fraction);
788 /* Keep OMSB up-to-date. */
789 if(omsb > (unsigned) exponentChange)
790 omsb -= (unsigned) exponentChange;
796 /* Now round the number according to rounding_mode given the lost
799 /* As specified in IEEE 754, since we do not trap we do not report
800 underflow for exact results. */
801 if(lost_fraction == lfExactlyZero) {
802 /* Canonicalize zeroes. */
809 /* Increment the significand if we're rounding away from zero. */
810 if(roundAwayFromZero(rounding_mode, lost_fraction, 0)) {
812 exponent = semantics->minExponent;
814 incrementSignificand();
815 omsb = significandMSB() + 1;
817 /* Did the significand increment overflow? */
818 if(omsb == (unsigned) semantics->precision + 1) {
819 /* Renormalize by incrementing the exponent and shifting our
820 significand right one. However if we already have the
821 maximum exponent we overflow to infinity. */
822 if(exponent == semantics->maxExponent) {
823 category = fcInfinity;
825 return (opStatus) (opOverflow | opInexact);
828 shiftSignificandRight(1);
834 /* The normal case - we were and are not denormal, and any
835 significand increment above didn't overflow. */
836 if(omsb == semantics->precision)
839 /* We have a non-zero denormal. */
840 assert(omsb < semantics->precision);
841 assert(exponent == semantics->minExponent);
843 /* Canonicalize zeroes. */
847 /* The fcZero case is a denormal that underflowed to zero. */
848 return (opStatus) (opUnderflow | opInexact);
852 APFloat::addOrSubtractSpecials(const APFloat &rhs, bool subtract)
854 switch(convolve(category, rhs.category)) {
858 case convolve(fcNaN, fcZero):
859 case convolve(fcNaN, fcNormal):
860 case convolve(fcNaN, fcInfinity):
861 case convolve(fcNaN, fcNaN):
862 case convolve(fcNormal, fcZero):
863 case convolve(fcInfinity, fcNormal):
864 case convolve(fcInfinity, fcZero):
867 case convolve(fcZero, fcNaN):
868 case convolve(fcNormal, fcNaN):
869 case convolve(fcInfinity, fcNaN):
871 copySignificand(rhs);
874 case convolve(fcNormal, fcInfinity):
875 case convolve(fcZero, fcInfinity):
876 category = fcInfinity;
877 sign = rhs.sign ^ subtract;
880 case convolve(fcZero, fcNormal):
882 sign = rhs.sign ^ subtract;
885 case convolve(fcZero, fcZero):
886 /* Sign depends on rounding mode; handled by caller. */
889 case convolve(fcInfinity, fcInfinity):
890 /* Differently signed infinities can only be validly
892 if(sign ^ rhs.sign != subtract) {
894 // Arbitrary but deterministic value for significand
895 APInt::tcSet(significandParts(), ~0U, partCount());
901 case convolve(fcNormal, fcNormal):
906 /* Add or subtract two normal numbers. */
908 APFloat::addOrSubtractSignificand(const APFloat &rhs, bool subtract)
911 lostFraction lost_fraction;
914 /* Determine if the operation on the absolute values is effectively
915 an addition or subtraction. */
916 subtract ^= (sign ^ rhs.sign);
918 /* Are we bigger exponent-wise than the RHS? */
919 bits = exponent - rhs.exponent;
921 /* Subtraction is more subtle than one might naively expect. */
923 APFloat temp_rhs(rhs);
927 reverse = compareAbsoluteValue(temp_rhs) == cmpLessThan;
928 lost_fraction = lfExactlyZero;
929 } else if (bits > 0) {
930 lost_fraction = temp_rhs.shiftSignificandRight(bits - 1);
931 shiftSignificandLeft(1);
934 lost_fraction = shiftSignificandRight(-bits - 1);
935 temp_rhs.shiftSignificandLeft(1);
940 carry = temp_rhs.subtractSignificand
941 (*this, lost_fraction != lfExactlyZero);
942 copySignificand(temp_rhs);
945 carry = subtractSignificand
946 (temp_rhs, lost_fraction != lfExactlyZero);
949 /* Invert the lost fraction - it was on the RHS and
951 if(lost_fraction == lfLessThanHalf)
952 lost_fraction = lfMoreThanHalf;
953 else if(lost_fraction == lfMoreThanHalf)
954 lost_fraction = lfLessThanHalf;
956 /* The code above is intended to ensure that no borrow is
961 APFloat temp_rhs(rhs);
963 lost_fraction = temp_rhs.shiftSignificandRight(bits);
964 carry = addSignificand(temp_rhs);
966 lost_fraction = shiftSignificandRight(-bits);
967 carry = addSignificand(rhs);
970 /* We have a guard bit; generating a carry cannot happen. */
974 return lost_fraction;
978 APFloat::multiplySpecials(const APFloat &rhs)
980 switch(convolve(category, rhs.category)) {
984 case convolve(fcNaN, fcZero):
985 case convolve(fcNaN, fcNormal):
986 case convolve(fcNaN, fcInfinity):
987 case convolve(fcNaN, fcNaN):
990 case convolve(fcZero, fcNaN):
991 case convolve(fcNormal, fcNaN):
992 case convolve(fcInfinity, fcNaN):
994 copySignificand(rhs);
997 case convolve(fcNormal, fcInfinity):
998 case convolve(fcInfinity, fcNormal):
999 case convolve(fcInfinity, fcInfinity):
1000 category = fcInfinity;
1003 case convolve(fcZero, fcNormal):
1004 case convolve(fcNormal, fcZero):
1005 case convolve(fcZero, fcZero):
1009 case convolve(fcZero, fcInfinity):
1010 case convolve(fcInfinity, fcZero):
1012 // Arbitrary but deterministic value for significand
1013 APInt::tcSet(significandParts(), ~0U, partCount());
1016 case convolve(fcNormal, fcNormal):
1022 APFloat::divideSpecials(const APFloat &rhs)
1024 switch(convolve(category, rhs.category)) {
1028 case convolve(fcNaN, fcZero):
1029 case convolve(fcNaN, fcNormal):
1030 case convolve(fcNaN, fcInfinity):
1031 case convolve(fcNaN, fcNaN):
1032 case convolve(fcInfinity, fcZero):
1033 case convolve(fcInfinity, fcNormal):
1034 case convolve(fcZero, fcInfinity):
1035 case convolve(fcZero, fcNormal):
1038 case convolve(fcZero, fcNaN):
1039 case convolve(fcNormal, fcNaN):
1040 case convolve(fcInfinity, fcNaN):
1042 copySignificand(rhs);
1045 case convolve(fcNormal, fcInfinity):
1049 case convolve(fcNormal, fcZero):
1050 category = fcInfinity;
1053 case convolve(fcInfinity, fcInfinity):
1054 case convolve(fcZero, fcZero):
1056 // Arbitrary but deterministic value for significand
1057 APInt::tcSet(significandParts(), ~0U, partCount());
1060 case convolve(fcNormal, fcNormal):
1067 APFloat::changeSign()
1069 /* Look mummy, this one's easy. */
1074 APFloat::clearSign()
1076 /* So is this one. */
1081 APFloat::copySign(const APFloat &rhs)
1087 /* Normalized addition or subtraction. */
1089 APFloat::addOrSubtract(const APFloat &rhs, roundingMode rounding_mode,
1094 fs = addOrSubtractSpecials(rhs, subtract);
1096 /* This return code means it was not a simple case. */
1097 if(fs == opDivByZero) {
1098 lostFraction lost_fraction;
1100 lost_fraction = addOrSubtractSignificand(rhs, subtract);
1101 fs = normalize(rounding_mode, lost_fraction);
1103 /* Can only be zero if we lost no fraction. */
1104 assert(category != fcZero || lost_fraction == lfExactlyZero);
1107 /* If two numbers add (exactly) to zero, IEEE 754 decrees it is a
1108 positive zero unless rounding to minus infinity, except that
1109 adding two like-signed zeroes gives that zero. */
1110 if(category == fcZero) {
1111 if(rhs.category != fcZero || (sign == rhs.sign) == subtract)
1112 sign = (rounding_mode == rmTowardNegative);
1118 /* Normalized addition. */
1120 APFloat::add(const APFloat &rhs, roundingMode rounding_mode)
1122 return addOrSubtract(rhs, rounding_mode, false);
1125 /* Normalized subtraction. */
1127 APFloat::subtract(const APFloat &rhs, roundingMode rounding_mode)
1129 return addOrSubtract(rhs, rounding_mode, true);
1132 /* Normalized multiply. */
1134 APFloat::multiply(const APFloat &rhs, roundingMode rounding_mode)
1139 fs = multiplySpecials(rhs);
1141 if(category == fcNormal) {
1142 lostFraction lost_fraction = multiplySignificand(rhs, 0);
1143 fs = normalize(rounding_mode, lost_fraction);
1144 if(lost_fraction != lfExactlyZero)
1145 fs = (opStatus) (fs | opInexact);
1151 /* Normalized divide. */
1153 APFloat::divide(const APFloat &rhs, roundingMode rounding_mode)
1158 fs = divideSpecials(rhs);
1160 if(category == fcNormal) {
1161 lostFraction lost_fraction = divideSignificand(rhs);
1162 fs = normalize(rounding_mode, lost_fraction);
1163 if(lost_fraction != lfExactlyZero)
1164 fs = (opStatus) (fs | opInexact);
1170 /* Normalized remainder. */
1172 APFloat::mod(const APFloat &rhs, roundingMode rounding_mode)
1176 unsigned int origSign = sign;
1177 fs = V.divide(rhs, rmNearestTiesToEven);
1178 if (fs == opDivByZero)
1181 int parts = partCount();
1182 integerPart *x = new integerPart[parts];
1183 fs = V.convertToInteger(x, parts * integerPartWidth, true,
1184 rmNearestTiesToEven);
1185 if (fs==opInvalidOp)
1188 fs = V.convertFromInteger(x, parts * integerPartWidth, true,
1189 rmNearestTiesToEven);
1190 assert(fs==opOK); // should always work
1192 fs = V.multiply(rhs, rounding_mode);
1193 assert(fs==opOK || fs==opInexact); // should not overflow or underflow
1195 fs = subtract(V, rounding_mode);
1196 assert(fs==opOK || fs==opInexact); // likewise
1199 sign = origSign; // IEEE754 requires this
1204 /* Normalized fused-multiply-add. */
1206 APFloat::fusedMultiplyAdd(const APFloat &multiplicand,
1207 const APFloat &addend,
1208 roundingMode rounding_mode)
1212 /* Post-multiplication sign, before addition. */
1213 sign ^= multiplicand.sign;
1215 /* If and only if all arguments are normal do we need to do an
1216 extended-precision calculation. */
1217 if(category == fcNormal
1218 && multiplicand.category == fcNormal
1219 && addend.category == fcNormal) {
1220 lostFraction lost_fraction;
1222 lost_fraction = multiplySignificand(multiplicand, &addend);
1223 fs = normalize(rounding_mode, lost_fraction);
1224 if(lost_fraction != lfExactlyZero)
1225 fs = (opStatus) (fs | opInexact);
1227 /* If two numbers add (exactly) to zero, IEEE 754 decrees it is a
1228 positive zero unless rounding to minus infinity, except that
1229 adding two like-signed zeroes gives that zero. */
1230 if(category == fcZero && sign != addend.sign)
1231 sign = (rounding_mode == rmTowardNegative);
1233 fs = multiplySpecials(multiplicand);
1235 /* FS can only be opOK or opInvalidOp. There is no more work
1236 to do in the latter case. The IEEE-754R standard says it is
1237 implementation-defined in this case whether, if ADDEND is a
1238 quiet NaN, we raise invalid op; this implementation does so.
1240 If we need to do the addition we can do so with normal
1243 fs = addOrSubtract(addend, rounding_mode, false);
1249 /* Comparison requires normalized numbers. */
1251 APFloat::compare(const APFloat &rhs) const
1255 assert(semantics == rhs.semantics);
1257 switch(convolve(category, rhs.category)) {
1261 case convolve(fcNaN, fcZero):
1262 case convolve(fcNaN, fcNormal):
1263 case convolve(fcNaN, fcInfinity):
1264 case convolve(fcNaN, fcNaN):
1265 case convolve(fcZero, fcNaN):
1266 case convolve(fcNormal, fcNaN):
1267 case convolve(fcInfinity, fcNaN):
1268 return cmpUnordered;
1270 case convolve(fcInfinity, fcNormal):
1271 case convolve(fcInfinity, fcZero):
1272 case convolve(fcNormal, fcZero):
1276 return cmpGreaterThan;
1278 case convolve(fcNormal, fcInfinity):
1279 case convolve(fcZero, fcInfinity):
1280 case convolve(fcZero, fcNormal):
1282 return cmpGreaterThan;
1286 case convolve(fcInfinity, fcInfinity):
1287 if(sign == rhs.sign)
1292 return cmpGreaterThan;
1294 case convolve(fcZero, fcZero):
1297 case convolve(fcNormal, fcNormal):
1301 /* Two normal numbers. Do they have the same sign? */
1302 if(sign != rhs.sign) {
1304 result = cmpLessThan;
1306 result = cmpGreaterThan;
1308 /* Compare absolute values; invert result if negative. */
1309 result = compareAbsoluteValue(rhs);
1312 if(result == cmpLessThan)
1313 result = cmpGreaterThan;
1314 else if(result == cmpGreaterThan)
1315 result = cmpLessThan;
1323 APFloat::convert(const fltSemantics &toSemantics,
1324 roundingMode rounding_mode)
1326 lostFraction lostFraction;
1327 unsigned int newPartCount, oldPartCount;
1330 lostFraction = lfExactlyZero;
1331 newPartCount = partCountForBits(toSemantics.precision + 1);
1332 oldPartCount = partCount();
1334 /* Handle storage complications. If our new form is wider,
1335 re-allocate our bit pattern into wider storage. If it is
1336 narrower, we ignore the excess parts, but if narrowing to a
1337 single part we need to free the old storage.
1338 Be careful not to reference significandParts for zeroes
1339 and infinities, since it aborts. */
1340 if (newPartCount > oldPartCount) {
1341 integerPart *newParts;
1342 newParts = new integerPart[newPartCount];
1343 APInt::tcSet(newParts, 0, newPartCount);
1344 if (category==fcNormal || category==fcNaN)
1345 APInt::tcAssign(newParts, significandParts(), oldPartCount);
1347 significand.parts = newParts;
1348 } else if (newPartCount < oldPartCount) {
1349 /* Capture any lost fraction through truncation of parts so we get
1350 correct rounding whilst normalizing. */
1351 if (category==fcNormal)
1352 lostFraction = lostFractionThroughTruncation
1353 (significandParts(), oldPartCount, toSemantics.precision);
1354 if (newPartCount == 1) {
1355 integerPart newPart = 0;
1356 if (category==fcNormal || category==fcNaN)
1357 newPart = significandParts()[0];
1359 significand.part = newPart;
1363 if(category == fcNormal) {
1364 /* Re-interpret our bit-pattern. */
1365 exponent += toSemantics.precision - semantics->precision;
1366 semantics = &toSemantics;
1367 fs = normalize(rounding_mode, lostFraction);
1368 } else if (category == fcNaN) {
1369 int shift = toSemantics.precision - semantics->precision;
1370 // No normalization here, just truncate
1372 APInt::tcShiftLeft(significandParts(), newPartCount, shift);
1374 APInt::tcShiftRight(significandParts(), newPartCount, -shift);
1375 // gcc forces the Quiet bit on, which means (float)(double)(float_sNan)
1376 // does not give you back the same bits. This is dubious, and we
1377 // don't currently do it. You're really supposed to get
1378 // an invalid operation signal at runtime, but nobody does that.
1379 semantics = &toSemantics;
1382 semantics = &toSemantics;
1389 /* Convert a floating point number to an integer according to the
1390 rounding mode. If the rounded integer value is out of range this
1391 returns an invalid operation exception. If the rounded value is in
1392 range but the floating point number is not the exact integer, the C
1393 standard doesn't require an inexact exception to be raised. IEEE
1394 854 does require it so we do that.
1396 Note that for conversions to integer type the C standard requires
1397 round-to-zero to always be used. */
1399 APFloat::convertToInteger(integerPart *parts, unsigned int width,
1401 roundingMode rounding_mode) const
1403 lostFraction lost_fraction;
1404 unsigned int msb, partsCount;
1407 partsCount = partCountForBits(width);
1409 /* Handle the three special cases first. We produce
1410 a deterministic result even for the Invalid cases. */
1411 if (category == fcNaN) {
1412 // Neither sign nor isSigned affects this.
1413 APInt::tcSet(parts, 0, partsCount);
1416 if (category == fcInfinity) {
1417 if (!sign && isSigned)
1418 APInt::tcSetLeastSignificantBits(parts, partsCount, width-1);
1419 else if (!sign && !isSigned)
1420 APInt::tcSetLeastSignificantBits(parts, partsCount, width);
1421 else if (sign && isSigned) {
1422 APInt::tcSetLeastSignificantBits(parts, partsCount, 1);
1423 APInt::tcShiftLeft(parts, partsCount, width-1);
1424 } else // sign && !isSigned
1425 APInt::tcSet(parts, 0, partsCount);
1428 if (category == fcZero) {
1429 APInt::tcSet(parts, 0, partsCount);
1433 /* Shift the bit pattern so the fraction is lost. */
1436 bits = (int) semantics->precision - 1 - exponent;
1439 lost_fraction = tmp.shiftSignificandRight(bits);
1441 if (-bits >= semantics->precision) {
1442 // Unrepresentably large.
1443 if (!sign && isSigned)
1444 APInt::tcSetLeastSignificantBits(parts, partsCount, width-1);
1445 else if (!sign && !isSigned)
1446 APInt::tcSetLeastSignificantBits(parts, partsCount, width);
1447 else if (sign && isSigned) {
1448 APInt::tcSetLeastSignificantBits(parts, partsCount, 1);
1449 APInt::tcShiftLeft(parts, partsCount, width-1);
1450 } else // sign && !isSigned
1451 APInt::tcSet(parts, 0, partsCount);
1452 return (opStatus)(opOverflow | opInexact);
1454 tmp.shiftSignificandLeft(-bits);
1455 lost_fraction = lfExactlyZero;
1458 if(lost_fraction != lfExactlyZero
1459 && tmp.roundAwayFromZero(rounding_mode, lost_fraction, 0))
1460 tmp.incrementSignificand();
1462 msb = tmp.significandMSB();
1464 /* Negative numbers cannot be represented as unsigned. */
1465 if(!isSigned && tmp.sign && msb != -1U)
1468 /* It takes exponent + 1 bits to represent the truncated floating
1469 point number without its sign. We lose a bit for the sign, but
1470 the maximally negative integer is a special case. */
1471 if(msb + 1 > width) /* !! Not same as msb >= width !! */
1474 if(isSigned && msb + 1 == width
1475 && (!tmp.sign || tmp.significandLSB() != msb))
1478 APInt::tcAssign(parts, tmp.significandParts(), partsCount);
1481 APInt::tcNegate(parts, partsCount);
1483 if(lost_fraction == lfExactlyZero)
1490 APFloat::convertFromUnsignedInteger(integerPart *parts,
1491 unsigned int partCount,
1492 roundingMode rounding_mode)
1494 unsigned int msb, precision;
1495 lostFraction lost_fraction;
1497 msb = APInt::tcMSB(parts, partCount) + 1;
1498 precision = semantics->precision;
1500 category = fcNormal;
1501 exponent = precision - 1;
1503 if(msb > precision) {
1504 exponent += (msb - precision);
1505 lost_fraction = shiftRight(parts, partCount, msb - precision);
1508 lost_fraction = lfExactlyZero;
1510 /* Copy the bit image. */
1512 APInt::tcAssign(significandParts(), parts, partCountForBits(msb));
1514 return normalize(rounding_mode, lost_fraction);
1518 APFloat::convertFromInteger(const integerPart *parts, unsigned int width,
1519 bool isSigned, roundingMode rounding_mode)
1521 unsigned int partCount = partCountForBits(width);
1523 APInt api = APInt(width, partCount, parts);
1524 integerPart *copy = new integerPart[partCount];
1527 if(isSigned && APInt::tcExtractBit(parts, width - 1)) {
1532 APInt::tcAssign(copy, api.getRawData(), partCount);
1533 status = convertFromUnsignedInteger(copy, partCount, rounding_mode);
1538 APFloat::convertFromHexadecimalString(const char *p,
1539 roundingMode rounding_mode)
1541 lostFraction lost_fraction;
1542 integerPart *significand;
1543 unsigned int bitPos, partsCount;
1544 const char *dot, *firstSignificantDigit;
1548 category = fcNormal;
1550 significand = significandParts();
1551 partsCount = partCount();
1552 bitPos = partsCount * integerPartWidth;
1554 /* Skip leading zeroes and any(hexa)decimal point. */
1555 p = skipLeadingZeroesAndAnyDot(p, &dot);
1556 firstSignificantDigit = p;
1559 integerPart hex_value;
1566 hex_value = hexDigitValue(*p);
1567 if(hex_value == -1U) {
1568 lost_fraction = lfExactlyZero;
1574 /* Store the number whilst 4-bit nibbles remain. */
1577 hex_value <<= bitPos % integerPartWidth;
1578 significand[bitPos / integerPartWidth] |= hex_value;
1580 lost_fraction = trailingHexadecimalFraction(p, hex_value);
1581 while(hexDigitValue(*p) != -1U)
1587 /* Hex floats require an exponent but not a hexadecimal point. */
1588 assert(*p == 'p' || *p == 'P');
1590 /* Ignore the exponent if we are zero. */
1591 if(p != firstSignificantDigit) {
1594 /* Implicit hexadecimal point? */
1598 /* Calculate the exponent adjustment implicit in the number of
1599 significant digits. */
1600 expAdjustment = dot - firstSignificantDigit;
1601 if(expAdjustment < 0)
1603 expAdjustment = expAdjustment * 4 - 1;
1605 /* Adjust for writing the significand starting at the most
1606 significant nibble. */
1607 expAdjustment += semantics->precision;
1608 expAdjustment -= partsCount * integerPartWidth;
1610 /* Adjust for the given exponent. */
1611 exponent = totalExponent(p, expAdjustment);
1614 return normalize(rounding_mode, lost_fraction);
1618 APFloat::convertFromString(const char *p, roundingMode rounding_mode)
1620 /* Handle a leading minus sign. */
1626 if(p[0] == '0' && (p[1] == 'x' || p[1] == 'X'))
1627 return convertFromHexadecimalString(p + 2, rounding_mode);
1629 assert(0 && "Decimal to binary conversions not yet implemented");
1633 // For good performance it is desirable for different APFloats
1634 // to produce different integers.
1636 APFloat::getHashValue() const
1638 if (category==fcZero) return sign<<8 | semantics->precision ;
1639 else if (category==fcInfinity) return sign<<9 | semantics->precision;
1640 else if (category==fcNaN) return 1<<10 | semantics->precision;
1642 uint32_t hash = sign<<11 | semantics->precision | exponent<<12;
1643 const integerPart* p = significandParts();
1644 for (int i=partCount(); i>0; i--, p++)
1645 hash ^= ((uint32_t)*p) ^ (*p)>>32;
1650 // Conversion from APFloat to/from host float/double. It may eventually be
1651 // possible to eliminate these and have everybody deal with APFloats, but that
1652 // will take a while. This approach will not easily extend to long double.
1653 // Current implementation requires integerPartWidth==64, which is correct at
1654 // the moment but could be made more general.
1656 // Denormals have exponent minExponent in APFloat, but minExponent-1 in
1657 // the actual IEEE respresentations. We compensate for that here.
1660 APFloat::convertF80LongDoubleAPFloatToAPInt() const
1662 assert(semantics == (const llvm::fltSemantics* const)&x87DoubleExtended);
1663 assert (partCount()==2);
1665 uint64_t myexponent, mysignificand;
1667 if (category==fcNormal) {
1668 myexponent = exponent+16383; //bias
1669 mysignificand = significandParts()[0];
1670 if (myexponent==1 && !(mysignificand & 0x8000000000000000ULL))
1671 myexponent = 0; // denormal
1672 } else if (category==fcZero) {
1675 } else if (category==fcInfinity) {
1676 myexponent = 0x7fff;
1677 mysignificand = 0x8000000000000000ULL;
1678 } else if (category==fcNaN) {
1679 myexponent = 0x7fff;
1680 mysignificand = significandParts()[0];
1685 words[0] = (((uint64_t)sign & 1) << 63) |
1686 ((myexponent & 0x7fff) << 48) |
1687 ((mysignificand >>16) & 0xffffffffffffLL);
1688 words[1] = mysignificand & 0xffff;
1689 APInt api(80, 2, words);
1694 APFloat::convertDoubleAPFloatToAPInt() const
1696 assert(semantics == (const llvm::fltSemantics*)&IEEEdouble);
1697 assert (partCount()==1);
1699 uint64_t myexponent, mysignificand;
1701 if (category==fcNormal) {
1702 myexponent = exponent+1023; //bias
1703 mysignificand = *significandParts();
1704 if (myexponent==1 && !(mysignificand & 0x10000000000000LL))
1705 myexponent = 0; // denormal
1706 } else if (category==fcZero) {
1709 } else if (category==fcInfinity) {
1712 } else if (category==fcNaN) {
1714 mysignificand = *significandParts();
1718 APInt api(64, (((((uint64_t)sign & 1) << 63) |
1719 ((myexponent & 0x7ff) << 52) |
1720 (mysignificand & 0xfffffffffffffLL))));
1725 APFloat::convertFloatAPFloatToAPInt() const
1727 assert(semantics == (const llvm::fltSemantics*)&IEEEsingle);
1728 assert (partCount()==1);
1730 uint32_t myexponent, mysignificand;
1732 if (category==fcNormal) {
1733 myexponent = exponent+127; //bias
1734 mysignificand = *significandParts();
1735 if (myexponent == 1 && !(mysignificand & 0x400000))
1736 myexponent = 0; // denormal
1737 } else if (category==fcZero) {
1740 } else if (category==fcInfinity) {
1743 } else if (category==fcNaN) {
1745 mysignificand = *significandParts();
1749 APInt api(32, (((sign&1) << 31) | ((myexponent&0xff) << 23) |
1750 (mysignificand & 0x7fffff)));
1755 APFloat::convertToAPInt() const
1757 if (semantics == (const llvm::fltSemantics* const)&IEEEsingle)
1758 return convertFloatAPFloatToAPInt();
1759 else if (semantics == (const llvm::fltSemantics* const)&IEEEdouble)
1760 return convertDoubleAPFloatToAPInt();
1761 else if (semantics == (const llvm::fltSemantics* const)&x87DoubleExtended)
1762 return convertF80LongDoubleAPFloatToAPInt();
1769 APFloat::convertToFloat() const
1771 assert(semantics == (const llvm::fltSemantics* const)&IEEEsingle);
1772 APInt api = convertToAPInt();
1773 return api.bitsToFloat();
1777 APFloat::convertToDouble() const
1779 assert(semantics == (const llvm::fltSemantics* const)&IEEEdouble);
1780 APInt api = convertToAPInt();
1781 return api.bitsToDouble();
1784 /// Integer bit is explicit in this format. Current Intel book does not
1785 /// define meaning of:
1786 /// exponent = all 1's, integer bit not set.
1787 /// exponent = 0, integer bit set. (formerly "psuedodenormals")
1788 /// exponent!=0 nor all 1's, integer bit not set. (formerly "unnormals")
1790 APFloat::initFromF80LongDoubleAPInt(const APInt &api)
1792 assert(api.getBitWidth()==80);
1793 uint64_t i1 = api.getRawData()[0];
1794 uint64_t i2 = api.getRawData()[1];
1795 uint64_t myexponent = (i1 >> 48) & 0x7fff;
1796 uint64_t mysignificand = ((i1 << 16) & 0xffffffffffff0000ULL) |
1799 initialize(&APFloat::x87DoubleExtended);
1800 assert(partCount()==2);
1803 if (myexponent==0 && mysignificand==0) {
1804 // exponent, significand meaningless
1806 } else if (myexponent==0x7fff && mysignificand==0x8000000000000000ULL) {
1807 // exponent, significand meaningless
1808 category = fcInfinity;
1809 } else if (myexponent==0x7fff && mysignificand!=0x8000000000000000ULL) {
1810 // exponent meaningless
1812 significandParts()[0] = mysignificand;
1813 significandParts()[1] = 0;
1815 category = fcNormal;
1816 exponent = myexponent - 16383;
1817 significandParts()[0] = mysignificand;
1818 significandParts()[1] = 0;
1819 if (myexponent==0) // denormal
1825 APFloat::initFromDoubleAPInt(const APInt &api)
1827 assert(api.getBitWidth()==64);
1828 uint64_t i = *api.getRawData();
1829 uint64_t myexponent = (i >> 52) & 0x7ff;
1830 uint64_t mysignificand = i & 0xfffffffffffffLL;
1832 initialize(&APFloat::IEEEdouble);
1833 assert(partCount()==1);
1836 if (myexponent==0 && mysignificand==0) {
1837 // exponent, significand meaningless
1839 } else if (myexponent==0x7ff && mysignificand==0) {
1840 // exponent, significand meaningless
1841 category = fcInfinity;
1842 } else if (myexponent==0x7ff && mysignificand!=0) {
1843 // exponent meaningless
1845 *significandParts() = mysignificand;
1847 category = fcNormal;
1848 exponent = myexponent - 1023;
1849 *significandParts() = mysignificand;
1850 if (myexponent==0) // denormal
1853 *significandParts() |= 0x10000000000000LL; // integer bit
1858 APFloat::initFromFloatAPInt(const APInt & api)
1860 assert(api.getBitWidth()==32);
1861 uint32_t i = (uint32_t)*api.getRawData();
1862 uint32_t myexponent = (i >> 23) & 0xff;
1863 uint32_t mysignificand = i & 0x7fffff;
1865 initialize(&APFloat::IEEEsingle);
1866 assert(partCount()==1);
1869 if (myexponent==0 && mysignificand==0) {
1870 // exponent, significand meaningless
1872 } else if (myexponent==0xff && mysignificand==0) {
1873 // exponent, significand meaningless
1874 category = fcInfinity;
1875 } else if (myexponent==0xff && mysignificand!=0) {
1876 // sign, exponent, significand meaningless
1878 *significandParts() = mysignificand;
1880 category = fcNormal;
1881 exponent = myexponent - 127; //bias
1882 *significandParts() = mysignificand;
1883 if (myexponent==0) // denormal
1886 *significandParts() |= 0x800000; // integer bit
1890 /// Treat api as containing the bits of a floating point number. Currently
1891 /// we infer the floating point type from the size of the APInt. FIXME: This
1892 /// breaks when we get to PPC128 and IEEE128 (but both cannot exist in the
1893 /// same compile...)
1895 APFloat::initFromAPInt(const APInt& api)
1897 if (api.getBitWidth() == 32)
1898 return initFromFloatAPInt(api);
1899 else if (api.getBitWidth()==64)
1900 return initFromDoubleAPInt(api);
1901 else if (api.getBitWidth()==80)
1902 return initFromF80LongDoubleAPInt(api);
1907 APFloat::APFloat(const APInt& api)
1912 APFloat::APFloat(float f)
1914 APInt api = APInt(32, 0);
1915 initFromAPInt(api.floatToBits(f));
1918 APFloat::APFloat(double d)
1920 APInt api = APInt(64, 0);
1921 initFromAPInt(api.doubleToBits(d));