lib/Support/APInt.cpp

   1 //===-- APInt.cpp - Implement APInt class ---------------------------------===//
   2 //
   3 //                     The LLVM Compiler Infrastructure
   4 //
   5 // This file is distributed under the University of Illinois Open Source
   6 // License. See LICENSE.TXT for details.
   7 //
   8 //===----------------------------------------------------------------------===//
   9 //
  10 // This file implements a class to represent arbitrary precision integer
  11 // constant values and provide a variety of arithmetic operations on them.
  12 //
  13 //===----------------------------------------------------------------------===//
  14
  15 #define DEBUG_TYPE "apint"
  16 #include "llvm/ADT/APInt.h"
  17 #include "llvm/ADT/StringRef.h"
  18 #include "llvm/ADT/FoldingSet.h"
  19 #include "llvm/ADT/SmallString.h"
  20 #include "llvm/Support/Debug.h"
  21 #include "llvm/Support/ErrorHandling.h"
  22 #include "llvm/Support/MathExtras.h"
  23 #include "llvm/Support/raw_ostream.h"
  24 #include <cmath>
  25 #include <limits>
  26 #include <cstring>
  27 #include <cstdlib>
  28 using namespace llvm;
  29
  30 /// A utility function for allocating memory, checking for allocation failures,
  31 /// and ensuring the contents are zeroed.
  32 inline static uint64_t* getClearedMemory(unsigned numWords) {
  33   uint64_t * result = new uint64_t[numWords];
  34   assert(result && "APInt memory allocation fails!");
  35   memset(result, 0, numWords * sizeof(uint64_t));
  36   return result;
  37 }
  38
  39 /// A utility function for allocating memory and checking for allocation
  40 /// failure.  The content is not zeroed.
  41 inline static uint64_t* getMemory(unsigned numWords) {
  42   uint64_t * result = new uint64_t[numWords];
  43   assert(result && "APInt memory allocation fails!");
  44   return result;
  45 }
  46
  47 /// A utility function that converts a character to a digit.
  48 inline static unsigned getDigit(char cdigit, uint8_t radix) {
  49   unsigned r;
  50
  51   if (radix == 16) {
  52     r = cdigit - '0';
  53     if (r <= 9)
  54       return r;
  55
  56     r = cdigit - 'A';
  57     if (r <= 5)
  58       return r + 10;
  59
  60     r = cdigit - 'a';
  61     if (r <= 5)
  62       return r + 10;
  63   }
  64
  65   r = cdigit - '0';
  66   if (r < radix)
  67     return r;
  68
  69   return -1U;
  70 }
  71
  72
  73 void APInt::initSlowCase(unsigned numBits, uint64_t val, bool isSigned) {
  74   pVal = getClearedMemory(getNumWords());
  75   pVal[0] = val;
  76   if (isSigned && int64_t(val) < 0)
  77     for (unsigned i = 1; i < getNumWords(); ++i)
  78       pVal[i] = -1ULL;
  79 }
  80
  81 void APInt::initSlowCase(const APInt& that) {
  82   pVal = getMemory(getNumWords());
  83   memcpy(pVal, that.pVal, getNumWords() * APINT_WORD_SIZE);
  84 }
  85
  86
  87 APInt::APInt(unsigned numBits, unsigned numWords, const uint64_t bigVal[])
  88   : BitWidth(numBits), VAL(0) {
  89   assert(BitWidth && "Bitwidth too small");
  90   assert(bigVal && "Null pointer detected!");
  91   if (isSingleWord())
  92     VAL = bigVal[0];
  93   else {
  94     // Get memory, cleared to 0
  95     pVal = getClearedMemory(getNumWords());
  96     // Calculate the number of words to copy
  97     unsigned words = std::min<unsigned>(numWords, getNumWords());
  98     // Copy the words from bigVal to pVal
  99     memcpy(pVal, bigVal, words * APINT_WORD_SIZE);
 100   }
 101   // Make sure unused high bits are cleared
 102   clearUnusedBits();
 103 }
 104
 105 APInt::APInt(unsigned numbits, const StringRef& Str, uint8_t radix)
 106   : BitWidth(numbits), VAL(0) {
 107   assert(BitWidth && "Bitwidth too small");
 108   fromString(numbits, Str, radix);
 109 }
 110
 111 APInt& APInt::AssignSlowCase(const APInt& RHS) {
 112   // Don't do anything for X = X
 113   if (this == &RHS)
 114     return *this;
 115
 116   if (BitWidth == RHS.getBitWidth()) {
 117     // assume same bit-width single-word case is already handled
 118     assert(!isSingleWord());
 119     memcpy(pVal, RHS.pVal, getNumWords() * APINT_WORD_SIZE);
 120     return *this;
 121   }
 122
 123   if (isSingleWord()) {
 124     // assume case where both are single words is already handled
 125     assert(!RHS.isSingleWord());
 126     VAL = 0;
 127     pVal = getMemory(RHS.getNumWords());
 128     memcpy(pVal, RHS.pVal, RHS.getNumWords() * APINT_WORD_SIZE);
 129   } else if (getNumWords() == RHS.getNumWords())
 130     memcpy(pVal, RHS.pVal, RHS.getNumWords() * APINT_WORD_SIZE);
 131   else if (RHS.isSingleWord()) {
 132     delete [] pVal;
 133     VAL = RHS.VAL;
 134   } else {
 135     delete [] pVal;
 136     pVal = getMemory(RHS.getNumWords());
 137     memcpy(pVal, RHS.pVal, RHS.getNumWords() * APINT_WORD_SIZE);
 138   }
 139   BitWidth = RHS.BitWidth;
 140   return clearUnusedBits();
 141 }
 142
 143 APInt& APInt::operator=(uint64_t RHS) {
 144   if (isSingleWord())
 145     VAL = RHS;
 146   else {
 147     pVal[0] = RHS;
 148     memset(pVal+1, 0, (getNumWords() - 1) * APINT_WORD_SIZE);
 149   }
 150   return clearUnusedBits();
 151 }
 152
 153 /// Profile - This method 'profiles' an APInt for use with FoldingSet.
 154 void APInt::Profile(FoldingSetNodeID& ID) const {
 155   ID.AddInteger(BitWidth);
 156
 157   if (isSingleWord()) {
 158     ID.AddInteger(VAL);
 159     return;
 160   }
 161
 162   unsigned NumWords = getNumWords();
 163   for (unsigned i = 0; i < NumWords; ++i)
 164     ID.AddInteger(pVal[i]);
 165 }
 166
 167 /// add_1 - This function adds a single "digit" integer, y, to the multiple
 168 /// "digit" integer array,  x[]. x[] is modified to reflect the addition and
 169 /// 1 is returned if there is a carry out, otherwise 0 is returned.
 170 /// @returns the carry of the addition.
 171 static bool add_1(uint64_t dest[], uint64_t x[], unsigned len, uint64_t y) {
 172   for (unsigned i = 0; i < len; ++i) {
 173     dest[i] = y + x[i];
 174     if (dest[i] < y)
 175       y = 1; // Carry one to next digit.
 176     else {
 177       y = 0; // No need to carry so exit early
 178       break;
 179     }
 180   }
 181   return y;
 182 }
 183
 184 /// @brief Prefix increment operator. Increments the APInt by one.
 185 APInt& APInt::operator++() {
 186   if (isSingleWord())
 187     ++VAL;
 188   else
 189     add_1(pVal, pVal, getNumWords(), 1);
 190   return clearUnusedBits();
 191 }
 192
 193 /// sub_1 - This function subtracts a single "digit" (64-bit word), y, from
 194 /// the multi-digit integer array, x[], propagating the borrowed 1 value until
 195 /// no further borrowing is neeeded or it runs out of "digits" in x.  The result
 196 /// is 1 if "borrowing" exhausted the digits in x, or 0 if x was not exhausted.
 197 /// In other words, if y > x then this function returns 1, otherwise 0.
 198 /// @returns the borrow out of the subtraction
 199 static bool sub_1(uint64_t x[], unsigned len, uint64_t y) {
 200   for (unsigned i = 0; i < len; ++i) {
 201     uint64_t X = x[i];
 202     x[i] -= y;
 203     if (y > X)
 204       y = 1;  // We have to "borrow 1" from next "digit"
 205     else {
 206       y = 0;  // No need to borrow
 207       break;  // Remaining digits are unchanged so exit early
 208     }
 209   }
 210   return bool(y);
 211 }
 212
 213 /// @brief Prefix decrement operator. Decrements the APInt by one.
 214 APInt& APInt::operator--() {
 215   if (isSingleWord())
 216     --VAL;
 217   else
 218     sub_1(pVal, getNumWords(), 1);
 219   return clearUnusedBits();
 220 }
 221
 222 /// add - This function adds the integer array x to the integer array Y and
 223 /// places the result in dest.
 224 /// @returns the carry out from the addition
 225 /// @brief General addition of 64-bit integer arrays
 226 static bool add(uint64_t *dest, const uint64_t *x, const uint64_t *y,
 227                 unsigned len) {
 228   bool carry = false;
 229   for (unsigned i = 0; i< len; ++i) {
 230     uint64_t limit = std::min(x[i],y[i]); // must come first in case dest == x
 231     dest[i] = x[i] + y[i] + carry;
 232     carry = dest[i] < limit || (carry && dest[i] == limit);
 233   }
 234   return carry;
 235 }
 236
 237 /// Adds the RHS APint to this APInt.
 238 /// @returns this, after addition of RHS.
 239 /// @brief Addition assignment operator.
 240 APInt& APInt::operator+=(const APInt& RHS) {
 241   assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
 242   if (isSingleWord())
 243     VAL += RHS.VAL;
 244   else {
 245     add(pVal, pVal, RHS.pVal, getNumWords());
 246   }
 247   return clearUnusedBits();
 248 }
 249
 250 /// Subtracts the integer array y from the integer array x
 251 /// @returns returns the borrow out.
 252 /// @brief Generalized subtraction of 64-bit integer arrays.
 253 static bool sub(uint64_t *dest, const uint64_t *x, const uint64_t *y,
 254                 unsigned len) {
 255   bool borrow = false;
 256   for (unsigned i = 0; i < len; ++i) {
 257     uint64_t x_tmp = borrow ? x[i] - 1 : x[i];
 258     borrow = y[i] > x_tmp || (borrow && x[i] == 0);
 259     dest[i] = x_tmp - y[i];
 260   }
 261   return borrow;
 262 }
 263
 264 /// Subtracts the RHS APInt from this APInt
 265 /// @returns this, after subtraction
 266 /// @brief Subtraction assignment operator.
 267 APInt& APInt::operator-=(const APInt& RHS) {
 268   assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
 269   if (isSingleWord())
 270     VAL -= RHS.VAL;
 271   else
 272     sub(pVal, pVal, RHS.pVal, getNumWords());
 273   return clearUnusedBits();
 274 }
 275
 276 /// Multiplies an integer array, x, by a uint64_t integer and places the result
 277 /// into dest.
 278 /// @returns the carry out of the multiplication.
 279 /// @brief Multiply a multi-digit APInt by a single digit (64-bit) integer.
 280 static uint64_t mul_1(uint64_t dest[], uint64_t x[], unsigned len, uint64_t y) {
 281   // Split y into high 32-bit part (hy)  and low 32-bit part (ly)
 282   uint64_t ly = y & 0xffffffffULL, hy = y >> 32;
 283   uint64_t carry = 0;
 284
 285   // For each digit of x.
 286   for (unsigned i = 0; i < len; ++i) {
 287     // Split x into high and low words
 288     uint64_t lx = x[i] & 0xffffffffULL;
 289     uint64_t hx = x[i] >> 32;
 290     // hasCarry - A flag to indicate if there is a carry to the next digit.
 291     // hasCarry == 0, no carry
 292     // hasCarry == 1, has carry
 293     // hasCarry == 2, no carry and the calculation result == 0.
 294     uint8_t hasCarry = 0;
 295     dest[i] = carry + lx * ly;
 296     // Determine if the add above introduces carry.
 297     hasCarry = (dest[i] < carry) ? 1 : 0;
 298     carry = hx * ly + (dest[i] >> 32) + (hasCarry ? (1ULL << 32) : 0);
 299     // The upper limit of carry can be (2^32 - 1)(2^32 - 1) +
 300     // (2^32 - 1) + 2^32 = 2^64.
 301     hasCarry = (!carry && hasCarry) ? 1 : (!carry ? 2 : 0);
 302
 303     carry += (lx * hy) & 0xffffffffULL;
 304     dest[i] = (carry << 32) | (dest[i] & 0xffffffffULL);
 305     carry = (((!carry && hasCarry != 2) || hasCarry == 1) ? (1ULL << 32) : 0) +
 306             (carry >> 32) + ((lx * hy) >> 32) + hx * hy;
 307   }
 308   return carry;
 309 }
 310
 311 /// Multiplies integer array x by integer array y and stores the result into
 312 /// the integer array dest. Note that dest's size must be >= xlen + ylen.
 313 /// @brief Generalized multiplicate of integer arrays.
 314 static void mul(uint64_t dest[], uint64_t x[], unsigned xlen, uint64_t y[],
 315                 unsigned ylen) {
 316   dest[xlen] = mul_1(dest, x, xlen, y[0]);
 317   for (unsigned i = 1; i < ylen; ++i) {
 318     uint64_t ly = y[i] & 0xffffffffULL, hy = y[i] >> 32;
 319     uint64_t carry = 0, lx = 0, hx = 0;
 320     for (unsigned j = 0; j < xlen; ++j) {
 321       lx = x[j] & 0xffffffffULL;
 322       hx = x[j] >> 32;
 323       // hasCarry - A flag to indicate if has carry.
 324       // hasCarry == 0, no carry
 325       // hasCarry == 1, has carry
 326       // hasCarry == 2, no carry and the calculation result == 0.
 327       uint8_t hasCarry = 0;
 328       uint64_t resul = carry + lx * ly;
 329       hasCarry = (resul < carry) ? 1 : 0;
 330       carry = (hasCarry ? (1ULL << 32) : 0) + hx * ly + (resul >> 32);
 331       hasCarry = (!carry && hasCarry) ? 1 : (!carry ? 2 : 0);
 332
 333       carry += (lx * hy) & 0xffffffffULL;
 334       resul = (carry << 32) | (resul & 0xffffffffULL);
 335       dest[i+j] += resul;
 336       carry = (((!carry && hasCarry != 2) || hasCarry == 1) ? (1ULL << 32) : 0)+
 337               (carry >> 32) + (dest[i+j] < resul ? 1 : 0) +
 338               ((lx * hy) >> 32) + hx * hy;
 339     }
 340     dest[i+xlen] = carry;
 341   }
 342 }
 343
 344 APInt& APInt::operator*=(const APInt& RHS) {
 345   assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
 346   if (isSingleWord()) {
 347     VAL *= RHS.VAL;
 348     clearUnusedBits();
 349     return *this;
 350   }
 351
 352   // Get some bit facts about LHS and check for zero
 353   unsigned lhsBits = getActiveBits();
 354   unsigned lhsWords = !lhsBits ? 0 : whichWord(lhsBits - 1) + 1;
 355   if (!lhsWords)
 356     // 0 * X ===> 0
 357     return *this;
 358
 359   // Get some bit facts about RHS and check for zero
 360   unsigned rhsBits = RHS.getActiveBits();
 361   unsigned rhsWords = !rhsBits ? 0 : whichWord(rhsBits - 1) + 1;
 362   if (!rhsWords) {
 363     // X * 0 ===> 0
 364     clear();
 365     return *this;
 366   }
 367
 368   // Allocate space for the result
 369   unsigned destWords = rhsWords + lhsWords;
 370   uint64_t *dest = getMemory(destWords);
 371
 372   // Perform the long multiply
 373   mul(dest, pVal, lhsWords, RHS.pVal, rhsWords);
 374
 375   // Copy result back into *this
 376   clear();
 377   unsigned wordsToCopy = destWords >= getNumWords() ? getNumWords() : destWords;
 378   memcpy(pVal, dest, wordsToCopy * APINT_WORD_SIZE);
 379
 380   // delete dest array and return
 381   delete[] dest;
 382   return *this;
 383 }
 384
 385 APInt& APInt::operator&=(const APInt& RHS) {
 386   assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
 387   if (isSingleWord()) {
 388     VAL &= RHS.VAL;
 389     return *this;
 390   }
 391   unsigned numWords = getNumWords();
 392   for (unsigned i = 0; i < numWords; ++i)
 393     pVal[i] &= RHS.pVal[i];
 394   return *this;
 395 }
 396
 397 APInt& APInt::operator|=(const APInt& RHS) {
 398   assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
 399   if (isSingleWord()) {
 400     VAL |= RHS.VAL;
 401     return *this;
 402   }
 403   unsigned numWords = getNumWords();
 404   for (unsigned i = 0; i < numWords; ++i)
 405     pVal[i] |= RHS.pVal[i];
 406   return *this;
 407 }
 408
 409 APInt& APInt::operator^=(const APInt& RHS) {
 410   assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
 411   if (isSingleWord()) {
 412     VAL ^= RHS.VAL;
 413     this->clearUnusedBits();
 414     return *this;
 415   }
 416   unsigned numWords = getNumWords();
 417   for (unsigned i = 0; i < numWords; ++i)
 418     pVal[i] ^= RHS.pVal[i];
 419   return clearUnusedBits();
 420 }
 421
 422 APInt APInt::AndSlowCase(const APInt& RHS) const {
 423   unsigned numWords = getNumWords();
 424   uint64_t* val = getMemory(numWords);
 425   for (unsigned i = 0; i < numWords; ++i)
 426     val[i] = pVal[i] & RHS.pVal[i];
 427   return APInt(val, getBitWidth());
 428 }
 429
 430 APInt APInt::OrSlowCase(const APInt& RHS) const {
 431   unsigned numWords = getNumWords();
 432   uint64_t *val = getMemory(numWords);
 433   for (unsigned i = 0; i < numWords; ++i)
 434     val[i] = pVal[i] | RHS.pVal[i];
 435   return APInt(val, getBitWidth());
 436 }
 437
 438 APInt APInt::XorSlowCase(const APInt& RHS) const {
 439   unsigned numWords = getNumWords();
 440   uint64_t *val = getMemory(numWords);
 441   for (unsigned i = 0; i < numWords; ++i)
 442     val[i] = pVal[i] ^ RHS.pVal[i];
 443
 444   // 0^0==1 so clear the high bits in case they got set.
 445   return APInt(val, getBitWidth()).clearUnusedBits();
 446 }
 447
 448 bool APInt::operator !() const {
 449   if (isSingleWord())
 450     return !VAL;
 451
 452   for (unsigned i = 0; i < getNumWords(); ++i)
 453     if (pVal[i])
 454       return false;
 455   return true;
 456 }
 457
 458 APInt APInt::operator*(const APInt& RHS) const {
 459   assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
 460   if (isSingleWord())
 461     return APInt(BitWidth, VAL * RHS.VAL);
 462   APInt Result(*this);
 463   Result *= RHS;
 464   return Result.clearUnusedBits();
 465 }
 466
 467 APInt APInt::operator+(const APInt& RHS) const {
 468   assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
 469   if (isSingleWord())
 470     return APInt(BitWidth, VAL + RHS.VAL);
 471   APInt Result(BitWidth, 0);
 472   add(Result.pVal, this->pVal, RHS.pVal, getNumWords());
 473   return Result.clearUnusedBits();
 474 }
 475
 476 APInt APInt::operator-(const APInt& RHS) const {
 477   assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
 478   if (isSingleWord())
 479     return APInt(BitWidth, VAL - RHS.VAL);
 480   APInt Result(BitWidth, 0);
 481   sub(Result.pVal, this->pVal, RHS.pVal, getNumWords());
 482   return Result.clearUnusedBits();
 483 }
 484
 485 bool APInt::operator[](unsigned bitPosition) const {
 486   return (maskBit(bitPosition) &
 487           (isSingleWord() ?  VAL : pVal[whichWord(bitPosition)])) != 0;
 488 }
 489
 490 bool APInt::EqualSlowCase(const APInt& RHS) const {
 491   // Get some facts about the number of bits used in the two operands.
 492   unsigned n1 = getActiveBits();
 493   unsigned n2 = RHS.getActiveBits();
 494
 495   // If the number of bits isn't the same, they aren't equal
 496   if (n1 != n2)
 497     return false;
 498
 499   // If the number of bits fits in a word, we only need to compare the low word.
 500   if (n1 <= APINT_BITS_PER_WORD)
 501     return pVal[0] == RHS.pVal[0];
 502
 503   // Otherwise, compare everything
 504   for (int i = whichWord(n1 - 1); i >= 0; --i)
 505     if (pVal[i] != RHS.pVal[i])
 506       return false;
 507   return true;
 508 }
 509
 510 bool APInt::EqualSlowCase(uint64_t Val) const {
 511   unsigned n = getActiveBits();
 512   if (n <= APINT_BITS_PER_WORD)
 513     return pVal[0] == Val;
 514   else
 515     return false;
 516 }
 517
 518 bool APInt::ult(const APInt& RHS) const {
 519   assert(BitWidth == RHS.BitWidth && "Bit widths must be same for comparison");
 520   if (isSingleWord())
 521     return VAL < RHS.VAL;
 522
 523   // Get active bit length of both operands
 524   unsigned n1 = getActiveBits();
 525   unsigned n2 = RHS.getActiveBits();
 526
 527   // If magnitude of LHS is less than RHS, return true.
 528   if (n1 < n2)
 529     return true;
 530
 531   // If magnitude of RHS is greather than LHS, return false.
 532   if (n2 < n1)
 533     return false;
 534
 535   // If they bot fit in a word, just compare the low order word
 536   if (n1 <= APINT_BITS_PER_WORD && n2 <= APINT_BITS_PER_WORD)
 537     return pVal[0] < RHS.pVal[0];
 538
 539   // Otherwise, compare all words
 540   unsigned topWord = whichWord(std::max(n1,n2)-1);
 541   for (int i = topWord; i >= 0; --i) {
 542     if (pVal[i] > RHS.pVal[i])
 543       return false;
 544     if (pVal[i] < RHS.pVal[i])
 545       return true;
 546   }
 547   return false;
 548 }
 549
 550 bool APInt::slt(const APInt& RHS) const {
 551   assert(BitWidth == RHS.BitWidth && "Bit widths must be same for comparison");
 552   if (isSingleWord()) {
 553     int64_t lhsSext = (int64_t(VAL) << (64-BitWidth)) >> (64-BitWidth);
 554     int64_t rhsSext = (int64_t(RHS.VAL) << (64-BitWidth)) >> (64-BitWidth);
 555     return lhsSext < rhsSext;
 556   }
 557
 558   APInt lhs(*this);
 559   APInt rhs(RHS);
 560   bool lhsNeg = isNegative();
 561   bool rhsNeg = rhs.isNegative();
 562   if (lhsNeg) {
 563     // Sign bit is set so perform two's complement to make it positive
 564     lhs.flip();
 565     lhs++;
 566   }
 567   if (rhsNeg) {
 568     // Sign bit is set so perform two's complement to make it positive
 569     rhs.flip();
 570     rhs++;
 571   }
 572
 573   // Now we have unsigned values to compare so do the comparison if necessary
 574   // based on the negativeness of the values.
 575   if (lhsNeg)
 576     if (rhsNeg)
 577       return lhs.ugt(rhs);
 578     else
 579       return true;
 580   else if (rhsNeg)
 581     return false;
 582   else
 583     return lhs.ult(rhs);
 584 }
 585
 586 APInt& APInt::set(unsigned bitPosition) {
 587   if (isSingleWord())
 588     VAL |= maskBit(bitPosition);
 589   else
 590     pVal[whichWord(bitPosition)] |= maskBit(bitPosition);
 591   return *this;
 592 }
 593
 594 /// Set the given bit to 0 whose position is given as "bitPosition".
 595 /// @brief Set a given bit to 0.
 596 APInt& APInt::clear(unsigned bitPosition) {
 597   if (isSingleWord())
 598     VAL &= ~maskBit(bitPosition);
 599   else
 600     pVal[whichWord(bitPosition)] &= ~maskBit(bitPosition);
 601   return *this;
 602 }
 603
 604 /// @brief Toggle every bit to its opposite value.
 605
 606 /// Toggle a given bit to its opposite value whose position is given
 607 /// as "bitPosition".
 608 /// @brief Toggles a given bit to its opposite value.
 609 APInt& APInt::flip(unsigned bitPosition) {
 610   assert(bitPosition < BitWidth && "Out of the bit-width range!");
 611   if ((*this)[bitPosition]) clear(bitPosition);
 612   else set(bitPosition);
 613   return *this;
 614 }
 615
 616 unsigned APInt::getBitsNeeded(const StringRef& str, uint8_t radix) {
 617   assert(!str.empty() && "Invalid string length");
 618   assert((radix == 10 || radix == 8 || radix == 16 || radix == 2) &&
 619          "Radix should be 2, 8, 10, or 16!");
 620
 621   size_t slen = str.size();
 622
 623   // Each computation below needs to know if it's negative.
 624   StringRef::iterator p = str.begin();
 625   unsigned isNegative = *p == '-';
 626   if (*p == '-' || *p == '+') {
 627     p++;
 628     slen--;
 629     assert(slen && "String is only a sign, needs a value.");
 630   }
 631
 632   // For radixes of power-of-two values, the bits required is accurately and
 633   // easily computed
 634   if (radix == 2)
 635     return slen + isNegative;
 636   if (radix == 8)
 637     return slen * 3 + isNegative;
 638   if (radix == 16)
 639     return slen * 4 + isNegative;
 640
 641   // This is grossly inefficient but accurate. We could probably do something
 642   // with a computation of roughly slen*64/20 and then adjust by the value of
 643   // the first few digits. But, I'm not sure how accurate that could be.
 644
 645   // Compute a sufficient number of bits that is always large enough but might
 646   // be too large. This avoids the assertion in the constructor. This
 647   // calculation doesn't work appropriately for the numbers 0-9, so just use 4
 648   // bits in that case.
 649   unsigned sufficient = slen == 1 ? 4 : slen * 64/18;
 650
 651   // Convert to the actual binary value.
 652   APInt tmp(sufficient, StringRef(p, slen), radix);
 653
 654   // Compute how many bits are required. If the log is infinite, assume we need
 655   // just bit.
 656   unsigned log = tmp.logBase2();
 657   if (log == (unsigned)-1) {
 658     return isNegative + 1;
 659   } else {
 660     return isNegative + log + 1;
 661   }
 662 }
 663
 664 // From http://www.burtleburtle.net, byBob Jenkins.
 665 // When targeting x86, both GCC and LLVM seem to recognize this as a
 666 // rotate instruction.
 667 #define rot(x,k) (((x)<<(k)) | ((x)>>(32-(k))))
 668
 669 // From http://www.burtleburtle.net, by Bob Jenkins.
 670 #define mix(a,b,c) \
 671   { \
 672     a -= c;  a ^= rot(c, 4);  c += b; \
 673     b -= a;  b ^= rot(a, 6);  a += c; \
 674     c -= b;  c ^= rot(b, 8);  b += a; \
 675     a -= c;  a ^= rot(c,16);  c += b; \
 676     b -= a;  b ^= rot(a,19);  a += c; \
 677     c -= b;  c ^= rot(b, 4);  b += a; \
 678   }
 679
 680 // From http://www.burtleburtle.net, by Bob Jenkins.
 681 #define final(a,b,c) \
 682   { \
 683     c ^= b; c -= rot(b,14); \
 684     a ^= c; a -= rot(c,11); \
 685     b ^= a; b -= rot(a,25); \
 686     c ^= b; c -= rot(b,16); \
 687     a ^= c; a -= rot(c,4);  \
 688     b ^= a; b -= rot(a,14); \
 689     c ^= b; c -= rot(b,24); \
 690   }
 691
 692 // hashword() was adapted from http://www.burtleburtle.net, by Bob
 693 // Jenkins.  k is a pointer to an array of uint32_t values; length is
 694 // the length of the key, in 32-bit chunks.  This version only handles
 695 // keys that are a multiple of 32 bits in size.
 696 static inline uint32_t hashword(const uint64_t *k64, size_t length)
 697 {
 698   const uint32_t *k = reinterpret_cast<const uint32_t *>(k64);
 699   uint32_t a,b,c;
 700
 701   /* Set up the internal state */
 702   a = b = c = 0xdeadbeef + (((uint32_t)length)<<2);
 703
 704   /*------------------------------------------------- handle most of the key */
 705   while (length > 3)
 706     {
 707       a += k[0];
 708       b += k[1];
 709       c += k[2];
 710       mix(a,b,c);
 711       length -= 3;
 712       k += 3;
 713     }
 714
 715   /*------------------------------------------- handle the last 3 uint32_t's */
 716   switch (length) {                  /* all the case statements fall through */
 717   case 3 : c+=k[2];
 718   case 2 : b+=k[1];
 719   case 1 : a+=k[0];
 720     final(a,b,c);
 721     case 0:     /* case 0: nothing left to add */
 722       break;
 723     }
 724   /*------------------------------------------------------ report the result */
 725   return c;
 726 }
 727
 728 // hashword8() was adapted from http://www.burtleburtle.net, by Bob
 729 // Jenkins.  This computes a 32-bit hash from one 64-bit word.  When
 730 // targeting x86 (32 or 64 bit), both LLVM and GCC compile this
 731 // function into about 35 instructions when inlined.
 732 static inline uint32_t hashword8(const uint64_t k64)
 733 {
 734   uint32_t a,b,c;
 735   a = b = c = 0xdeadbeef + 4;
 736   b += k64 >> 32;
 737   a += k64 & 0xffffffff;
 738   final(a,b,c);
 739   return c;
 740 }
 741 #undef final
 742 #undef mix
 743 #undef rot
 744
 745 uint64_t APInt::getHashValue() const {
 746   uint64_t hash;
 747   if (isSingleWord())
 748     hash = hashword8(VAL);
 749   else
 750     hash = hashword(pVal, getNumWords()*2);
 751   return hash;
 752 }
 753
 754 /// HiBits - This function returns the high "numBits" bits of this APInt.
 755 APInt APInt::getHiBits(unsigned numBits) const {
 756   return APIntOps::lshr(*this, BitWidth - numBits);
 757 }
 758
 759 /// LoBits - This function returns the low "numBits" bits of this APInt.
 760 APInt APInt::getLoBits(unsigned numBits) const {
 761   return APIntOps::lshr(APIntOps::shl(*this, BitWidth - numBits),
 762                         BitWidth - numBits);
 763 }
 764
 765 bool APInt::isPowerOf2() const {
 766   return (!!*this) && !(*this & (*this - APInt(BitWidth,1)));
 767 }
 768
 769 unsigned APInt::countLeadingZerosSlowCase() const {
 770   // Treat the most significand word differently because it might have
 771   // meaningless bits set beyond the precision.
 772   unsigned BitsInMSW = BitWidth % APINT_BITS_PER_WORD;
 773   integerPart MSWMask;
 774   if (BitsInMSW) MSWMask = (integerPart(1) << BitsInMSW) - 1;
 775   else {
 776     MSWMask = ~integerPart(0);
 777     BitsInMSW = APINT_BITS_PER_WORD;
 778   }
 779
 780   unsigned i = getNumWords();
 781   integerPart MSW = pVal[i-1] & MSWMask;
 782   if (MSW)
 783     return CountLeadingZeros_64(MSW) - (APINT_BITS_PER_WORD - BitsInMSW);
 784
 785   unsigned Count = BitsInMSW;
 786   for (--i; i > 0u; --i) {
 787     if (pVal[i-1] == 0)
 788       Count += APINT_BITS_PER_WORD;
 789     else {
 790       Count += CountLeadingZeros_64(pVal[i-1]);
 791       break;
 792     }
 793   }
 794   return Count;
 795 }
 796
 797 static unsigned countLeadingOnes_64(uint64_t V, unsigned skip) {
 798   unsigned Count = 0;
 799   if (skip)
 800     V <<= skip;
 801   while (V && (V & (1ULL << 63))) {
 802     Count++;
 803     V <<= 1;
 804   }
 805   return Count;
 806 }
 807
 808 unsigned APInt::countLeadingOnes() const {
 809   if (isSingleWord())
 810     return countLeadingOnes_64(VAL, APINT_BITS_PER_WORD - BitWidth);
 811
 812   unsigned highWordBits = BitWidth % APINT_BITS_PER_WORD;
 813   unsigned shift;
 814   if (!highWordBits) {
 815     highWordBits = APINT_BITS_PER_WORD;
 816     shift = 0;
 817   } else {
 818     shift = APINT_BITS_PER_WORD - highWordBits;
 819   }
 820   int i = getNumWords() - 1;
 821   unsigned Count = countLeadingOnes_64(pVal[i], shift);
 822   if (Count == highWordBits) {
 823     for (i--; i >= 0; --i) {
 824       if (pVal[i] == -1ULL)
 825         Count += APINT_BITS_PER_WORD;
 826       else {
 827         Count += countLeadingOnes_64(pVal[i], 0);
 828         break;
 829       }
 830     }
 831   }
 832   return Count;
 833 }
 834
 835 unsigned APInt::countTrailingZeros() const {
 836   if (isSingleWord())
 837     return std::min(unsigned(CountTrailingZeros_64(VAL)), BitWidth);
 838   unsigned Count = 0;
 839   unsigned i = 0;
 840   for (; i < getNumWords() && pVal[i] == 0; ++i)
 841     Count += APINT_BITS_PER_WORD;
 842   if (i < getNumWords())
 843     Count += CountTrailingZeros_64(pVal[i]);
 844   return std::min(Count, BitWidth);
 845 }
 846
 847 unsigned APInt::countTrailingOnesSlowCase() const {
 848   unsigned Count = 0;
 849   unsigned i = 0;
 850   for (; i < getNumWords() && pVal[i] == -1ULL; ++i)
 851     Count += APINT_BITS_PER_WORD;
 852   if (i < getNumWords())
 853     Count += CountTrailingOnes_64(pVal[i]);
 854   return std::min(Count, BitWidth);
 855 }
 856
 857 unsigned APInt::countPopulationSlowCase() const {
 858   unsigned Count = 0;
 859   for (unsigned i = 0; i < getNumWords(); ++i)
 860     Count += CountPopulation_64(pVal[i]);
 861   return Count;
 862 }
 863
 864 APInt APInt::byteSwap() const {
 865   assert(BitWidth >= 16 && BitWidth % 16 == 0 && "Cannot byteswap!");
 866   if (BitWidth == 16)
 867     return APInt(BitWidth, ByteSwap_16(uint16_t(VAL)));
 868   else if (BitWidth == 32)
 869     return APInt(BitWidth, ByteSwap_32(unsigned(VAL)));
 870   else if (BitWidth == 48) {
 871     unsigned Tmp1 = unsigned(VAL >> 16);
 872     Tmp1 = ByteSwap_32(Tmp1);
 873     uint16_t Tmp2 = uint16_t(VAL);
 874     Tmp2 = ByteSwap_16(Tmp2);
 875     return APInt(BitWidth, (uint64_t(Tmp2) << 32) | Tmp1);
 876   } else if (BitWidth == 64)
 877     return APInt(BitWidth, ByteSwap_64(VAL));
 878   else {
 879     APInt Result(BitWidth, 0);
 880     char *pByte = (char*)Result.pVal;
 881     for (unsigned i = 0; i < BitWidth / APINT_WORD_SIZE / 2; ++i) {
 882       char Tmp = pByte[i];
 883       pByte[i] = pByte[BitWidth / APINT_WORD_SIZE - 1 - i];
 884       pByte[BitWidth / APINT_WORD_SIZE - i - 1] = Tmp;
 885     }
 886     return Result;
 887   }
 888 }
 889
 890 APInt llvm::APIntOps::GreatestCommonDivisor(const APInt& API1,
 891                                             const APInt& API2) {
 892   APInt A = API1, B = API2;
 893   while (!!B) {
 894     APInt T = B;
 895     B = APIntOps::urem(A, B);
 896     A = T;
 897   }
 898   return A;
 899 }
 900
 901 APInt llvm::APIntOps::RoundDoubleToAPInt(double Double, unsigned width) {
 902   union {
 903     double D;
 904     uint64_t I;
 905   } T;
 906   T.D = Double;
 907
 908   // Get the sign bit from the highest order bit
 909   bool isNeg = T.I >> 63;
 910
 911   // Get the 11-bit exponent and adjust for the 1023 bit bias
 912   int64_t exp = ((T.I >> 52) & 0x7ff) - 1023;
 913
 914   // If the exponent is negative, the value is < 0 so just return 0.
 915   if (exp < 0)
 916     return APInt(width, 0u);
 917
 918   // Extract the mantissa by clearing the top 12 bits (sign + exponent).
 919   uint64_t mantissa = (T.I & (~0ULL >> 12)) | 1ULL << 52;
 920
 921   // If the exponent doesn't shift all bits out of the mantissa
 922   if (exp < 52)
 923     return isNeg ? -APInt(width, mantissa >> (52 - exp)) :
 924                     APInt(width, mantissa >> (52 - exp));
 925
 926   // If the client didn't provide enough bits for us to shift the mantissa into
 927   // then the result is undefined, just return 0
 928   if (width <= exp - 52)
 929     return APInt(width, 0);
 930
 931   // Otherwise, we have to shift the mantissa bits up to the right location
 932   APInt Tmp(width, mantissa);
 933   Tmp = Tmp.shl((unsigned)exp - 52);
 934   return isNeg ? -Tmp : Tmp;
 935 }
 936
 937 /// RoundToDouble - This function converts this APInt to a double.
 938 /// The layout for double is as following (IEEE Standard 754):
 939 ///  --------------------------------------
 940 /// |  Sign    Exponent    Fraction    Bias |
 941 /// |-------------------------------------- |
 942 /// |  1[63]   11[62-52]   52[51-00]   1023 |
 943 ///  --------------------------------------
 944 double APInt::roundToDouble(bool isSigned) const {
 945
 946   // Handle the simple case where the value is contained in one uint64_t.
 947   // It is wrong to optimize getWord(0) to VAL; there might be more than one word.
 948   if (isSingleWord() || getActiveBits() <= APINT_BITS_PER_WORD) {
 949     if (isSigned) {
 950       int64_t sext = (int64_t(getWord(0)) << (64-BitWidth)) >> (64-BitWidth);
 951       return double(sext);
 952     } else
 953       return double(getWord(0));
 954   }
 955
 956   // Determine if the value is negative.
 957   bool isNeg = isSigned ? (*this)[BitWidth-1] : false;
 958
 959   // Construct the absolute value if we're negative.
 960   APInt Tmp(isNeg ? -(*this) : (*this));
 961
 962   // Figure out how many bits we're using.
 963   unsigned n = Tmp.getActiveBits();
 964
 965   // The exponent (without bias normalization) is just the number of bits
 966   // we are using. Note that the sign bit is gone since we constructed the
 967   // absolute value.
 968   uint64_t exp = n;
 969
 970   // Return infinity for exponent overflow
 971   if (exp > 1023) {
 972     if (!isSigned || !isNeg)
 973       return std::numeric_limits<double>::infinity();
 974     else
 975       return -std::numeric_limits<double>::infinity();
 976   }
 977   exp += 1023; // Increment for 1023 bias
 978
 979   // Number of bits in mantissa is 52. To obtain the mantissa value, we must
 980   // extract the high 52 bits from the correct words in pVal.
 981   uint64_t mantissa;
 982   unsigned hiWord = whichWord(n-1);
 983   if (hiWord == 0) {
 984     mantissa = Tmp.pVal[0];
 985     if (n > 52)
 986       mantissa >>= n - 52; // shift down, we want the top 52 bits.
 987   } else {
 988     assert(hiWord > 0 && "huh?");
 989     uint64_t hibits = Tmp.pVal[hiWord] << (52 - n % APINT_BITS_PER_WORD);
 990     uint64_t lobits = Tmp.pVal[hiWord-1] >> (11 + n % APINT_BITS_PER_WORD);
 991     mantissa = hibits | lobits;
 992   }
 993
 994   // The leading bit of mantissa is implicit, so get rid of it.
 995   uint64_t sign = isNeg ? (1ULL << (APINT_BITS_PER_WORD - 1)) : 0;
 996   union {
 997     double D;
 998     uint64_t I;
 999   } T;
1000   T.I = sign | (exp << 52) | mantissa;
1001   return T.D;
1002 }
1003
1004 // Truncate to new width.
1005 APInt &APInt::trunc(unsigned width) {
1006   assert(width < BitWidth && "Invalid APInt Truncate request");
1007   assert(width && "Can't truncate to 0 bits");
1008   unsigned wordsBefore = getNumWords();
1009   BitWidth = width;
1010   unsigned wordsAfter = getNumWords();
1011   if (wordsBefore != wordsAfter) {
1012     if (wordsAfter == 1) {
1013       uint64_t *tmp = pVal;
1014       VAL = pVal[0];
1015       delete [] tmp;
1016     } else {
1017       uint64_t *newVal = getClearedMemory(wordsAfter);
1018       for (unsigned i = 0; i < wordsAfter; ++i)
1019         newVal[i] = pVal[i];
1020       delete [] pVal;
1021       pVal = newVal;
1022     }
1023   }
1024   return clearUnusedBits();
1025 }
1026
1027 // Sign extend to a new width.
1028 APInt &APInt::sext(unsigned width) {
1029   assert(width > BitWidth && "Invalid APInt SignExtend request");
1030   // If the sign bit isn't set, this is the same as zext.
1031   if (!isNegative()) {
1032     zext(width);
1033     return *this;
1034   }
1035
1036   // The sign bit is set. First, get some facts
1037   unsigned wordsBefore = getNumWords();
1038   unsigned wordBits = BitWidth % APINT_BITS_PER_WORD;
1039   BitWidth = width;
1040   unsigned wordsAfter = getNumWords();
1041
1042   // Mask the high order word appropriately
1043   if (wordsBefore == wordsAfter) {
1044     unsigned newWordBits = width % APINT_BITS_PER_WORD;
1045     // The extension is contained to the wordsBefore-1th word.
1046     uint64_t mask = ~0ULL;
1047     if (newWordBits)
1048       mask >>= APINT_BITS_PER_WORD - newWordBits;
1049     mask <<= wordBits;
1050     if (wordsBefore == 1)
1051       VAL |= mask;
1052     else
1053       pVal[wordsBefore-1] |= mask;
1054     return clearUnusedBits();
1055   }
1056
1057   uint64_t mask = wordBits == 0 ? 0 : ~0ULL << wordBits;
1058   uint64_t *newVal = getMemory(wordsAfter);
1059   if (wordsBefore == 1)
1060     newVal[0] = VAL | mask;
1061   else {
1062     for (unsigned i = 0; i < wordsBefore; ++i)
1063       newVal[i] = pVal[i];
1064     newVal[wordsBefore-1] |= mask;
1065   }
1066   for (unsigned i = wordsBefore; i < wordsAfter; i++)
1067     newVal[i] = -1ULL;
1068   if (wordsBefore != 1)
1069     delete [] pVal;
1070   pVal = newVal;
1071   return clearUnusedBits();
1072 }
1073
1074 //  Zero extend to a new width.
1075 APInt &APInt::zext(unsigned width) {
1076   assert(width > BitWidth && "Invalid APInt ZeroExtend request");
1077   unsigned wordsBefore = getNumWords();
1078   BitWidth = width;
1079   unsigned wordsAfter = getNumWords();
1080   if (wordsBefore != wordsAfter) {
1081     uint64_t *newVal = getClearedMemory(wordsAfter);
1082     if (wordsBefore == 1)
1083       newVal[0] = VAL;
1084     else
1085       for (unsigned i = 0; i < wordsBefore; ++i)
1086         newVal[i] = pVal[i];
1087     if (wordsBefore != 1)
1088       delete [] pVal;
1089     pVal = newVal;
1090   }
1091   return *this;
1092 }
1093
1094 APInt &APInt::zextOrTrunc(unsigned width) {
1095   if (BitWidth < width)
1096     return zext(width);
1097   if (BitWidth > width)
1098     return trunc(width);
1099   return *this;
1100 }
1101
1102 APInt &APInt::sextOrTrunc(unsigned width) {
1103   if (BitWidth < width)
1104     return sext(width);
1105   if (BitWidth > width)
1106     return trunc(width);
1107   return *this;
1108 }
1109
1110 /// Arithmetic right-shift this APInt by shiftAmt.
1111 /// @brief Arithmetic right-shift function.
1112 APInt APInt::ashr(const APInt &shiftAmt) const {
1113   return ashr((unsigned)shiftAmt.getLimitedValue(BitWidth));
1114 }
1115
1116 /// Arithmetic right-shift this APInt by shiftAmt.
1117 /// @brief Arithmetic right-shift function.
1118 APInt APInt::ashr(unsigned shiftAmt) const {
1119   assert(shiftAmt <= BitWidth && "Invalid shift amount");
1120   // Handle a degenerate case
1121   if (shiftAmt == 0)
1122     return *this;
1123
1124   // Handle single word shifts with built-in ashr
1125   if (isSingleWord()) {
1126     if (shiftAmt == BitWidth)
1127       return APInt(BitWidth, 0); // undefined
1128     else {
1129       unsigned SignBit = APINT_BITS_PER_WORD - BitWidth;
1130       return APInt(BitWidth,
1131         (((int64_t(VAL) << SignBit) >> SignBit) >> shiftAmt));
1132     }
1133   }
1134
1135   // If all the bits were shifted out, the result is, technically, undefined.
1136   // We return -1 if it was negative, 0 otherwise. We check this early to avoid
1137   // issues in the algorithm below.
1138   if (shiftAmt == BitWidth) {
1139     if (isNegative())
1140       return APInt(BitWidth, -1ULL, true);
1141     else
1142       return APInt(BitWidth, 0);
1143   }
1144
1145   // Create some space for the result.
1146   uint64_t * val = new uint64_t[getNumWords()];
1147
1148   // Compute some values needed by the following shift algorithms
1149   unsigned wordShift = shiftAmt % APINT_BITS_PER_WORD; // bits to shift per word
1150   unsigned offset = shiftAmt / APINT_BITS_PER_WORD; // word offset for shift
1151   unsigned breakWord = getNumWords() - 1 - offset; // last word affected
1152   unsigned bitsInWord = whichBit(BitWidth); // how many bits in last word?
1153   if (bitsInWord == 0)
1154     bitsInWord = APINT_BITS_PER_WORD;
1155
1156   // If we are shifting whole words, just move whole words
1157   if (wordShift == 0) {
1158     // Move the words containing significant bits
1159     for (unsigned i = 0; i <= breakWord; ++i)
1160       val[i] = pVal[i+offset]; // move whole word
1161
1162     // Adjust the top significant word for sign bit fill, if negative
1163     if (isNegative())
1164       if (bitsInWord < APINT_BITS_PER_WORD)
1165         val[breakWord] |= ~0ULL << bitsInWord; // set high bits
1166   } else {
1167     // Shift the low order words
1168     for (unsigned i = 0; i < breakWord; ++i) {
1169       // This combines the shifted corresponding word with the low bits from
1170       // the next word (shifted into this word's high bits).
1171       val[i] = (pVal[i+offset] >> wordShift) |
1172                (pVal[i+offset+1] << (APINT_BITS_PER_WORD - wordShift));
1173     }
1174
1175     // Shift the break word. In this case there are no bits from the next word
1176     // to include in this word.
1177     val[breakWord] = pVal[breakWord+offset] >> wordShift;
1178
1179     // Deal with sign extenstion in the break word, and possibly the word before
1180     // it.
1181     if (isNegative()) {
1182       if (wordShift > bitsInWord) {
1183         if (breakWord > 0)
1184           val[breakWord-1] |=
1185             ~0ULL << (APINT_BITS_PER_WORD - (wordShift - bitsInWord));
1186         val[breakWord] |= ~0ULL;
1187       } else
1188         val[breakWord] |= (~0ULL << (bitsInWord - wordShift));
1189     }
1190   }
1191
1192   // Remaining words are 0 or -1, just assign them.
1193   uint64_t fillValue = (isNegative() ? -1ULL : 0);
1194   for (unsigned i = breakWord+1; i < getNumWords(); ++i)
1195     val[i] = fillValue;
1196   return APInt(val, BitWidth).clearUnusedBits();
1197 }
1198
1199 /// Logical right-shift this APInt by shiftAmt.
1200 /// @brief Logical right-shift function.
1201 APInt APInt::lshr(const APInt &shiftAmt) const {
1202   return lshr((unsigned)shiftAmt.getLimitedValue(BitWidth));
1203 }
1204
1205 /// Logical right-shift this APInt by shiftAmt.
1206 /// @brief Logical right-shift function.
1207 APInt APInt::lshr(unsigned shiftAmt) const {
1208   if (isSingleWord()) {
1209     if (shiftAmt == BitWidth)
1210       return APInt(BitWidth, 0);
1211     else
1212       return APInt(BitWidth, this->VAL >> shiftAmt);
1213   }
1214
1215   // If all the bits were shifted out, the result is 0. This avoids issues
1216   // with shifting by the size of the integer type, which produces undefined
1217   // results. We define these "undefined results" to always be 0.
1218   if (shiftAmt == BitWidth)
1219     return APInt(BitWidth, 0);
1220
1221   // If none of the bits are shifted out, the result is *this. This avoids
1222   // issues with shifting by the size of the integer type, which produces
1223   // undefined results in the code below. This is also an optimization.
1224   if (shiftAmt == 0)
1225     return *this;
1226
1227   // Create some space for the result.
1228   uint64_t * val = new uint64_t[getNumWords()];
1229
1230   // If we are shifting less than a word, compute the shift with a simple carry
1231   if (shiftAmt < APINT_BITS_PER_WORD) {
1232     uint64_t carry = 0;
1233     for (int i = getNumWords()-1; i >= 0; --i) {
1234       val[i] = (pVal[i] >> shiftAmt) | carry;
1235       carry = pVal[i] << (APINT_BITS_PER_WORD - shiftAmt);
1236     }
1237     return APInt(val, BitWidth).clearUnusedBits();
1238   }
1239
1240   // Compute some values needed by the remaining shift algorithms
1241   unsigned wordShift = shiftAmt % APINT_BITS_PER_WORD;
1242   unsigned offset = shiftAmt / APINT_BITS_PER_WORD;
1243
1244   // If we are shifting whole words, just move whole words
1245   if (wordShift == 0) {
1246     for (unsigned i = 0; i < getNumWords() - offset; ++i)
1247       val[i] = pVal[i+offset];
1248     for (unsigned i = getNumWords()-offset; i < getNumWords(); i++)
1249       val[i] = 0;
1250     return APInt(val,BitWidth).clearUnusedBits();
1251   }
1252
1253   // Shift the low order words
1254   unsigned breakWord = getNumWords() - offset -1;
1255   for (unsigned i = 0; i < breakWord; ++i)
1256     val[i] = (pVal[i+offset] >> wordShift) |
1257              (pVal[i+offset+1] << (APINT_BITS_PER_WORD - wordShift));
1258   // Shift the break word.
1259   val[breakWord] = pVal[breakWord+offset] >> wordShift;
1260
1261   // Remaining words are 0
1262   for (unsigned i = breakWord+1; i < getNumWords(); ++i)
1263     val[i] = 0;
1264   return APInt(val, BitWidth).clearUnusedBits();
1265 }
1266
1267 /// Left-shift this APInt by shiftAmt.
1268 /// @brief Left-shift function.
1269 APInt APInt::shl(const APInt &shiftAmt) const {
1270   // It's undefined behavior in C to shift by BitWidth or greater.
1271   return shl((unsigned)shiftAmt.getLimitedValue(BitWidth));
1272 }
1273
1274 APInt APInt::shlSlowCase(unsigned shiftAmt) const {
1275   // If all the bits were shifted out, the result is 0. This avoids issues
1276   // with shifting by the size of the integer type, which produces undefined
1277   // results. We define these "undefined results" to always be 0.
1278   if (shiftAmt == BitWidth)
1279     return APInt(BitWidth, 0);
1280
1281   // If none of the bits are shifted out, the result is *this. This avoids a
1282   // lshr by the words size in the loop below which can produce incorrect
1283   // results. It also avoids the expensive computation below for a common case.
1284   if (shiftAmt == 0)
1285     return *this;
1286
1287   // Create some space for the result.
1288   uint64_t * val = new uint64_t[getNumWords()];
1289
1290   // If we are shifting less than a word, do it the easy way
1291   if (shiftAmt < APINT_BITS_PER_WORD) {
1292     uint64_t carry = 0;
1293     for (unsigned i = 0; i < getNumWords(); i++) {
1294       val[i] = pVal[i] << shiftAmt | carry;
1295       carry = pVal[i] >> (APINT_BITS_PER_WORD - shiftAmt);
1296     }
1297     return APInt(val, BitWidth).clearUnusedBits();
1298   }
1299
1300   // Compute some values needed by the remaining shift algorithms
1301   unsigned wordShift = shiftAmt % APINT_BITS_PER_WORD;
1302   unsigned offset = shiftAmt / APINT_BITS_PER_WORD;
1303
1304   // If we are shifting whole words, just move whole words
1305   if (wordShift == 0) {
1306     for (unsigned i = 0; i < offset; i++)
1307       val[i] = 0;
1308     for (unsigned i = offset; i < getNumWords(); i++)
1309       val[i] = pVal[i-offset];
1310     return APInt(val,BitWidth).clearUnusedBits();
1311   }
1312
1313   // Copy whole words from this to Result.
1314   unsigned i = getNumWords() - 1;
1315   for (; i > offset; --i)
1316     val[i] = pVal[i-offset] << wordShift |
1317              pVal[i-offset-1] >> (APINT_BITS_PER_WORD - wordShift);
1318   val[offset] = pVal[0] << wordShift;
1319   for (i = 0; i < offset; ++i)
1320     val[i] = 0;
1321   return APInt(val, BitWidth).clearUnusedBits();
1322 }
1323
1324 APInt APInt::rotl(const APInt &rotateAmt) const {
1325   return rotl((unsigned)rotateAmt.getLimitedValue(BitWidth));
1326 }
1327
1328 APInt APInt::rotl(unsigned rotateAmt) const {
1329   if (rotateAmt == 0)
1330     return *this;
1331   // Don't get too fancy, just use existing shift/or facilities
1332   APInt hi(*this);
1333   APInt lo(*this);
1334   hi.shl(rotateAmt);
1335   lo.lshr(BitWidth - rotateAmt);
1336   return hi | lo;
1337 }
1338
1339 APInt APInt::rotr(const APInt &rotateAmt) const {
1340   return rotr((unsigned)rotateAmt.getLimitedValue(BitWidth));
1341 }
1342
1343 APInt APInt::rotr(unsigned rotateAmt) const {
1344   if (rotateAmt == 0)
1345     return *this;
1346   // Don't get too fancy, just use existing shift/or facilities
1347   APInt hi(*this);
1348   APInt lo(*this);
1349   lo.lshr(rotateAmt);
1350   hi.shl(BitWidth - rotateAmt);
1351   return hi | lo;
1352 }
1353
1354 // Square Root - this method computes and returns the square root of "this".
1355 // Three mechanisms are used for computation. For small values (<= 5 bits),
1356 // a table lookup is done. This gets some performance for common cases. For
1357 // values using less than 52 bits, the value is converted to double and then
1358 // the libc sqrt function is called. The result is rounded and then converted
1359 // back to a uint64_t which is then used to construct the result. Finally,
1360 // the Babylonian method for computing square roots is used.
1361 APInt APInt::sqrt() const {
1362
1363   // Determine the magnitude of the value.
1364   unsigned magnitude = getActiveBits();
1365
1366   // Use a fast table for some small values. This also gets rid of some
1367   // rounding errors in libc sqrt for small values.
1368   if (magnitude <= 5) {
1369     static const uint8_t results[32] = {
1370       /*     0 */ 0,
1371       /*  1- 2 */ 1, 1,
1372       /*  3- 6 */ 2, 2, 2, 2,
1373       /*  7-12 */ 3, 3, 3, 3, 3, 3,
1374       /* 13-20 */ 4, 4, 4, 4, 4, 4, 4, 4,
1375       /* 21-30 */ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
1376       /*    31 */ 6
1377     };
1378     return APInt(BitWidth, results[ (isSingleWord() ? VAL : pVal[0]) ]);
1379   }
1380
1381   // If the magnitude of the value fits in less than 52 bits (the precision of
1382   // an IEEE double precision floating point value), then we can use the
1383   // libc sqrt function which will probably use a hardware sqrt computation.
1384   // This should be faster than the algorithm below.
1385   if (magnitude < 52) {
1386 #ifdef _MSC_VER
1387     // Amazingly, VC++ doesn't have round().
1388     return APInt(BitWidth,
1389                  uint64_t(::sqrt(double(isSingleWord()?VAL:pVal[0]))) + 0.5);
1390 #else
1391     return APInt(BitWidth,
1392                  uint64_t(::round(::sqrt(double(isSingleWord()?VAL:pVal[0])))));
1393 #endif
1394   }
1395
1396   // Okay, all the short cuts are exhausted. We must compute it. The following
1397   // is a classical Babylonian method for computing the square root. This code
1398   // was adapted to APINt from a wikipedia article on such computations.
1399   // See http://www.wikipedia.org/ and go to the page named
1400   // Calculate_an_integer_square_root.
1401   unsigned nbits = BitWidth, i = 4;
1402   APInt testy(BitWidth, 16);
1403   APInt x_old(BitWidth, 1);
1404   APInt x_new(BitWidth, 0);
1405   APInt two(BitWidth, 2);
1406
1407   // Select a good starting value using binary logarithms.
1408   for (;; i += 2, testy = testy.shl(2))
1409     if (i >= nbits || this->ule(testy)) {
1410       x_old = x_old.shl(i / 2);
1411       break;
1412     }
1413
1414   // Use the Babylonian method to arrive at the integer square root:
1415   for (;;) {
1416     x_new = (this->udiv(x_old) + x_old).udiv(two);
1417     if (x_old.ule(x_new))
1418       break;
1419     x_old = x_new;
1420   }
1421
1422   // Make sure we return the closest approximation
1423   // NOTE: The rounding calculation below is correct. It will produce an
1424   // off-by-one discrepancy with results from pari/gp. That discrepancy has been
1425   // determined to be a rounding issue with pari/gp as it begins to use a
1426   // floating point representation after 192 bits. There are no discrepancies
1427   // between this algorithm and pari/gp for bit widths < 192 bits.
1428   APInt square(x_old * x_old);
1429   APInt nextSquare((x_old + 1) * (x_old +1));
1430   if (this->ult(square))
1431     return x_old;
1432   else if (this->ule(nextSquare)) {
1433     APInt midpoint((nextSquare - square).udiv(two));
1434     APInt offset(*this - square);
1435     if (offset.ult(midpoint))
1436       return x_old;
1437     else
1438       return x_old + 1;
1439   } else
1440     llvm_unreachable("Error in APInt::sqrt computation");
1441   return x_old + 1;
1442 }
1443
1444 /// Computes the multiplicative inverse of this APInt for a given modulo. The
1445 /// iterative extended Euclidean algorithm is used to solve for this value,
1446 /// however we simplify it to speed up calculating only the inverse, and take
1447 /// advantage of div+rem calculations. We also use some tricks to avoid copying
1448 /// (potentially large) APInts around.
1449 APInt APInt::multiplicativeInverse(const APInt& modulo) const {
1450   assert(ult(modulo) && "This APInt must be smaller than the modulo");
1451
1452   // Using the properties listed at the following web page (accessed 06/21/08):
1453   //   http://www.numbertheory.org/php/euclid.html
1454   // (especially the properties numbered 3, 4 and 9) it can be proved that
1455   // BitWidth bits suffice for all the computations in the algorithm implemented
1456   // below. More precisely, this number of bits suffice if the multiplicative
1457   // inverse exists, but may not suffice for the general extended Euclidean
1458   // algorithm.
1459
1460   APInt r[2] = { modulo, *this };
1461   APInt t[2] = { APInt(BitWidth, 0), APInt(BitWidth, 1) };
1462   APInt q(BitWidth, 0);
1463
1464   unsigned i;
1465   for (i = 0; r[i^1] != 0; i ^= 1) {
1466     // An overview of the math without the confusing bit-flipping:
1467     // q = r[i-2] / r[i-1]
1468     // r[i] = r[i-2] % r[i-1]
1469     // t[i] = t[i-2] - t[i-1] * q
1470     udivrem(r[i], r[i^1], q, r[i]);
1471     t[i] -= t[i^1] * q;
1472   }
1473
1474   // If this APInt and the modulo are not coprime, there is no multiplicative
1475   // inverse, so return 0. We check this by looking at the next-to-last
1476   // remainder, which is the gcd(*this,modulo) as calculated by the Euclidean
1477   // algorithm.
1478   if (r[i] != 1)
1479     return APInt(BitWidth, 0);
1480
1481   // The next-to-last t is the multiplicative inverse.  However, we are
1482   // interested in a positive inverse. Calcuate a positive one from a negative
1483   // one if necessary. A simple addition of the modulo suffices because
1484   // abs(t[i]) is known to be less than *this/2 (see the link above).
1485   return t[i].isNegative() ? t[i] + modulo : t[i];
1486 }
1487
1488 /// Calculate the magic numbers required to implement a signed integer division
1489 /// by a constant as a sequence of multiplies, adds and shifts.  Requires that
1490 /// the divisor not be 0, 1, or -1.  Taken from "Hacker's Delight", Henry S.
1491 /// Warren, Jr., chapter 10.
1492 APInt::ms APInt::magic() const {
1493   const APInt& d = *this;
1494   unsigned p;
1495   APInt ad, anc, delta, q1, r1, q2, r2, t;
1496   APInt signedMin = APInt::getSignedMinValue(d.getBitWidth());
1497   struct ms mag;
1498
1499   ad = d.abs();
1500   t = signedMin + (d.lshr(d.getBitWidth() - 1));
1501   anc = t - 1 - t.urem(ad);   // absolute value of nc
1502   p = d.getBitWidth() - 1;    // initialize p
1503   q1 = signedMin.udiv(anc);   // initialize q1 = 2p/abs(nc)
1504   r1 = signedMin - q1*anc;    // initialize r1 = rem(2p,abs(nc))
1505   q2 = signedMin.udiv(ad);    // initialize q2 = 2p/abs(d)
1506   r2 = signedMin - q2*ad;     // initialize r2 = rem(2p,abs(d))
1507   do {
1508     p = p + 1;
1509     q1 = q1<<1;          // update q1 = 2p/abs(nc)
1510     r1 = r1<<1;          // update r1 = rem(2p/abs(nc))
1511     if (r1.uge(anc)) {  // must be unsigned comparison
1512       q1 = q1 + 1;
1513       r1 = r1 - anc;
1514     }
1515     q2 = q2<<1;          // update q2 = 2p/abs(d)
1516     r2 = r2<<1;          // update r2 = rem(2p/abs(d))
1517     if (r2.uge(ad)) {   // must be unsigned comparison
1518       q2 = q2 + 1;
1519       r2 = r2 - ad;
1520     }
1521     delta = ad - r2;
1522   } while (q1.ule(delta) || (q1 == delta && r1 == 0));
1523
1524   mag.m = q2 + 1;
1525   if (d.isNegative()) mag.m = -mag.m;   // resulting magic number
1526   mag.s = p - d.getBitWidth();          // resulting shift
1527   return mag;
1528 }
1529
1530 /// Calculate the magic numbers required to implement an unsigned integer
1531 /// division by a constant as a sequence of multiplies, adds and shifts.
1532 /// Requires that the divisor not be 0.  Taken from "Hacker's Delight", Henry
1533 /// S. Warren, Jr., chapter 10.
1534 APInt::mu APInt::magicu() const {
1535   const APInt& d = *this;
1536   unsigned p;
1537   APInt nc, delta, q1, r1, q2, r2;
1538   struct mu magu;
1539   magu.a = 0;               // initialize "add" indicator
1540   APInt allOnes = APInt::getAllOnesValue(d.getBitWidth());
1541   APInt signedMin = APInt::getSignedMinValue(d.getBitWidth());
1542   APInt signedMax = APInt::getSignedMaxValue(d.getBitWidth());
1543
1544   nc = allOnes - (-d).urem(d);
1545   p = d.getBitWidth() - 1;  // initialize p
1546   q1 = signedMin.udiv(nc);  // initialize q1 = 2p/nc
1547   r1 = signedMin - q1*nc;   // initialize r1 = rem(2p,nc)
1548   q2 = signedMax.udiv(d);   // initialize q2 = (2p-1)/d
1549   r2 = signedMax - q2*d;    // initialize r2 = rem((2p-1),d)
1550   do {
1551     p = p + 1;
1552     if (r1.uge(nc - r1)) {
1553       q1 = q1 + q1 + 1;  // update q1
1554       r1 = r1 + r1 - nc; // update r1
1555     }
1556     else {
1557       q1 = q1+q1; // update q1
1558       r1 = r1+r1; // update r1
1559     }
1560     if ((r2 + 1).uge(d - r2)) {
1561       if (q2.uge(signedMax)) magu.a = 1;
1562       q2 = q2+q2 + 1;     // update q2
1563       r2 = r2+r2 + 1 - d; // update r2
1564     }
1565     else {
1566       if (q2.uge(signedMin)) magu.a = 1;
1567       q2 = q2+q2;     // update q2
1568       r2 = r2+r2 + 1; // update r2
1569     }
1570     delta = d - 1 - r2;
1571   } while (p < d.getBitWidth()*2 &&
1572            (q1.ult(delta) || (q1 == delta && r1 == 0)));
1573   magu.m = q2 + 1; // resulting magic number
1574   magu.s = p - d.getBitWidth();  // resulting shift
1575   return magu;
1576 }
1577
1578 /// Implementation of Knuth's Algorithm D (Division of nonnegative integers)
1579 /// from "Art of Computer Programming, Volume 2", section 4.3.1, p. 272. The
1580 /// variables here have the same names as in the algorithm. Comments explain
1581 /// the algorithm and any deviation from it.
1582 static void KnuthDiv(unsigned *u, unsigned *v, unsigned *q, unsigned* r,
1583                      unsigned m, unsigned n) {
1584   assert(u && "Must provide dividend");
1585   assert(v && "Must provide divisor");
1586   assert(q && "Must provide quotient");
1587   assert(u != v && u != q && v != q && "Must us different memory");
1588   assert(n>1 && "n must be > 1");
1589
1590   // Knuth uses the value b as the base of the number system. In our case b
1591   // is 2^31 so we just set it to -1u.
1592   uint64_t b = uint64_t(1) << 32;
1593
1594 #if 0
1595   DEBUG(dbgs() << "KnuthDiv: m=" << m << " n=" << n << '\n');
1596   DEBUG(dbgs() << "KnuthDiv: original:");
1597   DEBUG(for (int i = m+n; i >=0; i--) dbgs() << " " << u[i]);
1598   DEBUG(dbgs() << " by");
1599   DEBUG(for (int i = n; i >0; i--) dbgs() << " " << v[i-1]);
1600   DEBUG(dbgs() << '\n');
1601 #endif
1602   // D1. [Normalize.] Set d = b / (v[n-1] + 1) and multiply all the digits of
1603   // u and v by d. Note that we have taken Knuth's advice here to use a power
1604   // of 2 value for d such that d * v[n-1] >= b/2 (b is the base). A power of
1605   // 2 allows us to shift instead of multiply and it is easy to determine the
1606   // shift amount from the leading zeros.  We are basically normalizing the u
1607   // and v so that its high bits are shifted to the top of v's range without
1608   // overflow. Note that this can require an extra word in u so that u must
1609   // be of length m+n+1.
1610   unsigned shift = CountLeadingZeros_32(v[n-1]);
1611   unsigned v_carry = 0;
1612   unsigned u_carry = 0;
1613   if (shift) {
1614     for (unsigned i = 0; i < m+n; ++i) {
1615       unsigned u_tmp = u[i] >> (32 - shift);
1616       u[i] = (u[i] << shift) | u_carry;
1617       u_carry = u_tmp;
1618     }
1619     for (unsigned i = 0; i < n; ++i) {
1620       unsigned v_tmp = v[i] >> (32 - shift);
1621       v[i] = (v[i] << shift) | v_carry;
1622       v_carry = v_tmp;
1623     }
1624   }
1625   u[m+n] = u_carry;
1626 #if 0
1627   DEBUG(dbgs() << "KnuthDiv:   normal:");
1628   DEBUG(for (int i = m+n; i >=0; i--) dbgs() << " " << u[i]);
1629   DEBUG(dbgs() << " by");
1630   DEBUG(for (int i = n; i >0; i--) dbgs() << " " << v[i-1]);
1631   DEBUG(dbgs() << '\n');
1632 #endif
1633
1634   // D2. [Initialize j.]  Set j to m. This is the loop counter over the places.
1635   int j = m;
1636   do {
1637     DEBUG(dbgs() << "KnuthDiv: quotient digit #" << j << '\n');
1638     // D3. [Calculate q'.].
1639     //     Set qp = (u[j+n]*b + u[j+n-1]) / v[n-1]. (qp=qprime=q')
1640     //     Set rp = (u[j+n]*b + u[j+n-1]) % v[n-1]. (rp=rprime=r')
1641     // Now test if qp == b or qp*v[n-2] > b*rp + u[j+n-2]; if so, decrease
1642     // qp by 1, inrease rp by v[n-1], and repeat this test if rp < b. The test
1643     // on v[n-2] determines at high speed most of the cases in which the trial
1644     // value qp is one too large, and it eliminates all cases where qp is two
1645     // too large.
1646     uint64_t dividend = ((uint64_t(u[j+n]) << 32) + u[j+n-1]);
1647     DEBUG(dbgs() << "KnuthDiv: dividend == " << dividend << '\n');
1648     uint64_t qp = dividend / v[n-1];
1649     uint64_t rp = dividend % v[n-1];
1650     if (qp == b || qp*v[n-2] > b*rp + u[j+n-2]) {
1651       qp--;
1652       rp += v[n-1];
1653       if (rp < b && (qp == b || qp*v[n-2] > b*rp + u[j+n-2]))
1654         qp--;
1655     }
1656     DEBUG(dbgs() << "KnuthDiv: qp == " << qp << ", rp == " << rp << '\n');
1657
1658     // D4. [Multiply and subtract.] Replace (u[j+n]u[j+n-1]...u[j]) with
1659     // (u[j+n]u[j+n-1]..u[j]) - qp * (v[n-1]...v[1]v[0]). This computation
1660     // consists of a simple multiplication by a one-place number, combined with
1661     // a subtraction.
1662     bool isNeg = false;
1663     for (unsigned i = 0; i < n; ++i) {
1664       uint64_t u_tmp = uint64_t(u[j+i]) | (uint64_t(u[j+i+1]) << 32);
1665       uint64_t subtrahend = uint64_t(qp) * uint64_t(v[i]);
1666       bool borrow = subtrahend > u_tmp;
1667       DEBUG(dbgs() << "KnuthDiv: u_tmp == " << u_tmp
1668                    << ", subtrahend == " << subtrahend
1669                    << ", borrow = " << borrow << '\n');
1670
1671       uint64_t result = u_tmp - subtrahend;
1672       unsigned k = j + i;
1673       u[k++] = (unsigned)(result & (b-1)); // subtract low word
1674       u[k++] = (unsigned)(result >> 32);   // subtract high word
1675       while (borrow && k <= m+n) { // deal with borrow to the left
1676         borrow = u[k] == 0;
1677         u[k]--;
1678         k++;
1679       }
1680       isNeg |= borrow;
1681       DEBUG(dbgs() << "KnuthDiv: u[j+i] == " << u[j+i] << ",  u[j+i+1] == " <<
1682                     u[j+i+1] << '\n');
1683     }
1684     DEBUG(dbgs() << "KnuthDiv: after subtraction:");
1685     DEBUG(for (int i = m+n; i >=0; i--) dbgs() << " " << u[i]);
1686     DEBUG(dbgs() << '\n');
1687     // The digits (u[j+n]...u[j]) should be kept positive; if the result of
1688     // this step is actually negative, (u[j+n]...u[j]) should be left as the
1689     // true value plus b**(n+1), namely as the b's complement of
1690     // the true value, and a "borrow" to the left should be remembered.
1691     //
1692     if (isNeg) {
1693       bool carry = true;  // true because b's complement is "complement + 1"
1694       for (unsigned i = 0; i <= m+n; ++i) {
1695         u[i] = ~u[i] + carry; // b's complement
1696         carry = carry && u[i] == 0;
1697       }
1698     }
1699     DEBUG(dbgs() << "KnuthDiv: after complement:");
1700     DEBUG(for (int i = m+n; i >=0; i--) dbgs() << " " << u[i]);
1701     DEBUG(dbgs() << '\n');
1702
1703     // D5. [Test remainder.] Set q[j] = qp. If the result of step D4 was
1704     // negative, go to step D6; otherwise go on to step D7.
1705     q[j] = (unsigned)qp;
1706     if (isNeg) {
1707       // D6. [Add back]. The probability that this step is necessary is very
1708       // small, on the order of only 2/b. Make sure that test data accounts for
1709       // this possibility. Decrease q[j] by 1
1710       q[j]--;
1711       // and add (0v[n-1]...v[1]v[0]) to (u[j+n]u[j+n-1]...u[j+1]u[j]).
1712       // A carry will occur to the left of u[j+n], and it should be ignored
1713       // since it cancels with the borrow that occurred in D4.
1714       bool carry = false;
1715       for (unsigned i = 0; i < n; i++) {
1716         unsigned limit = std::min(u[j+i],v[i]);
1717         u[j+i] += v[i] + carry;
1718         carry = u[j+i] < limit || (carry && u[j+i] == limit);
1719       }
1720       u[j+n] += carry;
1721     }
1722     DEBUG(dbgs() << "KnuthDiv: after correction:");
1723     DEBUG(for (int i = m+n; i >=0; i--) dbgs() <<" " << u[i]);
1724     DEBUG(dbgs() << "\nKnuthDiv: digit result = " << q[j] << '\n');
1725
1726   // D7. [Loop on j.]  Decrease j by one. Now if j >= 0, go back to D3.
1727   } while (--j >= 0);
1728
1729   DEBUG(dbgs() << "KnuthDiv: quotient:");
1730   DEBUG(for (int i = m; i >=0; i--) dbgs() <<" " << q[i]);
1731   DEBUG(dbgs() << '\n');
1732
1733   // D8. [Unnormalize]. Now q[...] is the desired quotient, and the desired
1734   // remainder may be obtained by dividing u[...] by d. If r is non-null we
1735   // compute the remainder (urem uses this).
1736   if (r) {
1737     // The value d is expressed by the "shift" value above since we avoided
1738     // multiplication by d by using a shift left. So, all we have to do is
1739     // shift right here. In order to mak
1740     if (shift) {
1741       unsigned carry = 0;
1742       DEBUG(dbgs() << "KnuthDiv: remainder:");
1743       for (int i = n-1; i >= 0; i--) {
1744         r[i] = (u[i] >> shift) | carry;
1745         carry = u[i] << (32 - shift);
1746         DEBUG(dbgs() << " " << r[i]);
1747       }
1748     } else {
1749       for (int i = n-1; i >= 0; i--) {
1750         r[i] = u[i];
1751         DEBUG(dbgs() << " " << r[i]);
1752       }
1753     }
1754     DEBUG(dbgs() << '\n');
1755   }
1756 #if 0
1757   DEBUG(dbgs() << '\n');
1758 #endif
1759 }
1760
1761 void APInt::divide(const APInt LHS, unsigned lhsWords,
1762                    const APInt &RHS, unsigned rhsWords,
1763                    APInt *Quotient, APInt *Remainder)
1764 {
1765   assert(lhsWords >= rhsWords && "Fractional result");
1766
1767   // First, compose the values into an array of 32-bit words instead of
1768   // 64-bit words. This is a necessity of both the "short division" algorithm
1769   // and the Knuth "classical algorithm" which requires there to be native
1770   // operations for +, -, and * on an m bit value with an m*2 bit result. We
1771   // can't use 64-bit operands here because we don't have native results of
1772   // 128-bits. Furthermore, casting the 64-bit values to 32-bit values won't
1773   // work on large-endian machines.
1774   uint64_t mask = ~0ull >> (sizeof(unsigned)*CHAR_BIT);
1775   unsigned n = rhsWords * 2;
1776   unsigned m = (lhsWords * 2) - n;
1777
1778   // Allocate space for the temporary values we need either on the stack, if
1779   // it will fit, or on the heap if it won't.
1780   unsigned SPACE[128];
1781   unsigned *U = 0;
1782   unsigned *V = 0;
1783   unsigned *Q = 0;
1784   unsigned *R = 0;
1785   if ((Remainder?4:3)*n+2*m+1 <= 128) {
1786     U = &SPACE[0];
1787     V = &SPACE[m+n+1];
1788     Q = &SPACE[(m+n+1) + n];
1789     if (Remainder)
1790       R = &SPACE[(m+n+1) + n + (m+n)];
1791   } else {
1792     U = new unsigned[m + n + 1];
1793     V = new unsigned[n];
1794     Q = new unsigned[m+n];
1795     if (Remainder)
1796       R = new unsigned[n];
1797   }
1798
1799   // Initialize the dividend
1800   memset(U, 0, (m+n+1)*sizeof(unsigned));
1801   for (unsigned i = 0; i < lhsWords; ++i) {
1802     uint64_t tmp = (LHS.getNumWords() == 1 ? LHS.VAL : LHS.pVal[i]);
1803     U[i * 2] = (unsigned)(tmp & mask);
1804     U[i * 2 + 1] = (unsigned)(tmp >> (sizeof(unsigned)*CHAR_BIT));
1805   }
1806   U[m+n] = 0; // this extra word is for "spill" in the Knuth algorithm.
1807
1808   // Initialize the divisor
1809   memset(V, 0, (n)*sizeof(unsigned));
1810   for (unsigned i = 0; i < rhsWords; ++i) {
1811     uint64_t tmp = (RHS.getNumWords() == 1 ? RHS.VAL : RHS.pVal[i]);
1812     V[i * 2] = (unsigned)(tmp & mask);
1813     V[i * 2 + 1] = (unsigned)(tmp >> (sizeof(unsigned)*CHAR_BIT));
1814   }
1815
1816   // initialize the quotient and remainder
1817   memset(Q, 0, (m+n) * sizeof(unsigned));
1818   if (Remainder)
1819     memset(R, 0, n * sizeof(unsigned));
1820
1821   // Now, adjust m and n for the Knuth division. n is the number of words in
1822   // the divisor. m is the number of words by which the dividend exceeds the
1823   // divisor (i.e. m+n is the length of the dividend). These sizes must not
1824   // contain any zero words or the Knuth algorithm fails.
1825   for (unsigned i = n; i > 0 && V[i-1] == 0; i--) {
1826     n--;
1827     m++;
1828   }
1829   for (unsigned i = m+n; i > 0 && U[i-1] == 0; i--)
1830     m--;
1831
1832   // If we're left with only a single word for the divisor, Knuth doesn't work
1833   // so we implement the short division algorithm here. This is much simpler
1834   // and faster because we are certain that we can divide a 64-bit quantity
1835   // by a 32-bit quantity at hardware speed and short division is simply a
1836   // series of such operations. This is just like doing short division but we
1837   // are using base 2^32 instead of base 10.
1838   assert(n != 0 && "Divide by zero?");
1839   if (n == 1) {
1840     unsigned divisor = V[0];
1841     unsigned remainder = 0;
1842     for (int i = m+n-1; i >= 0; i--) {
1843       uint64_t partial_dividend = uint64_t(remainder) << 32 | U[i];
1844       if (partial_dividend == 0) {
1845         Q[i] = 0;
1846         remainder = 0;
1847       } else if (partial_dividend < divisor) {
1848         Q[i] = 0;
1849         remainder = (unsigned)partial_dividend;
1850       } else if (partial_dividend == divisor) {
1851         Q[i] = 1;
1852         remainder = 0;
1853       } else {
1854         Q[i] = (unsigned)(partial_dividend / divisor);
1855         remainder = (unsigned)(partial_dividend - (Q[i] * divisor));
1856       }
1857     }
1858     if (R)
1859       R[0] = remainder;
1860   } else {
1861     // Now we're ready to invoke the Knuth classical divide algorithm. In this
1862     // case n > 1.
1863     KnuthDiv(U, V, Q, R, m, n);
1864   }
1865
1866   // If the caller wants the quotient
1867   if (Quotient) {
1868     // Set up the Quotient value's memory.
1869     if (Quotient->BitWidth != LHS.BitWidth) {
1870       if (Quotient->isSingleWord())
1871         Quotient->VAL = 0;
1872       else
1873         delete [] Quotient->pVal;
1874       Quotient->BitWidth = LHS.BitWidth;
1875       if (!Quotient->isSingleWord())
1876         Quotient->pVal = getClearedMemory(Quotient->getNumWords());
1877     } else
1878       Quotient->clear();
1879
1880     // The quotient is in Q. Reconstitute the quotient into Quotient's low
1881     // order words.
1882     if (lhsWords == 1) {
1883       uint64_t tmp =
1884         uint64_t(Q[0]) | (uint64_t(Q[1]) << (APINT_BITS_PER_WORD / 2));
1885       if (Quotient->isSingleWord())
1886         Quotient->VAL = tmp;
1887       else
1888         Quotient->pVal[0] = tmp;
1889     } else {
1890       assert(!Quotient->isSingleWord() && "Quotient APInt not large enough");
1891       for (unsigned i = 0; i < lhsWords; ++i)
1892         Quotient->pVal[i] =
1893           uint64_t(Q[i*2]) | (uint64_t(Q[i*2+1]) << (APINT_BITS_PER_WORD / 2));
1894     }
1895   }
1896
1897   // If the caller wants the remainder
1898   if (Remainder) {
1899     // Set up the Remainder value's memory.
1900     if (Remainder->BitWidth != RHS.BitWidth) {
1901       if (Remainder->isSingleWord())
1902         Remainder->VAL = 0;
1903       else
1904         delete [] Remainder->pVal;
1905       Remainder->BitWidth = RHS.BitWidth;
1906       if (!Remainder->isSingleWord())
1907         Remainder->pVal = getClearedMemory(Remainder->getNumWords());
1908     } else
1909       Remainder->clear();
1910
1911     // The remainder is in R. Reconstitute the remainder into Remainder's low
1912     // order words.
1913     if (rhsWords == 1) {
1914       uint64_t tmp =
1915         uint64_t(R[0]) | (uint64_t(R[1]) << (APINT_BITS_PER_WORD / 2));
1916       if (Remainder->isSingleWord())
1917         Remainder->VAL = tmp;
1918       else
1919         Remainder->pVal[0] = tmp;
1920     } else {
1921       assert(!Remainder->isSingleWord() && "Remainder APInt not large enough");
1922       for (unsigned i = 0; i < rhsWords; ++i)
1923         Remainder->pVal[i] =
1924           uint64_t(R[i*2]) | (uint64_t(R[i*2+1]) << (APINT_BITS_PER_WORD / 2));
1925     }
1926   }
1927
1928   // Clean up the memory we allocated.
1929   if (U != &SPACE[0]) {
1930     delete [] U;
1931     delete [] V;
1932     delete [] Q;
1933     delete [] R;
1934   }
1935 }
1936
1937 APInt APInt::udiv(const APInt& RHS) const {
1938   assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
1939
1940   // First, deal with the easy case
1941   if (isSingleWord()) {
1942     assert(RHS.VAL != 0 && "Divide by zero?");
1943     return APInt(BitWidth, VAL / RHS.VAL);
1944   }
1945
1946   // Get some facts about the LHS and RHS number of bits and words
1947   unsigned rhsBits = RHS.getActiveBits();
1948   unsigned rhsWords = !rhsBits ? 0 : (APInt::whichWord(rhsBits - 1) + 1);
1949   assert(rhsWords && "Divided by zero???");
1950   unsigned lhsBits = this->getActiveBits();
1951   unsigned lhsWords = !lhsBits ? 0 : (APInt::whichWord(lhsBits - 1) + 1);
1952
1953   // Deal with some degenerate cases
1954   if (!lhsWords)
1955     // 0 / X ===> 0
1956     return APInt(BitWidth, 0);
1957   else if (lhsWords < rhsWords || this->ult(RHS)) {
1958     // X / Y ===> 0, iff X < Y
1959     return APInt(BitWidth, 0);
1960   } else if (*this == RHS) {
1961     // X / X ===> 1
1962     return APInt(BitWidth, 1);
1963   } else if (lhsWords == 1 && rhsWords == 1) {
1964     // All high words are zero, just use native divide
1965     return APInt(BitWidth, this->pVal[0] / RHS.pVal[0]);
1966   }
1967
1968   // We have to compute it the hard way. Invoke the Knuth divide algorithm.
1969   APInt Quotient(1,0); // to hold result.
1970   divide(*this, lhsWords, RHS, rhsWords, &Quotient, 0);
1971   return Quotient;
1972 }
1973
1974 APInt APInt::urem(const APInt& RHS) const {
1975   assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
1976   if (isSingleWord()) {
1977     assert(RHS.VAL != 0 && "Remainder by zero?");
1978     return APInt(BitWidth, VAL % RHS.VAL);
1979   }
1980
1981   // Get some facts about the LHS
1982   unsigned lhsBits = getActiveBits();
1983   unsigned lhsWords = !lhsBits ? 0 : (whichWord(lhsBits - 1) + 1);
1984
1985   // Get some facts about the RHS
1986   unsigned rhsBits = RHS.getActiveBits();
1987   unsigned rhsWords = !rhsBits ? 0 : (APInt::whichWord(rhsBits - 1) + 1);
1988   assert(rhsWords && "Performing remainder operation by zero ???");
1989
1990   // Check the degenerate cases
1991   if (lhsWords == 0) {
1992     // 0 % Y ===> 0
1993     return APInt(BitWidth, 0);
1994   } else if (lhsWords < rhsWords || this->ult(RHS)) {
1995     // X % Y ===> X, iff X < Y
1996     return *this;
1997   } else if (*this == RHS) {
1998     // X % X == 0;
1999     return APInt(BitWidth, 0);
2000   } else if (lhsWords == 1) {
2001     // All high words are zero, just use native remainder
2002     return APInt(BitWidth, pVal[0] % RHS.pVal[0]);
2003   }
2004
2005   // We have to compute it the hard way. Invoke the Knuth divide algorithm.
2006   APInt Remainder(1,0);
2007   divide(*this, lhsWords, RHS, rhsWords, 0, &Remainder);
2008   return Remainder;
2009 }
2010
2011 void APInt::udivrem(const APInt &LHS, const APInt &RHS,
2012                     APInt &Quotient, APInt &Remainder) {
2013   // Get some size facts about the dividend and divisor
2014   unsigned lhsBits  = LHS.getActiveBits();
2015   unsigned lhsWords = !lhsBits ? 0 : (APInt::whichWord(lhsBits - 1) + 1);
2016   unsigned rhsBits  = RHS.getActiveBits();
2017   unsigned rhsWords = !rhsBits ? 0 : (APInt::whichWord(rhsBits - 1) + 1);
2018
2019   // Check the degenerate cases
2020   if (lhsWords == 0) {
2021     Quotient = 0;                // 0 / Y ===> 0
2022     Remainder = 0;               // 0 % Y ===> 0
2023     return;
2024   }
2025
2026   if (lhsWords < rhsWords || LHS.ult(RHS)) {
2027     Remainder = LHS;            // X % Y ===> X, iff X < Y
2028     Quotient = 0;               // X / Y ===> 0, iff X < Y
2029     return;
2030   }
2031
2032   if (LHS == RHS) {
2033     Quotient  = 1;              // X / X ===> 1
2034     Remainder = 0;              // X % X ===> 0;
2035     return;
2036   }
2037
2038   if (lhsWords == 1 && rhsWords == 1) {
2039     // There is only one word to consider so use the native versions.
2040     uint64_t lhsValue = LHS.isSingleWord() ? LHS.VAL : LHS.pVal[0];
2041     uint64_t rhsValue = RHS.isSingleWord() ? RHS.VAL : RHS.pVal[0];
2042     Quotient = APInt(LHS.getBitWidth(), lhsValue / rhsValue);
2043     Remainder = APInt(LHS.getBitWidth(), lhsValue % rhsValue);
2044     return;
2045   }
2046
2047   // Okay, lets do it the long way
2048   divide(LHS, lhsWords, RHS, rhsWords, &Quotient, &Remainder);
2049 }
2050
2051 void APInt::fromString(unsigned numbits, const StringRef& str, uint8_t radix) {
2052   // Check our assumptions here
2053   assert(!str.empty() && "Invalid string length");
2054   assert((radix == 10 || radix == 8 || radix == 16 || radix == 2) &&
2055          "Radix should be 2, 8, 10, or 16!");
2056
2057   StringRef::iterator p = str.begin();
2058   size_t slen = str.size();
2059   bool isNeg = *p == '-';
2060   if (*p == '-' || *p == '+') {
2061     p++;
2062     slen--;
2063     assert(slen && "String is only a sign, needs a value.");
2064   }
2065   assert((slen <= numbits || radix != 2) && "Insufficient bit width");
2066   assert(((slen-1)*3 <= numbits || radix != 8) && "Insufficient bit width");
2067   assert(((slen-1)*4 <= numbits || radix != 16) && "Insufficient bit width");
2068   assert((((slen-1)*64)/22 <= numbits || radix != 10)
2069          && "Insufficient bit width");
2070
2071   // Allocate memory
2072   if (!isSingleWord())
2073     pVal = getClearedMemory(getNumWords());
2074
2075   // Figure out if we can shift instead of multiply
2076   unsigned shift = (radix == 16 ? 4 : radix == 8 ? 3 : radix == 2 ? 1 : 0);
2077
2078   // Set up an APInt for the digit to add outside the loop so we don't
2079   // constantly construct/destruct it.
2080   APInt apdigit(getBitWidth(), 0);
2081   APInt apradix(getBitWidth(), radix);
2082
2083   // Enter digit traversal loop
2084   for (StringRef::iterator e = str.end(); p != e; ++p) {
2085     unsigned digit = getDigit(*p, radix);
2086     assert(digit < radix && "Invalid character in digit string");
2087
2088     // Shift or multiply the value by the radix
2089     if (slen > 1) {
2090       if (shift)
2091         *this <<= shift;
2092       else
2093         *this *= apradix;
2094     }
2095
2096     // Add in the digit we just interpreted
2097     if (apdigit.isSingleWord())
2098       apdigit.VAL = digit;
2099     else
2100       apdigit.pVal[0] = digit;
2101     *this += apdigit;
2102   }
2103   // If its negative, put it in two's complement form
2104   if (isNeg) {
2105     (*this)--;
2106     this->flip();
2107   }
2108 }
2109
2110 void APInt::toString(SmallVectorImpl<char> &Str, unsigned Radix,
2111                      bool Signed) const {
2112   assert((Radix == 10 || Radix == 8 || Radix == 16 || Radix == 2) &&
2113          "Radix should be 2, 8, 10, or 16!");
2114
2115   // First, check for a zero value and just short circuit the logic below.
2116   if (*this == 0) {
2117     Str.push_back('0');
2118     return;
2119   }
2120
2121   static const char Digits[] = "0123456789ABCDEF";
2122
2123   if (isSingleWord()) {
2124     char Buffer[65];
2125     char *BufPtr = Buffer+65;
2126
2127     uint64_t N;
2128     if (Signed) {
2129       int64_t I = getSExtValue();
2130       if (I < 0) {
2131         Str.push_back('-');
2132         I = -I;
2133       }
2134       N = I;
2135     } else {
2136       N = getZExtValue();
2137     }
2138
2139     while (N) {
2140       *--BufPtr = Digits[N % Radix];
2141       N /= Radix;
2142     }
2143     Str.append(BufPtr, Buffer+65);
2144     return;
2145   }
2146
2147   APInt Tmp(*this);
2148
2149   if (Signed && isNegative()) {
2150     // They want to print the signed version and it is a negative value
2151     // Flip the bits and add one to turn it into the equivalent positive
2152     // value and put a '-' in the result.
2153     Tmp.flip();
2154     Tmp++;
2155     Str.push_back('-');
2156   }
2157
2158   // We insert the digits backward, then reverse them to get the right order.
2159   unsigned StartDig = Str.size();
2160
2161   // For the 2, 8 and 16 bit cases, we can just shift instead of divide
2162   // because the number of bits per digit (1, 3 and 4 respectively) divides
2163   // equaly.  We just shift until the value is zero.
2164   if (Radix != 10) {
2165     // Just shift tmp right for each digit width until it becomes zero
2166     unsigned ShiftAmt = (Radix == 16 ? 4 : (Radix == 8 ? 3 : 1));
2167     unsigned MaskAmt = Radix - 1;
2168
2169     while (Tmp != 0) {
2170       unsigned Digit = unsigned(Tmp.getRawData()[0]) & MaskAmt;
2171       Str.push_back(Digits[Digit]);
2172       Tmp = Tmp.lshr(ShiftAmt);
2173     }
2174   } else {
2175     APInt divisor(4, 10);
2176     while (Tmp != 0) {
2177       APInt APdigit(1, 0);
2178       APInt tmp2(Tmp.getBitWidth(), 0);
2179       divide(Tmp, Tmp.getNumWords(), divisor, divisor.getNumWords(), &tmp2,
2180              &APdigit);
2181       unsigned Digit = (unsigned)APdigit.getZExtValue();
2182       assert(Digit < Radix && "divide failed");
2183       Str.push_back(Digits[Digit]);
2184       Tmp = tmp2;
2185     }
2186   }
2187
2188   // Reverse the digits before returning.
2189   std::reverse(Str.begin()+StartDig, Str.end());
2190 }
2191
2192 /// toString - This returns the APInt as a std::string.  Note that this is an
2193 /// inefficient method.  It is better to pass in a SmallVector/SmallString
2194 /// to the methods above.
2195 std::string APInt::toString(unsigned Radix = 10, bool Signed = true) const {
2196   SmallString<40> S;
2197   toString(S, Radix, Signed);
2198   return S.str();
2199 }
2200
2201
2202 void APInt::dump() const {
2203   SmallString<40> S, U;
2204   this->toStringUnsigned(U);
2205   this->toStringSigned(S);
2206   dbgs() << "APInt(" << BitWidth << "b, "
2207          << U.str() << "u " << S.str() << "s)";
2208 }
2209
2210 void APInt::print(raw_ostream &OS, bool isSigned) const {
2211   SmallString<40> S;
2212   this->toString(S, 10, isSigned);
2213   OS << S.str();
2214 }
2215
2216 // This implements a variety of operations on a representation of
2217 // arbitrary precision, two's-complement, bignum integer values.
2218
2219 // Assumed by lowHalf, highHalf, partMSB and partLSB.  A fairly safe
2220 // and unrestricting assumption.
2221 #define COMPILE_TIME_ASSERT(cond) extern int CTAssert[(cond) ? 1 : -1]
2222 COMPILE_TIME_ASSERT(integerPartWidth % 2 == 0);
2223
2224 /* Some handy functions local to this file.  */
2225 namespace {
2226
2227   /* Returns the integer part with the least significant BITS set.
2228      BITS cannot be zero.  */
2229   static inline integerPart
2230   lowBitMask(unsigned int bits)
2231   {
2232     assert (bits != 0 && bits <= integerPartWidth);
2233
2234     return ~(integerPart) 0 >> (integerPartWidth - bits);
2235   }
2236
2237   /* Returns the value of the lower half of PART.  */
2238   static inline integerPart
2239   lowHalf(integerPart part)
2240   {
2241     return part & lowBitMask(integerPartWidth / 2);
2242   }
2243
2244   /* Returns the value of the upper half of PART.  */
2245   static inline integerPart
2246   highHalf(integerPart part)
2247   {
2248     return part >> (integerPartWidth / 2);
2249   }
2250
2251   /* Returns the bit number of the most significant set bit of a part.
2252      If the input number has no bits set -1U is returned.  */
2253   static unsigned int
2254   partMSB(integerPart value)
2255   {
2256     unsigned int n, msb;
2257
2258     if (value == 0)
2259       return -1U;
2260
2261     n = integerPartWidth / 2;
2262
2263     msb = 0;
2264     do {
2265       if (value >> n) {
2266         value >>= n;
2267         msb += n;
2268       }
2269
2270       n >>= 1;
2271     } while (n);
2272
2273     return msb;
2274   }
2275
2276   /* Returns the bit number of the least significant set bit of a
2277      part.  If the input number has no bits set -1U is returned.  */
2278   static unsigned int
2279   partLSB(integerPart value)
2280   {
2281     unsigned int n, lsb;
2282
2283     if (value == 0)
2284       return -1U;
2285
2286     lsb = integerPartWidth - 1;
2287     n = integerPartWidth / 2;
2288
2289     do {
2290       if (value << n) {
2291         value <<= n;
2292         lsb -= n;
2293       }
2294
2295       n >>= 1;
2296     } while (n);
2297
2298     return lsb;
2299   }
2300 }
2301
2302 /* Sets the least significant part of a bignum to the input value, and
2303    zeroes out higher parts.  */
2304 void
2305 APInt::tcSet(integerPart *dst, integerPart part, unsigned int parts)
2306 {
2307   unsigned int i;
2308
2309   assert (parts > 0);
2310
2311   dst[0] = part;
2312   for(i = 1; i < parts; i++)
2313     dst[i] = 0;
2314 }
2315
2316 /* Assign one bignum to another.  */
2317 void
2318 APInt::tcAssign(integerPart *dst, const integerPart *src, unsigned int parts)
2319 {
2320   unsigned int i;
2321
2322   for(i = 0; i < parts; i++)
2323     dst[i] = src[i];
2324 }
2325
2326 /* Returns true if a bignum is zero, false otherwise.  */
2327 bool
2328 APInt::tcIsZero(const integerPart *src, unsigned int parts)
2329 {
2330   unsigned int i;
2331
2332   for(i = 0; i < parts; i++)
2333     if (src[i])
2334       return false;
2335
2336   return true;
2337 }
2338
2339 /* Extract the given bit of a bignum; returns 0 or 1.  */
2340 int
2341 APInt::tcExtractBit(const integerPart *parts, unsigned int bit)
2342 {
2343   return(parts[bit / integerPartWidth]
2344          & ((integerPart) 1 << bit % integerPartWidth)) != 0;
2345 }
2346
2347 /* Set the given bit of a bignum.  */
2348 void
2349 APInt::tcSetBit(integerPart *parts, unsigned int bit)
2350 {
2351   parts[bit / integerPartWidth] |= (integerPart) 1 << (bit % integerPartWidth);
2352 }
2353
2354 /* Returns the bit number of the least significant set bit of a
2355    number.  If the input number has no bits set -1U is returned.  */
2356 unsigned int
2357 APInt::tcLSB(const integerPart *parts, unsigned int n)
2358 {
2359   unsigned int i, lsb;
2360
2361   for(i = 0; i < n; i++) {
2362       if (parts[i] != 0) {
2363           lsb = partLSB(parts[i]);
2364
2365           return lsb + i * integerPartWidth;
2366       }
2367   }
2368
2369   return -1U;
2370 }
2371
2372 /* Returns the bit number of the most significant set bit of a number.
2373    If the input number has no bits set -1U is returned.  */
2374 unsigned int
2375 APInt::tcMSB(const integerPart *parts, unsigned int n)
2376 {
2377   unsigned int msb;
2378
2379   do {
2380       --n;
2381
2382       if (parts[n] != 0) {
2383           msb = partMSB(parts[n]);
2384
2385           return msb + n * integerPartWidth;
2386       }
2387   } while (n);
2388
2389   return -1U;
2390 }
2391
2392 /* Copy the bit vector of width srcBITS from SRC, starting at bit
2393    srcLSB, to DST, of dstCOUNT parts, such that the bit srcLSB becomes
2394    the least significant bit of DST.  All high bits above srcBITS in
2395    DST are zero-filled.  */
2396 void
2397 APInt::tcExtract(integerPart *dst, unsigned int dstCount,const integerPart *src,
2398                  unsigned int srcBits, unsigned int srcLSB)
2399 {
2400   unsigned int firstSrcPart, dstParts, shift, n;
2401
2402   dstParts = (srcBits + integerPartWidth - 1) / integerPartWidth;
2403   assert (dstParts <= dstCount);
2404
2405   firstSrcPart = srcLSB / integerPartWidth;
2406   tcAssign (dst, src + firstSrcPart, dstParts);
2407
2408   shift = srcLSB % integerPartWidth;
2409   tcShiftRight (dst, dstParts, shift);
2410
2411   /* We now have (dstParts * integerPartWidth - shift) bits from SRC
2412      in DST.  If this is less that srcBits, append the rest, else
2413      clear the high bits.  */
2414   n = dstParts * integerPartWidth - shift;
2415   if (n < srcBits) {
2416     integerPart mask = lowBitMask (srcBits - n);
2417     dst[dstParts - 1] |= ((src[firstSrcPart + dstParts] & mask)
2418                           << n % integerPartWidth);
2419   } else if (n > srcBits) {
2420     if (srcBits % integerPartWidth)
2421       dst[dstParts - 1] &= lowBitMask (srcBits % integerPartWidth);
2422   }
2423
2424   /* Clear high parts.  */
2425   while (dstParts < dstCount)
2426     dst[dstParts++] = 0;
2427 }
2428
2429 /* DST += RHS + C where C is zero or one.  Returns the carry flag.  */
2430 integerPart
2431 APInt::tcAdd(integerPart *dst, const integerPart *rhs,
2432              integerPart c, unsigned int parts)
2433 {
2434   unsigned int i;
2435
2436   assert(c <= 1);
2437
2438   for(i = 0; i < parts; i++) {
2439     integerPart l;
2440
2441     l = dst[i];
2442     if (c) {
2443       dst[i] += rhs[i] + 1;
2444       c = (dst[i] <= l);
2445     } else {
2446       dst[i] += rhs[i];
2447       c = (dst[i] < l);
2448     }
2449   }
2450
2451   return c;
2452 }
2453
2454 /* DST -= RHS + C where C is zero or one.  Returns the carry flag.  */
2455 integerPart
2456 APInt::tcSubtract(integerPart *dst, const integerPart *rhs,
2457                   integerPart c, unsigned int parts)
2458 {
2459   unsigned int i;
2460
2461   assert(c <= 1);
2462
2463   for(i = 0; i < parts; i++) {
2464     integerPart l;
2465
2466     l = dst[i];
2467     if (c) {
2468       dst[i] -= rhs[i] + 1;
2469       c = (dst[i] >= l);
2470     } else {
2471       dst[i] -= rhs[i];
2472       c = (dst[i] > l);
2473     }
2474   }
2475
2476   return c;
2477 }
2478
2479 /* Negate a bignum in-place.  */
2480 void
2481 APInt::tcNegate(integerPart *dst, unsigned int parts)
2482 {
2483   tcComplement(dst, parts);
2484   tcIncrement(dst, parts);
2485 }
2486
2487 /*  DST += SRC * MULTIPLIER + CARRY   if add is true
2488     DST  = SRC * MULTIPLIER + CARRY   if add is false
2489
2490     Requires 0 <= DSTPARTS <= SRCPARTS + 1.  If DST overlaps SRC
2491     they must start at the same point, i.e. DST == SRC.
2492
2493     If DSTPARTS == SRCPARTS + 1 no overflow occurs and zero is
2494     returned.  Otherwise DST is filled with the least significant
2495     DSTPARTS parts of the result, and if all of the omitted higher
2496     parts were zero return zero, otherwise overflow occurred and
2497     return one.  */
2498 int
2499 APInt::tcMultiplyPart(integerPart *dst, const integerPart *src,
2500                       integerPart multiplier, integerPart carry,
2501                       unsigned int srcParts, unsigned int dstParts,
2502                       bool add)
2503 {
2504   unsigned int i, n;
2505
2506   /* Otherwise our writes of DST kill our later reads of SRC.  */
2507   assert(dst <= src || dst >= src + srcParts);
2508   assert(dstParts <= srcParts + 1);
2509
2510   /* N loops; minimum of dstParts and srcParts.  */
2511   n = dstParts < srcParts ? dstParts: srcParts;
2512
2513   for(i = 0; i < n; i++) {
2514     integerPart low, mid, high, srcPart;
2515
2516       /* [ LOW, HIGH ] = MULTIPLIER * SRC[i] + DST[i] + CARRY.
2517
2518          This cannot overflow, because
2519
2520          (n - 1) * (n - 1) + 2 (n - 1) = (n - 1) * (n + 1)
2521
2522          which is less than n^2.  */
2523
2524     srcPart = src[i];
2525
2526     if (multiplier == 0 || srcPart == 0)        {
2527       low = carry;
2528       high = 0;
2529     } else {
2530       low = lowHalf(srcPart) * lowHalf(multiplier);
2531       high = highHalf(srcPart) * highHalf(multiplier);
2532
2533       mid = lowHalf(srcPart) * highHalf(multiplier);
2534       high += highHalf(mid);
2535       mid <<= integerPartWidth / 2;
2536       if (low + mid < low)
2537         high++;
2538       low += mid;
2539
2540       mid = highHalf(srcPart) * lowHalf(multiplier);
2541       high += highHalf(mid);
2542       mid <<= integerPartWidth / 2;
2543       if (low + mid < low)
2544         high++;
2545       low += mid;
2546
2547       /* Now add carry.  */
2548       if (low + carry < low)
2549         high++;
2550       low += carry;
2551     }
2552
2553     if (add) {
2554       /* And now DST[i], and store the new low part there.  */
2555       if (low + dst[i] < low)
2556         high++;
2557       dst[i] += low;
2558     } else
2559       dst[i] = low;
2560
2561     carry = high;
2562   }
2563
2564   if (i < dstParts) {
2565     /* Full multiplication, there is no overflow.  */
2566     assert(i + 1 == dstParts);
2567     dst[i] = carry;
2568     return 0;
2569   } else {
2570     /* We overflowed if there is carry.  */
2571     if (carry)
2572       return 1;
2573
2574     /* We would overflow if any significant unwritten parts would be
2575        non-zero.  This is true if any remaining src parts are non-zero
2576        and the multiplier is non-zero.  */
2577     if (multiplier)
2578       for(; i < srcParts; i++)
2579         if (src[i])
2580           return 1;
2581
2582     /* We fitted in the narrow destination.  */
2583     return 0;
2584   }
2585 }
2586
2587 /* DST = LHS * RHS, where DST has the same width as the operands and
2588    is filled with the least significant parts of the result.  Returns
2589    one if overflow occurred, otherwise zero.  DST must be disjoint
2590    from both operands.  */
2591 int
2592 APInt::tcMultiply(integerPart *dst, const integerPart *lhs,
2593                   const integerPart *rhs, unsigned int parts)
2594 {
2595   unsigned int i;
2596   int overflow;
2597
2598   assert(dst != lhs && dst != rhs);
2599
2600   overflow = 0;
2601   tcSet(dst, 0, parts);
2602
2603   for(i = 0; i < parts; i++)
2604     overflow |= tcMultiplyPart(&dst[i], lhs, rhs[i], 0, parts,
2605                                parts - i, true);
2606
2607   return overflow;
2608 }
2609
2610 /* DST = LHS * RHS, where DST has width the sum of the widths of the
2611    operands.  No overflow occurs.  DST must be disjoint from both
2612    operands.  Returns the number of parts required to hold the
2613    result.  */
2614 unsigned int
2615 APInt::tcFullMultiply(integerPart *dst, const integerPart *lhs,
2616                       const integerPart *rhs, unsigned int lhsParts,
2617                       unsigned int rhsParts)
2618 {
2619   /* Put the narrower number on the LHS for less loops below.  */
2620   if (lhsParts > rhsParts) {
2621     return tcFullMultiply (dst, rhs, lhs, rhsParts, lhsParts);
2622   } else {
2623     unsigned int n;
2624
2625     assert(dst != lhs && dst != rhs);
2626
2627     tcSet(dst, 0, rhsParts);
2628
2629     for(n = 0; n < lhsParts; n++)
2630       tcMultiplyPart(&dst[n], rhs, lhs[n], 0, rhsParts, rhsParts + 1, true);
2631
2632     n = lhsParts + rhsParts;
2633
2634     return n - (dst[n - 1] == 0);
2635   }
2636 }
2637
2638 /* If RHS is zero LHS and REMAINDER are left unchanged, return one.
2639    Otherwise set LHS to LHS / RHS with the fractional part discarded,
2640    set REMAINDER to the remainder, return zero.  i.e.
2641
2642    OLD_LHS = RHS * LHS + REMAINDER
2643
2644    SCRATCH is a bignum of the same size as the operands and result for
2645    use by the routine; its contents need not be initialized and are
2646    destroyed.  LHS, REMAINDER and SCRATCH must be distinct.
2647 */
2648 int
2649 APInt::tcDivide(integerPart *lhs, const integerPart *rhs,
2650                 integerPart *remainder, integerPart *srhs,
2651                 unsigned int parts)
2652 {
2653   unsigned int n, shiftCount;
2654   integerPart mask;
2655
2656   assert(lhs != remainder && lhs != srhs && remainder != srhs);
2657
2658   shiftCount = tcMSB(rhs, parts) + 1;
2659   if (shiftCount == 0)
2660     return true;
2661
2662   shiftCount = parts * integerPartWidth - shiftCount;
2663   n = shiftCount / integerPartWidth;
2664   mask = (integerPart) 1 << (shiftCount % integerPartWidth);
2665
2666   tcAssign(srhs, rhs, parts);
2667   tcShiftLeft(srhs, parts, shiftCount);
2668   tcAssign(remainder, lhs, parts);
2669   tcSet(lhs, 0, parts);
2670
2671   /* Loop, subtracting SRHS if REMAINDER is greater and adding that to
2672      the total.  */
2673   for(;;) {
2674       int compare;
2675
2676       compare = tcCompare(remainder, srhs, parts);
2677       if (compare >= 0) {
2678         tcSubtract(remainder, srhs, 0, parts);
2679         lhs[n] |= mask;
2680       }
2681
2682       if (shiftCount == 0)
2683         break;
2684       shiftCount--;
2685       tcShiftRight(srhs, parts, 1);
2686       if ((mask >>= 1) == 0)
2687         mask = (integerPart) 1 << (integerPartWidth - 1), n--;
2688   }
2689
2690   return false;
2691 }
2692
2693 /* Shift a bignum left COUNT bits in-place.  Shifted in bits are zero.
2694    There are no restrictions on COUNT.  */
2695 void
2696 APInt::tcShiftLeft(integerPart *dst, unsigned int parts, unsigned int count)
2697 {
2698   if (count) {
2699     unsigned int jump, shift;
2700
2701     /* Jump is the inter-part jump; shift is is intra-part shift.  */
2702     jump = count / integerPartWidth;
2703     shift = count % integerPartWidth;
2704
2705     while (parts > jump) {
2706       integerPart part;
2707
2708       parts--;
2709
2710       /* dst[i] comes from the two parts src[i - jump] and, if we have
2711          an intra-part shift, src[i - jump - 1].  */
2712       part = dst[parts - jump];
2713       if (shift) {
2714         part <<= shift;
2715         if (parts >= jump + 1)
2716           part |= dst[parts - jump - 1] >> (integerPartWidth - shift);
2717       }
2718
2719       dst[parts] = part;
2720     }
2721
2722     while (parts > 0)
2723       dst[--parts] = 0;
2724   }
2725 }
2726
2727 /* Shift a bignum right COUNT bits in-place.  Shifted in bits are
2728    zero.  There are no restrictions on COUNT.  */
2729 void
2730 APInt::tcShiftRight(integerPart *dst, unsigned int parts, unsigned int count)
2731 {
2732   if (count) {
2733     unsigned int i, jump, shift;
2734
2735     /* Jump is the inter-part jump; shift is is intra-part shift.  */
2736     jump = count / integerPartWidth;
2737     shift = count % integerPartWidth;
2738
2739     /* Perform the shift.  This leaves the most significant COUNT bits
2740        of the result at zero.  */
2741     for(i = 0; i < parts; i++) {
2742       integerPart part;
2743
2744       if (i + jump >= parts) {
2745         part = 0;
2746       } else {
2747         part = dst[i + jump];
2748         if (shift) {
2749           part >>= shift;
2750           if (i + jump + 1 < parts)
2751             part |= dst[i + jump + 1] << (integerPartWidth - shift);
2752         }
2753       }
2754
2755       dst[i] = part;
2756     }
2757   }
2758 }
2759
2760 /* Bitwise and of two bignums.  */
2761 void
2762 APInt::tcAnd(integerPart *dst, const integerPart *rhs, unsigned int parts)
2763 {
2764   unsigned int i;
2765
2766   for(i = 0; i < parts; i++)
2767     dst[i] &= rhs[i];
2768 }
2769
2770 /* Bitwise inclusive or of two bignums.  */
2771 void
2772 APInt::tcOr(integerPart *dst, const integerPart *rhs, unsigned int parts)
2773 {
2774   unsigned int i;
2775
2776   for(i = 0; i < parts; i++)
2777     dst[i] |= rhs[i];
2778 }
2779
2780 /* Bitwise exclusive or of two bignums.  */
2781 void
2782 APInt::tcXor(integerPart *dst, const integerPart *rhs, unsigned int parts)
2783 {
2784   unsigned int i;
2785
2786   for(i = 0; i < parts; i++)
2787     dst[i] ^= rhs[i];
2788 }
2789
2790 /* Complement a bignum in-place.  */
2791 void
2792 APInt::tcComplement(integerPart *dst, unsigned int parts)
2793 {
2794   unsigned int i;
2795
2796   for(i = 0; i < parts; i++)
2797     dst[i] = ~dst[i];
2798 }
2799
2800 /* Comparison (unsigned) of two bignums.  */
2801 int
2802 APInt::tcCompare(const integerPart *lhs, const integerPart *rhs,
2803                  unsigned int parts)
2804 {
2805   while (parts) {
2806       parts--;
2807       if (lhs[parts] == rhs[parts])
2808         continue;
2809
2810       if (lhs[parts] > rhs[parts])
2811         return 1;
2812       else
2813         return -1;
2814     }
2815
2816   return 0;
2817 }
2818
2819 /* Increment a bignum in-place, return the carry flag.  */
2820 integerPart
2821 APInt::tcIncrement(integerPart *dst, unsigned int parts)
2822 {
2823   unsigned int i;
2824
2825   for(i = 0; i < parts; i++)
2826     if (++dst[i] != 0)
2827       break;
2828
2829   return i == parts;
2830 }
2831
2832 /* Set the least significant BITS bits of a bignum, clear the
2833    rest.  */
2834 void
2835 APInt::tcSetLeastSignificantBits(integerPart *dst, unsigned int parts,
2836                                  unsigned int bits)
2837 {
2838   unsigned int i;
2839
2840   i = 0;
2841   while (bits > integerPartWidth) {
2842     dst[i++] = ~(integerPart) 0;
2843     bits -= integerPartWidth;
2844   }
2845
2846   if (bits)
2847     dst[i++] = ~(integerPart) 0 >> (integerPartWidth - bits);
2848
2849   while (i < parts)
2850     dst[i++] = 0;
2851 }