lib/Support/APInt.cpp

   1 //===-- APInt.cpp - Implement APInt class ---------------------------------===//
   2 //
   3 //                     The LLVM Compiler Infrastructure
   4 //
   5 // This file is distributed under the University of Illinois Open Source
   6 // License. See LICENSE.TXT for details.
   7 //
   8 //===----------------------------------------------------------------------===//
   9 //
  10 // This file implements a class to represent arbitrary precision integer
  11 // constant values and provide a variety of arithmetic operations on them.
  12 //
  13 //===----------------------------------------------------------------------===//
  14
  15 #define DEBUG_TYPE "apint"
  16 #include "llvm/ADT/APInt.h"
  17 #include "llvm/ADT/FoldingSet.h"
  18 #include "llvm/Support/Debug.h"
  19 #include "llvm/Support/MathExtras.h"
  20 #include <math.h>
  21 #include <limits>
  22 #include <cstring>
  23 #include <cstdlib>
  24 #include <iomanip>
  25
  26 using namespace llvm;
  27
  28 /// This enumeration just provides for internal constants used in this
  29 /// translation unit.
  30 enum {
  31   MIN_INT_BITS = 1,        ///< Minimum number of bits that can be specified
  32   ///< Note that this must remain synchronized with IntegerType::MIN_INT_BITS
  33   MAX_INT_BITS = (1<<23)-1 ///< Maximum number of bits that can be specified
  34   ///< Note that this must remain synchronized with IntegerType::MAX_INT_BITS
  35 };
  36
  37 /// A utility function for allocating memory, checking for allocation failures,
  38 /// and ensuring the contents are zeroed.
  39 inline static uint64_t* getClearedMemory(uint32_t numWords) {
  40   uint64_t * result = new uint64_t[numWords];
  41   assert(result && "APInt memory allocation fails!");
  42   memset(result, 0, numWords * sizeof(uint64_t));
  43   return result;
  44 }
  45
  46 /// A utility function for allocating memory and checking for allocation
  47 /// failure.  The content is not zeroed.
  48 inline static uint64_t* getMemory(uint32_t numWords) {
  49   uint64_t * result = new uint64_t[numWords];
  50   assert(result && "APInt memory allocation fails!");
  51   return result;
  52 }
  53
  54 APInt::APInt(uint32_t numBits, uint64_t val, bool isSigned)
  55   : BitWidth(numBits), VAL(0) {
  56   assert(BitWidth >= MIN_INT_BITS && "bitwidth too small");
  57   assert(BitWidth <= MAX_INT_BITS && "bitwidth too large");
  58   if (isSingleWord())
  59     VAL = val;
  60   else {
  61     pVal = getClearedMemory(getNumWords());
  62     pVal[0] = val;
  63     if (isSigned && int64_t(val) < 0)
  64       for (unsigned i = 1; i < getNumWords(); ++i)
  65         pVal[i] = -1ULL;
  66   }
  67   clearUnusedBits();
  68 }
  69
  70 APInt::APInt(uint32_t numBits, uint32_t numWords, const uint64_t bigVal[])
  71   : BitWidth(numBits), VAL(0)  {
  72   assert(BitWidth >= MIN_INT_BITS && "bitwidth too small");
  73   assert(BitWidth <= MAX_INT_BITS && "bitwidth too large");
  74   assert(bigVal && "Null pointer detected!");
  75   if (isSingleWord())
  76     VAL = bigVal[0];
  77   else {
  78     // Get memory, cleared to 0
  79     pVal = getClearedMemory(getNumWords());
  80     // Calculate the number of words to copy
  81     uint32_t words = std::min<uint32_t>(numWords, getNumWords());
  82     // Copy the words from bigVal to pVal
  83     memcpy(pVal, bigVal, words * APINT_WORD_SIZE);
  84   }
  85   // Make sure unused high bits are cleared
  86   clearUnusedBits();
  87 }
  88
  89 APInt::APInt(uint32_t numbits, const char StrStart[], uint32_t slen,
  90              uint8_t radix)
  91   : BitWidth(numbits), VAL(0) {
  92   assert(BitWidth >= MIN_INT_BITS && "bitwidth too small");
  93   assert(BitWidth <= MAX_INT_BITS && "bitwidth too large");
  94   fromString(numbits, StrStart, slen, radix);
  95 }
  96
  97 APInt::APInt(const APInt& that)
  98   : BitWidth(that.BitWidth), VAL(0) {
  99   assert(BitWidth >= MIN_INT_BITS && "bitwidth too small");
 100   assert(BitWidth <= MAX_INT_BITS && "bitwidth too large");
 101   if (isSingleWord())
 102     VAL = that.VAL;
 103   else {
 104     pVal = getMemory(getNumWords());
 105     memcpy(pVal, that.pVal, getNumWords() * APINT_WORD_SIZE);
 106   }
 107 }
 108
 109 APInt::~APInt() {
 110   if (!isSingleWord() && pVal)
 111     delete [] pVal;
 112 }
 113
 114 APInt& APInt::operator=(const APInt& RHS) {
 115   // Don't do anything for X = X
 116   if (this == &RHS)
 117     return *this;
 118
 119   // If the bitwidths are the same, we can avoid mucking with memory
 120   if (BitWidth == RHS.getBitWidth()) {
 121     if (isSingleWord())
 122       VAL = RHS.VAL;
 123     else
 124       memcpy(pVal, RHS.pVal, getNumWords() * APINT_WORD_SIZE);
 125     return *this;
 126   }
 127
 128   if (isSingleWord())
 129     if (RHS.isSingleWord())
 130       VAL = RHS.VAL;
 131     else {
 132       VAL = 0;
 133       pVal = getMemory(RHS.getNumWords());
 134       memcpy(pVal, RHS.pVal, RHS.getNumWords() * APINT_WORD_SIZE);
 135     }
 136   else if (getNumWords() == RHS.getNumWords())
 137     memcpy(pVal, RHS.pVal, RHS.getNumWords() * APINT_WORD_SIZE);
 138   else if (RHS.isSingleWord()) {
 139     delete [] pVal;
 140     VAL = RHS.VAL;
 141   } else {
 142     delete [] pVal;
 143     pVal = getMemory(RHS.getNumWords());
 144     memcpy(pVal, RHS.pVal, RHS.getNumWords() * APINT_WORD_SIZE);
 145   }
 146   BitWidth = RHS.BitWidth;
 147   return clearUnusedBits();
 148 }
 149
 150 APInt& APInt::operator=(uint64_t RHS) {
 151   if (isSingleWord())
 152     VAL = RHS;
 153   else {
 154     pVal[0] = RHS;
 155     memset(pVal+1, 0, (getNumWords() - 1) * APINT_WORD_SIZE);
 156   }
 157   return clearUnusedBits();
 158 }
 159
 160 /// Profile - This method 'profiles' an APInt for use with FoldingSet.
 161 void APInt::Profile(FoldingSetNodeID& ID) const {
 162   ID.AddInteger(BitWidth);
 163
 164   if (isSingleWord()) {
 165     ID.AddInteger(VAL);
 166     return;
 167   }
 168
 169   uint32_t NumWords = getNumWords();
 170   for (unsigned i = 0; i < NumWords; ++i)
 171     ID.AddInteger(pVal[i]);
 172 }
 173
 174 /// add_1 - This function adds a single "digit" integer, y, to the multiple
 175 /// "digit" integer array,  x[]. x[] is modified to reflect the addition and
 176 /// 1 is returned if there is a carry out, otherwise 0 is returned.
 177 /// @returns the carry of the addition.
 178 static bool add_1(uint64_t dest[], uint64_t x[], uint32_t len, uint64_t y) {
 179   for (uint32_t i = 0; i < len; ++i) {
 180     dest[i] = y + x[i];
 181     if (dest[i] < y)
 182       y = 1; // Carry one to next digit.
 183     else {
 184       y = 0; // No need to carry so exit early
 185       break;
 186     }
 187   }
 188   return y;
 189 }
 190
 191 /// @brief Prefix increment operator. Increments the APInt by one.
 192 APInt& APInt::operator++() {
 193   if (isSingleWord())
 194     ++VAL;
 195   else
 196     add_1(pVal, pVal, getNumWords(), 1);
 197   return clearUnusedBits();
 198 }
 199
 200 /// sub_1 - This function subtracts a single "digit" (64-bit word), y, from
 201 /// the multi-digit integer array, x[], propagating the borrowed 1 value until
 202 /// no further borrowing is neeeded or it runs out of "digits" in x.  The result
 203 /// is 1 if "borrowing" exhausted the digits in x, or 0 if x was not exhausted.
 204 /// In other words, if y > x then this function returns 1, otherwise 0.
 205 /// @returns the borrow out of the subtraction
 206 static bool sub_1(uint64_t x[], uint32_t len, uint64_t y) {
 207   for (uint32_t i = 0; i < len; ++i) {
 208     uint64_t X = x[i];
 209     x[i] -= y;
 210     if (y > X)
 211       y = 1;  // We have to "borrow 1" from next "digit"
 212     else {
 213       y = 0;  // No need to borrow
 214       break;  // Remaining digits are unchanged so exit early
 215     }
 216   }
 217   return bool(y);
 218 }
 219
 220 /// @brief Prefix decrement operator. Decrements the APInt by one.
 221 APInt& APInt::operator--() {
 222   if (isSingleWord())
 223     --VAL;
 224   else
 225     sub_1(pVal, getNumWords(), 1);
 226   return clearUnusedBits();
 227 }
 228
 229 /// add - This function adds the integer array x to the integer array Y and
 230 /// places the result in dest.
 231 /// @returns the carry out from the addition
 232 /// @brief General addition of 64-bit integer arrays
 233 static bool add(uint64_t *dest, const uint64_t *x, const uint64_t *y,
 234                 uint32_t len) {
 235   bool carry = false;
 236   for (uint32_t i = 0; i< len; ++i) {
 237     uint64_t limit = std::min(x[i],y[i]); // must come first in case dest == x
 238     dest[i] = x[i] + y[i] + carry;
 239     carry = dest[i] < limit || (carry && dest[i] == limit);
 240   }
 241   return carry;
 242 }
 243
 244 /// Adds the RHS APint to this APInt.
 245 /// @returns this, after addition of RHS.
 246 /// @brief Addition assignment operator.
 247 APInt& APInt::operator+=(const APInt& RHS) {
 248   assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
 249   if (isSingleWord())
 250     VAL += RHS.VAL;
 251   else {
 252     add(pVal, pVal, RHS.pVal, getNumWords());
 253   }
 254   return clearUnusedBits();
 255 }
 256
 257 /// Subtracts the integer array y from the integer array x
 258 /// @returns returns the borrow out.
 259 /// @brief Generalized subtraction of 64-bit integer arrays.
 260 static bool sub(uint64_t *dest, const uint64_t *x, const uint64_t *y,
 261                 uint32_t len) {
 262   bool borrow = false;
 263   for (uint32_t i = 0; i < len; ++i) {
 264     uint64_t x_tmp = borrow ? x[i] - 1 : x[i];
 265     borrow = y[i] > x_tmp || (borrow && x[i] == 0);
 266     dest[i] = x_tmp - y[i];
 267   }
 268   return borrow;
 269 }
 270
 271 /// Subtracts the RHS APInt from this APInt
 272 /// @returns this, after subtraction
 273 /// @brief Subtraction assignment operator.
 274 APInt& APInt::operator-=(const APInt& RHS) {
 275   assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
 276   if (isSingleWord())
 277     VAL -= RHS.VAL;
 278   else
 279     sub(pVal, pVal, RHS.pVal, getNumWords());
 280   return clearUnusedBits();
 281 }
 282
 283 /// Multiplies an integer array, x by a a uint64_t integer and places the result
 284 /// into dest.
 285 /// @returns the carry out of the multiplication.
 286 /// @brief Multiply a multi-digit APInt by a single digit (64-bit) integer.
 287 static uint64_t mul_1(uint64_t dest[], uint64_t x[], uint32_t len, uint64_t y) {
 288   // Split y into high 32-bit part (hy)  and low 32-bit part (ly)
 289   uint64_t ly = y & 0xffffffffULL, hy = y >> 32;
 290   uint64_t carry = 0;
 291
 292   // For each digit of x.
 293   for (uint32_t i = 0; i < len; ++i) {
 294     // Split x into high and low words
 295     uint64_t lx = x[i] & 0xffffffffULL;
 296     uint64_t hx = x[i] >> 32;
 297     // hasCarry - A flag to indicate if there is a carry to the next digit.
 298     // hasCarry == 0, no carry
 299     // hasCarry == 1, has carry
 300     // hasCarry == 2, no carry and the calculation result == 0.
 301     uint8_t hasCarry = 0;
 302     dest[i] = carry + lx * ly;
 303     // Determine if the add above introduces carry.
 304     hasCarry = (dest[i] < carry) ? 1 : 0;
 305     carry = hx * ly + (dest[i] >> 32) + (hasCarry ? (1ULL << 32) : 0);
 306     // The upper limit of carry can be (2^32 - 1)(2^32 - 1) +
 307     // (2^32 - 1) + 2^32 = 2^64.
 308     hasCarry = (!carry && hasCarry) ? 1 : (!carry ? 2 : 0);
 309
 310     carry += (lx * hy) & 0xffffffffULL;
 311     dest[i] = (carry << 32) | (dest[i] & 0xffffffffULL);
 312     carry = (((!carry && hasCarry != 2) || hasCarry == 1) ? (1ULL << 32) : 0) +
 313             (carry >> 32) + ((lx * hy) >> 32) + hx * hy;
 314   }
 315   return carry;
 316 }
 317
 318 /// Multiplies integer array x by integer array y and stores the result into
 319 /// the integer array dest. Note that dest's size must be >= xlen + ylen.
 320 /// @brief Generalized multiplicate of integer arrays.
 321 static void mul(uint64_t dest[], uint64_t x[], uint32_t xlen, uint64_t y[],
 322                 uint32_t ylen) {
 323   dest[xlen] = mul_1(dest, x, xlen, y[0]);
 324   for (uint32_t i = 1; i < ylen; ++i) {
 325     uint64_t ly = y[i] & 0xffffffffULL, hy = y[i] >> 32;
 326     uint64_t carry = 0, lx = 0, hx = 0;
 327     for (uint32_t j = 0; j < xlen; ++j) {
 328       lx = x[j] & 0xffffffffULL;
 329       hx = x[j] >> 32;
 330       // hasCarry - A flag to indicate if has carry.
 331       // hasCarry == 0, no carry
 332       // hasCarry == 1, has carry
 333       // hasCarry == 2, no carry and the calculation result == 0.
 334       uint8_t hasCarry = 0;
 335       uint64_t resul = carry + lx * ly;
 336       hasCarry = (resul < carry) ? 1 : 0;
 337       carry = (hasCarry ? (1ULL << 32) : 0) + hx * ly + (resul >> 32);
 338       hasCarry = (!carry && hasCarry) ? 1 : (!carry ? 2 : 0);
 339
 340       carry += (lx * hy) & 0xffffffffULL;
 341       resul = (carry << 32) | (resul & 0xffffffffULL);
 342       dest[i+j] += resul;
 343       carry = (((!carry && hasCarry != 2) || hasCarry == 1) ? (1ULL << 32) : 0)+
 344               (carry >> 32) + (dest[i+j] < resul ? 1 : 0) +
 345               ((lx * hy) >> 32) + hx * hy;
 346     }
 347     dest[i+xlen] = carry;
 348   }
 349 }
 350
 351 APInt& APInt::operator*=(const APInt& RHS) {
 352   assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
 353   if (isSingleWord()) {
 354     VAL *= RHS.VAL;
 355     clearUnusedBits();
 356     return *this;
 357   }
 358
 359   // Get some bit facts about LHS and check for zero
 360   uint32_t lhsBits = getActiveBits();
 361   uint32_t lhsWords = !lhsBits ? 0 : whichWord(lhsBits - 1) + 1;
 362   if (!lhsWords)
 363     // 0 * X ===> 0
 364     return *this;
 365
 366   // Get some bit facts about RHS and check for zero
 367   uint32_t rhsBits = RHS.getActiveBits();
 368   uint32_t rhsWords = !rhsBits ? 0 : whichWord(rhsBits - 1) + 1;
 369   if (!rhsWords) {
 370     // X * 0 ===> 0
 371     clear();
 372     return *this;
 373   }
 374
 375   // Allocate space for the result
 376   uint32_t destWords = rhsWords + lhsWords;
 377   uint64_t *dest = getMemory(destWords);
 378
 379   // Perform the long multiply
 380   mul(dest, pVal, lhsWords, RHS.pVal, rhsWords);
 381
 382   // Copy result back into *this
 383   clear();
 384   uint32_t wordsToCopy = destWords >= getNumWords() ? getNumWords() : destWords;
 385   memcpy(pVal, dest, wordsToCopy * APINT_WORD_SIZE);
 386
 387   // delete dest array and return
 388   delete[] dest;
 389   return *this;
 390 }
 391
 392 APInt& APInt::operator&=(const APInt& RHS) {
 393   assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
 394   if (isSingleWord()) {
 395     VAL &= RHS.VAL;
 396     return *this;
 397   }
 398   uint32_t numWords = getNumWords();
 399   for (uint32_t i = 0; i < numWords; ++i)
 400     pVal[i] &= RHS.pVal[i];
 401   return *this;
 402 }
 403
 404 APInt& APInt::operator|=(const APInt& RHS) {
 405   assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
 406   if (isSingleWord()) {
 407     VAL |= RHS.VAL;
 408     return *this;
 409   }
 410   uint32_t numWords = getNumWords();
 411   for (uint32_t i = 0; i < numWords; ++i)
 412     pVal[i] |= RHS.pVal[i];
 413   return *this;
 414 }
 415
 416 APInt& APInt::operator^=(const APInt& RHS) {
 417   assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
 418   if (isSingleWord()) {
 419     VAL ^= RHS.VAL;
 420     this->clearUnusedBits();
 421     return *this;
 422   }
 423   uint32_t numWords = getNumWords();
 424   for (uint32_t i = 0; i < numWords; ++i)
 425     pVal[i] ^= RHS.pVal[i];
 426   return clearUnusedBits();
 427 }
 428
 429 APInt APInt::operator&(const APInt& RHS) const {
 430   assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
 431   if (isSingleWord())
 432     return APInt(getBitWidth(), VAL & RHS.VAL);
 433
 434   uint32_t numWords = getNumWords();
 435   uint64_t* val = getMemory(numWords);
 436   for (uint32_t i = 0; i < numWords; ++i)
 437     val[i] = pVal[i] & RHS.pVal[i];
 438   return APInt(val, getBitWidth());
 439 }
 440
 441 APInt APInt::operator|(const APInt& RHS) const {
 442   assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
 443   if (isSingleWord())
 444     return APInt(getBitWidth(), VAL | RHS.VAL);
 445
 446   uint32_t numWords = getNumWords();
 447   uint64_t *val = getMemory(numWords);
 448   for (uint32_t i = 0; i < numWords; ++i)
 449     val[i] = pVal[i] | RHS.pVal[i];
 450   return APInt(val, getBitWidth());
 451 }
 452
 453 APInt APInt::operator^(const APInt& RHS) const {
 454   assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
 455   if (isSingleWord())
 456     return APInt(BitWidth, VAL ^ RHS.VAL);
 457
 458   uint32_t numWords = getNumWords();
 459   uint64_t *val = getMemory(numWords);
 460   for (uint32_t i = 0; i < numWords; ++i)
 461     val[i] = pVal[i] ^ RHS.pVal[i];
 462
 463   // 0^0==1 so clear the high bits in case they got set.
 464   return APInt(val, getBitWidth()).clearUnusedBits();
 465 }
 466
 467 bool APInt::operator !() const {
 468   if (isSingleWord())
 469     return !VAL;
 470
 471   for (uint32_t i = 0; i < getNumWords(); ++i)
 472     if (pVal[i])
 473       return false;
 474   return true;
 475 }
 476
 477 APInt APInt::operator*(const APInt& RHS) const {
 478   assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
 479   if (isSingleWord())
 480     return APInt(BitWidth, VAL * RHS.VAL);
 481   APInt Result(*this);
 482   Result *= RHS;
 483   return Result.clearUnusedBits();
 484 }
 485
 486 APInt APInt::operator+(const APInt& RHS) const {
 487   assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
 488   if (isSingleWord())
 489     return APInt(BitWidth, VAL + RHS.VAL);
 490   APInt Result(BitWidth, 0);
 491   add(Result.pVal, this->pVal, RHS.pVal, getNumWords());
 492   return Result.clearUnusedBits();
 493 }
 494
 495 APInt APInt::operator-(const APInt& RHS) const {
 496   assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
 497   if (isSingleWord())
 498     return APInt(BitWidth, VAL - RHS.VAL);
 499   APInt Result(BitWidth, 0);
 500   sub(Result.pVal, this->pVal, RHS.pVal, getNumWords());
 501   return Result.clearUnusedBits();
 502 }
 503
 504 bool APInt::operator[](uint32_t bitPosition) const {
 505   return (maskBit(bitPosition) &
 506           (isSingleWord() ?  VAL : pVal[whichWord(bitPosition)])) != 0;
 507 }
 508
 509 bool APInt::operator==(const APInt& RHS) const {
 510   assert(BitWidth == RHS.BitWidth && "Comparison requires equal bit widths");
 511   if (isSingleWord())
 512     return VAL == RHS.VAL;
 513
 514   // Get some facts about the number of bits used in the two operands.
 515   uint32_t n1 = getActiveBits();
 516   uint32_t n2 = RHS.getActiveBits();
 517
 518   // If the number of bits isn't the same, they aren't equal
 519   if (n1 != n2)
 520     return false;
 521
 522   // If the number of bits fits in a word, we only need to compare the low word.
 523   if (n1 <= APINT_BITS_PER_WORD)
 524     return pVal[0] == RHS.pVal[0];
 525
 526   // Otherwise, compare everything
 527   for (int i = whichWord(n1 - 1); i >= 0; --i)
 528     if (pVal[i] != RHS.pVal[i])
 529       return false;
 530   return true;
 531 }
 532
 533 bool APInt::operator==(uint64_t Val) const {
 534   if (isSingleWord())
 535     return VAL == Val;
 536
 537   uint32_t n = getActiveBits();
 538   if (n <= APINT_BITS_PER_WORD)
 539     return pVal[0] == Val;
 540   else
 541     return false;
 542 }
 543
 544 bool APInt::ult(const APInt& RHS) const {
 545   assert(BitWidth == RHS.BitWidth && "Bit widths must be same for comparison");
 546   if (isSingleWord())
 547     return VAL < RHS.VAL;
 548
 549   // Get active bit length of both operands
 550   uint32_t n1 = getActiveBits();
 551   uint32_t n2 = RHS.getActiveBits();
 552
 553   // If magnitude of LHS is less than RHS, return true.
 554   if (n1 < n2)
 555     return true;
 556
 557   // If magnitude of RHS is greather than LHS, return false.
 558   if (n2 < n1)
 559     return false;
 560
 561   // If they bot fit in a word, just compare the low order word
 562   if (n1 <= APINT_BITS_PER_WORD && n2 <= APINT_BITS_PER_WORD)
 563     return pVal[0] < RHS.pVal[0];
 564
 565   // Otherwise, compare all words
 566   uint32_t topWord = whichWord(std::max(n1,n2)-1);
 567   for (int i = topWord; i >= 0; --i) {
 568     if (pVal[i] > RHS.pVal[i])
 569       return false;
 570     if (pVal[i] < RHS.pVal[i])
 571       return true;
 572   }
 573   return false;
 574 }
 575
 576 bool APInt::slt(const APInt& RHS) const {
 577   assert(BitWidth == RHS.BitWidth && "Bit widths must be same for comparison");
 578   if (isSingleWord()) {
 579     int64_t lhsSext = (int64_t(VAL) << (64-BitWidth)) >> (64-BitWidth);
 580     int64_t rhsSext = (int64_t(RHS.VAL) << (64-BitWidth)) >> (64-BitWidth);
 581     return lhsSext < rhsSext;
 582   }
 583
 584   APInt lhs(*this);
 585   APInt rhs(RHS);
 586   bool lhsNeg = isNegative();
 587   bool rhsNeg = rhs.isNegative();
 588   if (lhsNeg) {
 589     // Sign bit is set so perform two's complement to make it positive
 590     lhs.flip();
 591     lhs++;
 592   }
 593   if (rhsNeg) {
 594     // Sign bit is set so perform two's complement to make it positive
 595     rhs.flip();
 596     rhs++;
 597   }
 598
 599   // Now we have unsigned values to compare so do the comparison if necessary
 600   // based on the negativeness of the values.
 601   if (lhsNeg)
 602     if (rhsNeg)
 603       return lhs.ugt(rhs);
 604     else
 605       return true;
 606   else if (rhsNeg)
 607     return false;
 608   else
 609     return lhs.ult(rhs);
 610 }
 611
 612 APInt& APInt::set(uint32_t bitPosition) {
 613   if (isSingleWord())
 614     VAL |= maskBit(bitPosition);
 615   else
 616     pVal[whichWord(bitPosition)] |= maskBit(bitPosition);
 617   return *this;
 618 }
 619
 620 APInt& APInt::set() {
 621   if (isSingleWord()) {
 622     VAL = -1ULL;
 623     return clearUnusedBits();
 624   }
 625
 626   // Set all the bits in all the words.
 627   for (uint32_t i = 0; i < getNumWords(); ++i)
 628     pVal[i] = -1ULL;
 629   // Clear the unused ones
 630   return clearUnusedBits();
 631 }
 632
 633 /// Set the given bit to 0 whose position is given as "bitPosition".
 634 /// @brief Set a given bit to 0.
 635 APInt& APInt::clear(uint32_t bitPosition) {
 636   if (isSingleWord())
 637     VAL &= ~maskBit(bitPosition);
 638   else
 639     pVal[whichWord(bitPosition)] &= ~maskBit(bitPosition);
 640   return *this;
 641 }
 642
 643 /// @brief Set every bit to 0.
 644 APInt& APInt::clear() {
 645   if (isSingleWord())
 646     VAL = 0;
 647   else
 648     memset(pVal, 0, getNumWords() * APINT_WORD_SIZE);
 649   return *this;
 650 }
 651
 652 /// @brief Bitwise NOT operator. Performs a bitwise logical NOT operation on
 653 /// this APInt.
 654 APInt APInt::operator~() const {
 655   APInt Result(*this);
 656   Result.flip();
 657   return Result;
 658 }
 659
 660 /// @brief Toggle every bit to its opposite value.
 661 APInt& APInt::flip() {
 662   if (isSingleWord()) {
 663     VAL ^= -1ULL;
 664     return clearUnusedBits();
 665   }
 666   for (uint32_t i = 0; i < getNumWords(); ++i)
 667     pVal[i] ^= -1ULL;
 668   return clearUnusedBits();
 669 }
 670
 671 /// Toggle a given bit to its opposite value whose position is given
 672 /// as "bitPosition".
 673 /// @brief Toggles a given bit to its opposite value.
 674 APInt& APInt::flip(uint32_t bitPosition) {
 675   assert(bitPosition < BitWidth && "Out of the bit-width range!");
 676   if ((*this)[bitPosition]) clear(bitPosition);
 677   else set(bitPosition);
 678   return *this;
 679 }
 680
 681 uint32_t APInt::getBitsNeeded(const char* str, uint32_t slen, uint8_t radix) {
 682   assert(str != 0 && "Invalid value string");
 683   assert(slen > 0 && "Invalid string length");
 684
 685   // Each computation below needs to know if its negative
 686   uint32_t isNegative = str[0] == '-';
 687   if (isNegative) {
 688     slen--;
 689     str++;
 690   }
 691   // For radixes of power-of-two values, the bits required is accurately and
 692   // easily computed
 693   if (radix == 2)
 694     return slen + isNegative;
 695   if (radix == 8)
 696     return slen * 3 + isNegative;
 697   if (radix == 16)
 698     return slen * 4 + isNegative;
 699
 700   // Otherwise it must be radix == 10, the hard case
 701   assert(radix == 10 && "Invalid radix");
 702
 703   // This is grossly inefficient but accurate. We could probably do something
 704   // with a computation of roughly slen*64/20 and then adjust by the value of
 705   // the first few digits. But, I'm not sure how accurate that could be.
 706
 707   // Compute a sufficient number of bits that is always large enough but might
 708   // be too large. This avoids the assertion in the constructor.
 709   uint32_t sufficient = slen*64/18;
 710
 711   // Convert to the actual binary value.
 712   APInt tmp(sufficient, str, slen, radix);
 713
 714   // Compute how many bits are required.
 715   return isNegative + tmp.logBase2() + 1;
 716 }
 717
 718 uint64_t APInt::getHashValue() const {
 719   // Put the bit width into the low order bits.
 720   uint64_t hash = BitWidth;
 721
 722   // Add the sum of the words to the hash.
 723   if (isSingleWord())
 724     hash += VAL << 6; // clear separation of up to 64 bits
 725   else
 726     for (uint32_t i = 0; i < getNumWords(); ++i)
 727       hash += pVal[i] << 6; // clear sepration of up to 64 bits
 728   return hash;
 729 }
 730
 731 /// HiBits - This function returns the high "numBits" bits of this APInt.
 732 APInt APInt::getHiBits(uint32_t numBits) const {
 733   return APIntOps::lshr(*this, BitWidth - numBits);
 734 }
 735
 736 /// LoBits - This function returns the low "numBits" bits of this APInt.
 737 APInt APInt::getLoBits(uint32_t numBits) const {
 738   return APIntOps::lshr(APIntOps::shl(*this, BitWidth - numBits),
 739                         BitWidth - numBits);
 740 }
 741
 742 bool APInt::isPowerOf2() const {
 743   return (!!*this) && !(*this & (*this - APInt(BitWidth,1)));
 744 }
 745
 746 uint32_t APInt::countLeadingZeros() const {
 747   uint32_t Count = 0;
 748   if (isSingleWord())
 749     Count = CountLeadingZeros_64(VAL);
 750   else {
 751     for (uint32_t i = getNumWords(); i > 0u; --i) {
 752       if (pVal[i-1] == 0)
 753         Count += APINT_BITS_PER_WORD;
 754       else {
 755         Count += CountLeadingZeros_64(pVal[i-1]);
 756         break;
 757       }
 758     }
 759   }
 760   uint32_t remainder = BitWidth % APINT_BITS_PER_WORD;
 761   if (remainder)
 762     Count -= APINT_BITS_PER_WORD - remainder;
 763   return std::min(Count, BitWidth);
 764 }
 765
 766 static uint32_t countLeadingOnes_64(uint64_t V, uint32_t skip) {
 767   uint32_t Count = 0;
 768   if (skip)
 769     V <<= skip;
 770   while (V && (V & (1ULL << 63))) {
 771     Count++;
 772     V <<= 1;
 773   }
 774   return Count;
 775 }
 776
 777 uint32_t APInt::countLeadingOnes() const {
 778   if (isSingleWord())
 779     return countLeadingOnes_64(VAL, APINT_BITS_PER_WORD - BitWidth);
 780
 781   uint32_t highWordBits = BitWidth % APINT_BITS_PER_WORD;
 782   uint32_t shift = (highWordBits == 0 ? 0 : APINT_BITS_PER_WORD - highWordBits);
 783   int i = getNumWords() - 1;
 784   uint32_t Count = countLeadingOnes_64(pVal[i], shift);
 785   if (Count == highWordBits) {
 786     for (i--; i >= 0; --i) {
 787       if (pVal[i] == -1ULL)
 788         Count += APINT_BITS_PER_WORD;
 789       else {
 790         Count += countLeadingOnes_64(pVal[i], 0);
 791         break;
 792       }
 793     }
 794   }
 795   return Count;
 796 }
 797
 798 uint32_t APInt::countTrailingZeros() const {
 799   if (isSingleWord())
 800     return std::min(uint32_t(CountTrailingZeros_64(VAL)), BitWidth);
 801   uint32_t Count = 0;
 802   uint32_t i = 0;
 803   for (; i < getNumWords() && pVal[i] == 0; ++i)
 804     Count += APINT_BITS_PER_WORD;
 805   if (i < getNumWords())
 806     Count += CountTrailingZeros_64(pVal[i]);
 807   return std::min(Count, BitWidth);
 808 }
 809
 810 uint32_t APInt::countTrailingOnes() const {
 811   if (isSingleWord())
 812     return std::min(uint32_t(CountTrailingOnes_64(VAL)), BitWidth);
 813   uint32_t Count = 0;
 814   uint32_t i = 0;
 815   for (; i < getNumWords() && pVal[i] == -1ULL; ++i)
 816     Count += APINT_BITS_PER_WORD;
 817   if (i < getNumWords())
 818     Count += CountTrailingOnes_64(pVal[i]);
 819   return std::min(Count, BitWidth);
 820 }
 821
 822 uint32_t APInt::countPopulation() const {
 823   if (isSingleWord())
 824     return CountPopulation_64(VAL);
 825   uint32_t Count = 0;
 826   for (uint32_t i = 0; i < getNumWords(); ++i)
 827     Count += CountPopulation_64(pVal[i]);
 828   return Count;
 829 }
 830
 831 APInt APInt::byteSwap() const {
 832   assert(BitWidth >= 16 && BitWidth % 16 == 0 && "Cannot byteswap!");
 833   if (BitWidth == 16)
 834     return APInt(BitWidth, ByteSwap_16(uint16_t(VAL)));
 835   else if (BitWidth == 32)
 836     return APInt(BitWidth, ByteSwap_32(uint32_t(VAL)));
 837   else if (BitWidth == 48) {
 838     uint32_t Tmp1 = uint32_t(VAL >> 16);
 839     Tmp1 = ByteSwap_32(Tmp1);
 840     uint16_t Tmp2 = uint16_t(VAL);
 841     Tmp2 = ByteSwap_16(Tmp2);
 842     return APInt(BitWidth, (uint64_t(Tmp2) << 32) | Tmp1);
 843   } else if (BitWidth == 64)
 844     return APInt(BitWidth, ByteSwap_64(VAL));
 845   else {
 846     APInt Result(BitWidth, 0);
 847     char *pByte = (char*)Result.pVal;
 848     for (uint32_t i = 0; i < BitWidth / APINT_WORD_SIZE / 2; ++i) {
 849       char Tmp = pByte[i];
 850       pByte[i] = pByte[BitWidth / APINT_WORD_SIZE - 1 - i];
 851       pByte[BitWidth / APINT_WORD_SIZE - i - 1] = Tmp;
 852     }
 853     return Result;
 854   }
 855 }
 856
 857 APInt llvm::APIntOps::GreatestCommonDivisor(const APInt& API1,
 858                                             const APInt& API2) {
 859   APInt A = API1, B = API2;
 860   while (!!B) {
 861     APInt T = B;
 862     B = APIntOps::urem(A, B);
 863     A = T;
 864   }
 865   return A;
 866 }
 867
 868 APInt llvm::APIntOps::RoundDoubleToAPInt(double Double, uint32_t width) {
 869   union {
 870     double D;
 871     uint64_t I;
 872   } T;
 873   T.D = Double;
 874
 875   // Get the sign bit from the highest order bit
 876   bool isNeg = T.I >> 63;
 877
 878   // Get the 11-bit exponent and adjust for the 1023 bit bias
 879   int64_t exp = ((T.I >> 52) & 0x7ff) - 1023;
 880
 881   // If the exponent is negative, the value is < 0 so just return 0.
 882   if (exp < 0)
 883     return APInt(width, 0u);
 884
 885   // Extract the mantissa by clearing the top 12 bits (sign + exponent).
 886   uint64_t mantissa = (T.I & (~0ULL >> 12)) | 1ULL << 52;
 887
 888   // If the exponent doesn't shift all bits out of the mantissa
 889   if (exp < 52)
 890     return isNeg ? -APInt(width, mantissa >> (52 - exp)) :
 891                     APInt(width, mantissa >> (52 - exp));
 892
 893   // If the client didn't provide enough bits for us to shift the mantissa into
 894   // then the result is undefined, just return 0
 895   if (width <= exp - 52)
 896     return APInt(width, 0);
 897
 898   // Otherwise, we have to shift the mantissa bits up to the right location
 899   APInt Tmp(width, mantissa);
 900   Tmp = Tmp.shl((uint32_t)exp - 52);
 901   return isNeg ? -Tmp : Tmp;
 902 }
 903
 904 /// RoundToDouble - This function convert this APInt to a double.
 905 /// The layout for double is as following (IEEE Standard 754):
 906 ///  --------------------------------------
 907 /// |  Sign    Exponent    Fraction    Bias |
 908 /// |-------------------------------------- |
 909 /// |  1[63]   11[62-52]   52[51-00]   1023 |
 910 ///  --------------------------------------
 911 double APInt::roundToDouble(bool isSigned) const {
 912
 913   // Handle the simple case where the value is contained in one uint64_t.
 914   if (isSingleWord() || getActiveBits() <= APINT_BITS_PER_WORD) {
 915     if (isSigned) {
 916       int64_t sext = (int64_t(VAL) << (64-BitWidth)) >> (64-BitWidth);
 917       return double(sext);
 918     } else
 919       return double(VAL);
 920   }
 921
 922   // Determine if the value is negative.
 923   bool isNeg = isSigned ? (*this)[BitWidth-1] : false;
 924
 925   // Construct the absolute value if we're negative.
 926   APInt Tmp(isNeg ? -(*this) : (*this));
 927
 928   // Figure out how many bits we're using.
 929   uint32_t n = Tmp.getActiveBits();
 930
 931   // The exponent (without bias normalization) is just the number of bits
 932   // we are using. Note that the sign bit is gone since we constructed the
 933   // absolute value.
 934   uint64_t exp = n;
 935
 936   // Return infinity for exponent overflow
 937   if (exp > 1023) {
 938     if (!isSigned || !isNeg)
 939       return std::numeric_limits<double>::infinity();
 940     else
 941       return -std::numeric_limits<double>::infinity();
 942   }
 943   exp += 1023; // Increment for 1023 bias
 944
 945   // Number of bits in mantissa is 52. To obtain the mantissa value, we must
 946   // extract the high 52 bits from the correct words in pVal.
 947   uint64_t mantissa;
 948   unsigned hiWord = whichWord(n-1);
 949   if (hiWord == 0) {
 950     mantissa = Tmp.pVal[0];
 951     if (n > 52)
 952       mantissa >>= n - 52; // shift down, we want the top 52 bits.
 953   } else {
 954     assert(hiWord > 0 && "huh?");
 955     uint64_t hibits = Tmp.pVal[hiWord] << (52 - n % APINT_BITS_PER_WORD);
 956     uint64_t lobits = Tmp.pVal[hiWord-1] >> (11 + n % APINT_BITS_PER_WORD);
 957     mantissa = hibits | lobits;
 958   }
 959
 960   // The leading bit of mantissa is implicit, so get rid of it.
 961   uint64_t sign = isNeg ? (1ULL << (APINT_BITS_PER_WORD - 1)) : 0;
 962   union {
 963     double D;
 964     uint64_t I;
 965   } T;
 966   T.I = sign | (exp << 52) | mantissa;
 967   return T.D;
 968 }
 969
 970 // Truncate to new width.
 971 APInt &APInt::trunc(uint32_t width) {
 972   assert(width < BitWidth && "Invalid APInt Truncate request");
 973   assert(width >= MIN_INT_BITS && "Can't truncate to 0 bits");
 974   uint32_t wordsBefore = getNumWords();
 975   BitWidth = width;
 976   uint32_t wordsAfter = getNumWords();
 977   if (wordsBefore != wordsAfter) {
 978     if (wordsAfter == 1) {
 979       uint64_t *tmp = pVal;
 980       VAL = pVal[0];
 981       delete [] tmp;
 982     } else {
 983       uint64_t *newVal = getClearedMemory(wordsAfter);
 984       for (uint32_t i = 0; i < wordsAfter; ++i)
 985         newVal[i] = pVal[i];
 986       delete [] pVal;
 987       pVal = newVal;
 988     }
 989   }
 990   return clearUnusedBits();
 991 }
 992
 993 // Sign extend to a new width.
 994 APInt &APInt::sext(uint32_t width) {
 995   assert(width > BitWidth && "Invalid APInt SignExtend request");
 996   assert(width <= MAX_INT_BITS && "Too many bits");
 997   // If the sign bit isn't set, this is the same as zext.
 998   if (!isNegative()) {
 999     zext(width);
1000     return *this;
1001   }
1002
1003   // The sign bit is set. First, get some facts
1004   uint32_t wordsBefore = getNumWords();
1005   uint32_t wordBits = BitWidth % APINT_BITS_PER_WORD;
1006   BitWidth = width;
1007   uint32_t wordsAfter = getNumWords();
1008
1009   // Mask the high order word appropriately
1010   if (wordsBefore == wordsAfter) {
1011     uint32_t newWordBits = width % APINT_BITS_PER_WORD;
1012     // The extension is contained to the wordsBefore-1th word.
1013     uint64_t mask = ~0ULL;
1014     if (newWordBits)
1015       mask >>= APINT_BITS_PER_WORD - newWordBits;
1016     mask <<= wordBits;
1017     if (wordsBefore == 1)
1018       VAL |= mask;
1019     else
1020       pVal[wordsBefore-1] |= mask;
1021     return clearUnusedBits();
1022   }
1023
1024   uint64_t mask = wordBits == 0 ? 0 : ~0ULL << wordBits;
1025   uint64_t *newVal = getMemory(wordsAfter);
1026   if (wordsBefore == 1)
1027     newVal[0] = VAL | mask;
1028   else {
1029     for (uint32_t i = 0; i < wordsBefore; ++i)
1030       newVal[i] = pVal[i];
1031     newVal[wordsBefore-1] |= mask;
1032   }
1033   for (uint32_t i = wordsBefore; i < wordsAfter; i++)
1034     newVal[i] = -1ULL;
1035   if (wordsBefore != 1)
1036     delete [] pVal;
1037   pVal = newVal;
1038   return clearUnusedBits();
1039 }
1040
1041 //  Zero extend to a new width.
1042 APInt &APInt::zext(uint32_t width) {
1043   assert(width > BitWidth && "Invalid APInt ZeroExtend request");
1044   assert(width <= MAX_INT_BITS && "Too many bits");
1045   uint32_t wordsBefore = getNumWords();
1046   BitWidth = width;
1047   uint32_t wordsAfter = getNumWords();
1048   if (wordsBefore != wordsAfter) {
1049     uint64_t *newVal = getClearedMemory(wordsAfter);
1050     if (wordsBefore == 1)
1051       newVal[0] = VAL;
1052     else
1053       for (uint32_t i = 0; i < wordsBefore; ++i)
1054         newVal[i] = pVal[i];
1055     if (wordsBefore != 1)
1056       delete [] pVal;
1057     pVal = newVal;
1058   }
1059   return *this;
1060 }
1061
1062 APInt &APInt::zextOrTrunc(uint32_t width) {
1063   if (BitWidth < width)
1064     return zext(width);
1065   if (BitWidth > width)
1066     return trunc(width);
1067   return *this;
1068 }
1069
1070 APInt &APInt::sextOrTrunc(uint32_t width) {
1071   if (BitWidth < width)
1072     return sext(width);
1073   if (BitWidth > width)
1074     return trunc(width);
1075   return *this;
1076 }
1077
1078 /// Arithmetic right-shift this APInt by shiftAmt.
1079 /// @brief Arithmetic right-shift function.
1080 APInt APInt::ashr(const APInt &shiftAmt) const {
1081   return ashr((uint32_t)shiftAmt.getLimitedValue(BitWidth));
1082 }
1083
1084 /// Arithmetic right-shift this APInt by shiftAmt.
1085 /// @brief Arithmetic right-shift function.
1086 APInt APInt::ashr(uint32_t shiftAmt) const {
1087   assert(shiftAmt <= BitWidth && "Invalid shift amount");
1088   // Handle a degenerate case
1089   if (shiftAmt == 0)
1090     return *this;
1091
1092   // Handle single word shifts with built-in ashr
1093   if (isSingleWord()) {
1094     if (shiftAmt == BitWidth)
1095       return APInt(BitWidth, 0); // undefined
1096     else {
1097       uint32_t SignBit = APINT_BITS_PER_WORD - BitWidth;
1098       return APInt(BitWidth,
1099         (((int64_t(VAL) << SignBit) >> SignBit) >> shiftAmt));
1100     }
1101   }
1102
1103   // If all the bits were shifted out, the result is, technically, undefined.
1104   // We return -1 if it was negative, 0 otherwise. We check this early to avoid
1105   // issues in the algorithm below.
1106   if (shiftAmt == BitWidth) {
1107     if (isNegative())
1108       return APInt(BitWidth, -1ULL, true);
1109     else
1110       return APInt(BitWidth, 0);
1111   }
1112
1113   // Create some space for the result.
1114   uint64_t * val = new uint64_t[getNumWords()];
1115
1116   // Compute some values needed by the following shift algorithms
1117   uint32_t wordShift = shiftAmt % APINT_BITS_PER_WORD; // bits to shift per word
1118   uint32_t offset = shiftAmt / APINT_BITS_PER_WORD; // word offset for shift
1119   uint32_t breakWord = getNumWords() - 1 - offset; // last word affected
1120   uint32_t bitsInWord = whichBit(BitWidth); // how many bits in last word?
1121   if (bitsInWord == 0)
1122     bitsInWord = APINT_BITS_PER_WORD;
1123
1124   // If we are shifting whole words, just move whole words
1125   if (wordShift == 0) {
1126     // Move the words containing significant bits
1127     for (uint32_t i = 0; i <= breakWord; ++i)
1128       val[i] = pVal[i+offset]; // move whole word
1129
1130     // Adjust the top significant word for sign bit fill, if negative
1131     if (isNegative())
1132       if (bitsInWord < APINT_BITS_PER_WORD)
1133         val[breakWord] |= ~0ULL << bitsInWord; // set high bits
1134   } else {
1135     // Shift the low order words
1136     for (uint32_t i = 0; i < breakWord; ++i) {
1137       // This combines the shifted corresponding word with the low bits from
1138       // the next word (shifted into this word's high bits).
1139       val[i] = (pVal[i+offset] >> wordShift) |
1140                (pVal[i+offset+1] << (APINT_BITS_PER_WORD - wordShift));
1141     }
1142
1143     // Shift the break word. In this case there are no bits from the next word
1144     // to include in this word.
1145     val[breakWord] = pVal[breakWord+offset] >> wordShift;
1146
1147     // Deal with sign extenstion in the break word, and possibly the word before
1148     // it.
1149     if (isNegative()) {
1150       if (wordShift > bitsInWord) {
1151         if (breakWord > 0)
1152           val[breakWord-1] |=
1153             ~0ULL << (APINT_BITS_PER_WORD - (wordShift - bitsInWord));
1154         val[breakWord] |= ~0ULL;
1155       } else
1156         val[breakWord] |= (~0ULL << (bitsInWord - wordShift));
1157     }
1158   }
1159
1160   // Remaining words are 0 or -1, just assign them.
1161   uint64_t fillValue = (isNegative() ? -1ULL : 0);
1162   for (uint32_t i = breakWord+1; i < getNumWords(); ++i)
1163     val[i] = fillValue;
1164   return APInt(val, BitWidth).clearUnusedBits();
1165 }
1166
1167 /// Logical right-shift this APInt by shiftAmt.
1168 /// @brief Logical right-shift function.
1169 APInt APInt::lshr(const APInt &shiftAmt) const {
1170   return lshr((uint32_t)shiftAmt.getLimitedValue(BitWidth));
1171 }
1172
1173 /// Logical right-shift this APInt by shiftAmt.
1174 /// @brief Logical right-shift function.
1175 APInt APInt::lshr(uint32_t shiftAmt) const {
1176   if (isSingleWord()) {
1177     if (shiftAmt == BitWidth)
1178       return APInt(BitWidth, 0);
1179     else
1180       return APInt(BitWidth, this->VAL >> shiftAmt);
1181   }
1182
1183   // If all the bits were shifted out, the result is 0. This avoids issues
1184   // with shifting by the size of the integer type, which produces undefined
1185   // results. We define these "undefined results" to always be 0.
1186   if (shiftAmt == BitWidth)
1187     return APInt(BitWidth, 0);
1188
1189   // If none of the bits are shifted out, the result is *this. This avoids
1190   // issues with shifting byt he size of the integer type, which produces
1191   // undefined results in the code below. This is also an optimization.
1192   if (shiftAmt == 0)
1193     return *this;
1194
1195   // Create some space for the result.
1196   uint64_t * val = new uint64_t[getNumWords()];
1197
1198   // If we are shifting less than a word, compute the shift with a simple carry
1199   if (shiftAmt < APINT_BITS_PER_WORD) {
1200     uint64_t carry = 0;
1201     for (int i = getNumWords()-1; i >= 0; --i) {
1202       val[i] = (pVal[i] >> shiftAmt) | carry;
1203       carry = pVal[i] << (APINT_BITS_PER_WORD - shiftAmt);
1204     }
1205     return APInt(val, BitWidth).clearUnusedBits();
1206   }
1207
1208   // Compute some values needed by the remaining shift algorithms
1209   uint32_t wordShift = shiftAmt % APINT_BITS_PER_WORD;
1210   uint32_t offset = shiftAmt / APINT_BITS_PER_WORD;
1211
1212   // If we are shifting whole words, just move whole words
1213   if (wordShift == 0) {
1214     for (uint32_t i = 0; i < getNumWords() - offset; ++i)
1215       val[i] = pVal[i+offset];
1216     for (uint32_t i = getNumWords()-offset; i < getNumWords(); i++)
1217       val[i] = 0;
1218     return APInt(val,BitWidth).clearUnusedBits();
1219   }
1220
1221   // Shift the low order words
1222   uint32_t breakWord = getNumWords() - offset -1;
1223   for (uint32_t i = 0; i < breakWord; ++i)
1224     val[i] = (pVal[i+offset] >> wordShift) |
1225              (pVal[i+offset+1] << (APINT_BITS_PER_WORD - wordShift));
1226   // Shift the break word.
1227   val[breakWord] = pVal[breakWord+offset] >> wordShift;
1228
1229   // Remaining words are 0
1230   for (uint32_t i = breakWord+1; i < getNumWords(); ++i)
1231     val[i] = 0;
1232   return APInt(val, BitWidth).clearUnusedBits();
1233 }
1234
1235 /// Left-shift this APInt by shiftAmt.
1236 /// @brief Left-shift function.
1237 APInt APInt::shl(const APInt &shiftAmt) const {
1238   // It's undefined behavior in C to shift by BitWidth or greater, but
1239   return shl((uint32_t)shiftAmt.getLimitedValue(BitWidth));
1240 }
1241
1242 /// Left-shift this APInt by shiftAmt.
1243 /// @brief Left-shift function.
1244 APInt APInt::shl(uint32_t shiftAmt) const {
1245   assert(shiftAmt <= BitWidth && "Invalid shift amount");
1246   if (isSingleWord()) {
1247     if (shiftAmt == BitWidth)
1248       return APInt(BitWidth, 0); // avoid undefined shift results
1249     return APInt(BitWidth, VAL << shiftAmt);
1250   }
1251
1252   // If all the bits were shifted out, the result is 0. This avoids issues
1253   // with shifting by the size of the integer type, which produces undefined
1254   // results. We define these "undefined results" to always be 0.
1255   if (shiftAmt == BitWidth)
1256     return APInt(BitWidth, 0);
1257
1258   // If none of the bits are shifted out, the result is *this. This avoids a
1259   // lshr by the words size in the loop below which can produce incorrect
1260   // results. It also avoids the expensive computation below for a common case.
1261   if (shiftAmt == 0)
1262     return *this;
1263
1264   // Create some space for the result.
1265   uint64_t * val = new uint64_t[getNumWords()];
1266
1267   // If we are shifting less than a word, do it the easy way
1268   if (shiftAmt < APINT_BITS_PER_WORD) {
1269     uint64_t carry = 0;
1270     for (uint32_t i = 0; i < getNumWords(); i++) {
1271       val[i] = pVal[i] << shiftAmt | carry;
1272       carry = pVal[i] >> (APINT_BITS_PER_WORD - shiftAmt);
1273     }
1274     return APInt(val, BitWidth).clearUnusedBits();
1275   }
1276
1277   // Compute some values needed by the remaining shift algorithms
1278   uint32_t wordShift = shiftAmt % APINT_BITS_PER_WORD;
1279   uint32_t offset = shiftAmt / APINT_BITS_PER_WORD;
1280
1281   // If we are shifting whole words, just move whole words
1282   if (wordShift == 0) {
1283     for (uint32_t i = 0; i < offset; i++)
1284       val[i] = 0;
1285     for (uint32_t i = offset; i < getNumWords(); i++)
1286       val[i] = pVal[i-offset];
1287     return APInt(val,BitWidth).clearUnusedBits();
1288   }
1289
1290   // Copy whole words from this to Result.
1291   uint32_t i = getNumWords() - 1;
1292   for (; i > offset; --i)
1293     val[i] = pVal[i-offset] << wordShift |
1294              pVal[i-offset-1] >> (APINT_BITS_PER_WORD - wordShift);
1295   val[offset] = pVal[0] << wordShift;
1296   for (i = 0; i < offset; ++i)
1297     val[i] = 0;
1298   return APInt(val, BitWidth).clearUnusedBits();
1299 }
1300
1301 APInt APInt::rotl(const APInt &rotateAmt) const {
1302   return rotl((uint32_t)rotateAmt.getLimitedValue(BitWidth));
1303 }
1304
1305 APInt APInt::rotl(uint32_t rotateAmt) const {
1306   if (rotateAmt == 0)
1307     return *this;
1308   // Don't get too fancy, just use existing shift/or facilities
1309   APInt hi(*this);
1310   APInt lo(*this);
1311   hi.shl(rotateAmt);
1312   lo.lshr(BitWidth - rotateAmt);
1313   return hi | lo;
1314 }
1315
1316 APInt APInt::rotr(const APInt &rotateAmt) const {
1317   return rotr((uint32_t)rotateAmt.getLimitedValue(BitWidth));
1318 }
1319
1320 APInt APInt::rotr(uint32_t rotateAmt) const {
1321   if (rotateAmt == 0)
1322     return *this;
1323   // Don't get too fancy, just use existing shift/or facilities
1324   APInt hi(*this);
1325   APInt lo(*this);
1326   lo.lshr(rotateAmt);
1327   hi.shl(BitWidth - rotateAmt);
1328   return hi | lo;
1329 }
1330
1331 // Square Root - this method computes and returns the square root of "this".
1332 // Three mechanisms are used for computation. For small values (<= 5 bits),
1333 // a table lookup is done. This gets some performance for common cases. For
1334 // values using less than 52 bits, the value is converted to double and then
1335 // the libc sqrt function is called. The result is rounded and then converted
1336 // back to a uint64_t which is then used to construct the result. Finally,
1337 // the Babylonian method for computing square roots is used.
1338 APInt APInt::sqrt() const {
1339
1340   // Determine the magnitude of the value.
1341   uint32_t magnitude = getActiveBits();
1342
1343   // Use a fast table for some small values. This also gets rid of some
1344   // rounding errors in libc sqrt for small values.
1345   if (magnitude <= 5) {
1346     static const uint8_t results[32] = {
1347       /*     0 */ 0,
1348       /*  1- 2 */ 1, 1,
1349       /*  3- 6 */ 2, 2, 2, 2,
1350       /*  7-12 */ 3, 3, 3, 3, 3, 3,
1351       /* 13-20 */ 4, 4, 4, 4, 4, 4, 4, 4,
1352       /* 21-30 */ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
1353       /*    31 */ 6
1354     };
1355     return APInt(BitWidth, results[ (isSingleWord() ? VAL : pVal[0]) ]);
1356   }
1357
1358   // If the magnitude of the value fits in less than 52 bits (the precision of
1359   // an IEEE double precision floating point value), then we can use the
1360   // libc sqrt function which will probably use a hardware sqrt computation.
1361   // This should be faster than the algorithm below.
1362   if (magnitude < 52) {
1363 #ifdef _MSC_VER
1364     // Amazingly, VC++ doesn't have round().
1365     return APInt(BitWidth,
1366                  uint64_t(::sqrt(double(isSingleWord()?VAL:pVal[0]))) + 0.5);
1367 #else
1368     return APInt(BitWidth,
1369                  uint64_t(::round(::sqrt(double(isSingleWord()?VAL:pVal[0])))));
1370 #endif
1371   }
1372
1373   // Okay, all the short cuts are exhausted. We must compute it. The following
1374   // is a classical Babylonian method for computing the square root. This code
1375   // was adapted to APINt from a wikipedia article on such computations.
1376   // See http://www.wikipedia.org/ and go to the page named
1377   // Calculate_an_integer_square_root.
1378   uint32_t nbits = BitWidth, i = 4;
1379   APInt testy(BitWidth, 16);
1380   APInt x_old(BitWidth, 1);
1381   APInt x_new(BitWidth, 0);
1382   APInt two(BitWidth, 2);
1383
1384   // Select a good starting value using binary logarithms.
1385   for (;; i += 2, testy = testy.shl(2))
1386     if (i >= nbits || this->ule(testy)) {
1387       x_old = x_old.shl(i / 2);
1388       break;
1389     }
1390
1391   // Use the Babylonian method to arrive at the integer square root:
1392   for (;;) {
1393     x_new = (this->udiv(x_old) + x_old).udiv(two);
1394     if (x_old.ule(x_new))
1395       break;
1396     x_old = x_new;
1397   }
1398
1399   // Make sure we return the closest approximation
1400   // NOTE: The rounding calculation below is correct. It will produce an
1401   // off-by-one discrepancy with results from pari/gp. That discrepancy has been
1402   // determined to be a rounding issue with pari/gp as it begins to use a
1403   // floating point representation after 192 bits. There are no discrepancies
1404   // between this algorithm and pari/gp for bit widths < 192 bits.
1405   APInt square(x_old * x_old);
1406   APInt nextSquare((x_old + 1) * (x_old +1));
1407   if (this->ult(square))
1408     return x_old;
1409   else if (this->ule(nextSquare)) {
1410     APInt midpoint((nextSquare - square).udiv(two));
1411     APInt offset(*this - square);
1412     if (offset.ult(midpoint))
1413       return x_old;
1414     else
1415       return x_old + 1;
1416   } else
1417     assert(0 && "Error in APInt::sqrt computation");
1418   return x_old + 1;
1419 }
1420
1421 /// Computes the multiplicative inverse of this APInt for a given modulo. The
1422 /// iterative extended Euclidean algorithm is used to solve for this value,
1423 /// however we simplify it to speed up calculating only the inverse, and take
1424 /// advantage of div+rem calculations. We also use some tricks to avoid copying
1425 /// (potentially large) APInts around.
1426 APInt APInt::multiplicativeInverse(const APInt& modulo) const {
1427   assert(ult(modulo) && "This APInt must be smaller than the modulo");
1428
1429   // Using the properties listed at the following web page (accessed 06/21/08):
1430   //   http://www.numbertheory.org/php/euclid.html
1431   // (especially the properties numbered 3, 4 and 9) it can be proved that
1432   // BitWidth bits suffice for all the computations in the algorithm implemented
1433   // below. More precisely, this number of bits suffice if the multiplicative
1434   // inverse exists, but may not suffice for the general extended Euclidean
1435   // algorithm.
1436
1437   APInt r[2] = { modulo, *this };
1438   APInt t[2] = { APInt(BitWidth, 0), APInt(BitWidth, 1) };
1439   APInt q(BitWidth, 0);
1440
1441   unsigned i;
1442   for (i = 0; r[i^1] != 0; i ^= 1) {
1443     // An overview of the math without the confusing bit-flipping:
1444     // q = r[i-2] / r[i-1]
1445     // r[i] = r[i-2] % r[i-1]
1446     // t[i] = t[i-2] - t[i-1] * q
1447     udivrem(r[i], r[i^1], q, r[i]);
1448     t[i] -= t[i^1] * q;
1449   }
1450
1451   // If this APInt and the modulo are not coprime, there is no multiplicative
1452   // inverse, so return 0. We check this by looking at the next-to-last
1453   // remainder, which is the gcd(*this,modulo) as calculated by the Euclidean
1454   // algorithm.
1455   if (r[i] != 1)
1456     return APInt(BitWidth, 0);
1457
1458   // The next-to-last t is the multiplicative inverse.  However, we are
1459   // interested in a positive inverse. Calcuate a positive one from a negative
1460   // one if necessary. A simple addition of the modulo suffices because
1461   // abs(t[i]) is known to be less than *this/2 (see the link above).
1462   return t[i].isNegative() ? t[i] + modulo : t[i];
1463 }
1464
1465 /// Implementation of Knuth's Algorithm D (Division of nonnegative integers)
1466 /// from "Art of Computer Programming, Volume 2", section 4.3.1, p. 272. The
1467 /// variables here have the same names as in the algorithm. Comments explain
1468 /// the algorithm and any deviation from it.
1469 static void KnuthDiv(uint32_t *u, uint32_t *v, uint32_t *q, uint32_t* r,
1470                      uint32_t m, uint32_t n) {
1471   assert(u && "Must provide dividend");
1472   assert(v && "Must provide divisor");
1473   assert(q && "Must provide quotient");
1474   assert(u != v && u != q && v != q && "Must us different memory");
1475   assert(n>1 && "n must be > 1");
1476
1477   // Knuth uses the value b as the base of the number system. In our case b
1478   // is 2^31 so we just set it to -1u.
1479   uint64_t b = uint64_t(1) << 32;
1480
1481   DEBUG(cerr << "KnuthDiv: m=" << m << " n=" << n << '\n');
1482   DEBUG(cerr << "KnuthDiv: original:");
1483   DEBUG(for (int i = m+n; i >=0; i--) cerr << " " << std::setbase(16) << u[i]);
1484   DEBUG(cerr << " by");
1485   DEBUG(for (int i = n; i >0; i--) cerr << " " << std::setbase(16) << v[i-1]);
1486   DEBUG(cerr << '\n');
1487   // D1. [Normalize.] Set d = b / (v[n-1] + 1) and multiply all the digits of
1488   // u and v by d. Note that we have taken Knuth's advice here to use a power
1489   // of 2 value for d such that d * v[n-1] >= b/2 (b is the base). A power of
1490   // 2 allows us to shift instead of multiply and it is easy to determine the
1491   // shift amount from the leading zeros.  We are basically normalizing the u
1492   // and v so that its high bits are shifted to the top of v's range without
1493   // overflow. Note that this can require an extra word in u so that u must
1494   // be of length m+n+1.
1495   uint32_t shift = CountLeadingZeros_32(v[n-1]);
1496   uint32_t v_carry = 0;
1497   uint32_t u_carry = 0;
1498   if (shift) {
1499     for (uint32_t i = 0; i < m+n; ++i) {
1500       uint32_t u_tmp = u[i] >> (32 - shift);
1501       u[i] = (u[i] << shift) | u_carry;
1502       u_carry = u_tmp;
1503     }
1504     for (uint32_t i = 0; i < n; ++i) {
1505       uint32_t v_tmp = v[i] >> (32 - shift);
1506       v[i] = (v[i] << shift) | v_carry;
1507       v_carry = v_tmp;
1508     }
1509   }
1510   u[m+n] = u_carry;
1511   DEBUG(cerr << "KnuthDiv:   normal:");
1512   DEBUG(for (int i = m+n; i >=0; i--) cerr << " " << std::setbase(16) << u[i]);
1513   DEBUG(cerr << " by");
1514   DEBUG(for (int i = n; i >0; i--) cerr << " " << std::setbase(16) << v[i-1]);
1515   DEBUG(cerr << '\n');
1516
1517   // D2. [Initialize j.]  Set j to m. This is the loop counter over the places.
1518   int j = m;
1519   do {
1520     DEBUG(cerr << "KnuthDiv: quotient digit #" << j << '\n');
1521     // D3. [Calculate q'.].
1522     //     Set qp = (u[j+n]*b + u[j+n-1]) / v[n-1]. (qp=qprime=q')
1523     //     Set rp = (u[j+n]*b + u[j+n-1]) % v[n-1]. (rp=rprime=r')
1524     // Now test if qp == b or qp*v[n-2] > b*rp + u[j+n-2]; if so, decrease
1525     // qp by 1, inrease rp by v[n-1], and repeat this test if rp < b. The test
1526     // on v[n-2] determines at high speed most of the cases in which the trial
1527     // value qp is one too large, and it eliminates all cases where qp is two
1528     // too large.
1529     uint64_t dividend = ((uint64_t(u[j+n]) << 32) + u[j+n-1]);
1530     DEBUG(cerr << "KnuthDiv: dividend == " << dividend << '\n');
1531     uint64_t qp = dividend / v[n-1];
1532     uint64_t rp = dividend % v[n-1];
1533     if (qp == b || qp*v[n-2] > b*rp + u[j+n-2]) {
1534       qp--;
1535       rp += v[n-1];
1536       if (rp < b && (qp == b || qp*v[n-2] > b*rp + u[j+n-2]))
1537         qp--;
1538     }
1539     DEBUG(cerr << "KnuthDiv: qp == " << qp << ", rp == " << rp << '\n');
1540
1541     // D4. [Multiply and subtract.] Replace (u[j+n]u[j+n-1]...u[j]) with
1542     // (u[j+n]u[j+n-1]..u[j]) - qp * (v[n-1]...v[1]v[0]). This computation
1543     // consists of a simple multiplication by a one-place number, combined with
1544     // a subtraction.
1545     bool isNeg = false;
1546     for (uint32_t i = 0; i < n; ++i) {
1547       uint64_t u_tmp = uint64_t(u[j+i]) | (uint64_t(u[j+i+1]) << 32);
1548       uint64_t subtrahend = uint64_t(qp) * uint64_t(v[i]);
1549       bool borrow = subtrahend > u_tmp;
1550       DEBUG(cerr << "KnuthDiv: u_tmp == " << u_tmp
1551                  << ", subtrahend == " << subtrahend
1552                  << ", borrow = " << borrow << '\n');
1553
1554       uint64_t result = u_tmp - subtrahend;
1555       uint32_t k = j + i;
1556       u[k++] = (uint32_t)(result & (b-1)); // subtract low word
1557       u[k++] = (uint32_t)(result >> 32);   // subtract high word
1558       while (borrow && k <= m+n) { // deal with borrow to the left
1559         borrow = u[k] == 0;
1560         u[k]--;
1561         k++;
1562       }
1563       isNeg |= borrow;
1564       DEBUG(cerr << "KnuthDiv: u[j+i] == " << u[j+i] << ",  u[j+i+1] == " <<
1565                     u[j+i+1] << '\n');
1566     }
1567     DEBUG(cerr << "KnuthDiv: after subtraction:");
1568     DEBUG(for (int i = m+n; i >=0; i--) cerr << " " << u[i]);
1569     DEBUG(cerr << '\n');
1570     // The digits (u[j+n]...u[j]) should be kept positive; if the result of
1571     // this step is actually negative, (u[j+n]...u[j]) should be left as the
1572     // true value plus b**(n+1), namely as the b's complement of
1573     // the true value, and a "borrow" to the left should be remembered.
1574     //
1575     if (isNeg) {
1576       bool carry = true;  // true because b's complement is "complement + 1"
1577       for (uint32_t i = 0; i <= m+n; ++i) {
1578         u[i] = ~u[i] + carry; // b's complement
1579         carry = carry && u[i] == 0;
1580       }
1581     }
1582     DEBUG(cerr << "KnuthDiv: after complement:");
1583     DEBUG(for (int i = m+n; i >=0; i--) cerr << " " << u[i]);
1584     DEBUG(cerr << '\n');
1585
1586     // D5. [Test remainder.] Set q[j] = qp. If the result of step D4 was
1587     // negative, go to step D6; otherwise go on to step D7.
1588     q[j] = (uint32_t)qp;
1589     if (isNeg) {
1590       // D6. [Add back]. The probability that this step is necessary is very
1591       // small, on the order of only 2/b. Make sure that test data accounts for
1592       // this possibility. Decrease q[j] by 1
1593       q[j]--;
1594       // and add (0v[n-1]...v[1]v[0]) to (u[j+n]u[j+n-1]...u[j+1]u[j]).
1595       // A carry will occur to the left of u[j+n], and it should be ignored
1596       // since it cancels with the borrow that occurred in D4.
1597       bool carry = false;
1598       for (uint32_t i = 0; i < n; i++) {
1599         uint32_t limit = std::min(u[j+i],v[i]);
1600         u[j+i] += v[i] + carry;
1601         carry = u[j+i] < limit || (carry && u[j+i] == limit);
1602       }
1603       u[j+n] += carry;
1604     }
1605     DEBUG(cerr << "KnuthDiv: after correction:");
1606     DEBUG(for (int i = m+n; i >=0; i--) cerr <<" " << u[i]);
1607     DEBUG(cerr << "\nKnuthDiv: digit result = " << q[j] << '\n');
1608
1609   // D7. [Loop on j.]  Decrease j by one. Now if j >= 0, go back to D3.
1610   } while (--j >= 0);
1611
1612   DEBUG(cerr << "KnuthDiv: quotient:");
1613   DEBUG(for (int i = m; i >=0; i--) cerr <<" " << q[i]);
1614   DEBUG(cerr << '\n');
1615
1616   // D8. [Unnormalize]. Now q[...] is the desired quotient, and the desired
1617   // remainder may be obtained by dividing u[...] by d. If r is non-null we
1618   // compute the remainder (urem uses this).
1619   if (r) {
1620     // The value d is expressed by the "shift" value above since we avoided
1621     // multiplication by d by using a shift left. So, all we have to do is
1622     // shift right here. In order to mak
1623     if (shift) {
1624       uint32_t carry = 0;
1625       DEBUG(cerr << "KnuthDiv: remainder:");
1626       for (int i = n-1; i >= 0; i--) {
1627         r[i] = (u[i] >> shift) | carry;
1628         carry = u[i] << (32 - shift);
1629         DEBUG(cerr << " " << r[i]);
1630       }
1631     } else {
1632       for (int i = n-1; i >= 0; i--) {
1633         r[i] = u[i];
1634         DEBUG(cerr << " " << r[i]);
1635       }
1636     }
1637     DEBUG(cerr << '\n');
1638   }
1639   DEBUG(cerr << std::setbase(10) << '\n');
1640 }
1641
1642 void APInt::divide(const APInt LHS, uint32_t lhsWords,
1643                    const APInt &RHS, uint32_t rhsWords,
1644                    APInt *Quotient, APInt *Remainder)
1645 {
1646   assert(lhsWords >= rhsWords && "Fractional result");
1647
1648   // First, compose the values into an array of 32-bit words instead of
1649   // 64-bit words. This is a necessity of both the "short division" algorithm
1650   // and the the Knuth "classical algorithm" which requires there to be native
1651   // operations for +, -, and * on an m bit value with an m*2 bit result. We
1652   // can't use 64-bit operands here because we don't have native results of
1653   // 128-bits. Furthremore, casting the 64-bit values to 32-bit values won't
1654   // work on large-endian machines.
1655   uint64_t mask = ~0ull >> (sizeof(uint32_t)*8);
1656   uint32_t n = rhsWords * 2;
1657   uint32_t m = (lhsWords * 2) - n;
1658
1659   // Allocate space for the temporary values we need either on the stack, if
1660   // it will fit, or on the heap if it won't.
1661   uint32_t SPACE[128];
1662   uint32_t *U = 0;
1663   uint32_t *V = 0;
1664   uint32_t *Q = 0;
1665   uint32_t *R = 0;
1666   if ((Remainder?4:3)*n+2*m+1 <= 128) {
1667     U = &SPACE[0];
1668     V = &SPACE[m+n+1];
1669     Q = &SPACE[(m+n+1) + n];
1670     if (Remainder)
1671       R = &SPACE[(m+n+1) + n + (m+n)];
1672   } else {
1673     U = new uint32_t[m + n + 1];
1674     V = new uint32_t[n];
1675     Q = new uint32_t[m+n];
1676     if (Remainder)
1677       R = new uint32_t[n];
1678   }
1679
1680   // Initialize the dividend
1681   memset(U, 0, (m+n+1)*sizeof(uint32_t));
1682   for (unsigned i = 0; i < lhsWords; ++i) {
1683     uint64_t tmp = (LHS.getNumWords() == 1 ? LHS.VAL : LHS.pVal[i]);
1684     U[i * 2] = (uint32_t)(tmp & mask);
1685     U[i * 2 + 1] = (uint32_t)(tmp >> (sizeof(uint32_t)*8));
1686   }
1687   U[m+n] = 0; // this extra word is for "spill" in the Knuth algorithm.
1688
1689   // Initialize the divisor
1690   memset(V, 0, (n)*sizeof(uint32_t));
1691   for (unsigned i = 0; i < rhsWords; ++i) {
1692     uint64_t tmp = (RHS.getNumWords() == 1 ? RHS.VAL : RHS.pVal[i]);
1693     V[i * 2] = (uint32_t)(tmp & mask);
1694     V[i * 2 + 1] = (uint32_t)(tmp >> (sizeof(uint32_t)*8));
1695   }
1696
1697   // initialize the quotient and remainder
1698   memset(Q, 0, (m+n) * sizeof(uint32_t));
1699   if (Remainder)
1700     memset(R, 0, n * sizeof(uint32_t));
1701
1702   // Now, adjust m and n for the Knuth division. n is the number of words in
1703   // the divisor. m is the number of words by which the dividend exceeds the
1704   // divisor (i.e. m+n is the length of the dividend). These sizes must not
1705   // contain any zero words or the Knuth algorithm fails.
1706   for (unsigned i = n; i > 0 && V[i-1] == 0; i--) {
1707     n--;
1708     m++;
1709   }
1710   for (unsigned i = m+n; i > 0 && U[i-1] == 0; i--)
1711     m--;
1712
1713   // If we're left with only a single word for the divisor, Knuth doesn't work
1714   // so we implement the short division algorithm here. This is much simpler
1715   // and faster because we are certain that we can divide a 64-bit quantity
1716   // by a 32-bit quantity at hardware speed and short division is simply a
1717   // series of such operations. This is just like doing short division but we
1718   // are using base 2^32 instead of base 10.
1719   assert(n != 0 && "Divide by zero?");
1720   if (n == 1) {
1721     uint32_t divisor = V[0];
1722     uint32_t remainder = 0;
1723     for (int i = m+n-1; i >= 0; i--) {
1724       uint64_t partial_dividend = uint64_t(remainder) << 32 | U[i];
1725       if (partial_dividend == 0) {
1726         Q[i] = 0;
1727         remainder = 0;
1728       } else if (partial_dividend < divisor) {
1729         Q[i] = 0;
1730         remainder = (uint32_t)partial_dividend;
1731       } else if (partial_dividend == divisor) {
1732         Q[i] = 1;
1733         remainder = 0;
1734       } else {
1735         Q[i] = (uint32_t)(partial_dividend / divisor);
1736         remainder = (uint32_t)(partial_dividend - (Q[i] * divisor));
1737       }
1738     }
1739     if (R)
1740       R[0] = remainder;
1741   } else {
1742     // Now we're ready to invoke the Knuth classical divide algorithm. In this
1743     // case n > 1.
1744     KnuthDiv(U, V, Q, R, m, n);
1745   }
1746
1747   // If the caller wants the quotient
1748   if (Quotient) {
1749     // Set up the Quotient value's memory.
1750     if (Quotient->BitWidth != LHS.BitWidth) {
1751       if (Quotient->isSingleWord())
1752         Quotient->VAL = 0;
1753       else
1754         delete [] Quotient->pVal;
1755       Quotient->BitWidth = LHS.BitWidth;
1756       if (!Quotient->isSingleWord())
1757         Quotient->pVal = getClearedMemory(Quotient->getNumWords());
1758     } else
1759       Quotient->clear();
1760
1761     // The quotient is in Q. Reconstitute the quotient into Quotient's low
1762     // order words.
1763     if (lhsWords == 1) {
1764       uint64_t tmp =
1765         uint64_t(Q[0]) | (uint64_t(Q[1]) << (APINT_BITS_PER_WORD / 2));
1766       if (Quotient->isSingleWord())
1767         Quotient->VAL = tmp;
1768       else
1769         Quotient->pVal[0] = tmp;
1770     } else {
1771       assert(!Quotient->isSingleWord() && "Quotient APInt not large enough");
1772       for (unsigned i = 0; i < lhsWords; ++i)
1773         Quotient->pVal[i] =
1774           uint64_t(Q[i*2]) | (uint64_t(Q[i*2+1]) << (APINT_BITS_PER_WORD / 2));
1775     }
1776   }
1777
1778   // If the caller wants the remainder
1779   if (Remainder) {
1780     // Set up the Remainder value's memory.
1781     if (Remainder->BitWidth != RHS.BitWidth) {
1782       if (Remainder->isSingleWord())
1783         Remainder->VAL = 0;
1784       else
1785         delete [] Remainder->pVal;
1786       Remainder->BitWidth = RHS.BitWidth;
1787       if (!Remainder->isSingleWord())
1788         Remainder->pVal = getClearedMemory(Remainder->getNumWords());
1789     } else
1790       Remainder->clear();
1791
1792     // The remainder is in R. Reconstitute the remainder into Remainder's low
1793     // order words.
1794     if (rhsWords == 1) {
1795       uint64_t tmp =
1796         uint64_t(R[0]) | (uint64_t(R[1]) << (APINT_BITS_PER_WORD / 2));
1797       if (Remainder->isSingleWord())
1798         Remainder->VAL = tmp;
1799       else
1800         Remainder->pVal[0] = tmp;
1801     } else {
1802       assert(!Remainder->isSingleWord() && "Remainder APInt not large enough");
1803       for (unsigned i = 0; i < rhsWords; ++i)
1804         Remainder->pVal[i] =
1805           uint64_t(R[i*2]) | (uint64_t(R[i*2+1]) << (APINT_BITS_PER_WORD / 2));
1806     }
1807   }
1808
1809   // Clean up the memory we allocated.
1810   if (U != &SPACE[0]) {
1811     delete [] U;
1812     delete [] V;
1813     delete [] Q;
1814     delete [] R;
1815   }
1816 }
1817
1818 APInt APInt::udiv(const APInt& RHS) const {
1819   assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
1820
1821   // First, deal with the easy case
1822   if (isSingleWord()) {
1823     assert(RHS.VAL != 0 && "Divide by zero?");
1824     return APInt(BitWidth, VAL / RHS.VAL);
1825   }
1826
1827   // Get some facts about the LHS and RHS number of bits and words
1828   uint32_t rhsBits = RHS.getActiveBits();
1829   uint32_t rhsWords = !rhsBits ? 0 : (APInt::whichWord(rhsBits - 1) + 1);
1830   assert(rhsWords && "Divided by zero???");
1831   uint32_t lhsBits = this->getActiveBits();
1832   uint32_t lhsWords = !lhsBits ? 0 : (APInt::whichWord(lhsBits - 1) + 1);
1833
1834   // Deal with some degenerate cases
1835   if (!lhsWords)
1836     // 0 / X ===> 0
1837     return APInt(BitWidth, 0);
1838   else if (lhsWords < rhsWords || this->ult(RHS)) {
1839     // X / Y ===> 0, iff X < Y
1840     return APInt(BitWidth, 0);
1841   } else if (*this == RHS) {
1842     // X / X ===> 1
1843     return APInt(BitWidth, 1);
1844   } else if (lhsWords == 1 && rhsWords == 1) {
1845     // All high words are zero, just use native divide
1846     return APInt(BitWidth, this->pVal[0] / RHS.pVal[0]);
1847   }
1848
1849   // We have to compute it the hard way. Invoke the Knuth divide algorithm.
1850   APInt Quotient(1,0); // to hold result.
1851   divide(*this, lhsWords, RHS, rhsWords, &Quotient, 0);
1852   return Quotient;
1853 }
1854
1855 APInt APInt::urem(const APInt& RHS) const {
1856   assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
1857   if (isSingleWord()) {
1858     assert(RHS.VAL != 0 && "Remainder by zero?");
1859     return APInt(BitWidth, VAL % RHS.VAL);
1860   }
1861
1862   // Get some facts about the LHS
1863   uint32_t lhsBits = getActiveBits();
1864   uint32_t lhsWords = !lhsBits ? 0 : (whichWord(lhsBits - 1) + 1);
1865
1866   // Get some facts about the RHS
1867   uint32_t rhsBits = RHS.getActiveBits();
1868   uint32_t rhsWords = !rhsBits ? 0 : (APInt::whichWord(rhsBits - 1) + 1);
1869   assert(rhsWords && "Performing remainder operation by zero ???");
1870
1871   // Check the degenerate cases
1872   if (lhsWords == 0) {
1873     // 0 % Y ===> 0
1874     return APInt(BitWidth, 0);
1875   } else if (lhsWords < rhsWords || this->ult(RHS)) {
1876     // X % Y ===> X, iff X < Y
1877     return *this;
1878   } else if (*this == RHS) {
1879     // X % X == 0;
1880     return APInt(BitWidth, 0);
1881   } else if (lhsWords == 1) {
1882     // All high words are zero, just use native remainder
1883     return APInt(BitWidth, pVal[0] % RHS.pVal[0]);
1884   }
1885
1886   // We have to compute it the hard way. Invoke the Knuth divide algorithm.
1887   APInt Remainder(1,0);
1888   divide(*this, lhsWords, RHS, rhsWords, 0, &Remainder);
1889   return Remainder;
1890 }
1891
1892 void APInt::udivrem(const APInt &LHS, const APInt &RHS,
1893                     APInt &Quotient, APInt &Remainder) {
1894   // Get some size facts about the dividend and divisor
1895   uint32_t lhsBits  = LHS.getActiveBits();
1896   uint32_t lhsWords = !lhsBits ? 0 : (APInt::whichWord(lhsBits - 1) + 1);
1897   uint32_t rhsBits  = RHS.getActiveBits();
1898   uint32_t rhsWords = !rhsBits ? 0 : (APInt::whichWord(rhsBits - 1) + 1);
1899
1900   // Check the degenerate cases
1901   if (lhsWords == 0) {
1902     Quotient = 0;                // 0 / Y ===> 0
1903     Remainder = 0;               // 0 % Y ===> 0
1904     return;
1905   }
1906
1907   if (lhsWords < rhsWords || LHS.ult(RHS)) {
1908     Quotient = 0;               // X / Y ===> 0, iff X < Y
1909     Remainder = LHS;            // X % Y ===> X, iff X < Y
1910     return;
1911   }
1912
1913   if (LHS == RHS) {
1914     Quotient  = 1;              // X / X ===> 1
1915     Remainder = 0;              // X % X ===> 0;
1916     return;
1917   }
1918
1919   if (lhsWords == 1 && rhsWords == 1) {
1920     // There is only one word to consider so use the native versions.
1921     uint64_t lhsValue = LHS.isSingleWord() ? LHS.VAL : LHS.pVal[0];
1922     uint64_t rhsValue = RHS.isSingleWord() ? RHS.VAL : RHS.pVal[0];
1923     Quotient = APInt(LHS.getBitWidth(), lhsValue / rhsValue);
1924     Remainder = APInt(LHS.getBitWidth(), lhsValue % rhsValue);
1925     return;
1926   }
1927
1928   // Okay, lets do it the long way
1929   divide(LHS, lhsWords, RHS, rhsWords, &Quotient, &Remainder);
1930 }
1931
1932 void APInt::fromString(uint32_t numbits, const char *str, uint32_t slen,
1933                        uint8_t radix) {
1934   // Check our assumptions here
1935   assert((radix == 10 || radix == 8 || radix == 16 || radix == 2) &&
1936          "Radix should be 2, 8, 10, or 16!");
1937   assert(str && "String is null?");
1938   bool isNeg = str[0] == '-';
1939   if (isNeg)
1940     str++, slen--;
1941   assert((slen <= numbits || radix != 2) && "Insufficient bit width");
1942   assert((slen*3 <= numbits || radix != 8) && "Insufficient bit width");
1943   assert((slen*4 <= numbits || radix != 16) && "Insufficient bit width");
1944   assert(((slen*64)/22 <= numbits || radix != 10) && "Insufficient bit width");
1945
1946   // Allocate memory
1947   if (!isSingleWord())
1948     pVal = getClearedMemory(getNumWords());
1949
1950   // Figure out if we can shift instead of multiply
1951   uint32_t shift = (radix == 16 ? 4 : radix == 8 ? 3 : radix == 2 ? 1 : 0);
1952
1953   // Set up an APInt for the digit to add outside the loop so we don't
1954   // constantly construct/destruct it.
1955   APInt apdigit(getBitWidth(), 0);
1956   APInt apradix(getBitWidth(), radix);
1957
1958   // Enter digit traversal loop
1959   for (unsigned i = 0; i < slen; i++) {
1960     // Get a digit
1961     uint32_t digit = 0;
1962     char cdigit = str[i];
1963     if (radix == 16) {
1964       if (!isxdigit(cdigit))
1965         assert(0 && "Invalid hex digit in string");
1966       if (isdigit(cdigit))
1967         digit = cdigit - '0';
1968       else if (cdigit >= 'a')
1969         digit = cdigit - 'a' + 10;
1970       else if (cdigit >= 'A')
1971         digit = cdigit - 'A' + 10;
1972       else
1973         assert(0 && "huh? we shouldn't get here");
1974     } else if (isdigit(cdigit)) {
1975       digit = cdigit - '0';
1976       assert((radix == 10 ||
1977               (radix == 8 && digit != 8 && digit != 9) ||
1978               (radix == 2 && (digit == 0 || digit == 1))) &&
1979              "Invalid digit in string for given radix");
1980     } else {
1981       assert(0 && "Invalid character in digit string");
1982     }
1983
1984     // Shift or multiply the value by the radix
1985     if (shift)
1986       *this <<= shift;
1987     else
1988       *this *= apradix;
1989
1990     // Add in the digit we just interpreted
1991     if (apdigit.isSingleWord())
1992       apdigit.VAL = digit;
1993     else
1994       apdigit.pVal[0] = digit;
1995     *this += apdigit;
1996   }
1997   // If its negative, put it in two's complement form
1998   if (isNeg) {
1999     (*this)--;
2000     this->flip();
2001   }
2002 }
2003
2004 std::string APInt::toString(uint8_t radix, bool wantSigned) const {
2005   assert((radix == 10 || radix == 8 || radix == 16 || radix == 2) &&
2006          "Radix should be 2, 8, 10, or 16!");
2007   static const char *const digits[] = {
2008     "0","1","2","3","4","5","6","7","8","9","A","B","C","D","E","F"
2009   };
2010   std::string result;
2011   uint32_t bits_used = getActiveBits();
2012   if (isSingleWord()) {
2013     char buf[65];
2014     const char *format = (radix == 10 ? (wantSigned ? "%lld" : "%llu") :
2015        (radix == 16 ? "%llX" : (radix == 8 ? "%llo" : 0)));
2016     if (format) {
2017       if (wantSigned) {
2018         int64_t sextVal = (int64_t(VAL) << (APINT_BITS_PER_WORD-BitWidth)) >>
2019                            (APINT_BITS_PER_WORD-BitWidth);
2020         sprintf(buf, format, sextVal);
2021       } else
2022         sprintf(buf, format, VAL);
2023     } else {
2024       memset(buf, 0, 65);
2025       uint64_t v = VAL;
2026       while (bits_used) {
2027         uint32_t bit = (uint32_t)v & 1;
2028         bits_used--;
2029         buf[bits_used] = digits[bit][0];
2030         v >>=1;
2031       }
2032     }
2033     result = buf;
2034     return result;
2035   }
2036
2037   if (radix != 10) {
2038     // For the 2, 8 and 16 bit cases, we can just shift instead of divide
2039     // because the number of bits per digit (1,3 and 4 respectively) divides
2040     // equaly. We just shift until there value is zero.
2041
2042     // First, check for a zero value and just short circuit the logic below.
2043     if (*this == 0)
2044       result = "0";
2045     else {
2046       APInt tmp(*this);
2047       size_t insert_at = 0;
2048       if (wantSigned && this->isNegative()) {
2049         // They want to print the signed version and it is a negative value
2050         // Flip the bits and add one to turn it into the equivalent positive
2051         // value and put a '-' in the result.
2052         tmp.flip();
2053         tmp++;
2054         result = "-";
2055         insert_at = 1;
2056       }
2057       // Just shift tmp right for each digit width until it becomes zero
2058       uint32_t shift = (radix == 16 ? 4 : (radix == 8 ? 3 : 1));
2059       uint64_t mask = radix - 1;
2060       APInt zero(tmp.getBitWidth(), 0);
2061       while (tmp.ne(zero)) {
2062         unsigned digit =
2063           (unsigned)((tmp.isSingleWord() ? tmp.VAL : tmp.pVal[0]) & mask);
2064         result.insert(insert_at, digits[digit]);
2065         tmp = tmp.lshr(shift);
2066       }
2067     }
2068     return result;
2069   }
2070
2071   APInt tmp(*this);
2072   APInt divisor(4, radix);
2073   APInt zero(tmp.getBitWidth(), 0);
2074   size_t insert_at = 0;
2075   if (wantSigned && tmp[BitWidth-1]) {
2076     // They want to print the signed version and it is a negative value
2077     // Flip the bits and add one to turn it into the equivalent positive
2078     // value and put a '-' in the result.
2079     tmp.flip();
2080     tmp++;
2081     result = "-";
2082     insert_at = 1;
2083   }
2084   if (tmp == zero)
2085     result = "0";
2086   else while (tmp.ne(zero)) {
2087     APInt APdigit(1,0);
2088     APInt tmp2(tmp.getBitWidth(), 0);
2089     divide(tmp, tmp.getNumWords(), divisor, divisor.getNumWords(), &tmp2,
2090            &APdigit);
2091     uint32_t digit = (uint32_t)APdigit.getZExtValue();
2092     assert(digit < radix && "divide failed");
2093     result.insert(insert_at,digits[digit]);
2094     tmp = tmp2;
2095   }
2096
2097   return result;
2098 }
2099
2100 void APInt::dump() const
2101 {
2102   cerr << "APInt(" << BitWidth << ")=" << std::setbase(16);
2103   if (isSingleWord())
2104     cerr << VAL;
2105   else for (unsigned i = getNumWords(); i > 0; i--) {
2106     cerr << pVal[i-1] << " ";
2107   }
2108   cerr << " U(" << this->toStringUnsigned(10) << ") S("
2109        << this->toStringSigned(10) << ")" << std::setbase(10);
2110 }
2111
2112 // This implements a variety of operations on a representation of
2113 // arbitrary precision, two's-complement, bignum integer values.
2114
2115 /* Assumed by lowHalf, highHalf, partMSB and partLSB.  A fairly safe
2116    and unrestricting assumption.  */
2117 #define COMPILE_TIME_ASSERT(cond) extern int CTAssert[(cond) ? 1 : -1]
2118 COMPILE_TIME_ASSERT(integerPartWidth % 2 == 0);
2119
2120 /* Some handy functions local to this file.  */
2121 namespace {
2122
2123   /* Returns the integer part with the least significant BITS set.
2124      BITS cannot be zero.  */
2125   static inline integerPart
2126   lowBitMask(unsigned int bits)
2127   {
2128     assert (bits != 0 && bits <= integerPartWidth);
2129
2130     return ~(integerPart) 0 >> (integerPartWidth - bits);
2131   }
2132
2133   /* Returns the value of the lower half of PART.  */
2134   static inline integerPart
2135   lowHalf(integerPart part)
2136   {
2137     return part & lowBitMask(integerPartWidth / 2);
2138   }
2139
2140   /* Returns the value of the upper half of PART.  */
2141   static inline integerPart
2142   highHalf(integerPart part)
2143   {
2144     return part >> (integerPartWidth / 2);
2145   }
2146
2147   /* Returns the bit number of the most significant set bit of a part.
2148      If the input number has no bits set -1U is returned.  */
2149   static unsigned int
2150   partMSB(integerPart value)
2151   {
2152     unsigned int n, msb;
2153
2154     if (value == 0)
2155       return -1U;
2156
2157     n = integerPartWidth / 2;
2158
2159     msb = 0;
2160     do {
2161       if (value >> n) {
2162         value >>= n;
2163         msb += n;
2164       }
2165
2166       n >>= 1;
2167     } while (n);
2168
2169     return msb;
2170   }
2171
2172   /* Returns the bit number of the least significant set bit of a
2173      part.  If the input number has no bits set -1U is returned.  */
2174   static unsigned int
2175   partLSB(integerPart value)
2176   {
2177     unsigned int n, lsb;
2178
2179     if (value == 0)
2180       return -1U;
2181
2182     lsb = integerPartWidth - 1;
2183     n = integerPartWidth / 2;
2184
2185     do {
2186       if (value << n) {
2187         value <<= n;
2188         lsb -= n;
2189       }
2190
2191       n >>= 1;
2192     } while (n);
2193
2194     return lsb;
2195   }
2196 }
2197
2198 /* Sets the least significant part of a bignum to the input value, and
2199    zeroes out higher parts.  */
2200 void
2201 APInt::tcSet(integerPart *dst, integerPart part, unsigned int parts)
2202 {
2203   unsigned int i;
2204
2205   assert (parts > 0);
2206
2207   dst[0] = part;
2208   for(i = 1; i < parts; i++)
2209     dst[i] = 0;
2210 }
2211
2212 /* Assign one bignum to another.  */
2213 void
2214 APInt::tcAssign(integerPart *dst, const integerPart *src, unsigned int parts)
2215 {
2216   unsigned int i;
2217
2218   for(i = 0; i < parts; i++)
2219     dst[i] = src[i];
2220 }
2221
2222 /* Returns true if a bignum is zero, false otherwise.  */
2223 bool
2224 APInt::tcIsZero(const integerPart *src, unsigned int parts)
2225 {
2226   unsigned int i;
2227
2228   for(i = 0; i < parts; i++)
2229     if (src[i])
2230       return false;
2231
2232   return true;
2233 }
2234
2235 /* Extract the given bit of a bignum; returns 0 or 1.  */
2236 int
2237 APInt::tcExtractBit(const integerPart *parts, unsigned int bit)
2238 {
2239   return(parts[bit / integerPartWidth]
2240          & ((integerPart) 1 << bit % integerPartWidth)) != 0;
2241 }
2242
2243 /* Set the given bit of a bignum.  */
2244 void
2245 APInt::tcSetBit(integerPart *parts, unsigned int bit)
2246 {
2247   parts[bit / integerPartWidth] |= (integerPart) 1 << (bit % integerPartWidth);
2248 }
2249
2250 /* Returns the bit number of the least significant set bit of a
2251    number.  If the input number has no bits set -1U is returned.  */
2252 unsigned int
2253 APInt::tcLSB(const integerPart *parts, unsigned int n)
2254 {
2255   unsigned int i, lsb;
2256
2257   for(i = 0; i < n; i++) {
2258       if (parts[i] != 0) {
2259           lsb = partLSB(parts[i]);
2260
2261           return lsb + i * integerPartWidth;
2262       }
2263   }
2264
2265   return -1U;
2266 }
2267
2268 /* Returns the bit number of the most significant set bit of a number.
2269    If the input number has no bits set -1U is returned.  */
2270 unsigned int
2271 APInt::tcMSB(const integerPart *parts, unsigned int n)
2272 {
2273   unsigned int msb;
2274
2275   do {
2276       --n;
2277
2278       if (parts[n] != 0) {
2279           msb = partMSB(parts[n]);
2280
2281           return msb + n * integerPartWidth;
2282       }
2283   } while (n);
2284
2285   return -1U;
2286 }
2287
2288 /* Copy the bit vector of width srcBITS from SRC, starting at bit
2289    srcLSB, to DST, of dstCOUNT parts, such that the bit srcLSB becomes
2290    the least significant bit of DST.  All high bits above srcBITS in
2291    DST are zero-filled.  */
2292 void
2293 APInt::tcExtract(integerPart *dst, unsigned int dstCount, const integerPart *src,
2294                  unsigned int srcBits, unsigned int srcLSB)
2295 {
2296   unsigned int firstSrcPart, dstParts, shift, n;
2297
2298   dstParts = (srcBits + integerPartWidth - 1) / integerPartWidth;
2299   assert (dstParts <= dstCount);
2300
2301   firstSrcPart = srcLSB / integerPartWidth;
2302   tcAssign (dst, src + firstSrcPart, dstParts);
2303
2304   shift = srcLSB % integerPartWidth;
2305   tcShiftRight (dst, dstParts, shift);
2306
2307   /* We now have (dstParts * integerPartWidth - shift) bits from SRC
2308      in DST.  If this is less that srcBits, append the rest, else
2309      clear the high bits.  */
2310   n = dstParts * integerPartWidth - shift;
2311   if (n < srcBits) {
2312     integerPart mask = lowBitMask (srcBits - n);
2313     dst[dstParts - 1] |= ((src[firstSrcPart + dstParts] & mask)
2314                           << n % integerPartWidth);
2315   } else if (n > srcBits) {
2316     if (srcBits % integerPartWidth)
2317       dst[dstParts - 1] &= lowBitMask (srcBits % integerPartWidth);
2318   }
2319
2320   /* Clear high parts.  */
2321   while (dstParts < dstCount)
2322     dst[dstParts++] = 0;
2323 }
2324
2325 /* DST += RHS + C where C is zero or one.  Returns the carry flag.  */
2326 integerPart
2327 APInt::tcAdd(integerPart *dst, const integerPart *rhs,
2328              integerPart c, unsigned int parts)
2329 {
2330   unsigned int i;
2331
2332   assert(c <= 1);
2333
2334   for(i = 0; i < parts; i++) {
2335     integerPart l;
2336
2337     l = dst[i];
2338     if (c) {
2339       dst[i] += rhs[i] + 1;
2340       c = (dst[i] <= l);
2341     } else {
2342       dst[i] += rhs[i];
2343       c = (dst[i] < l);
2344     }
2345   }
2346
2347   return c;
2348 }
2349
2350 /* DST -= RHS + C where C is zero or one.  Returns the carry flag.  */
2351 integerPart
2352 APInt::tcSubtract(integerPart *dst, const integerPart *rhs,
2353                   integerPart c, unsigned int parts)
2354 {
2355   unsigned int i;
2356
2357   assert(c <= 1);
2358
2359   for(i = 0; i < parts; i++) {
2360     integerPart l;
2361
2362     l = dst[i];
2363     if (c) {
2364       dst[i] -= rhs[i] + 1;
2365       c = (dst[i] >= l);
2366     } else {
2367       dst[i] -= rhs[i];
2368       c = (dst[i] > l);
2369     }
2370   }
2371
2372   return c;
2373 }
2374
2375 /* Negate a bignum in-place.  */
2376 void
2377 APInt::tcNegate(integerPart *dst, unsigned int parts)
2378 {
2379   tcComplement(dst, parts);
2380   tcIncrement(dst, parts);
2381 }
2382
2383 /*  DST += SRC * MULTIPLIER + CARRY   if add is true
2384     DST  = SRC * MULTIPLIER + CARRY   if add is false
2385
2386     Requires 0 <= DSTPARTS <= SRCPARTS + 1.  If DST overlaps SRC
2387     they must start at the same point, i.e. DST == SRC.
2388
2389     If DSTPARTS == SRCPARTS + 1 no overflow occurs and zero is
2390     returned.  Otherwise DST is filled with the least significant
2391     DSTPARTS parts of the result, and if all of the omitted higher
2392     parts were zero return zero, otherwise overflow occurred and
2393     return one.  */
2394 int
2395 APInt::tcMultiplyPart(integerPart *dst, const integerPart *src,
2396                       integerPart multiplier, integerPart carry,
2397                       unsigned int srcParts, unsigned int dstParts,
2398                       bool add)
2399 {
2400   unsigned int i, n;
2401
2402   /* Otherwise our writes of DST kill our later reads of SRC.  */
2403   assert(dst <= src || dst >= src + srcParts);
2404   assert(dstParts <= srcParts + 1);
2405
2406   /* N loops; minimum of dstParts and srcParts.  */
2407   n = dstParts < srcParts ? dstParts: srcParts;
2408
2409   for(i = 0; i < n; i++) {
2410     integerPart low, mid, high, srcPart;
2411
2412       /* [ LOW, HIGH ] = MULTIPLIER * SRC[i] + DST[i] + CARRY.
2413
2414          This cannot overflow, because
2415
2416          (n - 1) * (n - 1) + 2 (n - 1) = (n - 1) * (n + 1)
2417
2418          which is less than n^2.  */
2419
2420     srcPart = src[i];
2421
2422     if (multiplier == 0 || srcPart == 0)        {
2423       low = carry;
2424       high = 0;
2425     } else {
2426       low = lowHalf(srcPart) * lowHalf(multiplier);
2427       high = highHalf(srcPart) * highHalf(multiplier);
2428
2429       mid = lowHalf(srcPart) * highHalf(multiplier);
2430       high += highHalf(mid);
2431       mid <<= integerPartWidth / 2;
2432       if (low + mid < low)
2433         high++;
2434       low += mid;
2435
2436       mid = highHalf(srcPart) * lowHalf(multiplier);
2437       high += highHalf(mid);
2438       mid <<= integerPartWidth / 2;
2439       if (low + mid < low)
2440         high++;
2441       low += mid;
2442
2443       /* Now add carry.  */
2444       if (low + carry < low)
2445         high++;
2446       low += carry;
2447     }
2448
2449     if (add) {
2450       /* And now DST[i], and store the new low part there.  */
2451       if (low + dst[i] < low)
2452         high++;
2453       dst[i] += low;
2454     } else
2455       dst[i] = low;
2456
2457     carry = high;
2458   }
2459
2460   if (i < dstParts) {
2461     /* Full multiplication, there is no overflow.  */
2462     assert(i + 1 == dstParts);
2463     dst[i] = carry;
2464     return 0;
2465   } else {
2466     /* We overflowed if there is carry.  */
2467     if (carry)
2468       return 1;
2469
2470     /* We would overflow if any significant unwritten parts would be
2471        non-zero.  This is true if any remaining src parts are non-zero
2472        and the multiplier is non-zero.  */
2473     if (multiplier)
2474       for(; i < srcParts; i++)
2475         if (src[i])
2476           return 1;
2477
2478     /* We fitted in the narrow destination.  */
2479     return 0;
2480   }
2481 }
2482
2483 /* DST = LHS * RHS, where DST has the same width as the operands and
2484    is filled with the least significant parts of the result.  Returns
2485    one if overflow occurred, otherwise zero.  DST must be disjoint
2486    from both operands.  */
2487 int
2488 APInt::tcMultiply(integerPart *dst, const integerPart *lhs,
2489                   const integerPart *rhs, unsigned int parts)
2490 {
2491   unsigned int i;
2492   int overflow;
2493
2494   assert(dst != lhs && dst != rhs);
2495
2496   overflow = 0;
2497   tcSet(dst, 0, parts);
2498
2499   for(i = 0; i < parts; i++)
2500     overflow |= tcMultiplyPart(&dst[i], lhs, rhs[i], 0, parts,
2501                                parts - i, true);
2502
2503   return overflow;
2504 }
2505
2506 /* DST = LHS * RHS, where DST has width the sum of the widths of the
2507    operands.  No overflow occurs.  DST must be disjoint from both
2508    operands.  Returns the number of parts required to hold the
2509    result.  */
2510 unsigned int
2511 APInt::tcFullMultiply(integerPart *dst, const integerPart *lhs,
2512                       const integerPart *rhs, unsigned int lhsParts,
2513                       unsigned int rhsParts)
2514 {
2515   /* Put the narrower number on the LHS for less loops below.  */
2516   if (lhsParts > rhsParts) {
2517     return tcFullMultiply (dst, rhs, lhs, rhsParts, lhsParts);
2518   } else {
2519     unsigned int n;
2520
2521     assert(dst != lhs && dst != rhs);
2522
2523     tcSet(dst, 0, rhsParts);
2524
2525     for(n = 0; n < lhsParts; n++)
2526       tcMultiplyPart(&dst[n], rhs, lhs[n], 0, rhsParts, rhsParts + 1, true);
2527
2528     n = lhsParts + rhsParts;
2529
2530     return n - (dst[n - 1] == 0);
2531   }
2532 }
2533
2534 /* If RHS is zero LHS and REMAINDER are left unchanged, return one.
2535    Otherwise set LHS to LHS / RHS with the fractional part discarded,
2536    set REMAINDER to the remainder, return zero.  i.e.
2537
2538    OLD_LHS = RHS * LHS + REMAINDER
2539
2540    SCRATCH is a bignum of the same size as the operands and result for
2541    use by the routine; its contents need not be initialized and are
2542    destroyed.  LHS, REMAINDER and SCRATCH must be distinct.
2543 */
2544 int
2545 APInt::tcDivide(integerPart *lhs, const integerPart *rhs,
2546                 integerPart *remainder, integerPart *srhs,
2547                 unsigned int parts)
2548 {
2549   unsigned int n, shiftCount;
2550   integerPart mask;
2551
2552   assert(lhs != remainder && lhs != srhs && remainder != srhs);
2553
2554   shiftCount = tcMSB(rhs, parts) + 1;
2555   if (shiftCount == 0)
2556     return true;
2557
2558   shiftCount = parts * integerPartWidth - shiftCount;
2559   n = shiftCount / integerPartWidth;
2560   mask = (integerPart) 1 << (shiftCount % integerPartWidth);
2561
2562   tcAssign(srhs, rhs, parts);
2563   tcShiftLeft(srhs, parts, shiftCount);
2564   tcAssign(remainder, lhs, parts);
2565   tcSet(lhs, 0, parts);
2566
2567   /* Loop, subtracting SRHS if REMAINDER is greater and adding that to
2568      the total.  */
2569   for(;;) {
2570       int compare;
2571
2572       compare = tcCompare(remainder, srhs, parts);
2573       if (compare >= 0) {
2574         tcSubtract(remainder, srhs, 0, parts);
2575         lhs[n] |= mask;
2576       }
2577
2578       if (shiftCount == 0)
2579         break;
2580       shiftCount--;
2581       tcShiftRight(srhs, parts, 1);
2582       if ((mask >>= 1) == 0)
2583         mask = (integerPart) 1 << (integerPartWidth - 1), n--;
2584   }
2585
2586   return false;
2587 }
2588
2589 /* Shift a bignum left COUNT bits in-place.  Shifted in bits are zero.
2590    There are no restrictions on COUNT.  */
2591 void
2592 APInt::tcShiftLeft(integerPart *dst, unsigned int parts, unsigned int count)
2593 {
2594   if (count) {
2595     unsigned int jump, shift;
2596
2597     /* Jump is the inter-part jump; shift is is intra-part shift.  */
2598     jump = count / integerPartWidth;
2599     shift = count % integerPartWidth;
2600
2601     while (parts > jump) {
2602       integerPart part;
2603
2604       parts--;
2605
2606       /* dst[i] comes from the two parts src[i - jump] and, if we have
2607          an intra-part shift, src[i - jump - 1].  */
2608       part = dst[parts - jump];
2609       if (shift) {
2610         part <<= shift;
2611         if (parts >= jump + 1)
2612           part |= dst[parts - jump - 1] >> (integerPartWidth - shift);
2613       }
2614
2615       dst[parts] = part;
2616     }
2617
2618     while (parts > 0)
2619       dst[--parts] = 0;
2620   }
2621 }
2622
2623 /* Shift a bignum right COUNT bits in-place.  Shifted in bits are
2624    zero.  There are no restrictions on COUNT.  */
2625 void
2626 APInt::tcShiftRight(integerPart *dst, unsigned int parts, unsigned int count)
2627 {
2628   if (count) {
2629     unsigned int i, jump, shift;
2630
2631     /* Jump is the inter-part jump; shift is is intra-part shift.  */
2632     jump = count / integerPartWidth;
2633     shift = count % integerPartWidth;
2634
2635     /* Perform the shift.  This leaves the most significant COUNT bits
2636        of the result at zero.  */
2637     for(i = 0; i < parts; i++) {
2638       integerPart part;
2639
2640       if (i + jump >= parts) {
2641         part = 0;
2642       } else {
2643         part = dst[i + jump];
2644         if (shift) {
2645           part >>= shift;
2646           if (i + jump + 1 < parts)
2647             part |= dst[i + jump + 1] << (integerPartWidth - shift);
2648         }
2649       }
2650
2651       dst[i] = part;
2652     }
2653   }
2654 }
2655
2656 /* Bitwise and of two bignums.  */
2657 void
2658 APInt::tcAnd(integerPart *dst, const integerPart *rhs, unsigned int parts)
2659 {
2660   unsigned int i;
2661
2662   for(i = 0; i < parts; i++)
2663     dst[i] &= rhs[i];
2664 }
2665
2666 /* Bitwise inclusive or of two bignums.  */
2667 void
2668 APInt::tcOr(integerPart *dst, const integerPart *rhs, unsigned int parts)
2669 {
2670   unsigned int i;
2671
2672   for(i = 0; i < parts; i++)
2673     dst[i] |= rhs[i];
2674 }
2675
2676 /* Bitwise exclusive or of two bignums.  */
2677 void
2678 APInt::tcXor(integerPart *dst, const integerPart *rhs, unsigned int parts)
2679 {
2680   unsigned int i;
2681
2682   for(i = 0; i < parts; i++)
2683     dst[i] ^= rhs[i];
2684 }
2685
2686 /* Complement a bignum in-place.  */
2687 void
2688 APInt::tcComplement(integerPart *dst, unsigned int parts)
2689 {
2690   unsigned int i;
2691
2692   for(i = 0; i < parts; i++)
2693     dst[i] = ~dst[i];
2694 }
2695
2696 /* Comparison (unsigned) of two bignums.  */
2697 int
2698 APInt::tcCompare(const integerPart *lhs, const integerPart *rhs,
2699                  unsigned int parts)
2700 {
2701   while (parts) {
2702       parts--;
2703       if (lhs[parts] == rhs[parts])
2704         continue;
2705
2706       if (lhs[parts] > rhs[parts])
2707         return 1;
2708       else
2709         return -1;
2710     }
2711
2712   return 0;
2713 }
2714
2715 /* Increment a bignum in-place, return the carry flag.  */
2716 integerPart
2717 APInt::tcIncrement(integerPart *dst, unsigned int parts)
2718 {
2719   unsigned int i;
2720
2721   for(i = 0; i < parts; i++)
2722     if (++dst[i] != 0)
2723       break;
2724
2725   return i == parts;
2726 }
2727
2728 /* Set the least significant BITS bits of a bignum, clear the
2729    rest.  */
2730 void
2731 APInt::tcSetLeastSignificantBits(integerPart *dst, unsigned int parts,
2732                                  unsigned int bits)
2733 {
2734   unsigned int i;
2735
2736   i = 0;
2737   while (bits > integerPartWidth) {
2738     dst[i++] = ~(integerPart) 0;
2739     bits -= integerPartWidth;
2740   }
2741
2742   if (bits)
2743     dst[i++] = ~(integerPart) 0 >> (integerPartWidth - bits);
2744
2745   while (i < parts)
2746     dst[i++] = 0;
2747 }