lib/Support/APInt.cpp

   1 //===-- APInt.cpp - Implement APInt class ---------------------------------===//
   2 //
   3 //                     The LLVM Compiler Infrastructure
   4 //
   5 // This file is distributed under the University of Illinois Open Source
   6 // License. See LICENSE.TXT for details.
   7 //
   8 //===----------------------------------------------------------------------===//
   9 //
  10 // This file implements a class to represent arbitrary precision integer
  11 // constant values and provide a variety of arithmetic operations on them.
  12 //
  13 //===----------------------------------------------------------------------===//
  14
  15 #define DEBUG_TYPE "apint"
  16 #include "llvm/ADT/APInt.h"
  17 #include "llvm/ADT/FoldingSet.h"
  18 #include "llvm/ADT/SmallString.h"
  19 #include "llvm/Support/Debug.h"
  20 #include "llvm/Support/MathExtras.h"
  21 #include <cmath>
  22 #include <limits>
  23 #include <cstring>
  24 #include <cstdlib>
  25 using namespace llvm;
  26
  27 /// This enumeration just provides for internal constants used in this
  28 /// translation unit.
  29 enum {
  30   MIN_INT_BITS = 1,        ///< Minimum number of bits that can be specified
  31   ///< Note that this must remain synchronized with IntegerType::MIN_INT_BITS
  32   MAX_INT_BITS = (1<<23)-1 ///< Maximum number of bits that can be specified
  33   ///< Note that this must remain synchronized with IntegerType::MAX_INT_BITS
  34 };
  35
  36 /// A utility function for allocating memory, checking for allocation failures,
  37 /// and ensuring the contents are zeroed.
  38 inline static uint64_t* getClearedMemory(uint32_t numWords) {
  39   uint64_t * result = new uint64_t[numWords];
  40   assert(result && "APInt memory allocation fails!");
  41   memset(result, 0, numWords * sizeof(uint64_t));
  42   return result;
  43 }
  44
  45 /// A utility function for allocating memory and checking for allocation
  46 /// failure.  The content is not zeroed.
  47 inline static uint64_t* getMemory(uint32_t numWords) {
  48   uint64_t * result = new uint64_t[numWords];
  49   assert(result && "APInt memory allocation fails!");
  50   return result;
  51 }
  52
  53 APInt::APInt(uint32_t numBits, uint64_t val, bool isSigned)
  54   : BitWidth(numBits), VAL(0) {
  55   assert(BitWidth >= MIN_INT_BITS && "bitwidth too small");
  56   assert(BitWidth <= MAX_INT_BITS && "bitwidth too large");
  57   if (isSingleWord())
  58     VAL = val;
  59   else {
  60     pVal = getClearedMemory(getNumWords());
  61     pVal[0] = val;
  62     if (isSigned && int64_t(val) < 0)
  63       for (unsigned i = 1; i < getNumWords(); ++i)
  64         pVal[i] = -1ULL;
  65   }
  66   clearUnusedBits();
  67 }
  68
  69 APInt::APInt(uint32_t numBits, uint32_t numWords, const uint64_t bigVal[])
  70   : BitWidth(numBits), VAL(0)  {
  71   assert(BitWidth >= MIN_INT_BITS && "bitwidth too small");
  72   assert(BitWidth <= MAX_INT_BITS && "bitwidth too large");
  73   assert(bigVal && "Null pointer detected!");
  74   if (isSingleWord())
  75     VAL = bigVal[0];
  76   else {
  77     // Get memory, cleared to 0
  78     pVal = getClearedMemory(getNumWords());
  79     // Calculate the number of words to copy
  80     uint32_t words = std::min<uint32_t>(numWords, getNumWords());
  81     // Copy the words from bigVal to pVal
  82     memcpy(pVal, bigVal, words * APINT_WORD_SIZE);
  83   }
  84   // Make sure unused high bits are cleared
  85   clearUnusedBits();
  86 }
  87
  88 APInt::APInt(uint32_t numbits, const char StrStart[], uint32_t slen,
  89              uint8_t radix)
  90   : BitWidth(numbits), VAL(0) {
  91   assert(BitWidth >= MIN_INT_BITS && "bitwidth too small");
  92   assert(BitWidth <= MAX_INT_BITS && "bitwidth too large");
  93   fromString(numbits, StrStart, slen, radix);
  94 }
  95
  96 APInt::APInt(const APInt& that)
  97   : BitWidth(that.BitWidth), VAL(0) {
  98   assert(BitWidth >= MIN_INT_BITS && "bitwidth too small");
  99   assert(BitWidth <= MAX_INT_BITS && "bitwidth too large");
 100   if (isSingleWord())
 101     VAL = that.VAL;
 102   else {
 103     pVal = getMemory(getNumWords());
 104     memcpy(pVal, that.pVal, getNumWords() * APINT_WORD_SIZE);
 105   }
 106 }
 107
 108 APInt::~APInt() {
 109   if (!isSingleWord())
 110     delete [] pVal;
 111 }
 112
 113 APInt& APInt::operator=(const APInt& RHS) {
 114   // Don't do anything for X = X
 115   if (this == &RHS)
 116     return *this;
 117
 118   // If the bitwidths are the same, we can avoid mucking with memory
 119   if (BitWidth == RHS.getBitWidth()) {
 120     if (isSingleWord())
 121       VAL = RHS.VAL;
 122     else
 123       memcpy(pVal, RHS.pVal, getNumWords() * APINT_WORD_SIZE);
 124     return *this;
 125   }
 126
 127   if (isSingleWord())
 128     if (RHS.isSingleWord())
 129       VAL = RHS.VAL;
 130     else {
 131       VAL = 0;
 132       pVal = getMemory(RHS.getNumWords());
 133       memcpy(pVal, RHS.pVal, RHS.getNumWords() * APINT_WORD_SIZE);
 134     }
 135   else if (getNumWords() == RHS.getNumWords())
 136     memcpy(pVal, RHS.pVal, RHS.getNumWords() * APINT_WORD_SIZE);
 137   else if (RHS.isSingleWord()) {
 138     delete [] pVal;
 139     VAL = RHS.VAL;
 140   } else {
 141     delete [] pVal;
 142     pVal = getMemory(RHS.getNumWords());
 143     memcpy(pVal, RHS.pVal, RHS.getNumWords() * APINT_WORD_SIZE);
 144   }
 145   BitWidth = RHS.BitWidth;
 146   return clearUnusedBits();
 147 }
 148
 149 APInt& APInt::operator=(uint64_t RHS) {
 150   if (isSingleWord())
 151     VAL = RHS;
 152   else {
 153     pVal[0] = RHS;
 154     memset(pVal+1, 0, (getNumWords() - 1) * APINT_WORD_SIZE);
 155   }
 156   return clearUnusedBits();
 157 }
 158
 159 /// Profile - This method 'profiles' an APInt for use with FoldingSet.
 160 void APInt::Profile(FoldingSetNodeID& ID) const {
 161   ID.AddInteger(BitWidth);
 162
 163   if (isSingleWord()) {
 164     ID.AddInteger(VAL);
 165     return;
 166   }
 167
 168   uint32_t NumWords = getNumWords();
 169   for (unsigned i = 0; i < NumWords; ++i)
 170     ID.AddInteger(pVal[i]);
 171 }
 172
 173 /// add_1 - This function adds a single "digit" integer, y, to the multiple
 174 /// "digit" integer array,  x[]. x[] is modified to reflect the addition and
 175 /// 1 is returned if there is a carry out, otherwise 0 is returned.
 176 /// @returns the carry of the addition.
 177 static bool add_1(uint64_t dest[], uint64_t x[], uint32_t len, uint64_t y) {
 178   for (uint32_t i = 0; i < len; ++i) {
 179     dest[i] = y + x[i];
 180     if (dest[i] < y)
 181       y = 1; // Carry one to next digit.
 182     else {
 183       y = 0; // No need to carry so exit early
 184       break;
 185     }
 186   }
 187   return y;
 188 }
 189
 190 /// @brief Prefix increment operator. Increments the APInt by one.
 191 APInt& APInt::operator++() {
 192   if (isSingleWord())
 193     ++VAL;
 194   else
 195     add_1(pVal, pVal, getNumWords(), 1);
 196   return clearUnusedBits();
 197 }
 198
 199 /// sub_1 - This function subtracts a single "digit" (64-bit word), y, from
 200 /// the multi-digit integer array, x[], propagating the borrowed 1 value until
 201 /// no further borrowing is neeeded or it runs out of "digits" in x.  The result
 202 /// is 1 if "borrowing" exhausted the digits in x, or 0 if x was not exhausted.
 203 /// In other words, if y > x then this function returns 1, otherwise 0.
 204 /// @returns the borrow out of the subtraction
 205 static bool sub_1(uint64_t x[], uint32_t len, uint64_t y) {
 206   for (uint32_t i = 0; i < len; ++i) {
 207     uint64_t X = x[i];
 208     x[i] -= y;
 209     if (y > X)
 210       y = 1;  // We have to "borrow 1" from next "digit"
 211     else {
 212       y = 0;  // No need to borrow
 213       break;  // Remaining digits are unchanged so exit early
 214     }
 215   }
 216   return bool(y);
 217 }
 218
 219 /// @brief Prefix decrement operator. Decrements the APInt by one.
 220 APInt& APInt::operator--() {
 221   if (isSingleWord())
 222     --VAL;
 223   else
 224     sub_1(pVal, getNumWords(), 1);
 225   return clearUnusedBits();
 226 }
 227
 228 /// add - This function adds the integer array x to the integer array Y and
 229 /// places the result in dest.
 230 /// @returns the carry out from the addition
 231 /// @brief General addition of 64-bit integer arrays
 232 static bool add(uint64_t *dest, const uint64_t *x, const uint64_t *y,
 233                 uint32_t len) {
 234   bool carry = false;
 235   for (uint32_t i = 0; i< len; ++i) {
 236     uint64_t limit = std::min(x[i],y[i]); // must come first in case dest == x
 237     dest[i] = x[i] + y[i] + carry;
 238     carry = dest[i] < limit || (carry && dest[i] == limit);
 239   }
 240   return carry;
 241 }
 242
 243 /// Adds the RHS APint to this APInt.
 244 /// @returns this, after addition of RHS.
 245 /// @brief Addition assignment operator.
 246 APInt& APInt::operator+=(const APInt& RHS) {
 247   assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
 248   if (isSingleWord())
 249     VAL += RHS.VAL;
 250   else {
 251     add(pVal, pVal, RHS.pVal, getNumWords());
 252   }
 253   return clearUnusedBits();
 254 }
 255
 256 /// Subtracts the integer array y from the integer array x
 257 /// @returns returns the borrow out.
 258 /// @brief Generalized subtraction of 64-bit integer arrays.
 259 static bool sub(uint64_t *dest, const uint64_t *x, const uint64_t *y,
 260                 uint32_t len) {
 261   bool borrow = false;
 262   for (uint32_t i = 0; i < len; ++i) {
 263     uint64_t x_tmp = borrow ? x[i] - 1 : x[i];
 264     borrow = y[i] > x_tmp || (borrow && x[i] == 0);
 265     dest[i] = x_tmp - y[i];
 266   }
 267   return borrow;
 268 }
 269
 270 /// Subtracts the RHS APInt from this APInt
 271 /// @returns this, after subtraction
 272 /// @brief Subtraction assignment operator.
 273 APInt& APInt::operator-=(const APInt& RHS) {
 274   assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
 275   if (isSingleWord())
 276     VAL -= RHS.VAL;
 277   else
 278     sub(pVal, pVal, RHS.pVal, getNumWords());
 279   return clearUnusedBits();
 280 }
 281
 282 /// Multiplies an integer array, x by a a uint64_t integer and places the result
 283 /// into dest.
 284 /// @returns the carry out of the multiplication.
 285 /// @brief Multiply a multi-digit APInt by a single digit (64-bit) integer.
 286 static uint64_t mul_1(uint64_t dest[], uint64_t x[], uint32_t len, uint64_t y) {
 287   // Split y into high 32-bit part (hy)  and low 32-bit part (ly)
 288   uint64_t ly = y & 0xffffffffULL, hy = y >> 32;
 289   uint64_t carry = 0;
 290
 291   // For each digit of x.
 292   for (uint32_t i = 0; i < len; ++i) {
 293     // Split x into high and low words
 294     uint64_t lx = x[i] & 0xffffffffULL;
 295     uint64_t hx = x[i] >> 32;
 296     // hasCarry - A flag to indicate if there is a carry to the next digit.
 297     // hasCarry == 0, no carry
 298     // hasCarry == 1, has carry
 299     // hasCarry == 2, no carry and the calculation result == 0.
 300     uint8_t hasCarry = 0;
 301     dest[i] = carry + lx * ly;
 302     // Determine if the add above introduces carry.
 303     hasCarry = (dest[i] < carry) ? 1 : 0;
 304     carry = hx * ly + (dest[i] >> 32) + (hasCarry ? (1ULL << 32) : 0);
 305     // The upper limit of carry can be (2^32 - 1)(2^32 - 1) +
 306     // (2^32 - 1) + 2^32 = 2^64.
 307     hasCarry = (!carry && hasCarry) ? 1 : (!carry ? 2 : 0);
 308
 309     carry += (lx * hy) & 0xffffffffULL;
 310     dest[i] = (carry << 32) | (dest[i] & 0xffffffffULL);
 311     carry = (((!carry && hasCarry != 2) || hasCarry == 1) ? (1ULL << 32) : 0) +
 312             (carry >> 32) + ((lx * hy) >> 32) + hx * hy;
 313   }
 314   return carry;
 315 }
 316
 317 /// Multiplies integer array x by integer array y and stores the result into
 318 /// the integer array dest. Note that dest's size must be >= xlen + ylen.
 319 /// @brief Generalized multiplicate of integer arrays.
 320 static void mul(uint64_t dest[], uint64_t x[], uint32_t xlen, uint64_t y[],
 321                 uint32_t ylen) {
 322   dest[xlen] = mul_1(dest, x, xlen, y[0]);
 323   for (uint32_t i = 1; i < ylen; ++i) {
 324     uint64_t ly = y[i] & 0xffffffffULL, hy = y[i] >> 32;
 325     uint64_t carry = 0, lx = 0, hx = 0;
 326     for (uint32_t j = 0; j < xlen; ++j) {
 327       lx = x[j] & 0xffffffffULL;
 328       hx = x[j] >> 32;
 329       // hasCarry - A flag to indicate if has carry.
 330       // hasCarry == 0, no carry
 331       // hasCarry == 1, has carry
 332       // hasCarry == 2, no carry and the calculation result == 0.
 333       uint8_t hasCarry = 0;
 334       uint64_t resul = carry + lx * ly;
 335       hasCarry = (resul < carry) ? 1 : 0;
 336       carry = (hasCarry ? (1ULL << 32) : 0) + hx * ly + (resul >> 32);
 337       hasCarry = (!carry && hasCarry) ? 1 : (!carry ? 2 : 0);
 338
 339       carry += (lx * hy) & 0xffffffffULL;
 340       resul = (carry << 32) | (resul & 0xffffffffULL);
 341       dest[i+j] += resul;
 342       carry = (((!carry && hasCarry != 2) || hasCarry == 1) ? (1ULL << 32) : 0)+
 343               (carry >> 32) + (dest[i+j] < resul ? 1 : 0) +
 344               ((lx * hy) >> 32) + hx * hy;
 345     }
 346     dest[i+xlen] = carry;
 347   }
 348 }
 349
 350 APInt& APInt::operator*=(const APInt& RHS) {
 351   assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
 352   if (isSingleWord()) {
 353     VAL *= RHS.VAL;
 354     clearUnusedBits();
 355     return *this;
 356   }
 357
 358   // Get some bit facts about LHS and check for zero
 359   uint32_t lhsBits = getActiveBits();
 360   uint32_t lhsWords = !lhsBits ? 0 : whichWord(lhsBits - 1) + 1;
 361   if (!lhsWords)
 362     // 0 * X ===> 0
 363     return *this;
 364
 365   // Get some bit facts about RHS and check for zero
 366   uint32_t rhsBits = RHS.getActiveBits();
 367   uint32_t rhsWords = !rhsBits ? 0 : whichWord(rhsBits - 1) + 1;
 368   if (!rhsWords) {
 369     // X * 0 ===> 0
 370     clear();
 371     return *this;
 372   }
 373
 374   // Allocate space for the result
 375   uint32_t destWords = rhsWords + lhsWords;
 376   uint64_t *dest = getMemory(destWords);
 377
 378   // Perform the long multiply
 379   mul(dest, pVal, lhsWords, RHS.pVal, rhsWords);
 380
 381   // Copy result back into *this
 382   clear();
 383   uint32_t wordsToCopy = destWords >= getNumWords() ? getNumWords() : destWords;
 384   memcpy(pVal, dest, wordsToCopy * APINT_WORD_SIZE);
 385
 386   // delete dest array and return
 387   delete[] dest;
 388   return *this;
 389 }
 390
 391 APInt& APInt::operator&=(const APInt& RHS) {
 392   assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
 393   if (isSingleWord()) {
 394     VAL &= RHS.VAL;
 395     return *this;
 396   }
 397   uint32_t numWords = getNumWords();
 398   for (uint32_t i = 0; i < numWords; ++i)
 399     pVal[i] &= RHS.pVal[i];
 400   return *this;
 401 }
 402
 403 APInt& APInt::operator|=(const APInt& RHS) {
 404   assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
 405   if (isSingleWord()) {
 406     VAL |= RHS.VAL;
 407     return *this;
 408   }
 409   uint32_t numWords = getNumWords();
 410   for (uint32_t i = 0; i < numWords; ++i)
 411     pVal[i] |= RHS.pVal[i];
 412   return *this;
 413 }
 414
 415 APInt& APInt::operator^=(const APInt& RHS) {
 416   assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
 417   if (isSingleWord()) {
 418     VAL ^= RHS.VAL;
 419     this->clearUnusedBits();
 420     return *this;
 421   }
 422   uint32_t numWords = getNumWords();
 423   for (uint32_t i = 0; i < numWords; ++i)
 424     pVal[i] ^= RHS.pVal[i];
 425   return clearUnusedBits();
 426 }
 427
 428 APInt APInt::operator&(const APInt& RHS) const {
 429   assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
 430   if (isSingleWord())
 431     return APInt(getBitWidth(), VAL & RHS.VAL);
 432
 433   uint32_t numWords = getNumWords();
 434   uint64_t* val = getMemory(numWords);
 435   for (uint32_t i = 0; i < numWords; ++i)
 436     val[i] = pVal[i] & RHS.pVal[i];
 437   return APInt(val, getBitWidth());
 438 }
 439
 440 APInt APInt::operator|(const APInt& RHS) const {
 441   assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
 442   if (isSingleWord())
 443     return APInt(getBitWidth(), VAL | RHS.VAL);
 444
 445   uint32_t numWords = getNumWords();
 446   uint64_t *val = getMemory(numWords);
 447   for (uint32_t i = 0; i < numWords; ++i)
 448     val[i] = pVal[i] | RHS.pVal[i];
 449   return APInt(val, getBitWidth());
 450 }
 451
 452 APInt APInt::operator^(const APInt& RHS) const {
 453   assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
 454   if (isSingleWord())
 455     return APInt(BitWidth, VAL ^ RHS.VAL);
 456
 457   uint32_t numWords = getNumWords();
 458   uint64_t *val = getMemory(numWords);
 459   for (uint32_t i = 0; i < numWords; ++i)
 460     val[i] = pVal[i] ^ RHS.pVal[i];
 461
 462   // 0^0==1 so clear the high bits in case they got set.
 463   return APInt(val, getBitWidth()).clearUnusedBits();
 464 }
 465
 466 bool APInt::operator !() const {
 467   if (isSingleWord())
 468     return !VAL;
 469
 470   for (uint32_t i = 0; i < getNumWords(); ++i)
 471     if (pVal[i])
 472       return false;
 473   return true;
 474 }
 475
 476 APInt APInt::operator*(const APInt& RHS) const {
 477   assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
 478   if (isSingleWord())
 479     return APInt(BitWidth, VAL * RHS.VAL);
 480   APInt Result(*this);
 481   Result *= RHS;
 482   return Result.clearUnusedBits();
 483 }
 484
 485 APInt APInt::operator+(const APInt& RHS) const {
 486   assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
 487   if (isSingleWord())
 488     return APInt(BitWidth, VAL + RHS.VAL);
 489   APInt Result(BitWidth, 0);
 490   add(Result.pVal, this->pVal, RHS.pVal, getNumWords());
 491   return Result.clearUnusedBits();
 492 }
 493
 494 APInt APInt::operator-(const APInt& RHS) const {
 495   assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
 496   if (isSingleWord())
 497     return APInt(BitWidth, VAL - RHS.VAL);
 498   APInt Result(BitWidth, 0);
 499   sub(Result.pVal, this->pVal, RHS.pVal, getNumWords());
 500   return Result.clearUnusedBits();
 501 }
 502
 503 bool APInt::operator[](uint32_t bitPosition) const {
 504   return (maskBit(bitPosition) &
 505           (isSingleWord() ?  VAL : pVal[whichWord(bitPosition)])) != 0;
 506 }
 507
 508 bool APInt::operator==(const APInt& RHS) const {
 509   assert(BitWidth == RHS.BitWidth && "Comparison requires equal bit widths");
 510   if (isSingleWord())
 511     return VAL == RHS.VAL;
 512
 513   // Get some facts about the number of bits used in the two operands.
 514   uint32_t n1 = getActiveBits();
 515   uint32_t n2 = RHS.getActiveBits();
 516
 517   // If the number of bits isn't the same, they aren't equal
 518   if (n1 != n2)
 519     return false;
 520
 521   // If the number of bits fits in a word, we only need to compare the low word.
 522   if (n1 <= APINT_BITS_PER_WORD)
 523     return pVal[0] == RHS.pVal[0];
 524
 525   // Otherwise, compare everything
 526   for (int i = whichWord(n1 - 1); i >= 0; --i)
 527     if (pVal[i] != RHS.pVal[i])
 528       return false;
 529   return true;
 530 }
 531
 532 bool APInt::operator==(uint64_t Val) const {
 533   if (isSingleWord())
 534     return VAL == Val;
 535
 536   uint32_t n = getActiveBits();
 537   if (n <= APINT_BITS_PER_WORD)
 538     return pVal[0] == Val;
 539   else
 540     return false;
 541 }
 542
 543 bool APInt::ult(const APInt& RHS) const {
 544   assert(BitWidth == RHS.BitWidth && "Bit widths must be same for comparison");
 545   if (isSingleWord())
 546     return VAL < RHS.VAL;
 547
 548   // Get active bit length of both operands
 549   uint32_t n1 = getActiveBits();
 550   uint32_t n2 = RHS.getActiveBits();
 551
 552   // If magnitude of LHS is less than RHS, return true.
 553   if (n1 < n2)
 554     return true;
 555
 556   // If magnitude of RHS is greather than LHS, return false.
 557   if (n2 < n1)
 558     return false;
 559
 560   // If they bot fit in a word, just compare the low order word
 561   if (n1 <= APINT_BITS_PER_WORD && n2 <= APINT_BITS_PER_WORD)
 562     return pVal[0] < RHS.pVal[0];
 563
 564   // Otherwise, compare all words
 565   uint32_t topWord = whichWord(std::max(n1,n2)-1);
 566   for (int i = topWord; i >= 0; --i) {
 567     if (pVal[i] > RHS.pVal[i])
 568       return false;
 569     if (pVal[i] < RHS.pVal[i])
 570       return true;
 571   }
 572   return false;
 573 }
 574
 575 bool APInt::slt(const APInt& RHS) const {
 576   assert(BitWidth == RHS.BitWidth && "Bit widths must be same for comparison");
 577   if (isSingleWord()) {
 578     int64_t lhsSext = (int64_t(VAL) << (64-BitWidth)) >> (64-BitWidth);
 579     int64_t rhsSext = (int64_t(RHS.VAL) << (64-BitWidth)) >> (64-BitWidth);
 580     return lhsSext < rhsSext;
 581   }
 582
 583   APInt lhs(*this);
 584   APInt rhs(RHS);
 585   bool lhsNeg = isNegative();
 586   bool rhsNeg = rhs.isNegative();
 587   if (lhsNeg) {
 588     // Sign bit is set so perform two's complement to make it positive
 589     lhs.flip();
 590     lhs++;
 591   }
 592   if (rhsNeg) {
 593     // Sign bit is set so perform two's complement to make it positive
 594     rhs.flip();
 595     rhs++;
 596   }
 597
 598   // Now we have unsigned values to compare so do the comparison if necessary
 599   // based on the negativeness of the values.
 600   if (lhsNeg)
 601     if (rhsNeg)
 602       return lhs.ugt(rhs);
 603     else
 604       return true;
 605   else if (rhsNeg)
 606     return false;
 607   else
 608     return lhs.ult(rhs);
 609 }
 610
 611 APInt& APInt::set(uint32_t bitPosition) {
 612   if (isSingleWord())
 613     VAL |= maskBit(bitPosition);
 614   else
 615     pVal[whichWord(bitPosition)] |= maskBit(bitPosition);
 616   return *this;
 617 }
 618
 619 APInt& APInt::set() {
 620   if (isSingleWord()) {
 621     VAL = -1ULL;
 622     return clearUnusedBits();
 623   }
 624
 625   // Set all the bits in all the words.
 626   for (uint32_t i = 0; i < getNumWords(); ++i)
 627     pVal[i] = -1ULL;
 628   // Clear the unused ones
 629   return clearUnusedBits();
 630 }
 631
 632 /// Set the given bit to 0 whose position is given as "bitPosition".
 633 /// @brief Set a given bit to 0.
 634 APInt& APInt::clear(uint32_t bitPosition) {
 635   if (isSingleWord())
 636     VAL &= ~maskBit(bitPosition);
 637   else
 638     pVal[whichWord(bitPosition)] &= ~maskBit(bitPosition);
 639   return *this;
 640 }
 641
 642 /// @brief Set every bit to 0.
 643 APInt& APInt::clear() {
 644   if (isSingleWord())
 645     VAL = 0;
 646   else
 647     memset(pVal, 0, getNumWords() * APINT_WORD_SIZE);
 648   return *this;
 649 }
 650
 651 /// @brief Bitwise NOT operator. Performs a bitwise logical NOT operation on
 652 /// this APInt.
 653 APInt APInt::operator~() const {
 654   APInt Result(*this);
 655   Result.flip();
 656   return Result;
 657 }
 658
 659 /// @brief Toggle every bit to its opposite value.
 660 APInt& APInt::flip() {
 661   if (isSingleWord()) {
 662     VAL ^= -1ULL;
 663     return clearUnusedBits();
 664   }
 665   for (uint32_t i = 0; i < getNumWords(); ++i)
 666     pVal[i] ^= -1ULL;
 667   return clearUnusedBits();
 668 }
 669
 670 /// Toggle a given bit to its opposite value whose position is given
 671 /// as "bitPosition".
 672 /// @brief Toggles a given bit to its opposite value.
 673 APInt& APInt::flip(uint32_t bitPosition) {
 674   assert(bitPosition < BitWidth && "Out of the bit-width range!");
 675   if ((*this)[bitPosition]) clear(bitPosition);
 676   else set(bitPosition);
 677   return *this;
 678 }
 679
 680 uint32_t APInt::getBitsNeeded(const char* str, uint32_t slen, uint8_t radix) {
 681   assert(str != 0 && "Invalid value string");
 682   assert(slen > 0 && "Invalid string length");
 683
 684   // Each computation below needs to know if its negative
 685   uint32_t isNegative = str[0] == '-';
 686   if (isNegative) {
 687     slen--;
 688     str++;
 689   }
 690   // For radixes of power-of-two values, the bits required is accurately and
 691   // easily computed
 692   if (radix == 2)
 693     return slen + isNegative;
 694   if (radix == 8)
 695     return slen * 3 + isNegative;
 696   if (radix == 16)
 697     return slen * 4 + isNegative;
 698
 699   // Otherwise it must be radix == 10, the hard case
 700   assert(radix == 10 && "Invalid radix");
 701
 702   // This is grossly inefficient but accurate. We could probably do something
 703   // with a computation of roughly slen*64/20 and then adjust by the value of
 704   // the first few digits. But, I'm not sure how accurate that could be.
 705
 706   // Compute a sufficient number of bits that is always large enough but might
 707   // be too large. This avoids the assertion in the constructor.
 708   uint32_t sufficient = slen*64/18;
 709
 710   // Convert to the actual binary value.
 711   APInt tmp(sufficient, str, slen, radix);
 712
 713   // Compute how many bits are required.
 714   return isNegative + tmp.logBase2() + 1;
 715 }
 716
 717 uint64_t APInt::getHashValue() const {
 718   // Put the bit width into the low order bits.
 719   uint64_t hash = BitWidth;
 720
 721   // Add the sum of the words to the hash.
 722   if (isSingleWord())
 723     hash += VAL << 6; // clear separation of up to 64 bits
 724   else
 725     for (uint32_t i = 0; i < getNumWords(); ++i)
 726       hash += pVal[i] << 6; // clear sepration of up to 64 bits
 727   return hash;
 728 }
 729
 730 /// HiBits - This function returns the high "numBits" bits of this APInt.
 731 APInt APInt::getHiBits(uint32_t numBits) const {
 732   return APIntOps::lshr(*this, BitWidth - numBits);
 733 }
 734
 735 /// LoBits - This function returns the low "numBits" bits of this APInt.
 736 APInt APInt::getLoBits(uint32_t numBits) const {
 737   return APIntOps::lshr(APIntOps::shl(*this, BitWidth - numBits),
 738                         BitWidth - numBits);
 739 }
 740
 741 bool APInt::isPowerOf2() const {
 742   return (!!*this) && !(*this & (*this - APInt(BitWidth,1)));
 743 }
 744
 745 uint32_t APInt::countLeadingZeros() const {
 746   uint32_t Count = 0;
 747   if (isSingleWord())
 748     Count = CountLeadingZeros_64(VAL);
 749   else {
 750     for (uint32_t i = getNumWords(); i > 0u; --i) {
 751       if (pVal[i-1] == 0)
 752         Count += APINT_BITS_PER_WORD;
 753       else {
 754         Count += CountLeadingZeros_64(pVal[i-1]);
 755         break;
 756       }
 757     }
 758   }
 759   uint32_t remainder = BitWidth % APINT_BITS_PER_WORD;
 760   if (remainder)
 761     Count -= APINT_BITS_PER_WORD - remainder;
 762   return std::min(Count, BitWidth);
 763 }
 764
 765 static uint32_t countLeadingOnes_64(uint64_t V, uint32_t skip) {
 766   uint32_t Count = 0;
 767   if (skip)
 768     V <<= skip;
 769   while (V && (V & (1ULL << 63))) {
 770     Count++;
 771     V <<= 1;
 772   }
 773   return Count;
 774 }
 775
 776 uint32_t APInt::countLeadingOnes() const {
 777   if (isSingleWord())
 778     return countLeadingOnes_64(VAL, APINT_BITS_PER_WORD - BitWidth);
 779
 780   uint32_t highWordBits = BitWidth % APINT_BITS_PER_WORD;
 781   uint32_t shift = (highWordBits == 0 ? 0 : APINT_BITS_PER_WORD - highWordBits);
 782   int i = getNumWords() - 1;
 783   uint32_t Count = countLeadingOnes_64(pVal[i], shift);
 784   if (Count == highWordBits) {
 785     for (i--; i >= 0; --i) {
 786       if (pVal[i] == -1ULL)
 787         Count += APINT_BITS_PER_WORD;
 788       else {
 789         Count += countLeadingOnes_64(pVal[i], 0);
 790         break;
 791       }
 792     }
 793   }
 794   return Count;
 795 }
 796
 797 uint32_t APInt::countTrailingZeros() const {
 798   if (isSingleWord())
 799     return std::min(uint32_t(CountTrailingZeros_64(VAL)), BitWidth);
 800   uint32_t Count = 0;
 801   uint32_t i = 0;
 802   for (; i < getNumWords() && pVal[i] == 0; ++i)
 803     Count += APINT_BITS_PER_WORD;
 804   if (i < getNumWords())
 805     Count += CountTrailingZeros_64(pVal[i]);
 806   return std::min(Count, BitWidth);
 807 }
 808
 809 uint32_t APInt::countTrailingOnes() const {
 810   if (isSingleWord())
 811     return std::min(uint32_t(CountTrailingOnes_64(VAL)), BitWidth);
 812   uint32_t Count = 0;
 813   uint32_t i = 0;
 814   for (; i < getNumWords() && pVal[i] == -1ULL; ++i)
 815     Count += APINT_BITS_PER_WORD;
 816   if (i < getNumWords())
 817     Count += CountTrailingOnes_64(pVal[i]);
 818   return std::min(Count, BitWidth);
 819 }
 820
 821 uint32_t APInt::countPopulation() const {
 822   if (isSingleWord())
 823     return CountPopulation_64(VAL);
 824   uint32_t Count = 0;
 825   for (uint32_t i = 0; i < getNumWords(); ++i)
 826     Count += CountPopulation_64(pVal[i]);
 827   return Count;
 828 }
 829
 830 APInt APInt::byteSwap() const {
 831   assert(BitWidth >= 16 && BitWidth % 16 == 0 && "Cannot byteswap!");
 832   if (BitWidth == 16)
 833     return APInt(BitWidth, ByteSwap_16(uint16_t(VAL)));
 834   else if (BitWidth == 32)
 835     return APInt(BitWidth, ByteSwap_32(uint32_t(VAL)));
 836   else if (BitWidth == 48) {
 837     uint32_t Tmp1 = uint32_t(VAL >> 16);
 838     Tmp1 = ByteSwap_32(Tmp1);
 839     uint16_t Tmp2 = uint16_t(VAL);
 840     Tmp2 = ByteSwap_16(Tmp2);
 841     return APInt(BitWidth, (uint64_t(Tmp2) << 32) | Tmp1);
 842   } else if (BitWidth == 64)
 843     return APInt(BitWidth, ByteSwap_64(VAL));
 844   else {
 845     APInt Result(BitWidth, 0);
 846     char *pByte = (char*)Result.pVal;
 847     for (uint32_t i = 0; i < BitWidth / APINT_WORD_SIZE / 2; ++i) {
 848       char Tmp = pByte[i];
 849       pByte[i] = pByte[BitWidth / APINT_WORD_SIZE - 1 - i];
 850       pByte[BitWidth / APINT_WORD_SIZE - i - 1] = Tmp;
 851     }
 852     return Result;
 853   }
 854 }
 855
 856 APInt llvm::APIntOps::GreatestCommonDivisor(const APInt& API1,
 857                                             const APInt& API2) {
 858   APInt A = API1, B = API2;
 859   while (!!B) {
 860     APInt T = B;
 861     B = APIntOps::urem(A, B);
 862     A = T;
 863   }
 864   return A;
 865 }
 866
 867 APInt llvm::APIntOps::RoundDoubleToAPInt(double Double, uint32_t width) {
 868   union {
 869     double D;
 870     uint64_t I;
 871   } T;
 872   T.D = Double;
 873
 874   // Get the sign bit from the highest order bit
 875   bool isNeg = T.I >> 63;
 876
 877   // Get the 11-bit exponent and adjust for the 1023 bit bias
 878   int64_t exp = ((T.I >> 52) & 0x7ff) - 1023;
 879
 880   // If the exponent is negative, the value is < 0 so just return 0.
 881   if (exp < 0)
 882     return APInt(width, 0u);
 883
 884   // Extract the mantissa by clearing the top 12 bits (sign + exponent).
 885   uint64_t mantissa = (T.I & (~0ULL >> 12)) | 1ULL << 52;
 886
 887   // If the exponent doesn't shift all bits out of the mantissa
 888   if (exp < 52)
 889     return isNeg ? -APInt(width, mantissa >> (52 - exp)) :
 890                     APInt(width, mantissa >> (52 - exp));
 891
 892   // If the client didn't provide enough bits for us to shift the mantissa into
 893   // then the result is undefined, just return 0
 894   if (width <= exp - 52)
 895     return APInt(width, 0);
 896
 897   // Otherwise, we have to shift the mantissa bits up to the right location
 898   APInt Tmp(width, mantissa);
 899   Tmp = Tmp.shl((uint32_t)exp - 52);
 900   return isNeg ? -Tmp : Tmp;
 901 }
 902
 903 /// RoundToDouble - This function convert this APInt to a double.
 904 /// The layout for double is as following (IEEE Standard 754):
 905 ///  --------------------------------------
 906 /// |  Sign    Exponent    Fraction    Bias |
 907 /// |-------------------------------------- |
 908 /// |  1[63]   11[62-52]   52[51-00]   1023 |
 909 ///  --------------------------------------
 910 double APInt::roundToDouble(bool isSigned) const {
 911
 912   // Handle the simple case where the value is contained in one uint64_t.
 913   if (isSingleWord() || getActiveBits() <= APINT_BITS_PER_WORD) {
 914     if (isSigned) {
 915       int64_t sext = (int64_t(VAL) << (64-BitWidth)) >> (64-BitWidth);
 916       return double(sext);
 917     } else
 918       return double(VAL);
 919   }
 920
 921   // Determine if the value is negative.
 922   bool isNeg = isSigned ? (*this)[BitWidth-1] : false;
 923
 924   // Construct the absolute value if we're negative.
 925   APInt Tmp(isNeg ? -(*this) : (*this));
 926
 927   // Figure out how many bits we're using.
 928   uint32_t n = Tmp.getActiveBits();
 929
 930   // The exponent (without bias normalization) is just the number of bits
 931   // we are using. Note that the sign bit is gone since we constructed the
 932   // absolute value.
 933   uint64_t exp = n;
 934
 935   // Return infinity for exponent overflow
 936   if (exp > 1023) {
 937     if (!isSigned || !isNeg)
 938       return std::numeric_limits<double>::infinity();
 939     else
 940       return -std::numeric_limits<double>::infinity();
 941   }
 942   exp += 1023; // Increment for 1023 bias
 943
 944   // Number of bits in mantissa is 52. To obtain the mantissa value, we must
 945   // extract the high 52 bits from the correct words in pVal.
 946   uint64_t mantissa;
 947   unsigned hiWord = whichWord(n-1);
 948   if (hiWord == 0) {
 949     mantissa = Tmp.pVal[0];
 950     if (n > 52)
 951       mantissa >>= n - 52; // shift down, we want the top 52 bits.
 952   } else {
 953     assert(hiWord > 0 && "huh?");
 954     uint64_t hibits = Tmp.pVal[hiWord] << (52 - n % APINT_BITS_PER_WORD);
 955     uint64_t lobits = Tmp.pVal[hiWord-1] >> (11 + n % APINT_BITS_PER_WORD);
 956     mantissa = hibits | lobits;
 957   }
 958
 959   // The leading bit of mantissa is implicit, so get rid of it.
 960   uint64_t sign = isNeg ? (1ULL << (APINT_BITS_PER_WORD - 1)) : 0;
 961   union {
 962     double D;
 963     uint64_t I;
 964   } T;
 965   T.I = sign | (exp << 52) | mantissa;
 966   return T.D;
 967 }
 968
 969 // Truncate to new width.
 970 APInt &APInt::trunc(uint32_t width) {
 971   assert(width < BitWidth && "Invalid APInt Truncate request");
 972   assert(width >= MIN_INT_BITS && "Can't truncate to 0 bits");
 973   uint32_t wordsBefore = getNumWords();
 974   BitWidth = width;
 975   uint32_t wordsAfter = getNumWords();
 976   if (wordsBefore != wordsAfter) {
 977     if (wordsAfter == 1) {
 978       uint64_t *tmp = pVal;
 979       VAL = pVal[0];
 980       delete [] tmp;
 981     } else {
 982       uint64_t *newVal = getClearedMemory(wordsAfter);
 983       for (uint32_t i = 0; i < wordsAfter; ++i)
 984         newVal[i] = pVal[i];
 985       delete [] pVal;
 986       pVal = newVal;
 987     }
 988   }
 989   return clearUnusedBits();
 990 }
 991
 992 // Sign extend to a new width.
 993 APInt &APInt::sext(uint32_t width) {
 994   assert(width > BitWidth && "Invalid APInt SignExtend request");
 995   assert(width <= MAX_INT_BITS && "Too many bits");
 996   // If the sign bit isn't set, this is the same as zext.
 997   if (!isNegative()) {
 998     zext(width);
 999     return *this;
1000   }
1001
1002   // The sign bit is set. First, get some facts
1003   uint32_t wordsBefore = getNumWords();
1004   uint32_t wordBits = BitWidth % APINT_BITS_PER_WORD;
1005   BitWidth = width;
1006   uint32_t wordsAfter = getNumWords();
1007
1008   // Mask the high order word appropriately
1009   if (wordsBefore == wordsAfter) {
1010     uint32_t newWordBits = width % APINT_BITS_PER_WORD;
1011     // The extension is contained to the wordsBefore-1th word.
1012     uint64_t mask = ~0ULL;
1013     if (newWordBits)
1014       mask >>= APINT_BITS_PER_WORD - newWordBits;
1015     mask <<= wordBits;
1016     if (wordsBefore == 1)
1017       VAL |= mask;
1018     else
1019       pVal[wordsBefore-1] |= mask;
1020     return clearUnusedBits();
1021   }
1022
1023   uint64_t mask = wordBits == 0 ? 0 : ~0ULL << wordBits;
1024   uint64_t *newVal = getMemory(wordsAfter);
1025   if (wordsBefore == 1)
1026     newVal[0] = VAL | mask;
1027   else {
1028     for (uint32_t i = 0; i < wordsBefore; ++i)
1029       newVal[i] = pVal[i];
1030     newVal[wordsBefore-1] |= mask;
1031   }
1032   for (uint32_t i = wordsBefore; i < wordsAfter; i++)
1033     newVal[i] = -1ULL;
1034   if (wordsBefore != 1)
1035     delete [] pVal;
1036   pVal = newVal;
1037   return clearUnusedBits();
1038 }
1039
1040 //  Zero extend to a new width.
1041 APInt &APInt::zext(uint32_t width) {
1042   assert(width > BitWidth && "Invalid APInt ZeroExtend request");
1043   assert(width <= MAX_INT_BITS && "Too many bits");
1044   uint32_t wordsBefore = getNumWords();
1045   BitWidth = width;
1046   uint32_t wordsAfter = getNumWords();
1047   if (wordsBefore != wordsAfter) {
1048     uint64_t *newVal = getClearedMemory(wordsAfter);
1049     if (wordsBefore == 1)
1050       newVal[0] = VAL;
1051     else
1052       for (uint32_t i = 0; i < wordsBefore; ++i)
1053         newVal[i] = pVal[i];
1054     if (wordsBefore != 1)
1055       delete [] pVal;
1056     pVal = newVal;
1057   }
1058   return *this;
1059 }
1060
1061 APInt &APInt::zextOrTrunc(uint32_t width) {
1062   if (BitWidth < width)
1063     return zext(width);
1064   if (BitWidth > width)
1065     return trunc(width);
1066   return *this;
1067 }
1068
1069 APInt &APInt::sextOrTrunc(uint32_t width) {
1070   if (BitWidth < width)
1071     return sext(width);
1072   if (BitWidth > width)
1073     return trunc(width);
1074   return *this;
1075 }
1076
1077 /// Arithmetic right-shift this APInt by shiftAmt.
1078 /// @brief Arithmetic right-shift function.
1079 APInt APInt::ashr(const APInt &shiftAmt) const {
1080   return ashr((uint32_t)shiftAmt.getLimitedValue(BitWidth));
1081 }
1082
1083 /// Arithmetic right-shift this APInt by shiftAmt.
1084 /// @brief Arithmetic right-shift function.
1085 APInt APInt::ashr(uint32_t shiftAmt) const {
1086   assert(shiftAmt <= BitWidth && "Invalid shift amount");
1087   // Handle a degenerate case
1088   if (shiftAmt == 0)
1089     return *this;
1090
1091   // Handle single word shifts with built-in ashr
1092   if (isSingleWord()) {
1093     if (shiftAmt == BitWidth)
1094       return APInt(BitWidth, 0); // undefined
1095     else {
1096       uint32_t SignBit = APINT_BITS_PER_WORD - BitWidth;
1097       return APInt(BitWidth,
1098         (((int64_t(VAL) << SignBit) >> SignBit) >> shiftAmt));
1099     }
1100   }
1101
1102   // If all the bits were shifted out, the result is, technically, undefined.
1103   // We return -1 if it was negative, 0 otherwise. We check this early to avoid
1104   // issues in the algorithm below.
1105   if (shiftAmt == BitWidth) {
1106     if (isNegative())
1107       return APInt(BitWidth, -1ULL, true);
1108     else
1109       return APInt(BitWidth, 0);
1110   }
1111
1112   // Create some space for the result.
1113   uint64_t * val = new uint64_t[getNumWords()];
1114
1115   // Compute some values needed by the following shift algorithms
1116   uint32_t wordShift = shiftAmt % APINT_BITS_PER_WORD; // bits to shift per word
1117   uint32_t offset = shiftAmt / APINT_BITS_PER_WORD; // word offset for shift
1118   uint32_t breakWord = getNumWords() - 1 - offset; // last word affected
1119   uint32_t bitsInWord = whichBit(BitWidth); // how many bits in last word?
1120   if (bitsInWord == 0)
1121     bitsInWord = APINT_BITS_PER_WORD;
1122
1123   // If we are shifting whole words, just move whole words
1124   if (wordShift == 0) {
1125     // Move the words containing significant bits
1126     for (uint32_t i = 0; i <= breakWord; ++i)
1127       val[i] = pVal[i+offset]; // move whole word
1128
1129     // Adjust the top significant word for sign bit fill, if negative
1130     if (isNegative())
1131       if (bitsInWord < APINT_BITS_PER_WORD)
1132         val[breakWord] |= ~0ULL << bitsInWord; // set high bits
1133   } else {
1134     // Shift the low order words
1135     for (uint32_t i = 0; i < breakWord; ++i) {
1136       // This combines the shifted corresponding word with the low bits from
1137       // the next word (shifted into this word's high bits).
1138       val[i] = (pVal[i+offset] >> wordShift) |
1139                (pVal[i+offset+1] << (APINT_BITS_PER_WORD - wordShift));
1140     }
1141
1142     // Shift the break word. In this case there are no bits from the next word
1143     // to include in this word.
1144     val[breakWord] = pVal[breakWord+offset] >> wordShift;
1145
1146     // Deal with sign extenstion in the break word, and possibly the word before
1147     // it.
1148     if (isNegative()) {
1149       if (wordShift > bitsInWord) {
1150         if (breakWord > 0)
1151           val[breakWord-1] |=
1152             ~0ULL << (APINT_BITS_PER_WORD - (wordShift - bitsInWord));
1153         val[breakWord] |= ~0ULL;
1154       } else
1155         val[breakWord] |= (~0ULL << (bitsInWord - wordShift));
1156     }
1157   }
1158
1159   // Remaining words are 0 or -1, just assign them.
1160   uint64_t fillValue = (isNegative() ? -1ULL : 0);
1161   for (uint32_t i = breakWord+1; i < getNumWords(); ++i)
1162     val[i] = fillValue;
1163   return APInt(val, BitWidth).clearUnusedBits();
1164 }
1165
1166 /// Logical right-shift this APInt by shiftAmt.
1167 /// @brief Logical right-shift function.
1168 APInt APInt::lshr(const APInt &shiftAmt) const {
1169   return lshr((uint32_t)shiftAmt.getLimitedValue(BitWidth));
1170 }
1171
1172 /// Logical right-shift this APInt by shiftAmt.
1173 /// @brief Logical right-shift function.
1174 APInt APInt::lshr(uint32_t shiftAmt) const {
1175   if (isSingleWord()) {
1176     if (shiftAmt == BitWidth)
1177       return APInt(BitWidth, 0);
1178     else
1179       return APInt(BitWidth, this->VAL >> shiftAmt);
1180   }
1181
1182   // If all the bits were shifted out, the result is 0. This avoids issues
1183   // with shifting by the size of the integer type, which produces undefined
1184   // results. We define these "undefined results" to always be 0.
1185   if (shiftAmt == BitWidth)
1186     return APInt(BitWidth, 0);
1187
1188   // If none of the bits are shifted out, the result is *this. This avoids
1189   // issues with shifting byt he size of the integer type, which produces
1190   // undefined results in the code below. This is also an optimization.
1191   if (shiftAmt == 0)
1192     return *this;
1193
1194   // Create some space for the result.
1195   uint64_t * val = new uint64_t[getNumWords()];
1196
1197   // If we are shifting less than a word, compute the shift with a simple carry
1198   if (shiftAmt < APINT_BITS_PER_WORD) {
1199     uint64_t carry = 0;
1200     for (int i = getNumWords()-1; i >= 0; --i) {
1201       val[i] = (pVal[i] >> shiftAmt) | carry;
1202       carry = pVal[i] << (APINT_BITS_PER_WORD - shiftAmt);
1203     }
1204     return APInt(val, BitWidth).clearUnusedBits();
1205   }
1206
1207   // Compute some values needed by the remaining shift algorithms
1208   uint32_t wordShift = shiftAmt % APINT_BITS_PER_WORD;
1209   uint32_t offset = shiftAmt / APINT_BITS_PER_WORD;
1210
1211   // If we are shifting whole words, just move whole words
1212   if (wordShift == 0) {
1213     for (uint32_t i = 0; i < getNumWords() - offset; ++i)
1214       val[i] = pVal[i+offset];
1215     for (uint32_t i = getNumWords()-offset; i < getNumWords(); i++)
1216       val[i] = 0;
1217     return APInt(val,BitWidth).clearUnusedBits();
1218   }
1219
1220   // Shift the low order words
1221   uint32_t breakWord = getNumWords() - offset -1;
1222   for (uint32_t i = 0; i < breakWord; ++i)
1223     val[i] = (pVal[i+offset] >> wordShift) |
1224              (pVal[i+offset+1] << (APINT_BITS_PER_WORD - wordShift));
1225   // Shift the break word.
1226   val[breakWord] = pVal[breakWord+offset] >> wordShift;
1227
1228   // Remaining words are 0
1229   for (uint32_t i = breakWord+1; i < getNumWords(); ++i)
1230     val[i] = 0;
1231   return APInt(val, BitWidth).clearUnusedBits();
1232 }
1233
1234 /// Left-shift this APInt by shiftAmt.
1235 /// @brief Left-shift function.
1236 APInt APInt::shl(const APInt &shiftAmt) const {
1237   // It's undefined behavior in C to shift by BitWidth or greater, but
1238   return shl((uint32_t)shiftAmt.getLimitedValue(BitWidth));
1239 }
1240
1241 /// Left-shift this APInt by shiftAmt.
1242 /// @brief Left-shift function.
1243 APInt APInt::shl(uint32_t shiftAmt) const {
1244   assert(shiftAmt <= BitWidth && "Invalid shift amount");
1245   if (isSingleWord()) {
1246     if (shiftAmt == BitWidth)
1247       return APInt(BitWidth, 0); // avoid undefined shift results
1248     return APInt(BitWidth, VAL << shiftAmt);
1249   }
1250
1251   // If all the bits were shifted out, the result is 0. This avoids issues
1252   // with shifting by the size of the integer type, which produces undefined
1253   // results. We define these "undefined results" to always be 0.
1254   if (shiftAmt == BitWidth)
1255     return APInt(BitWidth, 0);
1256
1257   // If none of the bits are shifted out, the result is *this. This avoids a
1258   // lshr by the words size in the loop below which can produce incorrect
1259   // results. It also avoids the expensive computation below for a common case.
1260   if (shiftAmt == 0)
1261     return *this;
1262
1263   // Create some space for the result.
1264   uint64_t * val = new uint64_t[getNumWords()];
1265
1266   // If we are shifting less than a word, do it the easy way
1267   if (shiftAmt < APINT_BITS_PER_WORD) {
1268     uint64_t carry = 0;
1269     for (uint32_t i = 0; i < getNumWords(); i++) {
1270       val[i] = pVal[i] << shiftAmt | carry;
1271       carry = pVal[i] >> (APINT_BITS_PER_WORD - shiftAmt);
1272     }
1273     return APInt(val, BitWidth).clearUnusedBits();
1274   }
1275
1276   // Compute some values needed by the remaining shift algorithms
1277   uint32_t wordShift = shiftAmt % APINT_BITS_PER_WORD;
1278   uint32_t offset = shiftAmt / APINT_BITS_PER_WORD;
1279
1280   // If we are shifting whole words, just move whole words
1281   if (wordShift == 0) {
1282     for (uint32_t i = 0; i < offset; i++)
1283       val[i] = 0;
1284     for (uint32_t i = offset; i < getNumWords(); i++)
1285       val[i] = pVal[i-offset];
1286     return APInt(val,BitWidth).clearUnusedBits();
1287   }
1288
1289   // Copy whole words from this to Result.
1290   uint32_t i = getNumWords() - 1;
1291   for (; i > offset; --i)
1292     val[i] = pVal[i-offset] << wordShift |
1293              pVal[i-offset-1] >> (APINT_BITS_PER_WORD - wordShift);
1294   val[offset] = pVal[0] << wordShift;
1295   for (i = 0; i < offset; ++i)
1296     val[i] = 0;
1297   return APInt(val, BitWidth).clearUnusedBits();
1298 }
1299
1300 APInt APInt::rotl(const APInt &rotateAmt) const {
1301   return rotl((uint32_t)rotateAmt.getLimitedValue(BitWidth));
1302 }
1303
1304 APInt APInt::rotl(uint32_t rotateAmt) const {
1305   if (rotateAmt == 0)
1306     return *this;
1307   // Don't get too fancy, just use existing shift/or facilities
1308   APInt hi(*this);
1309   APInt lo(*this);
1310   hi.shl(rotateAmt);
1311   lo.lshr(BitWidth - rotateAmt);
1312   return hi | lo;
1313 }
1314
1315 APInt APInt::rotr(const APInt &rotateAmt) const {
1316   return rotr((uint32_t)rotateAmt.getLimitedValue(BitWidth));
1317 }
1318
1319 APInt APInt::rotr(uint32_t rotateAmt) const {
1320   if (rotateAmt == 0)
1321     return *this;
1322   // Don't get too fancy, just use existing shift/or facilities
1323   APInt hi(*this);
1324   APInt lo(*this);
1325   lo.lshr(rotateAmt);
1326   hi.shl(BitWidth - rotateAmt);
1327   return hi | lo;
1328 }
1329
1330 // Square Root - this method computes and returns the square root of "this".
1331 // Three mechanisms are used for computation. For small values (<= 5 bits),
1332 // a table lookup is done. This gets some performance for common cases. For
1333 // values using less than 52 bits, the value is converted to double and then
1334 // the libc sqrt function is called. The result is rounded and then converted
1335 // back to a uint64_t which is then used to construct the result. Finally,
1336 // the Babylonian method for computing square roots is used.
1337 APInt APInt::sqrt() const {
1338
1339   // Determine the magnitude of the value.
1340   uint32_t magnitude = getActiveBits();
1341
1342   // Use a fast table for some small values. This also gets rid of some
1343   // rounding errors in libc sqrt for small values.
1344   if (magnitude <= 5) {
1345     static const uint8_t results[32] = {
1346       /*     0 */ 0,
1347       /*  1- 2 */ 1, 1,
1348       /*  3- 6 */ 2, 2, 2, 2,
1349       /*  7-12 */ 3, 3, 3, 3, 3, 3,
1350       /* 13-20 */ 4, 4, 4, 4, 4, 4, 4, 4,
1351       /* 21-30 */ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
1352       /*    31 */ 6
1353     };
1354     return APInt(BitWidth, results[ (isSingleWord() ? VAL : pVal[0]) ]);
1355   }
1356
1357   // If the magnitude of the value fits in less than 52 bits (the precision of
1358   // an IEEE double precision floating point value), then we can use the
1359   // libc sqrt function which will probably use a hardware sqrt computation.
1360   // This should be faster than the algorithm below.
1361   if (magnitude < 52) {
1362 #ifdef _MSC_VER
1363     // Amazingly, VC++ doesn't have round().
1364     return APInt(BitWidth,
1365                  uint64_t(::sqrt(double(isSingleWord()?VAL:pVal[0]))) + 0.5);
1366 #else
1367     return APInt(BitWidth,
1368                  uint64_t(::round(::sqrt(double(isSingleWord()?VAL:pVal[0])))));
1369 #endif
1370   }
1371
1372   // Okay, all the short cuts are exhausted. We must compute it. The following
1373   // is a classical Babylonian method for computing the square root. This code
1374   // was adapted to APINt from a wikipedia article on such computations.
1375   // See http://www.wikipedia.org/ and go to the page named
1376   // Calculate_an_integer_square_root.
1377   uint32_t nbits = BitWidth, i = 4;
1378   APInt testy(BitWidth, 16);
1379   APInt x_old(BitWidth, 1);
1380   APInt x_new(BitWidth, 0);
1381   APInt two(BitWidth, 2);
1382
1383   // Select a good starting value using binary logarithms.
1384   for (;; i += 2, testy = testy.shl(2))
1385     if (i >= nbits || this->ule(testy)) {
1386       x_old = x_old.shl(i / 2);
1387       break;
1388     }
1389
1390   // Use the Babylonian method to arrive at the integer square root:
1391   for (;;) {
1392     x_new = (this->udiv(x_old) + x_old).udiv(two);
1393     if (x_old.ule(x_new))
1394       break;
1395     x_old = x_new;
1396   }
1397
1398   // Make sure we return the closest approximation
1399   // NOTE: The rounding calculation below is correct. It will produce an
1400   // off-by-one discrepancy with results from pari/gp. That discrepancy has been
1401   // determined to be a rounding issue with pari/gp as it begins to use a
1402   // floating point representation after 192 bits. There are no discrepancies
1403   // between this algorithm and pari/gp for bit widths < 192 bits.
1404   APInt square(x_old * x_old);
1405   APInt nextSquare((x_old + 1) * (x_old +1));
1406   if (this->ult(square))
1407     return x_old;
1408   else if (this->ule(nextSquare)) {
1409     APInt midpoint((nextSquare - square).udiv(two));
1410     APInt offset(*this - square);
1411     if (offset.ult(midpoint))
1412       return x_old;
1413     else
1414       return x_old + 1;
1415   } else
1416     assert(0 && "Error in APInt::sqrt computation");
1417   return x_old + 1;
1418 }
1419
1420 /// Computes the multiplicative inverse of this APInt for a given modulo. The
1421 /// iterative extended Euclidean algorithm is used to solve for this value,
1422 /// however we simplify it to speed up calculating only the inverse, and take
1423 /// advantage of div+rem calculations. We also use some tricks to avoid copying
1424 /// (potentially large) APInts around.
1425 APInt APInt::multiplicativeInverse(const APInt& modulo) const {
1426   assert(ult(modulo) && "This APInt must be smaller than the modulo");
1427
1428   // Using the properties listed at the following web page (accessed 06/21/08):
1429   //   http://www.numbertheory.org/php/euclid.html
1430   // (especially the properties numbered 3, 4 and 9) it can be proved that
1431   // BitWidth bits suffice for all the computations in the algorithm implemented
1432   // below. More precisely, this number of bits suffice if the multiplicative
1433   // inverse exists, but may not suffice for the general extended Euclidean
1434   // algorithm.
1435
1436   APInt r[2] = { modulo, *this };
1437   APInt t[2] = { APInt(BitWidth, 0), APInt(BitWidth, 1) };
1438   APInt q(BitWidth, 0);
1439
1440   unsigned i;
1441   for (i = 0; r[i^1] != 0; i ^= 1) {
1442     // An overview of the math without the confusing bit-flipping:
1443     // q = r[i-2] / r[i-1]
1444     // r[i] = r[i-2] % r[i-1]
1445     // t[i] = t[i-2] - t[i-1] * q
1446     udivrem(r[i], r[i^1], q, r[i]);
1447     t[i] -= t[i^1] * q;
1448   }
1449
1450   // If this APInt and the modulo are not coprime, there is no multiplicative
1451   // inverse, so return 0. We check this by looking at the next-to-last
1452   // remainder, which is the gcd(*this,modulo) as calculated by the Euclidean
1453   // algorithm.
1454   if (r[i] != 1)
1455     return APInt(BitWidth, 0);
1456
1457   // The next-to-last t is the multiplicative inverse.  However, we are
1458   // interested in a positive inverse. Calcuate a positive one from a negative
1459   // one if necessary. A simple addition of the modulo suffices because
1460   // abs(t[i]) is known to be less than *this/2 (see the link above).
1461   return t[i].isNegative() ? t[i] + modulo : t[i];
1462 }
1463
1464 /// Implementation of Knuth's Algorithm D (Division of nonnegative integers)
1465 /// from "Art of Computer Programming, Volume 2", section 4.3.1, p. 272. The
1466 /// variables here have the same names as in the algorithm. Comments explain
1467 /// the algorithm and any deviation from it.
1468 static void KnuthDiv(uint32_t *u, uint32_t *v, uint32_t *q, uint32_t* r,
1469                      uint32_t m, uint32_t n) {
1470   assert(u && "Must provide dividend");
1471   assert(v && "Must provide divisor");
1472   assert(q && "Must provide quotient");
1473   assert(u != v && u != q && v != q && "Must us different memory");
1474   assert(n>1 && "n must be > 1");
1475
1476   // Knuth uses the value b as the base of the number system. In our case b
1477   // is 2^31 so we just set it to -1u.
1478   uint64_t b = uint64_t(1) << 32;
1479
1480 #if 0
1481   DEBUG(cerr << "KnuthDiv: m=" << m << " n=" << n << '\n');
1482   DEBUG(cerr << "KnuthDiv: original:");
1483   DEBUG(for (int i = m+n; i >=0; i--) cerr << " " << std::setbase(16) << u[i]);
1484   DEBUG(cerr << " by");
1485   DEBUG(for (int i = n; i >0; i--) cerr << " " << std::setbase(16) << v[i-1]);
1486   DEBUG(cerr << '\n');
1487 #endif
1488   // D1. [Normalize.] Set d = b / (v[n-1] + 1) and multiply all the digits of
1489   // u and v by d. Note that we have taken Knuth's advice here to use a power
1490   // of 2 value for d such that d * v[n-1] >= b/2 (b is the base). A power of
1491   // 2 allows us to shift instead of multiply and it is easy to determine the
1492   // shift amount from the leading zeros.  We are basically normalizing the u
1493   // and v so that its high bits are shifted to the top of v's range without
1494   // overflow. Note that this can require an extra word in u so that u must
1495   // be of length m+n+1.
1496   uint32_t shift = CountLeadingZeros_32(v[n-1]);
1497   uint32_t v_carry = 0;
1498   uint32_t u_carry = 0;
1499   if (shift) {
1500     for (uint32_t i = 0; i < m+n; ++i) {
1501       uint32_t u_tmp = u[i] >> (32 - shift);
1502       u[i] = (u[i] << shift) | u_carry;
1503       u_carry = u_tmp;
1504     }
1505     for (uint32_t i = 0; i < n; ++i) {
1506       uint32_t v_tmp = v[i] >> (32 - shift);
1507       v[i] = (v[i] << shift) | v_carry;
1508       v_carry = v_tmp;
1509     }
1510   }
1511   u[m+n] = u_carry;
1512 #if 0
1513   DEBUG(cerr << "KnuthDiv:   normal:");
1514   DEBUG(for (int i = m+n; i >=0; i--) cerr << " " << std::setbase(16) << u[i]);
1515   DEBUG(cerr << " by");
1516   DEBUG(for (int i = n; i >0; i--) cerr << " " << std::setbase(16) << v[i-1]);
1517   DEBUG(cerr << '\n');
1518 #endif
1519
1520   // D2. [Initialize j.]  Set j to m. This is the loop counter over the places.
1521   int j = m;
1522   do {
1523     DEBUG(cerr << "KnuthDiv: quotient digit #" << j << '\n');
1524     // D3. [Calculate q'.].
1525     //     Set qp = (u[j+n]*b + u[j+n-1]) / v[n-1]. (qp=qprime=q')
1526     //     Set rp = (u[j+n]*b + u[j+n-1]) % v[n-1]. (rp=rprime=r')
1527     // Now test if qp == b or qp*v[n-2] > b*rp + u[j+n-2]; if so, decrease
1528     // qp by 1, inrease rp by v[n-1], and repeat this test if rp < b. The test
1529     // on v[n-2] determines at high speed most of the cases in which the trial
1530     // value qp is one too large, and it eliminates all cases where qp is two
1531     // too large.
1532     uint64_t dividend = ((uint64_t(u[j+n]) << 32) + u[j+n-1]);
1533     DEBUG(cerr << "KnuthDiv: dividend == " << dividend << '\n');
1534     uint64_t qp = dividend / v[n-1];
1535     uint64_t rp = dividend % v[n-1];
1536     if (qp == b || qp*v[n-2] > b*rp + u[j+n-2]) {
1537       qp--;
1538       rp += v[n-1];
1539       if (rp < b && (qp == b || qp*v[n-2] > b*rp + u[j+n-2]))
1540         qp--;
1541     }
1542     DEBUG(cerr << "KnuthDiv: qp == " << qp << ", rp == " << rp << '\n');
1543
1544     // D4. [Multiply and subtract.] Replace (u[j+n]u[j+n-1]...u[j]) with
1545     // (u[j+n]u[j+n-1]..u[j]) - qp * (v[n-1]...v[1]v[0]). This computation
1546     // consists of a simple multiplication by a one-place number, combined with
1547     // a subtraction.
1548     bool isNeg = false;
1549     for (uint32_t i = 0; i < n; ++i) {
1550       uint64_t u_tmp = uint64_t(u[j+i]) | (uint64_t(u[j+i+1]) << 32);
1551       uint64_t subtrahend = uint64_t(qp) * uint64_t(v[i]);
1552       bool borrow = subtrahend > u_tmp;
1553       DEBUG(cerr << "KnuthDiv: u_tmp == " << u_tmp
1554                  << ", subtrahend == " << subtrahend
1555                  << ", borrow = " << borrow << '\n');
1556
1557       uint64_t result = u_tmp - subtrahend;
1558       uint32_t k = j + i;
1559       u[k++] = (uint32_t)(result & (b-1)); // subtract low word
1560       u[k++] = (uint32_t)(result >> 32);   // subtract high word
1561       while (borrow && k <= m+n) { // deal with borrow to the left
1562         borrow = u[k] == 0;
1563         u[k]--;
1564         k++;
1565       }
1566       isNeg |= borrow;
1567       DEBUG(cerr << "KnuthDiv: u[j+i] == " << u[j+i] << ",  u[j+i+1] == " <<
1568                     u[j+i+1] << '\n');
1569     }
1570     DEBUG(cerr << "KnuthDiv: after subtraction:");
1571     DEBUG(for (int i = m+n; i >=0; i--) cerr << " " << u[i]);
1572     DEBUG(cerr << '\n');
1573     // The digits (u[j+n]...u[j]) should be kept positive; if the result of
1574     // this step is actually negative, (u[j+n]...u[j]) should be left as the
1575     // true value plus b**(n+1), namely as the b's complement of
1576     // the true value, and a "borrow" to the left should be remembered.
1577     //
1578     if (isNeg) {
1579       bool carry = true;  // true because b's complement is "complement + 1"
1580       for (uint32_t i = 0; i <= m+n; ++i) {
1581         u[i] = ~u[i] + carry; // b's complement
1582         carry = carry && u[i] == 0;
1583       }
1584     }
1585     DEBUG(cerr << "KnuthDiv: after complement:");
1586     DEBUG(for (int i = m+n; i >=0; i--) cerr << " " << u[i]);
1587     DEBUG(cerr << '\n');
1588
1589     // D5. [Test remainder.] Set q[j] = qp. If the result of step D4 was
1590     // negative, go to step D6; otherwise go on to step D7.
1591     q[j] = (uint32_t)qp;
1592     if (isNeg) {
1593       // D6. [Add back]. The probability that this step is necessary is very
1594       // small, on the order of only 2/b. Make sure that test data accounts for
1595       // this possibility. Decrease q[j] by 1
1596       q[j]--;
1597       // and add (0v[n-1]...v[1]v[0]) to (u[j+n]u[j+n-1]...u[j+1]u[j]).
1598       // A carry will occur to the left of u[j+n], and it should be ignored
1599       // since it cancels with the borrow that occurred in D4.
1600       bool carry = false;
1601       for (uint32_t i = 0; i < n; i++) {
1602         uint32_t limit = std::min(u[j+i],v[i]);
1603         u[j+i] += v[i] + carry;
1604         carry = u[j+i] < limit || (carry && u[j+i] == limit);
1605       }
1606       u[j+n] += carry;
1607     }
1608     DEBUG(cerr << "KnuthDiv: after correction:");
1609     DEBUG(for (int i = m+n; i >=0; i--) cerr <<" " << u[i]);
1610     DEBUG(cerr << "\nKnuthDiv: digit result = " << q[j] << '\n');
1611
1612   // D7. [Loop on j.]  Decrease j by one. Now if j >= 0, go back to D3.
1613   } while (--j >= 0);
1614
1615   DEBUG(cerr << "KnuthDiv: quotient:");
1616   DEBUG(for (int i = m; i >=0; i--) cerr <<" " << q[i]);
1617   DEBUG(cerr << '\n');
1618
1619   // D8. [Unnormalize]. Now q[...] is the desired quotient, and the desired
1620   // remainder may be obtained by dividing u[...] by d. If r is non-null we
1621   // compute the remainder (urem uses this).
1622   if (r) {
1623     // The value d is expressed by the "shift" value above since we avoided
1624     // multiplication by d by using a shift left. So, all we have to do is
1625     // shift right here. In order to mak
1626     if (shift) {
1627       uint32_t carry = 0;
1628       DEBUG(cerr << "KnuthDiv: remainder:");
1629       for (int i = n-1; i >= 0; i--) {
1630         r[i] = (u[i] >> shift) | carry;
1631         carry = u[i] << (32 - shift);
1632         DEBUG(cerr << " " << r[i]);
1633       }
1634     } else {
1635       for (int i = n-1; i >= 0; i--) {
1636         r[i] = u[i];
1637         DEBUG(cerr << " " << r[i]);
1638       }
1639     }
1640     DEBUG(cerr << '\n');
1641   }
1642 #if 0
1643   DEBUG(cerr << std::setbase(10) << '\n');
1644 #endif
1645 }
1646
1647 void APInt::divide(const APInt LHS, uint32_t lhsWords,
1648                    const APInt &RHS, uint32_t rhsWords,
1649                    APInt *Quotient, APInt *Remainder)
1650 {
1651   assert(lhsWords >= rhsWords && "Fractional result");
1652
1653   // First, compose the values into an array of 32-bit words instead of
1654   // 64-bit words. This is a necessity of both the "short division" algorithm
1655   // and the the Knuth "classical algorithm" which requires there to be native
1656   // operations for +, -, and * on an m bit value with an m*2 bit result. We
1657   // can't use 64-bit operands here because we don't have native results of
1658   // 128-bits. Furthremore, casting the 64-bit values to 32-bit values won't
1659   // work on large-endian machines.
1660   uint64_t mask = ~0ull >> (sizeof(uint32_t)*8);
1661   uint32_t n = rhsWords * 2;
1662   uint32_t m = (lhsWords * 2) - n;
1663
1664   // Allocate space for the temporary values we need either on the stack, if
1665   // it will fit, or on the heap if it won't.
1666   uint32_t SPACE[128];
1667   uint32_t *U = 0;
1668   uint32_t *V = 0;
1669   uint32_t *Q = 0;
1670   uint32_t *R = 0;
1671   if ((Remainder?4:3)*n+2*m+1 <= 128) {
1672     U = &SPACE[0];
1673     V = &SPACE[m+n+1];
1674     Q = &SPACE[(m+n+1) + n];
1675     if (Remainder)
1676       R = &SPACE[(m+n+1) + n + (m+n)];
1677   } else {
1678     U = new uint32_t[m + n + 1];
1679     V = new uint32_t[n];
1680     Q = new uint32_t[m+n];
1681     if (Remainder)
1682       R = new uint32_t[n];
1683   }
1684
1685   // Initialize the dividend
1686   memset(U, 0, (m+n+1)*sizeof(uint32_t));
1687   for (unsigned i = 0; i < lhsWords; ++i) {
1688     uint64_t tmp = (LHS.getNumWords() == 1 ? LHS.VAL : LHS.pVal[i]);
1689     U[i * 2] = (uint32_t)(tmp & mask);
1690     U[i * 2 + 1] = (uint32_t)(tmp >> (sizeof(uint32_t)*8));
1691   }
1692   U[m+n] = 0; // this extra word is for "spill" in the Knuth algorithm.
1693
1694   // Initialize the divisor
1695   memset(V, 0, (n)*sizeof(uint32_t));
1696   for (unsigned i = 0; i < rhsWords; ++i) {
1697     uint64_t tmp = (RHS.getNumWords() == 1 ? RHS.VAL : RHS.pVal[i]);
1698     V[i * 2] = (uint32_t)(tmp & mask);
1699     V[i * 2 + 1] = (uint32_t)(tmp >> (sizeof(uint32_t)*8));
1700   }
1701
1702   // initialize the quotient and remainder
1703   memset(Q, 0, (m+n) * sizeof(uint32_t));
1704   if (Remainder)
1705     memset(R, 0, n * sizeof(uint32_t));
1706
1707   // Now, adjust m and n for the Knuth division. n is the number of words in
1708   // the divisor. m is the number of words by which the dividend exceeds the
1709   // divisor (i.e. m+n is the length of the dividend). These sizes must not
1710   // contain any zero words or the Knuth algorithm fails.
1711   for (unsigned i = n; i > 0 && V[i-1] == 0; i--) {
1712     n--;
1713     m++;
1714   }
1715   for (unsigned i = m+n; i > 0 && U[i-1] == 0; i--)
1716     m--;
1717
1718   // If we're left with only a single word for the divisor, Knuth doesn't work
1719   // so we implement the short division algorithm here. This is much simpler
1720   // and faster because we are certain that we can divide a 64-bit quantity
1721   // by a 32-bit quantity at hardware speed and short division is simply a
1722   // series of such operations. This is just like doing short division but we
1723   // are using base 2^32 instead of base 10.
1724   assert(n != 0 && "Divide by zero?");
1725   if (n == 1) {
1726     uint32_t divisor = V[0];
1727     uint32_t remainder = 0;
1728     for (int i = m+n-1; i >= 0; i--) {
1729       uint64_t partial_dividend = uint64_t(remainder) << 32 | U[i];
1730       if (partial_dividend == 0) {
1731         Q[i] = 0;
1732         remainder = 0;
1733       } else if (partial_dividend < divisor) {
1734         Q[i] = 0;
1735         remainder = (uint32_t)partial_dividend;
1736       } else if (partial_dividend == divisor) {
1737         Q[i] = 1;
1738         remainder = 0;
1739       } else {
1740         Q[i] = (uint32_t)(partial_dividend / divisor);
1741         remainder = (uint32_t)(partial_dividend - (Q[i] * divisor));
1742       }
1743     }
1744     if (R)
1745       R[0] = remainder;
1746   } else {
1747     // Now we're ready to invoke the Knuth classical divide algorithm. In this
1748     // case n > 1.
1749     KnuthDiv(U, V, Q, R, m, n);
1750   }
1751
1752   // If the caller wants the quotient
1753   if (Quotient) {
1754     // Set up the Quotient value's memory.
1755     if (Quotient->BitWidth != LHS.BitWidth) {
1756       if (Quotient->isSingleWord())
1757         Quotient->VAL = 0;
1758       else
1759         delete [] Quotient->pVal;
1760       Quotient->BitWidth = LHS.BitWidth;
1761       if (!Quotient->isSingleWord())
1762         Quotient->pVal = getClearedMemory(Quotient->getNumWords());
1763     } else
1764       Quotient->clear();
1765
1766     // The quotient is in Q. Reconstitute the quotient into Quotient's low
1767     // order words.
1768     if (lhsWords == 1) {
1769       uint64_t tmp =
1770         uint64_t(Q[0]) | (uint64_t(Q[1]) << (APINT_BITS_PER_WORD / 2));
1771       if (Quotient->isSingleWord())
1772         Quotient->VAL = tmp;
1773       else
1774         Quotient->pVal[0] = tmp;
1775     } else {
1776       assert(!Quotient->isSingleWord() && "Quotient APInt not large enough");
1777       for (unsigned i = 0; i < lhsWords; ++i)
1778         Quotient->pVal[i] =
1779           uint64_t(Q[i*2]) | (uint64_t(Q[i*2+1]) << (APINT_BITS_PER_WORD / 2));
1780     }
1781   }
1782
1783   // If the caller wants the remainder
1784   if (Remainder) {
1785     // Set up the Remainder value's memory.
1786     if (Remainder->BitWidth != RHS.BitWidth) {
1787       if (Remainder->isSingleWord())
1788         Remainder->VAL = 0;
1789       else
1790         delete [] Remainder->pVal;
1791       Remainder->BitWidth = RHS.BitWidth;
1792       if (!Remainder->isSingleWord())
1793         Remainder->pVal = getClearedMemory(Remainder->getNumWords());
1794     } else
1795       Remainder->clear();
1796
1797     // The remainder is in R. Reconstitute the remainder into Remainder's low
1798     // order words.
1799     if (rhsWords == 1) {
1800       uint64_t tmp =
1801         uint64_t(R[0]) | (uint64_t(R[1]) << (APINT_BITS_PER_WORD / 2));
1802       if (Remainder->isSingleWord())
1803         Remainder->VAL = tmp;
1804       else
1805         Remainder->pVal[0] = tmp;
1806     } else {
1807       assert(!Remainder->isSingleWord() && "Remainder APInt not large enough");
1808       for (unsigned i = 0; i < rhsWords; ++i)
1809         Remainder->pVal[i] =
1810           uint64_t(R[i*2]) | (uint64_t(R[i*2+1]) << (APINT_BITS_PER_WORD / 2));
1811     }
1812   }
1813
1814   // Clean up the memory we allocated.
1815   if (U != &SPACE[0]) {
1816     delete [] U;
1817     delete [] V;
1818     delete [] Q;
1819     delete [] R;
1820   }
1821 }
1822
1823 APInt APInt::udiv(const APInt& RHS) const {
1824   assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
1825
1826   // First, deal with the easy case
1827   if (isSingleWord()) {
1828     assert(RHS.VAL != 0 && "Divide by zero?");
1829     return APInt(BitWidth, VAL / RHS.VAL);
1830   }
1831
1832   // Get some facts about the LHS and RHS number of bits and words
1833   uint32_t rhsBits = RHS.getActiveBits();
1834   uint32_t rhsWords = !rhsBits ? 0 : (APInt::whichWord(rhsBits - 1) + 1);
1835   assert(rhsWords && "Divided by zero???");
1836   uint32_t lhsBits = this->getActiveBits();
1837   uint32_t lhsWords = !lhsBits ? 0 : (APInt::whichWord(lhsBits - 1) + 1);
1838
1839   // Deal with some degenerate cases
1840   if (!lhsWords)
1841     // 0 / X ===> 0
1842     return APInt(BitWidth, 0);
1843   else if (lhsWords < rhsWords || this->ult(RHS)) {
1844     // X / Y ===> 0, iff X < Y
1845     return APInt(BitWidth, 0);
1846   } else if (*this == RHS) {
1847     // X / X ===> 1
1848     return APInt(BitWidth, 1);
1849   } else if (lhsWords == 1 && rhsWords == 1) {
1850     // All high words are zero, just use native divide
1851     return APInt(BitWidth, this->pVal[0] / RHS.pVal[0]);
1852   }
1853
1854   // We have to compute it the hard way. Invoke the Knuth divide algorithm.
1855   APInt Quotient(1,0); // to hold result.
1856   divide(*this, lhsWords, RHS, rhsWords, &Quotient, 0);
1857   return Quotient;
1858 }
1859
1860 APInt APInt::urem(const APInt& RHS) const {
1861   assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
1862   if (isSingleWord()) {
1863     assert(RHS.VAL != 0 && "Remainder by zero?");
1864     return APInt(BitWidth, VAL % RHS.VAL);
1865   }
1866
1867   // Get some facts about the LHS
1868   uint32_t lhsBits = getActiveBits();
1869   uint32_t lhsWords = !lhsBits ? 0 : (whichWord(lhsBits - 1) + 1);
1870
1871   // Get some facts about the RHS
1872   uint32_t rhsBits = RHS.getActiveBits();
1873   uint32_t rhsWords = !rhsBits ? 0 : (APInt::whichWord(rhsBits - 1) + 1);
1874   assert(rhsWords && "Performing remainder operation by zero ???");
1875
1876   // Check the degenerate cases
1877   if (lhsWords == 0) {
1878     // 0 % Y ===> 0
1879     return APInt(BitWidth, 0);
1880   } else if (lhsWords < rhsWords || this->ult(RHS)) {
1881     // X % Y ===> X, iff X < Y
1882     return *this;
1883   } else if (*this == RHS) {
1884     // X % X == 0;
1885     return APInt(BitWidth, 0);
1886   } else if (lhsWords == 1) {
1887     // All high words are zero, just use native remainder
1888     return APInt(BitWidth, pVal[0] % RHS.pVal[0]);
1889   }
1890
1891   // We have to compute it the hard way. Invoke the Knuth divide algorithm.
1892   APInt Remainder(1,0);
1893   divide(*this, lhsWords, RHS, rhsWords, 0, &Remainder);
1894   return Remainder;
1895 }
1896
1897 void APInt::udivrem(const APInt &LHS, const APInt &RHS,
1898                     APInt &Quotient, APInt &Remainder) {
1899   // Get some size facts about the dividend and divisor
1900   uint32_t lhsBits  = LHS.getActiveBits();
1901   uint32_t lhsWords = !lhsBits ? 0 : (APInt::whichWord(lhsBits - 1) + 1);
1902   uint32_t rhsBits  = RHS.getActiveBits();
1903   uint32_t rhsWords = !rhsBits ? 0 : (APInt::whichWord(rhsBits - 1) + 1);
1904
1905   // Check the degenerate cases
1906   if (lhsWords == 0) {
1907     Quotient = 0;                // 0 / Y ===> 0
1908     Remainder = 0;               // 0 % Y ===> 0
1909     return;
1910   }
1911
1912   if (lhsWords < rhsWords || LHS.ult(RHS)) {
1913     Quotient = 0;               // X / Y ===> 0, iff X < Y
1914     Remainder = LHS;            // X % Y ===> X, iff X < Y
1915     return;
1916   }
1917
1918   if (LHS == RHS) {
1919     Quotient  = 1;              // X / X ===> 1
1920     Remainder = 0;              // X % X ===> 0;
1921     return;
1922   }
1923
1924   if (lhsWords == 1 && rhsWords == 1) {
1925     // There is only one word to consider so use the native versions.
1926     uint64_t lhsValue = LHS.isSingleWord() ? LHS.VAL : LHS.pVal[0];
1927     uint64_t rhsValue = RHS.isSingleWord() ? RHS.VAL : RHS.pVal[0];
1928     Quotient = APInt(LHS.getBitWidth(), lhsValue / rhsValue);
1929     Remainder = APInt(LHS.getBitWidth(), lhsValue % rhsValue);
1930     return;
1931   }
1932
1933   // Okay, lets do it the long way
1934   divide(LHS, lhsWords, RHS, rhsWords, &Quotient, &Remainder);
1935 }
1936
1937 void APInt::fromString(uint32_t numbits, const char *str, uint32_t slen,
1938                        uint8_t radix) {
1939   // Check our assumptions here
1940   assert((radix == 10 || radix == 8 || radix == 16 || radix == 2) &&
1941          "Radix should be 2, 8, 10, or 16!");
1942   assert(str && "String is null?");
1943   bool isNeg = str[0] == '-';
1944   if (isNeg)
1945     str++, slen--;
1946   assert((slen <= numbits || radix != 2) && "Insufficient bit width");
1947   assert((slen*3 <= numbits || radix != 8) && "Insufficient bit width");
1948   assert((slen*4 <= numbits || radix != 16) && "Insufficient bit width");
1949   assert(((slen*64)/22 <= numbits || radix != 10) && "Insufficient bit width");
1950
1951   // Allocate memory
1952   if (!isSingleWord())
1953     pVal = getClearedMemory(getNumWords());
1954
1955   // Figure out if we can shift instead of multiply
1956   uint32_t shift = (radix == 16 ? 4 : radix == 8 ? 3 : radix == 2 ? 1 : 0);
1957
1958   // Set up an APInt for the digit to add outside the loop so we don't
1959   // constantly construct/destruct it.
1960   APInt apdigit(getBitWidth(), 0);
1961   APInt apradix(getBitWidth(), radix);
1962
1963   // Enter digit traversal loop
1964   for (unsigned i = 0; i < slen; i++) {
1965     // Get a digit
1966     uint32_t digit = 0;
1967     char cdigit = str[i];
1968     if (radix == 16) {
1969       if (!isxdigit(cdigit))
1970         assert(0 && "Invalid hex digit in string");
1971       if (isdigit(cdigit))
1972         digit = cdigit - '0';
1973       else if (cdigit >= 'a')
1974         digit = cdigit - 'a' + 10;
1975       else if (cdigit >= 'A')
1976         digit = cdigit - 'A' + 10;
1977       else
1978         assert(0 && "huh? we shouldn't get here");
1979     } else if (isdigit(cdigit)) {
1980       digit = cdigit - '0';
1981       assert((radix == 10 ||
1982               (radix == 8 && digit != 8 && digit != 9) ||
1983               (radix == 2 && (digit == 0 || digit == 1))) &&
1984              "Invalid digit in string for given radix");
1985     } else {
1986       assert(0 && "Invalid character in digit string");
1987     }
1988
1989     // Shift or multiply the value by the radix
1990     if (shift)
1991       *this <<= shift;
1992     else
1993       *this *= apradix;
1994
1995     // Add in the digit we just interpreted
1996     if (apdigit.isSingleWord())
1997       apdigit.VAL = digit;
1998     else
1999       apdigit.pVal[0] = digit;
2000     *this += apdigit;
2001   }
2002   // If its negative, put it in two's complement form
2003   if (isNeg) {
2004     (*this)--;
2005     this->flip();
2006   }
2007 }
2008
2009 void APInt::toString(SmallVectorImpl<char> &Str, unsigned Radix,
2010                      bool Signed) const {
2011   assert((Radix == 10 || Radix == 8 || Radix == 16 || Radix == 2) &&
2012          "Radix should be 2, 8, 10, or 16!");
2013
2014   // First, check for a zero value and just short circuit the logic below.
2015   if (*this == 0) {
2016     Str.push_back('0');
2017     return;
2018   }
2019
2020   static const char Digits[] = "0123456789ABCDEF";
2021
2022   if (isSingleWord()) {
2023     char Buffer[65];
2024     char *BufPtr = Buffer+65;
2025
2026     uint64_t N;
2027     if (Signed) {
2028       int64_t I = getSExtValue();
2029       if (I < 0) {
2030         Str.push_back('-');
2031         I = -I;
2032       }
2033       N = I;
2034     } else {
2035       N = getZExtValue();
2036     }
2037
2038     while (N) {
2039       *--BufPtr = Digits[N % Radix];
2040       N /= Radix;
2041     }
2042     Str.append(BufPtr, Buffer+65);
2043     return;
2044   }
2045
2046   APInt Tmp(*this);
2047
2048   if (Signed && isNegative()) {
2049     // They want to print the signed version and it is a negative value
2050     // Flip the bits and add one to turn it into the equivalent positive
2051     // value and put a '-' in the result.
2052     Tmp.flip();
2053     Tmp++;
2054     Str.push_back('-');
2055   }
2056
2057   // We insert the digits backward, then reverse them to get the right order.
2058   unsigned StartDig = Str.size();
2059
2060   // For the 2, 8 and 16 bit cases, we can just shift instead of divide
2061   // because the number of bits per digit (1, 3 and 4 respectively) divides
2062   // equaly.  We just shift until the value is zero.
2063   if (Radix != 10) {
2064     // Just shift tmp right for each digit width until it becomes zero
2065     unsigned ShiftAmt = (Radix == 16 ? 4 : (Radix == 8 ? 3 : 1));
2066     unsigned MaskAmt = Radix - 1;
2067
2068     while (Tmp != 0) {
2069       unsigned Digit = unsigned(Tmp.getRawData()[0]) & MaskAmt;
2070       Str.push_back(Digits[Digit]);
2071       Tmp = Tmp.lshr(ShiftAmt);
2072     }
2073   } else {
2074     APInt divisor(4, 10);
2075     while (Tmp != 0) {
2076       APInt APdigit(1, 0);
2077       APInt tmp2(Tmp.getBitWidth(), 0);
2078       divide(Tmp, Tmp.getNumWords(), divisor, divisor.getNumWords(), &tmp2,
2079              &APdigit);
2080       uint32_t Digit = (uint32_t)APdigit.getZExtValue();
2081       assert(Digit < Radix && "divide failed");
2082       Str.push_back(Digits[Digit]);
2083       Tmp = tmp2;
2084     }
2085   }
2086
2087   // Reverse the digits before returning.
2088   std::reverse(Str.begin()+StartDig, Str.end());
2089 }
2090
2091 /// toString - This returns the APInt as a std::string.  Note that this is an
2092 /// inefficient method.  It is better to pass in a SmallVector/SmallString
2093 /// to the methods above.
2094 std::string APInt::toString(unsigned Radix = 10, bool Signed = true) const {
2095   SmallString<40> S;
2096   toString(S, Radix, Signed);
2097   return S.c_str();
2098 }
2099
2100
2101 void APInt::dump() const {
2102   SmallString<40> S, U;
2103   this->toStringUnsigned(U);
2104   this->toStringSigned(S);
2105   fprintf(stderr, "APInt(%db, %su %ss)", BitWidth, U.c_str(), S.c_str());
2106 }
2107
2108 void APInt::print(std::ostream &OS, bool isSigned) const {
2109   SmallString<40> S;
2110   this->toString(S, 10, isSigned);
2111   OS << S.c_str();
2112 }
2113
2114
2115 // This implements a variety of operations on a representation of
2116 // arbitrary precision, two's-complement, bignum integer values.
2117
2118 /* Assumed by lowHalf, highHalf, partMSB and partLSB.  A fairly safe
2119    and unrestricting assumption.  */
2120 #define COMPILE_TIME_ASSERT(cond) extern int CTAssert[(cond) ? 1 : -1]
2121 COMPILE_TIME_ASSERT(integerPartWidth % 2 == 0);
2122
2123 /* Some handy functions local to this file.  */
2124 namespace {
2125
2126   /* Returns the integer part with the least significant BITS set.
2127      BITS cannot be zero.  */
2128   static inline integerPart
2129   lowBitMask(unsigned int bits)
2130   {
2131     assert (bits != 0 && bits <= integerPartWidth);
2132
2133     return ~(integerPart) 0 >> (integerPartWidth - bits);
2134   }
2135
2136   /* Returns the value of the lower half of PART.  */
2137   static inline integerPart
2138   lowHalf(integerPart part)
2139   {
2140     return part & lowBitMask(integerPartWidth / 2);
2141   }
2142
2143   /* Returns the value of the upper half of PART.  */
2144   static inline integerPart
2145   highHalf(integerPart part)
2146   {
2147     return part >> (integerPartWidth / 2);
2148   }
2149
2150   /* Returns the bit number of the most significant set bit of a part.
2151      If the input number has no bits set -1U is returned.  */
2152   static unsigned int
2153   partMSB(integerPart value)
2154   {
2155     unsigned int n, msb;
2156
2157     if (value == 0)
2158       return -1U;
2159
2160     n = integerPartWidth / 2;
2161
2162     msb = 0;
2163     do {
2164       if (value >> n) {
2165         value >>= n;
2166         msb += n;
2167       }
2168
2169       n >>= 1;
2170     } while (n);
2171
2172     return msb;
2173   }
2174
2175   /* Returns the bit number of the least significant set bit of a
2176      part.  If the input number has no bits set -1U is returned.  */
2177   static unsigned int
2178   partLSB(integerPart value)
2179   {
2180     unsigned int n, lsb;
2181
2182     if (value == 0)
2183       return -1U;
2184
2185     lsb = integerPartWidth - 1;
2186     n = integerPartWidth / 2;
2187
2188     do {
2189       if (value << n) {
2190         value <<= n;
2191         lsb -= n;
2192       }
2193
2194       n >>= 1;
2195     } while (n);
2196
2197     return lsb;
2198   }
2199 }
2200
2201 /* Sets the least significant part of a bignum to the input value, and
2202    zeroes out higher parts.  */
2203 void
2204 APInt::tcSet(integerPart *dst, integerPart part, unsigned int parts)
2205 {
2206   unsigned int i;
2207
2208   assert (parts > 0);
2209
2210   dst[0] = part;
2211   for(i = 1; i < parts; i++)
2212     dst[i] = 0;
2213 }
2214
2215 /* Assign one bignum to another.  */
2216 void
2217 APInt::tcAssign(integerPart *dst, const integerPart *src, unsigned int parts)
2218 {
2219   unsigned int i;
2220
2221   for(i = 0; i < parts; i++)
2222     dst[i] = src[i];
2223 }
2224
2225 /* Returns true if a bignum is zero, false otherwise.  */
2226 bool
2227 APInt::tcIsZero(const integerPart *src, unsigned int parts)
2228 {
2229   unsigned int i;
2230
2231   for(i = 0; i < parts; i++)
2232     if (src[i])
2233       return false;
2234
2235   return true;
2236 }
2237
2238 /* Extract the given bit of a bignum; returns 0 or 1.  */
2239 int
2240 APInt::tcExtractBit(const integerPart *parts, unsigned int bit)
2241 {
2242   return(parts[bit / integerPartWidth]
2243          & ((integerPart) 1 << bit % integerPartWidth)) != 0;
2244 }
2245
2246 /* Set the given bit of a bignum.  */
2247 void
2248 APInt::tcSetBit(integerPart *parts, unsigned int bit)
2249 {
2250   parts[bit / integerPartWidth] |= (integerPart) 1 << (bit % integerPartWidth);
2251 }
2252
2253 /* Returns the bit number of the least significant set bit of a
2254    number.  If the input number has no bits set -1U is returned.  */
2255 unsigned int
2256 APInt::tcLSB(const integerPart *parts, unsigned int n)
2257 {
2258   unsigned int i, lsb;
2259
2260   for(i = 0; i < n; i++) {
2261       if (parts[i] != 0) {
2262           lsb = partLSB(parts[i]);
2263
2264           return lsb + i * integerPartWidth;
2265       }
2266   }
2267
2268   return -1U;
2269 }
2270
2271 /* Returns the bit number of the most significant set bit of a number.
2272    If the input number has no bits set -1U is returned.  */
2273 unsigned int
2274 APInt::tcMSB(const integerPart *parts, unsigned int n)
2275 {
2276   unsigned int msb;
2277
2278   do {
2279       --n;
2280
2281       if (parts[n] != 0) {
2282           msb = partMSB(parts[n]);
2283
2284           return msb + n * integerPartWidth;
2285       }
2286   } while (n);
2287
2288   return -1U;
2289 }
2290
2291 /* Copy the bit vector of width srcBITS from SRC, starting at bit
2292    srcLSB, to DST, of dstCOUNT parts, such that the bit srcLSB becomes
2293    the least significant bit of DST.  All high bits above srcBITS in
2294    DST are zero-filled.  */
2295 void
2296 APInt::tcExtract(integerPart *dst, unsigned int dstCount, const integerPart *src,
2297                  unsigned int srcBits, unsigned int srcLSB)
2298 {
2299   unsigned int firstSrcPart, dstParts, shift, n;
2300
2301   dstParts = (srcBits + integerPartWidth - 1) / integerPartWidth;
2302   assert (dstParts <= dstCount);
2303
2304   firstSrcPart = srcLSB / integerPartWidth;
2305   tcAssign (dst, src + firstSrcPart, dstParts);
2306
2307   shift = srcLSB % integerPartWidth;
2308   tcShiftRight (dst, dstParts, shift);
2309
2310   /* We now have (dstParts * integerPartWidth - shift) bits from SRC
2311      in DST.  If this is less that srcBits, append the rest, else
2312      clear the high bits.  */
2313   n = dstParts * integerPartWidth - shift;
2314   if (n < srcBits) {
2315     integerPart mask = lowBitMask (srcBits - n);
2316     dst[dstParts - 1] |= ((src[firstSrcPart + dstParts] & mask)
2317                           << n % integerPartWidth);
2318   } else if (n > srcBits) {
2319     if (srcBits % integerPartWidth)
2320       dst[dstParts - 1] &= lowBitMask (srcBits % integerPartWidth);
2321   }
2322
2323   /* Clear high parts.  */
2324   while (dstParts < dstCount)
2325     dst[dstParts++] = 0;
2326 }
2327
2328 /* DST += RHS + C where C is zero or one.  Returns the carry flag.  */
2329 integerPart
2330 APInt::tcAdd(integerPart *dst, const integerPart *rhs,
2331              integerPart c, unsigned int parts)
2332 {
2333   unsigned int i;
2334
2335   assert(c <= 1);
2336
2337   for(i = 0; i < parts; i++) {
2338     integerPart l;
2339
2340     l = dst[i];
2341     if (c) {
2342       dst[i] += rhs[i] + 1;
2343       c = (dst[i] <= l);
2344     } else {
2345       dst[i] += rhs[i];
2346       c = (dst[i] < l);
2347     }
2348   }
2349
2350   return c;
2351 }
2352
2353 /* DST -= RHS + C where C is zero or one.  Returns the carry flag.  */
2354 integerPart
2355 APInt::tcSubtract(integerPart *dst, const integerPart *rhs,
2356                   integerPart c, unsigned int parts)
2357 {
2358   unsigned int i;
2359
2360   assert(c <= 1);
2361
2362   for(i = 0; i < parts; i++) {
2363     integerPart l;
2364
2365     l = dst[i];
2366     if (c) {
2367       dst[i] -= rhs[i] + 1;
2368       c = (dst[i] >= l);
2369     } else {
2370       dst[i] -= rhs[i];
2371       c = (dst[i] > l);
2372     }
2373   }
2374
2375   return c;
2376 }
2377
2378 /* Negate a bignum in-place.  */
2379 void
2380 APInt::tcNegate(integerPart *dst, unsigned int parts)
2381 {
2382   tcComplement(dst, parts);
2383   tcIncrement(dst, parts);
2384 }
2385
2386 /*  DST += SRC * MULTIPLIER + CARRY   if add is true
2387     DST  = SRC * MULTIPLIER + CARRY   if add is false
2388
2389     Requires 0 <= DSTPARTS <= SRCPARTS + 1.  If DST overlaps SRC
2390     they must start at the same point, i.e. DST == SRC.
2391
2392     If DSTPARTS == SRCPARTS + 1 no overflow occurs and zero is
2393     returned.  Otherwise DST is filled with the least significant
2394     DSTPARTS parts of the result, and if all of the omitted higher
2395     parts were zero return zero, otherwise overflow occurred and
2396     return one.  */
2397 int
2398 APInt::tcMultiplyPart(integerPart *dst, const integerPart *src,
2399                       integerPart multiplier, integerPart carry,
2400                       unsigned int srcParts, unsigned int dstParts,
2401                       bool add)
2402 {
2403   unsigned int i, n;
2404
2405   /* Otherwise our writes of DST kill our later reads of SRC.  */
2406   assert(dst <= src || dst >= src + srcParts);
2407   assert(dstParts <= srcParts + 1);
2408
2409   /* N loops; minimum of dstParts and srcParts.  */
2410   n = dstParts < srcParts ? dstParts: srcParts;
2411
2412   for(i = 0; i < n; i++) {
2413     integerPart low, mid, high, srcPart;
2414
2415       /* [ LOW, HIGH ] = MULTIPLIER * SRC[i] + DST[i] + CARRY.
2416
2417          This cannot overflow, because
2418
2419          (n - 1) * (n - 1) + 2 (n - 1) = (n - 1) * (n + 1)
2420
2421          which is less than n^2.  */
2422
2423     srcPart = src[i];
2424
2425     if (multiplier == 0 || srcPart == 0)        {
2426       low = carry;
2427       high = 0;
2428     } else {
2429       low = lowHalf(srcPart) * lowHalf(multiplier);
2430       high = highHalf(srcPart) * highHalf(multiplier);
2431
2432       mid = lowHalf(srcPart) * highHalf(multiplier);
2433       high += highHalf(mid);
2434       mid <<= integerPartWidth / 2;
2435       if (low + mid < low)
2436         high++;
2437       low += mid;
2438
2439       mid = highHalf(srcPart) * lowHalf(multiplier);
2440       high += highHalf(mid);
2441       mid <<= integerPartWidth / 2;
2442       if (low + mid < low)
2443         high++;
2444       low += mid;
2445
2446       /* Now add carry.  */
2447       if (low + carry < low)
2448         high++;
2449       low += carry;
2450     }
2451
2452     if (add) {
2453       /* And now DST[i], and store the new low part there.  */
2454       if (low + dst[i] < low)
2455         high++;
2456       dst[i] += low;
2457     } else
2458       dst[i] = low;
2459
2460     carry = high;
2461   }
2462
2463   if (i < dstParts) {
2464     /* Full multiplication, there is no overflow.  */
2465     assert(i + 1 == dstParts);
2466     dst[i] = carry;
2467     return 0;
2468   } else {
2469     /* We overflowed if there is carry.  */
2470     if (carry)
2471       return 1;
2472
2473     /* We would overflow if any significant unwritten parts would be
2474        non-zero.  This is true if any remaining src parts are non-zero
2475        and the multiplier is non-zero.  */
2476     if (multiplier)
2477       for(; i < srcParts; i++)
2478         if (src[i])
2479           return 1;
2480
2481     /* We fitted in the narrow destination.  */
2482     return 0;
2483   }
2484 }
2485
2486 /* DST = LHS * RHS, where DST has the same width as the operands and
2487    is filled with the least significant parts of the result.  Returns
2488    one if overflow occurred, otherwise zero.  DST must be disjoint
2489    from both operands.  */
2490 int
2491 APInt::tcMultiply(integerPart *dst, const integerPart *lhs,
2492                   const integerPart *rhs, unsigned int parts)
2493 {
2494   unsigned int i;
2495   int overflow;
2496
2497   assert(dst != lhs && dst != rhs);
2498
2499   overflow = 0;
2500   tcSet(dst, 0, parts);
2501
2502   for(i = 0; i < parts; i++)
2503     overflow |= tcMultiplyPart(&dst[i], lhs, rhs[i], 0, parts,
2504                                parts - i, true);
2505
2506   return overflow;
2507 }
2508
2509 /* DST = LHS * RHS, where DST has width the sum of the widths of the
2510    operands.  No overflow occurs.  DST must be disjoint from both
2511    operands.  Returns the number of parts required to hold the
2512    result.  */
2513 unsigned int
2514 APInt::tcFullMultiply(integerPart *dst, const integerPart *lhs,
2515                       const integerPart *rhs, unsigned int lhsParts,
2516                       unsigned int rhsParts)
2517 {
2518   /* Put the narrower number on the LHS for less loops below.  */
2519   if (lhsParts > rhsParts) {
2520     return tcFullMultiply (dst, rhs, lhs, rhsParts, lhsParts);
2521   } else {
2522     unsigned int n;
2523
2524     assert(dst != lhs && dst != rhs);
2525
2526     tcSet(dst, 0, rhsParts);
2527
2528     for(n = 0; n < lhsParts; n++)
2529       tcMultiplyPart(&dst[n], rhs, lhs[n], 0, rhsParts, rhsParts + 1, true);
2530
2531     n = lhsParts + rhsParts;
2532
2533     return n - (dst[n - 1] == 0);
2534   }
2535 }
2536
2537 /* If RHS is zero LHS and REMAINDER are left unchanged, return one.
2538    Otherwise set LHS to LHS / RHS with the fractional part discarded,
2539    set REMAINDER to the remainder, return zero.  i.e.
2540
2541    OLD_LHS = RHS * LHS + REMAINDER
2542
2543    SCRATCH is a bignum of the same size as the operands and result for
2544    use by the routine; its contents need not be initialized and are
2545    destroyed.  LHS, REMAINDER and SCRATCH must be distinct.
2546 */
2547 int
2548 APInt::tcDivide(integerPart *lhs, const integerPart *rhs,
2549                 integerPart *remainder, integerPart *srhs,
2550                 unsigned int parts)
2551 {
2552   unsigned int n, shiftCount;
2553   integerPart mask;
2554
2555   assert(lhs != remainder && lhs != srhs && remainder != srhs);
2556
2557   shiftCount = tcMSB(rhs, parts) + 1;
2558   if (shiftCount == 0)
2559     return true;
2560
2561   shiftCount = parts * integerPartWidth - shiftCount;
2562   n = shiftCount / integerPartWidth;
2563   mask = (integerPart) 1 << (shiftCount % integerPartWidth);
2564
2565   tcAssign(srhs, rhs, parts);
2566   tcShiftLeft(srhs, parts, shiftCount);
2567   tcAssign(remainder, lhs, parts);
2568   tcSet(lhs, 0, parts);
2569
2570   /* Loop, subtracting SRHS if REMAINDER is greater and adding that to
2571      the total.  */
2572   for(;;) {
2573       int compare;
2574
2575       compare = tcCompare(remainder, srhs, parts);
2576       if (compare >= 0) {
2577         tcSubtract(remainder, srhs, 0, parts);
2578         lhs[n] |= mask;
2579       }
2580
2581       if (shiftCount == 0)
2582         break;
2583       shiftCount--;
2584       tcShiftRight(srhs, parts, 1);
2585       if ((mask >>= 1) == 0)
2586         mask = (integerPart) 1 << (integerPartWidth - 1), n--;
2587   }
2588
2589   return false;
2590 }
2591
2592 /* Shift a bignum left COUNT bits in-place.  Shifted in bits are zero.
2593    There are no restrictions on COUNT.  */
2594 void
2595 APInt::tcShiftLeft(integerPart *dst, unsigned int parts, unsigned int count)
2596 {
2597   if (count) {
2598     unsigned int jump, shift;
2599
2600     /* Jump is the inter-part jump; shift is is intra-part shift.  */
2601     jump = count / integerPartWidth;
2602     shift = count % integerPartWidth;
2603
2604     while (parts > jump) {
2605       integerPart part;
2606
2607       parts--;
2608
2609       /* dst[i] comes from the two parts src[i - jump] and, if we have
2610          an intra-part shift, src[i - jump - 1].  */
2611       part = dst[parts - jump];
2612       if (shift) {
2613         part <<= shift;
2614         if (parts >= jump + 1)
2615           part |= dst[parts - jump - 1] >> (integerPartWidth - shift);
2616       }
2617
2618       dst[parts] = part;
2619     }
2620
2621     while (parts > 0)
2622       dst[--parts] = 0;
2623   }
2624 }
2625
2626 /* Shift a bignum right COUNT bits in-place.  Shifted in bits are
2627    zero.  There are no restrictions on COUNT.  */
2628 void
2629 APInt::tcShiftRight(integerPart *dst, unsigned int parts, unsigned int count)
2630 {
2631   if (count) {
2632     unsigned int i, jump, shift;
2633
2634     /* Jump is the inter-part jump; shift is is intra-part shift.  */
2635     jump = count / integerPartWidth;
2636     shift = count % integerPartWidth;
2637
2638     /* Perform the shift.  This leaves the most significant COUNT bits
2639        of the result at zero.  */
2640     for(i = 0; i < parts; i++) {
2641       integerPart part;
2642
2643       if (i + jump >= parts) {
2644         part = 0;
2645       } else {
2646         part = dst[i + jump];
2647         if (shift) {
2648           part >>= shift;
2649           if (i + jump + 1 < parts)
2650             part |= dst[i + jump + 1] << (integerPartWidth - shift);
2651         }
2652       }
2653
2654       dst[i] = part;
2655     }
2656   }
2657 }
2658
2659 /* Bitwise and of two bignums.  */
2660 void
2661 APInt::tcAnd(integerPart *dst, const integerPart *rhs, unsigned int parts)
2662 {
2663   unsigned int i;
2664
2665   for(i = 0; i < parts; i++)
2666     dst[i] &= rhs[i];
2667 }
2668
2669 /* Bitwise inclusive or of two bignums.  */
2670 void
2671 APInt::tcOr(integerPart *dst, const integerPart *rhs, unsigned int parts)
2672 {
2673   unsigned int i;
2674
2675   for(i = 0; i < parts; i++)
2676     dst[i] |= rhs[i];
2677 }
2678
2679 /* Bitwise exclusive or of two bignums.  */
2680 void
2681 APInt::tcXor(integerPart *dst, const integerPart *rhs, unsigned int parts)
2682 {
2683   unsigned int i;
2684
2685   for(i = 0; i < parts; i++)
2686     dst[i] ^= rhs[i];
2687 }
2688
2689 /* Complement a bignum in-place.  */
2690 void
2691 APInt::tcComplement(integerPart *dst, unsigned int parts)
2692 {
2693   unsigned int i;
2694
2695   for(i = 0; i < parts; i++)
2696     dst[i] = ~dst[i];
2697 }
2698
2699 /* Comparison (unsigned) of two bignums.  */
2700 int
2701 APInt::tcCompare(const integerPart *lhs, const integerPart *rhs,
2702                  unsigned int parts)
2703 {
2704   while (parts) {
2705       parts--;
2706       if (lhs[parts] == rhs[parts])
2707         continue;
2708
2709       if (lhs[parts] > rhs[parts])
2710         return 1;
2711       else
2712         return -1;
2713     }
2714
2715   return 0;
2716 }
2717
2718 /* Increment a bignum in-place, return the carry flag.  */
2719 integerPart
2720 APInt::tcIncrement(integerPart *dst, unsigned int parts)
2721 {
2722   unsigned int i;
2723
2724   for(i = 0; i < parts; i++)
2725     if (++dst[i] != 0)
2726       break;
2727
2728   return i == parts;
2729 }
2730
2731 /* Set the least significant BITS bits of a bignum, clear the
2732    rest.  */
2733 void
2734 APInt::tcSetLeastSignificantBits(integerPart *dst, unsigned int parts,
2735                                  unsigned int bits)
2736 {
2737   unsigned int i;
2738
2739   i = 0;
2740   while (bits > integerPartWidth) {
2741     dst[i++] = ~(integerPart) 0;
2742     bits -= integerPartWidth;
2743   }
2744
2745   if (bits)
2746     dst[i++] = ~(integerPart) 0 >> (integerPartWidth - bits);
2747
2748   while (i < parts)
2749     dst[i++] = 0;
2750 }