folly/Conv.cpp

   1 /*
   2  * Copyright 2017 Facebook, Inc.
   3  *
   4  * Licensed under the Apache License, Version 2.0 (the "License");
   5  * you may not use this file except in compliance with the License.
   6  * You may obtain a copy of the License at
   7  *
   8  *   http://www.apache.org/licenses/LICENSE-2.0
   9  *
  10  * Unless required by applicable law or agreed to in writing, software
  11  * distributed under the License is distributed on an "AS IS" BASIS,
  12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13  * See the License for the specific language governing permissions and
  14  * limitations under the License.
  15  */
  16 #include <folly/Conv.h>
  17 #include <array>
  18
  19 namespace folly {
  20 namespace detail {
  21
  22 namespace {
  23
  24 /**
  25  * Finds the first non-digit in a string. The number of digits
  26  * searched depends on the precision of the Tgt integral. Assumes the
  27  * string starts with NO whitespace and NO sign.
  28  *
  29  * The semantics of the routine is:
  30  *   for (;; ++b) {
  31  *     if (b >= e || !isdigit(*b)) return b;
  32  *   }
  33  *
  34  *  Complete unrolling marks bottom-line (i.e. entire conversion)
  35  *  improvements of 20%.
  36  */
  37 inline const char* findFirstNonDigit(const char* b, const char* e) {
  38   for (; b < e; ++b) {
  39     auto const c = static_cast<unsigned>(*b) - '0';
  40     if (c >= 10) {
  41       break;
  42     }
  43   }
  44   return b;
  45 }
  46
  47 // Maximum value of number when represented as a string
  48 template <class T>
  49 struct MaxString {
  50   static const char* const value;
  51 };
  52
  53 template <> const char *const MaxString<uint8_t>::value = "255";
  54 template <> const char *const MaxString<uint16_t>::value = "65535";
  55 template <> const char *const MaxString<uint32_t>::value = "4294967295";
  56 #if __SIZEOF_LONG__ == 4
  57 template <> const char *const MaxString<unsigned long>::value =
  58   "4294967295";
  59 #else
  60 template <> const char *const MaxString<unsigned long>::value =
  61   "18446744073709551615";
  62 #endif
  63 static_assert(sizeof(unsigned long) >= 4,
  64               "Wrong value for MaxString<unsigned long>::value,"
  65               " please update.");
  66 template <> const char *const MaxString<unsigned long long>::value =
  67   "18446744073709551615";
  68 static_assert(sizeof(unsigned long long) >= 8,
  69               "Wrong value for MaxString<unsigned long long>::value"
  70               ", please update.");
  71
  72 #if FOLLY_HAVE_INT128_T
  73 template <> const char *const MaxString<__uint128_t>::value =
  74   "340282366920938463463374607431768211455";
  75 #endif
  76
  77 /*
  78  * Lookup tables that converts from a decimal character value to an integral
  79  * binary value, shifted by a decimal "shift" multiplier.
  80  * For all character values in the range '0'..'9', the table at those
  81  * index locations returns the actual decimal value shifted by the multiplier.
  82  * For all other values, the lookup table returns an invalid OOR value.
  83  */
  84 // Out-of-range flag value, larger than the largest value that can fit in
  85 // four decimal bytes (9999), but four of these added up together should
  86 // still not overflow uint16_t.
  87 constexpr int32_t OOR = 10000;
  88
  89 FOLLY_ALIGNED(16) constexpr uint16_t shift1[] = {
  90   OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR,  // 0-9
  91   OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR,  //  10
  92   OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR,  //  20
  93   OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR,  //  30
  94   OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, 0,         //  40
  95   1, 2, 3, 4, 5, 6, 7, 8, 9, OOR, OOR,
  96   OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR,  //  60
  97   OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR,  //  70
  98   OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR,  //  80
  99   OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR,  //  90
 100   OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR,  // 100
 101   OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR,  // 110
 102   OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR,  // 120
 103   OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR,  // 130
 104   OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR,  // 140
 105   OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR,  // 150
 106   OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR,  // 160
 107   OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR,  // 170
 108   OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR,  // 180
 109   OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR,  // 190
 110   OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR,  // 200
 111   OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR,  // 210
 112   OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR,  // 220
 113   OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR,  // 230
 114   OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR,  // 240
 115   OOR, OOR, OOR, OOR, OOR, OOR                       // 250
 116 };
 117
 118 FOLLY_ALIGNED(16) constexpr uint16_t shift10[] = {
 119   OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR,  // 0-9
 120   OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR,  //  10
 121   OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR,  //  20
 122   OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR,  //  30
 123   OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, 0,         //  40
 124   10, 20, 30, 40, 50, 60, 70, 80, 90, OOR, OOR,
 125   OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR,  //  60
 126   OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR,  //  70
 127   OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR,  //  80
 128   OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR,  //  90
 129   OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR,  // 100
 130   OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR,  // 110
 131   OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR,  // 120
 132   OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR,  // 130
 133   OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR,  // 140
 134   OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR,  // 150
 135   OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR,  // 160
 136   OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR,  // 170
 137   OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR,  // 180
 138   OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR,  // 190
 139   OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR,  // 200
 140   OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR,  // 210
 141   OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR,  // 220
 142   OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR,  // 230
 143   OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR,  // 240
 144   OOR, OOR, OOR, OOR, OOR, OOR                       // 250
 145 };
 146
 147 FOLLY_ALIGNED(16) constexpr uint16_t shift100[] = {
 148   OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR,  // 0-9
 149   OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR,  //  10
 150   OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR,  //  20
 151   OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR,  //  30
 152   OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, 0,         //  40
 153   100, 200, 300, 400, 500, 600, 700, 800, 900, OOR, OOR,
 154   OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR,  //  60
 155   OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR,  //  70
 156   OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR,  //  80
 157   OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR,  //  90
 158   OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR,  // 100
 159   OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR,  // 110
 160   OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR,  // 120
 161   OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR,  // 130
 162   OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR,  // 140
 163   OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR,  // 150
 164   OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR,  // 160
 165   OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR,  // 170
 166   OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR,  // 180
 167   OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR,  // 190
 168   OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR,  // 200
 169   OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR,  // 210
 170   OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR,  // 220
 171   OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR,  // 230
 172   OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR,  // 240
 173   OOR, OOR, OOR, OOR, OOR, OOR                       // 250
 174 };
 175
 176 FOLLY_ALIGNED(16) constexpr uint16_t shift1000[] = {
 177   OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR,  // 0-9
 178   OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR,  //  10
 179   OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR,  //  20
 180   OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR,  //  30
 181   OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, 0,         //  40
 182   1000, 2000, 3000, 4000, 5000, 6000, 7000, 8000, 9000, OOR, OOR,
 183   OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR,  //  60
 184   OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR,  //  70
 185   OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR,  //  80
 186   OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR,  //  90
 187   OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR,  // 100
 188   OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR,  // 110
 189   OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR,  // 120
 190   OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR,  // 130
 191   OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR,  // 140
 192   OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR,  // 150
 193   OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR,  // 160
 194   OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR,  // 170
 195   OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR,  // 180
 196   OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR,  // 190
 197   OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR,  // 200
 198   OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR,  // 210
 199   OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR,  // 220
 200   OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR,  // 230
 201   OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR,  // 240
 202   OOR, OOR, OOR, OOR, OOR, OOR                       // 250
 203 };
 204
 205 struct ErrorString {
 206   const char* string;
 207   bool quote;
 208 };
 209
 210 // Keep this in sync with ConversionCode in Conv.h
 211 constexpr const std::array<
 212     ErrorString,
 213     static_cast<std::size_t>(ConversionCode::NUM_ERROR_CODES)>
 214     kErrorStrings{{
 215         {"Success", true},
 216         {"Empty input string", true},
 217         {"No digits found in input string", true},
 218         {"Integer overflow when parsing bool (must be 0 or 1)", true},
 219         {"Invalid value for bool", true},
 220         {"Non-digit character found", true},
 221         {"Invalid leading character", true},
 222         {"Overflow during conversion", true},
 223         {"Negative overflow during conversion", true},
 224         {"Unable to convert string to floating point value", true},
 225         {"Non-whitespace character found after end of conversion", true},
 226         {"Overflow during arithmetic conversion", false},
 227         {"Negative overflow during arithmetic conversion", false},
 228         {"Loss of precision during arithmetic conversion", false},
 229     }};
 230
 231 // Check if ASCII is really ASCII
 232 using IsAscii = std::
 233     integral_constant<bool, 'A' == 65 && 'Z' == 90 && 'a' == 97 && 'z' == 122>;
 234
 235 // The code in this file that uses tolower() really only cares about
 236 // 7-bit ASCII characters, so we can take a nice shortcut here.
 237 inline char tolower_ascii(char in) {
 238   return IsAscii::value ? in | 0x20 : char(std::tolower(in));
 239 }
 240
 241 inline bool bool_str_cmp(const char** b, size_t len, const char* value) {
 242   // Can't use strncasecmp, since we want to ensure that the full value matches
 243   const char* p = *b;
 244   const char* e = *b + len;
 245   const char* v = value;
 246   while (*v != '\0') {
 247     if (p == e || tolower_ascii(*p) != *v) { // value is already lowercase
 248       return false;
 249     }
 250     ++p;
 251     ++v;
 252   }
 253
 254   *b = p;
 255   return true;
 256 }
 257
 258 } // namespace
 259
 260 Expected<bool, ConversionCode> str_to_bool(StringPiece* src) noexcept {
 261   auto b = src->begin(), e = src->end();
 262   for (;; ++b) {
 263     if (b >= e) {
 264       return makeUnexpected(ConversionCode::EMPTY_INPUT_STRING);
 265     }
 266     if (!std::isspace(*b)) {
 267       break;
 268     }
 269   }
 270
 271   bool result;
 272   size_t len = size_t(e - b);
 273   switch (*b) {
 274     case '0':
 275     case '1': {
 276       result = false;
 277       for (; b < e && isdigit(*b); ++b) {
 278         if (result || (*b != '0' && *b != '1')) {
 279           return makeUnexpected(ConversionCode::BOOL_OVERFLOW);
 280         }
 281         result = (*b == '1');
 282       }
 283       break;
 284     }
 285     case 'y':
 286     case 'Y':
 287       result = true;
 288       if (!bool_str_cmp(&b, len, "yes")) {
 289         ++b;  // accept the single 'y' character
 290       }
 291       break;
 292     case 'n':
 293     case 'N':
 294       result = false;
 295       if (!bool_str_cmp(&b, len, "no")) {
 296         ++b;
 297       }
 298       break;
 299     case 't':
 300     case 'T':
 301       result = true;
 302       if (!bool_str_cmp(&b, len, "true")) {
 303         ++b;
 304       }
 305       break;
 306     case 'f':
 307     case 'F':
 308       result = false;
 309       if (!bool_str_cmp(&b, len, "false")) {
 310         ++b;
 311       }
 312       break;
 313     case 'o':
 314     case 'O':
 315       if (bool_str_cmp(&b, len, "on")) {
 316         result = true;
 317       } else if (bool_str_cmp(&b, len, "off")) {
 318         result = false;
 319       } else {
 320         return makeUnexpected(ConversionCode::BOOL_INVALID_VALUE);
 321       }
 322       break;
 323     default:
 324       return makeUnexpected(ConversionCode::BOOL_INVALID_VALUE);
 325   }
 326
 327   src->assign(b, e);
 328
 329   return result;
 330 }
 331
 332 /**
 333  * StringPiece to double, with progress information. Alters the
 334  * StringPiece parameter to munch the already-parsed characters.
 335  */
 336 template <class Tgt>
 337 Expected<Tgt, ConversionCode> str_to_floating(StringPiece* src) noexcept {
 338   using namespace double_conversion;
 339   static StringToDoubleConverter
 340     conv(StringToDoubleConverter::ALLOW_TRAILING_JUNK
 341          | StringToDoubleConverter::ALLOW_LEADING_SPACES,
 342          0.0,
 343          // return this for junk input string
 344          std::numeric_limits<double>::quiet_NaN(),
 345          nullptr, nullptr);
 346
 347   if (src->empty()) {
 348     return makeUnexpected(ConversionCode::EMPTY_INPUT_STRING);
 349   }
 350
 351   int length;
 352   auto result = conv.StringToDouble(src->data(),
 353                                     static_cast<int>(src->size()),
 354                                     &length); // processed char count
 355
 356   if (!std::isnan(result)) {
 357     // If we get here with length = 0, the input string is empty.
 358     // If we get here with result = 0.0, it's either because the string
 359     // contained only whitespace, or because we had an actual zero value
 360     // (with potential trailing junk). If it was only whitespace, we
 361     // want to raise an error; length will point past the last character
 362     // that was processed, so we need to check if that character was
 363     // whitespace or not.
 364     if (length == 0 ||
 365         (result == 0.0 && std::isspace((*src)[size_t(length) - 1]))) {
 366       return makeUnexpected(ConversionCode::EMPTY_INPUT_STRING);
 367     }
 368     src->advance(size_t(length));
 369     return Tgt(result);
 370   }
 371
 372   auto* e = src->end();
 373   auto* b =
 374       std::find_if_not(src->begin(), e, [](char c) { return std::isspace(c); });
 375
 376   // There must be non-whitespace, otherwise we would have caught this above
 377   assert(b < e);
 378   size_t size = size_t(e - b);
 379
 380   bool negative = false;
 381   if (*b == '-') {
 382     negative = true;
 383     ++b;
 384     --size;
 385   }
 386
 387   result = 0.0;
 388
 389   switch (tolower_ascii(*b)) {
 390     case 'i':
 391       if (size >= 3 && tolower_ascii(b[1]) == 'n' &&
 392           tolower_ascii(b[2]) == 'f') {
 393         if (size >= 8 && tolower_ascii(b[3]) == 'i' &&
 394             tolower_ascii(b[4]) == 'n' && tolower_ascii(b[5]) == 'i' &&
 395             tolower_ascii(b[6]) == 't' && tolower_ascii(b[7]) == 'y') {
 396           b += 8;
 397         } else {
 398           b += 3;
 399         }
 400         result = std::numeric_limits<Tgt>::infinity();
 401       }
 402       break;
 403
 404     case 'n':
 405       if (size >= 3 && tolower_ascii(b[1]) == 'a' &&
 406           tolower_ascii(b[2]) == 'n') {
 407         b += 3;
 408         result = std::numeric_limits<Tgt>::quiet_NaN();
 409       }
 410       break;
 411
 412     default:
 413       break;
 414   }
 415
 416   if (result == 0.0) {
 417     // All bets are off
 418     return makeUnexpected(ConversionCode::STRING_TO_FLOAT_ERROR);
 419   }
 420
 421   if (negative) {
 422     result = -result;
 423   }
 424
 425   src->assign(b, e);
 426
 427   return Tgt(result);
 428 }
 429
 430 template Expected<float, ConversionCode> str_to_floating<float>(
 431     StringPiece* src) noexcept;
 432 template Expected<double, ConversionCode> str_to_floating<double>(
 433     StringPiece* src) noexcept;
 434
 435 /**
 436  * This class takes care of additional processing needed for signed values,
 437  * like leading sign character and overflow checks.
 438  */
 439 template <typename T, bool IsSigned = std::is_signed<T>::value>
 440 class SignedValueHandler;
 441
 442 template <typename T>
 443 class SignedValueHandler<T, true> {
 444  public:
 445   ConversionCode init(const char*& b) {
 446     negative_ = false;
 447     if (!std::isdigit(*b)) {
 448       if (*b == '-') {
 449         negative_ = true;
 450       } else if (UNLIKELY(*b != '+')) {
 451         return ConversionCode::INVALID_LEADING_CHAR;
 452       }
 453       ++b;
 454     }
 455     return ConversionCode::SUCCESS;
 456   }
 457
 458   ConversionCode overflow() {
 459     return negative_ ? ConversionCode::NEGATIVE_OVERFLOW
 460                      : ConversionCode::POSITIVE_OVERFLOW;
 461   }
 462
 463   template <typename U>
 464   Expected<T, ConversionCode> finalize(U value) {
 465     T rv;
 466     if (negative_) {
 467       rv = T(-value);
 468       if (UNLIKELY(rv > 0)) {
 469         return makeUnexpected(ConversionCode::NEGATIVE_OVERFLOW);
 470       }
 471     } else {
 472       rv = T(value);
 473       if (UNLIKELY(rv < 0)) {
 474         return makeUnexpected(ConversionCode::POSITIVE_OVERFLOW);
 475       }
 476     }
 477     return rv;
 478   }
 479
 480  private:
 481   bool negative_;
 482 };
 483
 484 // For unsigned types, we don't need any extra processing
 485 template <typename T>
 486 class SignedValueHandler<T, false> {
 487  public:
 488   ConversionCode init(const char*&) {
 489     return ConversionCode::SUCCESS;
 490   }
 491
 492   ConversionCode overflow() {
 493     return ConversionCode::POSITIVE_OVERFLOW;
 494   }
 495
 496   Expected<T, ConversionCode> finalize(T value) {
 497     return value;
 498   }
 499 };
 500
 501 /**
 502  * String represented as a pair of pointers to char to signed/unsigned
 503  * integrals. Assumes NO whitespace before or after, and also that the
 504  * string is composed entirely of digits (and an optional sign only for
 505  * signed types). String may be empty, in which case digits_to returns
 506  * an appropriate error.
 507  */
 508 template <class Tgt>
 509 inline Expected<Tgt, ConversionCode> digits_to(
 510     const char* b,
 511     const char* const e) noexcept {
 512   using UT = typename std::make_unsigned<Tgt>::type;
 513   assert(b <= e);
 514
 515   SignedValueHandler<Tgt> sgn;
 516
 517   auto err = sgn.init(b);
 518   if (UNLIKELY(err != ConversionCode::SUCCESS)) {
 519     return makeUnexpected(err);
 520   }
 521
 522   size_t size = size_t(e - b);
 523
 524   /* Although the string is entirely made of digits, we still need to
 525    * check for overflow.
 526    */
 527   if (size > std::numeric_limits<UT>::digits10) {
 528     // Leading zeros?
 529     if (b < e && *b == '0') {
 530       for (++b;; ++b) {
 531         if (b == e) {
 532           return Tgt(0); // just zeros, e.g. "0000"
 533         }
 534         if (*b != '0') {
 535           size = size_t(e - b);
 536           break;
 537         }
 538       }
 539     }
 540     if (size > std::numeric_limits<UT>::digits10 &&
 541         (size != std::numeric_limits<UT>::digits10 + 1 ||
 542          strncmp(b, MaxString<UT>::value, size) > 0)) {
 543       return makeUnexpected(sgn.overflow());
 544     }
 545   }
 546
 547   // Here we know that the number won't overflow when
 548   // converted. Proceed without checks.
 549
 550   UT result = 0;
 551
 552   for (; e - b >= 4; b += 4) {
 553     result *= static_cast<UT>(10000);
 554     const int32_t r0 = shift1000[static_cast<size_t>(b[0])];
 555     const int32_t r1 = shift100[static_cast<size_t>(b[1])];
 556     const int32_t r2 = shift10[static_cast<size_t>(b[2])];
 557     const int32_t r3 = shift1[static_cast<size_t>(b[3])];
 558     const auto sum = r0 + r1 + r2 + r3;
 559     if (sum >= OOR) {
 560       goto outOfRange;
 561     }
 562     result += UT(sum);
 563   }
 564
 565   switch (e - b) {
 566   case 3: {
 567     const int32_t r0 = shift100[static_cast<size_t>(b[0])];
 568     const int32_t r1 = shift10[static_cast<size_t>(b[1])];
 569     const int32_t r2 = shift1[static_cast<size_t>(b[2])];
 570     const auto sum = r0 + r1 + r2;
 571     if (sum >= OOR) {
 572       goto outOfRange;
 573     }
 574     result = UT(1000 * result + sum);
 575     break;
 576   }
 577   case 2: {
 578     const int32_t r0 = shift10[static_cast<size_t>(b[0])];
 579     const int32_t r1 = shift1[static_cast<size_t>(b[1])];
 580     const auto sum = r0 + r1;
 581     if (sum >= OOR) {
 582       goto outOfRange;
 583     }
 584     result = UT(100 * result + sum);
 585     break;
 586   }
 587   case 1: {
 588     const int32_t sum = shift1[static_cast<size_t>(b[0])];
 589     if (sum >= OOR) {
 590       goto outOfRange;
 591     }
 592     result = UT(10 * result + sum);
 593     break;
 594   }
 595   default:
 596     assert(b == e);
 597     if (size == 0) {
 598       return makeUnexpected(ConversionCode::NO_DIGITS);
 599     }
 600     break;
 601   }
 602
 603   return sgn.finalize(result);
 604
 605 outOfRange:
 606   return makeUnexpected(ConversionCode::NON_DIGIT_CHAR);
 607 }
 608
 609 template Expected<char, ConversionCode> digits_to<char>(
 610     const char*,
 611     const char*) noexcept;
 612 template Expected<signed char, ConversionCode> digits_to<signed char>(
 613     const char*,
 614     const char*) noexcept;
 615 template Expected<unsigned char, ConversionCode> digits_to<unsigned char>(
 616     const char*,
 617     const char*) noexcept;
 618
 619 template Expected<short, ConversionCode> digits_to<short>(
 620     const char*,
 621     const char*) noexcept;
 622 template Expected<unsigned short, ConversionCode> digits_to<unsigned short>(
 623     const char*,
 624     const char*) noexcept;
 625
 626 template Expected<int, ConversionCode> digits_to<int>(
 627     const char*,
 628     const char*) noexcept;
 629 template Expected<unsigned int, ConversionCode> digits_to<unsigned int>(
 630     const char*,
 631     const char*) noexcept;
 632
 633 template Expected<long, ConversionCode> digits_to<long>(
 634     const char*,
 635     const char*) noexcept;
 636 template Expected<unsigned long, ConversionCode> digits_to<unsigned long>(
 637     const char*,
 638     const char*) noexcept;
 639
 640 template Expected<long long, ConversionCode> digits_to<long long>(
 641     const char*,
 642     const char*) noexcept;
 643 template Expected<unsigned long long, ConversionCode>
 644 digits_to<unsigned long long>(const char*, const char*) noexcept;
 645
 646 #if FOLLY_HAVE_INT128_T
 647 template Expected<__int128, ConversionCode> digits_to<__int128>(
 648     const char*,
 649     const char*) noexcept;
 650 template Expected<unsigned __int128, ConversionCode>
 651 digits_to<unsigned __int128>(const char*, const char*) noexcept;
 652 #endif
 653
 654 /**
 655  * StringPiece to integrals, with progress information. Alters the
 656  * StringPiece parameter to munch the already-parsed characters.
 657  */
 658 template <class Tgt>
 659 Expected<Tgt, ConversionCode> str_to_integral(StringPiece* src) noexcept {
 660   using UT = typename std::make_unsigned<Tgt>::type;
 661
 662   auto b = src->data(), past = src->data() + src->size();
 663
 664   for (;; ++b) {
 665     if (UNLIKELY(b >= past)) {
 666       return makeUnexpected(ConversionCode::EMPTY_INPUT_STRING);
 667     }
 668     if (!std::isspace(*b)) {
 669       break;
 670     }
 671   }
 672
 673   SignedValueHandler<Tgt> sgn;
 674   auto err = sgn.init(b);
 675
 676   if (UNLIKELY(err != ConversionCode::SUCCESS)) {
 677     return makeUnexpected(err);
 678   }
 679   if (std::is_signed<Tgt>::value && UNLIKELY(b >= past)) {
 680     return makeUnexpected(ConversionCode::NO_DIGITS);
 681   }
 682   if (UNLIKELY(!isdigit(*b))) {
 683     return makeUnexpected(ConversionCode::NON_DIGIT_CHAR);
 684   }
 685
 686   auto m = findFirstNonDigit(b + 1, past);
 687
 688   auto tmp = digits_to<UT>(b, m);
 689
 690   if (UNLIKELY(!tmp.hasValue())) {
 691     return makeUnexpected(
 692         tmp.error() == ConversionCode::POSITIVE_OVERFLOW ? sgn.overflow()
 693                                                          : tmp.error());
 694   }
 695
 696   auto res = sgn.finalize(tmp.value());
 697
 698   if (res.hasValue()) {
 699     src->advance(size_t(m - src->data()));
 700   }
 701
 702   return res;
 703 }
 704
 705 template Expected<char, ConversionCode> str_to_integral<char>(
 706     StringPiece* src) noexcept;
 707 template Expected<signed char, ConversionCode> str_to_integral<signed char>(
 708     StringPiece* src) noexcept;
 709 template Expected<unsigned char, ConversionCode> str_to_integral<unsigned char>(
 710     StringPiece* src) noexcept;
 711
 712 template Expected<short, ConversionCode> str_to_integral<short>(
 713     StringPiece* src) noexcept;
 714 template Expected<unsigned short, ConversionCode>
 715 str_to_integral<unsigned short>(StringPiece* src) noexcept;
 716
 717 template Expected<int, ConversionCode> str_to_integral<int>(
 718     StringPiece* src) noexcept;
 719 template Expected<unsigned int, ConversionCode> str_to_integral<unsigned int>(
 720     StringPiece* src) noexcept;
 721
 722 template Expected<long, ConversionCode> str_to_integral<long>(
 723     StringPiece* src) noexcept;
 724 template Expected<unsigned long, ConversionCode> str_to_integral<unsigned long>(
 725     StringPiece* src) noexcept;
 726
 727 template Expected<long long, ConversionCode> str_to_integral<long long>(
 728     StringPiece* src) noexcept;
 729 template Expected<unsigned long long, ConversionCode>
 730 str_to_integral<unsigned long long>(StringPiece* src) noexcept;
 731
 732 #if FOLLY_HAVE_INT128_T
 733 template Expected<__int128, ConversionCode> str_to_integral<__int128>(
 734     StringPiece* src) noexcept;
 735 template Expected<unsigned __int128, ConversionCode>
 736 str_to_integral<unsigned __int128>(StringPiece* src) noexcept;
 737 #endif
 738
 739 } // namespace detail
 740
 741 ConversionError makeConversionError(ConversionCode code, StringPiece input) {
 742   using namespace detail;
 743   static_assert(
 744       std::is_unsigned<std::underlying_type<ConversionCode>::type>::value,
 745       "ConversionCode should be unsigned");
 746   assert((std::size_t)code < kErrorStrings.size());
 747   const ErrorString& err = kErrorStrings[(std::size_t)code];
 748   if (code == ConversionCode::EMPTY_INPUT_STRING && input.empty()) {
 749     return {err.string, code};
 750   }
 751   std::string tmp(err.string);
 752   tmp.append(": ");
 753   if (err.quote) {
 754     tmp.append(1, '"');
 755   }
 756   if (input.size() > 0) {
 757     tmp.append(input.data(), input.size());
 758   }
 759   if (err.quote) {
 760     tmp.append(1, '"');
 761   }
 762   return {tmp, code};
 763 }
 764
 765 } // namespace folly