From: Marcus Holland-Moritz Date: Wed, 6 Jul 2016 10:56:58 +0000 (-0700) Subject: Refactor folly::to<> X-Git-Tag: 2016.07.26~75 X-Git-Url: http://plrg.eecs.uci.edu/git/?a=commitdiff_plain;h=ad2f872bf354b62dbde66937da096565ef9ed663;p=folly.git Refactor folly::to<> Summary: This is the main diff of the series. Its main purpose is to make the internals of folly::to<> propagate error codes instead of throwing exceptions. Along with this, it makes the following changes: - Move most of the string-to-int implementation out of the header file - Unify error/exception strings across conversion routines - Introduce a ConversionError class that derives from std::range_error - Capture an error code in ConversionError in addition to a string - Optimize tolower() calls in Conv.cpp - Introduce ConversionResult<>, which is used as the internal result wrapper - Get rid of all error checking macros There are quite a few benefits here. == Faster conversions == For a large set of conversions, the performance is unchanged. I've removed all benchmarks that were unchanged from the table below for simplicity. A few things stand out: - `follyAtoiMeasure` is consistently faster, sometimes by quite a large margin - The cost of throwing exceptions is significantly reduced, as throwing them further down on the call stack will reduce the amount of stack unwinding - String-to-boolean and string-to-float conversions are significantly faster when passing in a string representation (e.g. "off" or "infinity") thanks to the optimized tolower_ascii() call (column `New+Ascii` in the table) - Conversions between int and float are significantly faster and almost back at the performance of before the undefined behaviour fix - All string-to-(int|float|bool) conversions are consistently faster The columns in the table are as follows: Original: Original code before the undefined behaviour fix Fix UB: Code with the undefined behaviour fix; this impacts mostly the float <-> int conversions, but appears to have a small effect on some other benchmarks New: New code introduced by this diff, but without the tolower_ascii() optimization New+Ascii: New code, including the tolower_ascii() optimization =========================================================================================== Original Fix UB New New+Ascii folly/test/ConvBenchmark.cpp time/iter time/iter time/iter time/iter =========================================================================================== handwrittenAtoiMeasure(1) 3.95ns 3.95ns 3.95ns 3.95ns follyAtoiMeasure(1) 6.08ns 6.08ns 3.95ns 3.95ns ------------------------------------------------------------------------------------------- handwrittenAtoiMeasure(2) 5.47ns 5.47ns 5.47ns 5.47ns follyAtoiMeasure(2) 5.77ns 5.77ns 3.95ns 3.95ns ------------------------------------------------------------------------------------------- handwrittenAtoiMeasure(3) 6.08ns 6.08ns 6.08ns 6.08ns follyAtoiMeasure(3) 6.08ns 6.08ns 4.25ns 4.25ns ------------------------------------------------------------------------------------------- handwrittenAtoiMeasure(4) 6.99ns 6.99ns 6.99ns 6.99ns follyAtoiMeasure(4) 6.99ns 6.99ns 4.56ns 4.56ns ------------------------------------------------------------------------------------------- handwrittenAtoiMeasure(5) 7.90ns 8.20ns 7.90ns 7.90ns follyAtoiMeasure(5) 7.29ns 7.29ns 4.86ns 4.86ns ------------------------------------------------------------------------------------------- handwrittenAtoiMeasure(6) 8.81ns 9.42ns 8.81ns 8.81ns follyAtoiMeasure(6) 7.29ns 7.29ns 4.86ns 4.86ns ------------------------------------------------------------------------------------------- handwrittenAtoiMeasure(7) 9.72ns 10.63ns 9.72ns 9.72ns follyAtoiMeasure(7) 7.60ns 7.60ns 5.16ns 5.16ns ------------------------------------------------------------------------------------------- handwrittenAtoiMeasure(8) 10.63ns 11.85ns 10.63ns 10.63ns follyAtoiMeasure(8) 8.51ns 8.51ns 6.08ns 6.08ns ------------------------------------------------------------------------------------------- handwrittenAtoiMeasure(9) 11.54ns 13.07ns 11.54ns 11.54ns follyAtoiMeasure(9) 8.81ns 8.81ns 6.08ns 6.08ns ------------------------------------------------------------------------------------------- handwrittenAtoiMeasure(10) 12.46ns 14.28ns 12.46ns 12.46ns follyAtoiMeasure(10) 8.81ns 8.81ns 6.38ns 6.38ns ------------------------------------------------------------------------------------------- handwrittenAtoiMeasure(11) 13.37ns 15.50ns 13.37ns 13.37ns follyAtoiMeasure(11) 9.12ns 9.12ns 6.38ns 6.38ns ------------------------------------------------------------------------------------------- handwrittenAtoiMeasure(12) 14.28ns 16.71ns 14.28ns 14.28ns follyAtoiMeasure(12) 10.03ns 10.03ns 7.29ns 7.29ns ------------------------------------------------------------------------------------------- handwrittenAtoiMeasure(13) 15.19ns 17.92ns 15.19ns 15.19ns follyAtoiMeasure(13) 10.33ns 10.33ns 7.60ns 7.60ns ------------------------------------------------------------------------------------------- handwrittenAtoiMeasure(14) 16.10ns 19.14ns 16.10ns 16.10ns follyAtoiMeasure(14) 10.33ns 10.33ns 7.60ns 7.60ns ------------------------------------------------------------------------------------------- handwrittenAtoiMeasure(15) 17.01ns 20.36ns 17.01ns 17.01ns follyAtoiMeasure(15) 10.63ns 10.63ns 7.90ns 7.90ns ------------------------------------------------------------------------------------------- handwrittenAtoiMeasure(16) 17.92ns 21.57ns 17.92ns 17.92ns follyAtoiMeasure(16) 11.55ns 11.55ns 8.81ns 8.81ns ------------------------------------------------------------------------------------------- handwrittenAtoiMeasure(17) 18.84ns 22.79ns 18.84ns 18.84ns follyAtoiMeasure(17) 11.85ns 11.85ns 8.81ns 8.81ns ------------------------------------------------------------------------------------------- handwrittenAtoiMeasure(18) 19.75ns 24.00ns 19.75ns 19.75ns follyAtoiMeasure(18) 11.85ns 11.85ns 9.12ns 9.12ns ------------------------------------------------------------------------------------------- handwrittenAtoiMeasure(19) 20.66ns 25.22ns 20.66ns 20.66ns follyAtoiMeasure(19) 12.16ns 12.16ns 9.11ns 9.11ns ------------------------------------------------------------------------------------------- stringToBoolNumClassic 12.76ns 12.76ns 11.96ns 12.15ns stringToBoolNumClassicError 3.19us 3.18us 1.58us 1.58us stringToBoolStrClassic 17.92ns 17.92ns 15.50ns 7.60ns stringToBoolStrClassicError 3.21us 3.18us 1.57us 1.57us ------------------------------------------------------------------------------------------- stringToFloatNumClassic 32.96ns 32.81ns 32.10ns 31.12ns stringToFloatNumClassicError 2.73us 2.69us 1.65us 1.66us stringToFloatStrClassic 37.37ns 38.58ns 36.76ns 16.71ns stringToFloatStrClassicError 2.87us 2.87us 1.60us 1.59us stringToDoubleNumClassic 31.30ns 31.82ns 29.77ns 29.17ns stringToDoubleNumClassicError 2.69us 2.66us 1.65us 1.66us stringToDoubleStrClassic 37.67ns 37.67ns 35.84ns 16.71ns stringToDoubleStrClassicError 2.87us 2.86us 1.58us 1.58us ------------------------------------------------------------------------------------------- stringToCharSignedClassic 16.71ns 18.08ns 15.49ns 14.59ns stringToCharSignedClassicError 3.87us 3.82us 1.61us 1.61us stringToCharUnsignedClassic 15.49ns 15.19ns 12.46ns 12.66ns stringToCharUnsignedClassicError 2.73us 2.70us 1.62us 1.62us stringToIntSignedClassic 21.26ns 19.44ns 17.92ns 18.40ns stringToIntSignedClassicError 3.94us 3.89us 1.64us 1.64us stringToIntUnsignedClassic 17.93ns 18.53ns 15.50ns 15.50ns stringToIntUnsignedClassicError 2.72us 2.71us 1.62us 1.61us stringToLongLongSignedClassic 34.63ns 30.58ns 27.04ns 27.04ns stringToLongLongSignedClassicError 3.94us 3.90us 1.63us 1.63us stringToLongLongUnsignedClassic 51.04ns 47.96ns 46.44ns 46.68ns stringToLongLongUnsignedClassicError 2.73us 2.71us 1.61us 1.61us ------------------------------------------------------------------------------------------- ptrPairToCharSignedClassic 5.16ns 5.16ns 3.34ns 3.65ns ptrPairToCharSignedClassicError 3.56us 3.54us 1.61us 1.61us ptrPairToCharUnsignedClassic 2.43ns 2.43ns 2.13ns 2.13ns ptrPairToCharUnsignedClassicError 2.63us 2.63us 1.61us 1.61us ptrPairToIntSignedClassic 6.99ns 6.99ns 5.16ns 5.16ns ptrPairToIntSignedClassicError 4.08us 4.06us 1.61us 1.61us ptrPairToIntUnsignedClassic 4.25ns 4.56ns 3.34ns 3.34ns ptrPairToIntUnsignedClassicError 2.70us 2.70us 1.60us 1.60us ptrPairToLongLongSignedClassic 12.16ns 12.16ns 9.72ns 9.72ns ptrPairToLongLongSignedClassicError 4.06us 4.06us 1.61us 1.61us ptrPairToLongLongUnsignedClassic 29.13ns 29.13ns 27.61ns 27.61ns ptrPairToLongLongUnsignedClassicError 2.71us 2.72us 1.63us 1.64us ------------------------------------------------------------------------------------------- intToCharSignedClassic 405.02ps 506.35ps 405.02ps 405.02ps intToCharSignedClassicError 2.10us 2.09us 1.63us 1.64us intToCharUnsignedClassic 303.79ps 303.78ps 303.77ps 303.77ps intToCharUnsignedClassicError 2.10us 2.09us 1.63us 1.64us intToIntSignedClassic 405.02ps 405.02ps 405.01ps 405.01ps intToIntSignedClassicError 1.99us 1.98us 1.72us 1.72us intToIntUnsignedClassic 405.03ps 405.03ps 379.71ps 379.71ps intToIntUnsignedClassicError 2.09us 2.09us 1.63us 1.63us ------------------------------------------------------------------------------------------- intToFloatClassic 545.11ps 3.34ns 1.23ns 1.23ns intToFloatClassicError 1.67us 2.37us 1.73us 1.72us ------------------------------------------------------------------------------------------- floatToFloatClassic 759.47ps 759.47ps 759.45ps 759.45ps floatToFloatClassicError 6.45us 6.44us 1.77us 1.77us ------------------------------------------------------------------------------------------- floatToIntClassic 637.82ps 2.89ns 1.50ns 1.50ns floatToIntClassicError 1.92us 2.61us 1.82us 1.83us =========================================================================================== == Improved build times == I've checked this with gcc 4.9.3, and compile times for both ConvTest and ConvBenchmark are reduced by roughly 10%: ==================================== original new code ------------------------------------ ConvTest.o 14.788s 13.361s ConvBenchmark.o 16.148s 14.578s ==================================== == Smaller binary size == Again, checked with gcc 4.9.3, stripped binaries are slightly smaller with the new code: ==================================== original new code ------------------------------------ conv_test 761704 749384 conv_benchmark 539632 510928 ==================================== == Ability to add new non-throwing interfaces == This change sticks to the original API that will throw an exception in case of an error. A subsequent diff will introduce non-throwing interfaces with a minimum of additional code. Reviewed By: ericniebler Differential Revision: D3433856 fbshipit-source-id: 9bc976ebc181fe2f172ae47c78edf407e9ee7bb0 --- diff --git a/folly/Conv.cpp b/folly/Conv.cpp index c78ed498..199819e3 100644 --- a/folly/Conv.cpp +++ b/folly/Conv.cpp @@ -13,14 +13,37 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -#define FOLLY_CONV_INTERNAL #include +#include namespace folly { namespace detail { namespace { +/** + * Finds the first non-digit in a string. The number of digits + * searched depends on the precision of the Tgt integral. Assumes the + * string starts with NO whitespace and NO sign. + * + * The semantics of the routine is: + * for (;; ++b) { + * if (b >= e || !isdigit(*b)) return b; + * } + * + * Complete unrolling marks bottom-line (i.e. entire conversion) + * improvements of 20%. + */ +inline const char* findFirstNonDigit(const char* b, const char* e) { + for (; b < e; ++b) { + auto const c = static_cast(*b) - '0'; + if (c >= 10) { + break; + } + } + return b; +} + // Maximum value of number when represented as a string template struct MaxString { @@ -178,6 +201,39 @@ FOLLY_ALIGNED(16) constexpr uint16_t shift1000[] = { OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 240 OOR, OOR, OOR, OOR, OOR, OOR // 250 }; + +struct ErrorString { + const char* string; + bool quote; +}; + +// Keep this in sync with ConversionError::Code in Conv.h +constexpr const std::array + kErrorStrings{{ + {"Success", true}, + {"Empty input string", true}, + {"No digits found in input string", true}, + {"Integer overflow when parsing bool (must be 0 or 1)", true}, + {"Invalid value for bool", true}, + {"Non-digit character found", true}, + {"Invalid leading character", true}, + {"Overflow during conversion", true}, + {"Negative overflow during conversion", true}, + {"Unable to convert string to floating point value", true}, + {"Non-whitespace character found after end of conversion", true}, + {"Overflow during arithmetic conversion", false}, + {"Negative overflow during arithmetic conversion", false}, + {"Loss of precision during arithmetic conversion", false}, + }}; + +// Check if ASCII is really ASCII +using IsAscii = std:: + integral_constant; + +// The code in this file that uses tolower() really only cares about +// 7-bit ASCII characters, so we can take a nice shortcut here. +inline char tolower_ascii(char in) { + return IsAscii::value ? in | 0x20 : std::tolower(in); } inline bool bool_str_cmp(const char** b, size_t len, const char* value) { @@ -186,7 +242,7 @@ inline bool bool_str_cmp(const char** b, size_t len, const char* value) { const char* e = *b + len; const char* v = value; while (*v != '\0') { - if (p == e || tolower(*p) != *v) { // value is already lowercase + if (p == e || tolower_ascii(*p) != *v) { // value is already lowercase return false; } ++p; @@ -197,12 +253,40 @@ inline bool bool_str_cmp(const char** b, size_t len, const char* value) { return true; } -bool str_to_bool(StringPiece* src) { +} // anonymous namespace + +ConversionError makeConversionError( + ConversionError::Code code, + const char* input, + size_t inputLen) { + assert(code >= 0 && code < kErrorStrings.size()); + const ErrorString& err = kErrorStrings[code]; + if (code == ConversionError::EMPTY_INPUT_STRING && inputLen == 0) { + return ConversionError(err.string, code); + } + std::string tmp(err.string); + tmp.append(": "); + if (err.quote) { + tmp.append(1, '"'); + } + if (input && inputLen > 0) { + tmp.append(input, inputLen); + } + if (err.quote) { + tmp.append(1, '"'); + } + return ConversionError(tmp, code); +} + +ConversionResult str_to_bool(StringPiece* src) { auto b = src->begin(), e = src->end(); for (;; ++b) { - FOLLY_RANGE_CHECK_STRINGPIECE( - b < e, "No non-whitespace characters found in input string", *src); - if (!isspace(*b)) break; + if (b >= e) { + return ConversionResult(ConversionError::EMPTY_INPUT_STRING); + } + if (!std::isspace(*b)) { + break; + } } bool result; @@ -212,9 +296,9 @@ bool str_to_bool(StringPiece* src) { case '1': { result = false; for (; b < e && isdigit(*b); ++b) { - FOLLY_RANGE_CHECK_STRINGPIECE( - !result && (*b == '0' || *b == '1'), - "Integer overflow when parsing bool: must be 0 or 1", *src); + if (result || (*b != '0' && *b != '1')) { + return ConversionResult(ConversionError::BOOL_OVERFLOW); + } result = (*b == '1'); } break; @@ -254,24 +338,24 @@ bool str_to_bool(StringPiece* src) { } else if (bool_str_cmp(&b, len, "off")) { result = false; } else { - FOLLY_RANGE_CHECK_STRINGPIECE(false, "Invalid value for bool", *src); + return ConversionResult(ConversionError::BOOL_INVALID_VALUE); } break; default: - FOLLY_RANGE_CHECK_STRINGPIECE(false, "Invalid value for bool", *src); + return ConversionResult(ConversionError::BOOL_INVALID_VALUE); } src->assign(b, e); - return result; + + return ConversionResult(result); } -namespace { /** * StringPiece to double, with progress information. Alters the * StringPiece parameter to munch the already-parsed characters. */ template -Tgt str_to_floating(StringPiece* src) { +ConversionResult str_to_floating(StringPiece* src) { using namespace double_conversion; static StringToDoubleConverter conv(StringToDoubleConverter::ALLOW_TRAILING_JUNK @@ -281,8 +365,9 @@ Tgt str_to_floating(StringPiece* src) { std::numeric_limits::quiet_NaN(), nullptr, nullptr); - FOLLY_RANGE_CHECK_STRINGPIECE(!src->empty(), - "No digits found in input string", *src); + if (src->empty()) { + return ConversionResult(ConversionError::EMPTY_INPUT_STRING); + } int length; auto result = conv.StringToDouble(src->data(), @@ -290,125 +375,195 @@ Tgt str_to_floating(StringPiece* src) { &length); // processed char count if (!std::isnan(result)) { + // If we get here with length = 0, the input string is empty. + // If we get here with result = 0.0, it's either because the string + // contained only whitespace, or because we had an actual zero value + // (with potential trailing junk). If it was only whitespace, we + // want to raise an error; length will point past the last character + // that was processed, so we need to check if that character was + // whitespace or not. + if (length == 0 || (result == 0.0 && std::isspace((*src)[length - 1]))) { + return ConversionResult(ConversionError::EMPTY_INPUT_STRING); + } src->advance(length); - return result; + return ConversionResult(result); } - for (;; src->advance(1)) { - if (src->empty()) { - throw std::range_error("Unable to convert an empty string" - " to a floating point value."); - } - if (!isspace(src->front())) { + auto* e = src->end(); + auto* b = + std::find_if_not(src->begin(), e, [](char c) { return std::isspace(c); }); + + // There must be non-whitespace, otherwise we would have caught this above + assert(b < e); + size_t size = e - b; + + bool negative = false; + if (*b == '-') { + negative = true; + ++b; + --size; + } + + result = 0.0; + + switch (tolower_ascii(*b)) { + case 'i': + if (size >= 3 && tolower_ascii(b[1]) == 'n' && + tolower_ascii(b[2]) == 'f') { + if (size >= 8 && tolower_ascii(b[3]) == 'i' && + tolower_ascii(b[4]) == 'n' && tolower_ascii(b[5]) == 'i' && + tolower_ascii(b[6]) == 't' && tolower_ascii(b[7]) == 'y') { + b += 8; + } else { + b += 3; + } + result = std::numeric_limits::infinity(); + } + break; + + case 'n': + if (size >= 3 && tolower_ascii(b[1]) == 'a' && + tolower_ascii(b[2]) == 'n') { + b += 3; + result = std::numeric_limits::quiet_NaN(); + } + break; + + default: break; - } } - // Was that "inf[inity]"? - if (src->size() >= 3 && toupper((*src)[0]) == 'I' - && toupper((*src)[1]) == 'N' && toupper((*src)[2]) == 'F') { - if (src->size() >= 8 && - toupper((*src)[3]) == 'I' && - toupper((*src)[4]) == 'N' && - toupper((*src)[5]) == 'I' && - toupper((*src)[6]) == 'T' && - toupper((*src)[7]) == 'Y') { - src->advance(8); - } else { - src->advance(3); - } - return std::numeric_limits::infinity(); - } - - // Was that "-inf[inity]"? - if (src->size() >= 4 && toupper((*src)[0]) == '-' - && toupper((*src)[1]) == 'I' && toupper((*src)[2]) == 'N' - && toupper((*src)[3]) == 'F') { - if (src->size() >= 9 && - toupper((*src)[4]) == 'I' && - toupper((*src)[5]) == 'N' && - toupper((*src)[6]) == 'I' && - toupper((*src)[7]) == 'T' && - toupper((*src)[8]) == 'Y') { - src->advance(9); - } else { - src->advance(4); + if (result == 0.0) { + // All bets are off + return ConversionResult(ConversionError::STRING_TO_FLOAT_ERROR); + } + + if (negative) { + result = -result; + } + + src->assign(b, e); + + return ConversionResult(result); +} + +template ConversionResult str_to_floating(StringPiece* src); +template ConversionResult str_to_floating(StringPiece* src); + +/** + * This class takes care of additional processing needed for signed values, + * like leading sign character and overflow checks. + */ +template ::value> +class SignedValueHandler; + +template +class SignedValueHandler { + public: + ConversionError::Code init(const char*& b) { + negative_ = false; + if (!std::isdigit(*b)) { + if (*b == '-') { + negative_ = true; + } else if (UNLIKELY(*b != '+')) { + return ConversionError::INVALID_LEADING_CHAR; + } + ++b; } - return -std::numeric_limits::infinity(); + return ConversionError::SUCCESS; } - // "nan"? - if (src->size() >= 3 && toupper((*src)[0]) == 'N' - && toupper((*src)[1]) == 'A' && toupper((*src)[2]) == 'N') { - src->advance(3); - return std::numeric_limits::quiet_NaN(); + ConversionError::Code overflow() { + return negative_ ? ConversionError::NEGATIVE_OVERFLOW + : ConversionError::POSITIVE_OVERFLOW; } - // "-nan"? - if (src->size() >= 4 && - toupper((*src)[0]) == '-' && - toupper((*src)[1]) == 'N' && - toupper((*src)[2]) == 'A' && - toupper((*src)[3]) == 'N') { - src->advance(4); - return -std::numeric_limits::quiet_NaN(); + template + ConversionResult finalize(U value) { + T rv; + if (negative_) { + rv = -value; + if (UNLIKELY(rv > 0)) { + return ConversionResult(ConversionError::NEGATIVE_OVERFLOW); + } + } else { + rv = value; + if (UNLIKELY(rv < 0)) { + return ConversionResult(ConversionError::POSITIVE_OVERFLOW); + } + } + return ConversionResult(rv); } - // All bets are off - throw std::range_error("Unable to convert \"" + src->toString() - + "\" to a floating point value."); -} + private: + bool negative_; +}; -} +// For unsigned types, we don't need any extra processing +template +class SignedValueHandler { + public: + ConversionError::Code init(const char*&) { + return ConversionError::SUCCESS; + } -float str_to_float(StringPiece* src) { - return str_to_floating(src); -} + ConversionError::Code overflow() { + return ConversionError::POSITIVE_OVERFLOW; + } -double str_to_double(StringPiece* src) { - return str_to_floating(src); -} + ConversionResult finalize(T value) { + return ConversionResult(value); + } +}; /** - * String represented as a pair of pointers to char to unsigned + * String represented as a pair of pointers to char to signed/unsigned * integrals. Assumes NO whitespace before or after, and also that the - * string is composed entirely of digits. Tgt must be unsigned, and no - * sign is allowed in the string (even it's '+'). String may be empty, - * in which case digits_to throws. + * string is composed entirely of digits (and an optional sign only for + * signed types). String may be empty, in which case digits_to returns + * an appropriate error. */ template -Tgt digits_to(const char* b, const char* e) { - - static_assert(!std::is_signed::value, "Unsigned type expected"); +inline ConversionResult digits_to(const char* b, const char* const e) { + using UT = typename std::make_unsigned::type; assert(b <= e); - const size_t size = e - b; + SignedValueHandler sgn; + + auto err = sgn.init(b); + if (UNLIKELY(err != ConversionError::SUCCESS)) { + return ConversionResult(err); + } + + size_t size = e - b; /* Although the string is entirely made of digits, we still need to * check for overflow. */ - if (size >= std::numeric_limits::digits10 + 1) { - // Leading zeros? If so, recurse to keep things simple + if (size > std::numeric_limits::digits10) { + // Leading zeros? if (b < e && *b == '0') { for (++b;; ++b) { - if (b == e) - return 0; // just zeros, e.g. "0000" - if (*b != '0') - return digits_to(b, e); + if (b == e) { + return ConversionResult(Tgt(0)); // just zeros, e.g. "0000" + } + if (*b != '0') { + size = e - b; + break; + } } } - FOLLY_RANGE_CHECK_BEGIN_END( - size == std::numeric_limits::digits10 + 1 && - strncmp(b, detail::MaxString::value, size) <= 0, - "Numeric overflow upon conversion", - b, - e); + if (size > std::numeric_limits::digits10 && + (size != std::numeric_limits::digits10 + 1 || + strncmp(b, MaxString::value, size) > 0)) { + return ConversionResult(sgn.overflow()); + } } // Here we know that the number won't overflow when // converted. Proceed without checks. - Tgt result = 0; + UT result = 0; for (; e - b >= 4; b += 4) { result *= 10000; @@ -417,7 +572,9 @@ Tgt digits_to(const char* b, const char* e) { const int32_t r2 = shift10[static_cast(b[2])]; const int32_t r3 = shift1[static_cast(b[3])]; const auto sum = r0 + r1 + r2 + r3; - assert(sum < OOR && "Assumption: string only has digits"); + if (sum >= OOR) { + goto outOfRange; + } result += sum; } @@ -427,38 +584,161 @@ Tgt digits_to(const char* b, const char* e) { const int32_t r1 = shift10[static_cast(b[1])]; const int32_t r2 = shift1[static_cast(b[2])]; const auto sum = r0 + r1 + r2; - assert(sum < OOR && "Assumption: string only has digits"); - return result * 1000 + sum; + if (sum >= OOR) { + goto outOfRange; + } + result = 1000 * result + sum; + break; } case 2: { const int32_t r0 = shift10[static_cast(b[0])]; const int32_t r1 = shift1[static_cast(b[1])]; const auto sum = r0 + r1; - assert(sum < OOR && "Assumption: string only has digits"); - return result * 100 + sum; + if (sum >= OOR) { + goto outOfRange; + } + result = 100 * result + sum; + break; } case 1: { const int32_t sum = shift1[static_cast(b[0])]; - assert(sum < OOR && "Assumption: string only has digits"); - return result * 10 + sum; + if (sum >= OOR) { + goto outOfRange; + } + result = 10 * result + sum; + break; } + default: + assert(b == e); + if (size == 0) { + return ConversionResult(ConversionError::NO_DIGITS); + } + break; } - assert(b == e); - FOLLY_RANGE_CHECK_BEGIN_END( - size > 0, "Found no digits to convert in input", b, e); - return result; + return sgn.finalize(result); + +outOfRange: + return ConversionResult(ConversionError::NON_DIGIT_CHAR); } -template unsigned char digits_to(const char* b, const char* e); -template unsigned short digits_to(const char* b, const char* e); -template unsigned int digits_to(const char* b, const char* e); -template unsigned long digits_to(const char* b, const char* e); -template unsigned long long digits_to(const char* b, - const char* e); +template ConversionResult digits_to(const char*, const char*); +template ConversionResult digits_to( + const char*, + const char*); +template ConversionResult digits_to( + const char*, + const char*); + +template ConversionResult digits_to(const char*, const char*); +template ConversionResult digits_to( + const char*, + const char*); + +template ConversionResult digits_to(const char*, const char*); +template ConversionResult digits_to( + const char*, + const char*); + +template ConversionResult digits_to(const char*, const char*); +template ConversionResult digits_to( + const char*, + const char*); + +template ConversionResult digits_to( + const char*, + const char*); +template ConversionResult digits_to( + const char*, + const char*); + +#if FOLLY_HAVE_INT128_T +template ConversionResult<__int128> digits_to<__int128>( + const char*, + const char*); +template ConversionResult digits_to( + const char*, + const char*); +#endif + +/** + * StringPiece to integrals, with progress information. Alters the + * StringPiece parameter to munch the already-parsed characters. + */ +template +ConversionResult str_to_integral(StringPiece* src) { + using UT = typename std::make_unsigned::type; + + auto b = src->data(), past = src->data() + src->size(); + + for (;; ++b) { + if (UNLIKELY(b >= past)) { + return ConversionResult(ConversionError::EMPTY_INPUT_STRING); + } + if (!std::isspace(*b)) { + break; + } + } + + SignedValueHandler sgn; + auto err = sgn.init(b); + + if (UNLIKELY(err != ConversionError::SUCCESS)) { + return ConversionResult(err); + } + if (std::is_signed::value && UNLIKELY(b >= past)) { + return ConversionResult(ConversionError::NO_DIGITS); + } + if (UNLIKELY(!isdigit(*b))) { + return ConversionResult(ConversionError::NON_DIGIT_CHAR); + } + + auto m = findFirstNonDigit(b + 1, past); + + auto tmp = digits_to(b, m); + + if (UNLIKELY(!tmp.success())) { + return ConversionResult( + tmp.error == ConversionError::POSITIVE_OVERFLOW ? sgn.overflow() + : tmp.error); + } + + auto res = sgn.finalize(tmp.value); + + if (res.success()) { + src->advance(m - src->data()); + } + + return res; +} + +template ConversionResult str_to_integral(StringPiece* src); +template ConversionResult str_to_integral( + StringPiece* src); +template ConversionResult str_to_integral( + StringPiece* src); + +template ConversionResult str_to_integral(StringPiece* src); +template ConversionResult str_to_integral( + StringPiece* src); + +template ConversionResult str_to_integral(StringPiece* src); +template ConversionResult str_to_integral( + StringPiece* src); + +template ConversionResult str_to_integral(StringPiece* src); +template ConversionResult str_to_integral( + StringPiece* src); + +template ConversionResult str_to_integral( + StringPiece* src); +template ConversionResult +str_to_integral(StringPiece* src); + #if FOLLY_HAVE_INT128_T -template unsigned __int128 digits_to(const char* b, - const char* e); +template ConversionResult<__int128> str_to_integral<__int128>(StringPiece* src); +template ConversionResult str_to_integral( + StringPiece* src); #endif } // namespace detail diff --git a/folly/Conv.h b/folly/Conv.h index 871853d3..d6553373 100644 --- a/folly/Conv.h +++ b/folly/Conv.h @@ -24,6 +24,7 @@ #pragma once #include +#include #include #include #include @@ -37,124 +38,119 @@ #include #include // V8 JavaScript implementation +#include #include #include -#include #include #include -#define FOLLY_RANGE_CHECK_STRINGIZE(x) #x -#define FOLLY_RANGE_CHECK_STRINGIZE2(x) FOLLY_RANGE_CHECK_STRINGIZE(x) +namespace folly { -// Android doesn't support std::to_string so just use a placeholder there. -#ifdef __ANDROID__ -#define FOLLY_RANGE_CHECK_TO_STRING(x) std::string("N/A") -#else -#define FOLLY_RANGE_CHECK_TO_STRING(x) std::to_string(x) -#endif +class ConversionError : public std::range_error { + public: + // Keep this in sync with kErrorStrings in Conv.cpp + enum Code { + SUCCESS, + EMPTY_INPUT_STRING, + NO_DIGITS, + BOOL_OVERFLOW, + BOOL_INVALID_VALUE, + NON_DIGIT_CHAR, + INVALID_LEADING_CHAR, + POSITIVE_OVERFLOW, + NEGATIVE_OVERFLOW, + STRING_TO_FLOAT_ERROR, + NON_WHITESPACE_AFTER_END, + ARITH_POSITIVE_OVERFLOW, + ARITH_NEGATIVE_OVERFLOW, + ARITH_LOSS_OF_PRECISION, + NUM_ERROR_CODES, // has to be the last entry + }; -#define FOLLY_RANGE_CHECK(condition, message, src) \ - ((condition) ? (void)0 : throw std::range_error( \ - (std::string(__FILE__ "(" FOLLY_RANGE_CHECK_STRINGIZE2(__LINE__) "): ") \ - + (message) + ": '" + (src) + "'").c_str())) + ConversionError(const std::string& str, Code code) + : std::range_error(str), code_(code) {} -#define FOLLY_RANGE_CHECK_BEGIN_END(condition, message, b, e) \ - FOLLY_RANGE_CHECK(condition, message, std::string((b), (e) - (b))) + ConversionError(const char* str, Code code) + : std::range_error(str), code_(code) {} -#define FOLLY_RANGE_CHECK_STRINGPIECE(condition, message, sp) \ - FOLLY_RANGE_CHECK(condition, message, std::string((sp).data(), (sp).size())) + Code errorCode() const { return code_; } -namespace folly { + private: + Code code_; +}; -/** - * The identity conversion function. - * to(T) returns itself for all types T. - */ -template -typename std::enable_if::value, Tgt>::type -to(const Src & value) { - return value; +namespace detail { + +ConversionError makeConversionError( + ConversionError::Code code, + const char* input, + size_t inputLen); + +inline ConversionError makeConversionError( + ConversionError::Code code, + const std::string& str) { + return makeConversionError(code, str.data(), str.size()); } -template -typename std::enable_if::value, Tgt>::type -to(Src && value) { - return std::forward(value); +inline ConversionError makeConversionError( + ConversionError::Code code, + StringPiece sp) { + return makeConversionError(code, sp.data(), sp.size()); } -/******************************************************************************* - * Arithmetic to boolean - ******************************************************************************/ +/** + * Enforce that the suffix following a number is made up only of whitespace. + */ +inline ConversionError::Code enforceWhitespaceErr(StringPiece sp) { + for (auto c : sp) { + if (!std::isspace(c)) { + return ConversionError::NON_WHITESPACE_AFTER_END; + } + } + return ConversionError::SUCCESS; +} /** - * Unchecked conversion from arithmetic to boolean. This is different from the - * other arithmetic conversions because we use the C convention of treating any - * non-zero value as true, instead of range checking. + * Keep this implementation around for prettyToDouble(). */ -template -typename std::enable_if< - std::is_arithmetic::value && !std::is_same::value && - std::is_same::value, - Tgt>::type -to(const Src& value) { - return value != Src(); +inline void enforceWhitespace(StringPiece sp) { + auto err = enforceWhitespaceErr(sp); + if (err != ConversionError::SUCCESS) { + throw detail::makeConversionError(err, sp); + } } -/******************************************************************************* - * Integral to integral - ******************************************************************************/ +/** + * A simple std::pair-like wrapper to wrap both a value and an error + */ +template +struct ConversionResult { + explicit ConversionResult(T v) : value(v) {} + explicit ConversionResult(ConversionError::Code e) : error(e) {} + + bool success() const { + return error == ConversionError::SUCCESS; + } + + T value; + ConversionError::Code error{ConversionError::SUCCESS}; +}; +} /** - * Checked conversion from integral to integral. The checks are only - * performed when meaningful, e.g. conversion from int to long goes - * unchecked. + * The identity conversion function. + * to(T) returns itself for all types T. */ template -typename std::enable_if< - std::is_integral::value - && !std::is_same::value - && !std::is_same::value - && std::is_integral::value, - Tgt>::type +typename std::enable_if::value, Tgt>::type to(const Src & value) { - /* static */ if (std::numeric_limits::max() - < std::numeric_limits::max()) { - FOLLY_RANGE_CHECK( - (!greater_than::max()>(value)), - "Overflow", - FOLLY_RANGE_CHECK_TO_STRING(value)); - } - /* static */ if (std::is_signed::value && - (!std::is_signed::value || sizeof(Src) > sizeof(Tgt))) { - FOLLY_RANGE_CHECK( - (!less_than::min()>(value)), - "Negative overflow", - FOLLY_RANGE_CHECK_TO_STRING(value)); - } - return static_cast(value); + return value; } -/******************************************************************************* - * Floating point to floating point - ******************************************************************************/ - template -typename std::enable_if< - std::is_floating_point::value - && std::is_floating_point::value - && !std::is_same::value, - Tgt>::type -to(const Src & value) { - /* static */ if (std::numeric_limits::max() < - std::numeric_limits::max()) { - FOLLY_RANGE_CHECK(value <= std::numeric_limits::max(), - "Overflow", - FOLLY_RANGE_CHECK_TO_STRING(value)); - FOLLY_RANGE_CHECK(value >= -std::numeric_limits::max(), - "Negative overflow", - FOLLY_RANGE_CHECK_TO_STRING(value)); - } - return boost::implicit_cast(value); +typename std::enable_if::value, Tgt>::type +to(Src && value) { + return std::forward(value); } /******************************************************************************* @@ -961,205 +957,245 @@ toDelim(const Delim& delim, const Ts&... vs) { namespace detail { -/** - * Finds the first non-digit in a string. The number of digits - * searched depends on the precision of the Tgt integral. Assumes the - * string starts with NO whitespace and NO sign. - * - * The semantics of the routine is: - * for (;; ++b) { - * if (b >= e || !isdigit(*b)) return b; - * } - * - * Complete unrolling marks bottom-line (i.e. entire conversion) - * improvements of 20%. - */ - template - const char* findFirstNonDigit(const char* b, const char* e) { - for (; b < e; ++b) { - auto const c = static_cast(*b) - '0'; - if (c >= 10) break; - } - return b; - } +ConversionResult str_to_bool(StringPiece* src); +template +ConversionResult str_to_floating(StringPiece* src); - bool str_to_bool(StringPiece* src); - float str_to_float(StringPiece* src); - double str_to_double(StringPiece* src); +extern template ConversionResult str_to_floating( + StringPiece* src); +extern template ConversionResult str_to_floating( + StringPiece* src); - template - Tgt digits_to(const char* b, const char* e); +template +ConversionResult digits_to(const char* b, const char* e); + +extern template ConversionResult digits_to( + const char*, + const char*); +extern template ConversionResult digits_to( + const char*, + const char*); +extern template ConversionResult digits_to( + const char*, + const char*); + +extern template ConversionResult digits_to( + const char*, + const char*); +extern template ConversionResult digits_to( + const char*, + const char*); + +extern template ConversionResult digits_to(const char*, const char*); +extern template ConversionResult digits_to( + const char*, + const char*); + +extern template ConversionResult digits_to( + const char*, + const char*); +extern template ConversionResult digits_to( + const char*, + const char*); + +extern template ConversionResult digits_to( + const char*, + const char*); +extern template ConversionResult +digits_to(const char*, const char*); - extern template unsigned char digits_to(const char* b, - const char* e); - extern template unsigned short digits_to(const char* b, - const char* e); - extern template unsigned int digits_to(const char* b, - const char* e); - extern template unsigned long digits_to(const char* b, - const char* e); - extern template unsigned long long digits_to( - const char* b, const char* e); #if FOLLY_HAVE_INT128_T - extern template unsigned __int128 digits_to(const char* b, - const char* e); +extern template ConversionResult<__int128> digits_to<__int128>( + const char*, + const char*); +extern template ConversionResult +digits_to(const char*, const char*); #endif -} // namespace detail +template +ConversionResult str_to_integral(StringPiece* src); -/** - * String represented as a pair of pointers to char to unsigned - * integrals. Assumes NO whitespace before or after. - */ -template -typename std::enable_if< - std::is_integral::value && !std::is_signed::value - && !std::is_same::type, bool>::value, - Tgt>::type -to(const char * b, const char * e) { - return detail::digits_to(b, e); +extern template ConversionResult str_to_integral(StringPiece* src); +extern template ConversionResult str_to_integral( + StringPiece* src); +extern template ConversionResult str_to_integral( + StringPiece* src); + +extern template ConversionResult str_to_integral( + StringPiece* src); +extern template ConversionResult +str_to_integral(StringPiece* src); + +extern template ConversionResult str_to_integral(StringPiece* src); +extern template ConversionResult str_to_integral( + StringPiece* src); + +extern template ConversionResult str_to_integral(StringPiece* src); +extern template ConversionResult str_to_integral( + StringPiece* src); + +extern template ConversionResult str_to_integral( + StringPiece* src); +extern template ConversionResult +str_to_integral(StringPiece* src); + +#if FOLLY_HAVE_INT128_T +extern template ConversionResult<__int128> str_to_integral<__int128>( + StringPiece* src); +extern template ConversionResult +str_to_integral(StringPiece* src); +#endif + +template +typename std::enable_if::value, ConversionResult>::type +convertTo(StringPiece* src) { + return str_to_bool(src); } -/** - * String represented as a pair of pointers to char to signed - * integrals. Assumes NO whitespace before or after. Allows an - * optional leading sign. - */ -template +template typename std::enable_if< - std::is_integral::value && std::is_signed::value, - Tgt>::type -to(const char * b, const char * e) { - FOLLY_RANGE_CHECK(b < e, "Empty input string in conversion to integral", - to("b: ", intptr_t(b), " e: ", intptr_t(e))); - if (!isdigit(*b)) { - if (*b == '-') { - Tgt result = -to::type>(b + 1, e); - FOLLY_RANGE_CHECK_BEGIN_END(result <= 0, "Negative overflow.", b, e); - return result; - } - FOLLY_RANGE_CHECK_BEGIN_END(*b == '+', "Invalid lead character", b, e); - ++b; - } - Tgt result = to::type>(b, e); - FOLLY_RANGE_CHECK_BEGIN_END(result >= 0, "Overflow", b, e); - return result; + std::is_floating_point::value, ConversionResult>::type +convertTo(StringPiece* src) { + return str_to_floating(src); } -namespace detail { -/** - * StringPiece to integrals, with progress information. Alters the - * StringPiece parameter to munch the already-parsed characters. - */ -template -Tgt str_to_integral(StringPiece* src) { - auto b = src->data(), past = src->data() + src->size(); - for (;; ++b) { - FOLLY_RANGE_CHECK_STRINGPIECE(b < past, - "No digits found in input string", *src); - if (!isspace(*b)) break; - } +template +typename std::enable_if< + std::is_integral::value && !std::is_same::value, + ConversionResult>::type +convertTo(StringPiece* src) { + return str_to_integral(src); +} - auto m = b; - - // First digit is customized because we test for sign - bool negative = false; - /* static */ if (std::is_signed::value) { - if (!isdigit(*m)) { - if (*m == '-') { - negative = true; - } else { - FOLLY_RANGE_CHECK_STRINGPIECE(*m == '+', "Invalid leading character in " - "conversion to integral", *src); - } - ++b; - ++m; - } - } - FOLLY_RANGE_CHECK_STRINGPIECE(m < past, "No digits found in input string", - *src); - FOLLY_RANGE_CHECK_STRINGPIECE(isdigit(*m), "Non-digit character found", *src); - m = detail::findFirstNonDigit(m + 1, past); +template +struct WrapperInfo { using type = T; }; - Tgt result; - /* static */ if (!std::is_signed::value) { - result = detail::digits_to::type>(b, m); - } else { - auto t = detail::digits_to::type>(b, m); - if (negative) { - result = -t; - FOLLY_RANGE_CHECK_STRINGPIECE(is_non_positive(result), - "Negative overflow", *src); - } else { - result = t; - FOLLY_RANGE_CHECK_STRINGPIECE(is_non_negative(result), "Overflow", *src); - } +template +typename std::enable_if< + std::is_same::type, T>::value, T>::type +inline wrap(ConversionResult::type> res, Gen&& gen) { + if (LIKELY(res.success())) { + return res.value; } - src->advance(m - src->data()); - return result; + throw detail::makeConversionError(res.error, gen()); } +} // namespace detail + /** - * Enforce that the suffix following a number is made up only of whitespace. + * String represented as a pair of pointers to char to unsigned + * integrals. Assumes NO whitespace before or after. */ -inline void enforceWhitespace(StringPiece sp) { - for (char ch : sp) { - FOLLY_RANGE_CHECK_STRINGPIECE( - isspace(ch), to("Non-whitespace: ", ch), sp); - } +template +typename std::enable_if< + std::is_integral::type>::value && + !std::is_same::type, bool>::value, + Tgt>::type +to(const char* b, const char* e) { + auto res = detail::digits_to::type>(b, e); + return detail::wrap(res, [&] { return StringPiece(b, e); }); } /******************************************************************************* - * Conversions from string types to floating-point types. + * Conversions from string types to arithmetic types. ******************************************************************************/ - -} // namespace detail - -/** - * StringPiece to bool, with progress information. Alters the - * StringPiece parameter to munch the already-parsed characters. - */ -inline void parseTo(StringPiece* src, bool& out) { - out = detail::str_to_bool(src); -} - /** * Parsing strings to numeric types. These routines differ from * parseTo(str, numeric) routines in that they take a POINTER TO a StringPiece * and alter that StringPiece to reflect progress information. */ -template +template typename std::enable_if< - std::is_integral::type>::value>::type + std::is_arithmetic::type>::value>::type parseTo(StringPiece* src, Tgt& out) { - out = detail::str_to_integral(src); -} - -inline void parseTo(StringPiece* src, float& out) { - out = detail::str_to_float(src); -} - -inline void parseTo(StringPiece* src, double& out) { - out = detail::str_to_double(src); + auto res = detail::convertTo::type>(src); + out = detail::wrap(res, [&] { return *src; }); } -template +template typename std::enable_if< - std::is_floating_point::value || - std::is_integral::type>::value>::type + std::is_arithmetic::type>::value>::type parseTo(StringPiece src, Tgt& out) { - parseTo(&src, out); - detail::enforceWhitespace(src); + auto res = detail::convertTo::type>(&src); + if (LIKELY(res.success())) { + res.error = detail::enforceWhitespaceErr(src); + } + out = detail::wrap(res, [&] { return src; }); } /******************************************************************************* - * Integral to floating point and back + * Integral / Floating Point to integral / Floating Point ******************************************************************************/ +/** + * Unchecked conversion from arithmetic to boolean. This is different from the + * other arithmetic conversions because we use the C convention of treating any + * non-zero value as true, instead of range checking. + */ +template +typename std::enable_if< + std::is_arithmetic::value && !std::is_same::value && + std::is_same::value, + Tgt>::type +to(const Src& value) { + return value != Src(); +} + namespace detail { +/** + * Checked conversion from integral to integral. The checks are only + * performed when meaningful, e.g. conversion from int to long goes + * unchecked. + */ +template +typename std::enable_if< + std::is_integral::value && !std::is_same::value && + !std::is_same::value && + std::is_integral::value, + ConversionResult>::type +convertTo(const Src& value) { + /* static */ if ( + std::numeric_limits::max() < std::numeric_limits::max()) { + if (greater_than::max()>(value)) { + return ConversionResult(ConversionError::ARITH_POSITIVE_OVERFLOW); + } + } + /* static */ if ( + std::is_signed::value && + (!std::is_signed::value || sizeof(Src) > sizeof(Tgt))) { + if (less_than::min()>(value)) { + return ConversionResult(ConversionError::ARITH_NEGATIVE_OVERFLOW); + } + } + return ConversionResult(static_cast(value)); +} + +/** + * Checked conversion from floating to floating. The checks are only + * performed when meaningful, e.g. conversion from float to double goes + * unchecked. + */ +template +typename std::enable_if< + std::is_floating_point::value && std::is_floating_point::value && + !std::is_same::value, + ConversionResult>::type +convertTo(const Src& value) { + /* static */ if ( + std::numeric_limits::max() < std::numeric_limits::max()) { + if (value > std::numeric_limits::max()) { + return ConversionResult(ConversionError::ARITH_POSITIVE_OVERFLOW); + } + if (value < std::numeric_limits::lowest()) { + return ConversionResult(ConversionError::ARITH_NEGATIVE_OVERFLOW); + } + } + return ConversionResult(boost::implicit_cast(value)); +} + /** * Check if a floating point value can safely be converted to an * integer value without triggering undefined behaviour. @@ -1212,7 +1248,6 @@ constexpr typename std::enable_if< checkConversion(const Src&) { return true; } -} /** * Checked conversion from integral to floating point and back. The @@ -1221,30 +1256,51 @@ checkConversion(const Src&) { * complements existing routines nicely. For various rounding * routines, see . */ -template +template typename std::enable_if< (std::is_integral::value && std::is_floating_point::value) || - (std::is_floating_point::value && std::is_integral::value && - !std::is_same::value), - Tgt>::type -to(const Src& value) { - if (detail::checkConversion(value)) { - Tgt result = Tgt(value); - if (detail::checkConversion(result)) { - auto witness = static_cast(result); - if (value == witness) { - return result; + (std::is_floating_point::value && std::is_integral::value), + ConversionResult>::type +convertTo(const Src& value) { + if (LIKELY(checkConversion(value))) { + Tgt result = static_cast(value); + if (LIKELY(checkConversion(result))) { + Src witness = static_cast(result); + if (LIKELY(value == witness)) { + return ConversionResult(result); } } } - throw std::range_error( - to("to<>: loss of precision when converting ", value, + return ConversionResult(ConversionError::ARITH_LOSS_OF_PRECISION); +} + +template +inline std::string errorValue(const Src& value) { #ifdef FOLLY_HAS_RTTI - " to type ", typeid(Tgt).name() + return to("(", demangle(typeid(Tgt)), ") ", value); #else - " to other type" + return to(value); #endif - ).c_str()); +} + +template +using IsArithToArith = std::integral_constant< + bool, + !std::is_same::value && !std::is_same::value && + std::is_arithmetic::value && + std::is_arithmetic::value>; + +} // namespace detail + +template +typename std::enable_if< + detail::IsArithToArith< + typename detail::WrapperInfo::type, Src>::value, Tgt>::type +to(const Src& value) { + auto res = detail::convertTo::type>(value); + return detail::wrap(res, [&] { + return detail::errorValue::type>(value); + }); } /******************************************************************************* @@ -1319,14 +1375,3 @@ to(const Src & value) { } } // namespace folly - -// FOLLY_CONV_INTERNAL is defined by Conv.cpp. Keep the FOLLY_RANGE_CHECK -// macro for use in Conv.cpp, but #undefine it everywhere else we are included, -// to avoid defining this global macro name in other files that include Conv.h. -#ifndef FOLLY_CONV_INTERNAL -#undef FOLLY_RANGE_CHECK -#undef FOLLY_RANGE_CHECK_BEGIN_END -#undef FOLLY_RANGE_CHECK_STRINGPIECE -#undef FOLLY_RANGE_CHECK_STRINGIZE -#undef FOLLY_RANGE_CHECK_STRINGIZE2 -#endif diff --git a/folly/experimental/test/DynamicParserTest.cpp b/folly/experimental/test/DynamicParserTest.cpp index e7301c93..39bc5752 100644 --- a/folly/experimental/test/DynamicParserTest.cpp +++ b/folly/experimental/test/DynamicParserTest.cpp @@ -97,9 +97,9 @@ TEST(TestDynamicParser, OnErrorThrowError) { auto error = ex.error(); const auto& message = error.at("nested").at("0").at("nested").at("int").at("error"); - EXPECT_PCRE_MATCH(".* conversion to integral.*", message.getString()); + EXPECT_PCRE_MATCH(".*Invalid leading.*", message.getString()); EXPECT_PCRE_MATCH( - "DynamicParserParseError: .* conversion to integral.*", ex.what() + "DynamicParserParseError: .*Invalid leading.*", ex.what() ); EXPECT_EQ(dynamic(dynamic::object ("nested", dynamic::object @@ -282,7 +282,7 @@ TEST(TestDynamicParser, TestRequiredOptionalParseErrors) { }; EXPECT_EQ(dynamic(dynamic::object("nested", dynamic::object ("x", get_expected_error_fn("x", "TypeError: .* but had type `array'")) - ("y", get_expected_error_fn("y", ".* Invalid leading character .*")) + ("y", get_expected_error_fn("y", ".*Invalid leading character.*")) ("z", get_expected_error_fn("z", "CUSTOM")))), errors); } @@ -317,7 +317,7 @@ TEST(TestDynamicParser, TestItemParseErrors) { dynamic::array("this is not a bool"), [&](DynamicParser& p) { p.arrayItems([&](int64_t, bool) {}); }, 0, "0", - ".* Non-whitespace: .*" + ".*Non-whitespace.*" ); } diff --git a/folly/test/ConvBenchmark.cpp b/folly/test/ConvBenchmark.cpp index a6e21dd2..0c7ffd3d 100644 --- a/folly/test/ConvBenchmark.cpp +++ b/folly/test/ConvBenchmark.cpp @@ -28,6 +28,13 @@ using namespace std; using namespace folly; +// Android doesn't support std::to_string so just use a placeholder there. +#ifdef __ANDROID__ +#define FOLLY_RANGE_CHECK_TO_STRING(x) std::string("N/A") +#else +#define FOLLY_RANGE_CHECK_TO_STRING(x) std::to_string(x) +#endif + namespace folly { namespace conv_bench_detail { diff --git a/folly/test/ConvTest.cpp b/folly/test/ConvTest.cpp index 5512014c..42612818 100644 --- a/folly/test/ConvTest.cpp +++ b/folly/test/ConvTest.cpp @@ -14,12 +14,14 @@ * limitations under the License. */ +#include #include #include -#include #include #include +#include #include +#include using namespace std; using namespace folly; @@ -563,15 +565,32 @@ TEST(Conv, FBStringToString) { } TEST(Conv, StringPieceToDouble) { - string s = "2134123.125 zorro"; - StringPiece pc(s); - EXPECT_EQ(to(&pc), 2134123.125); - EXPECT_EQ(pc, " zorro"); - - EXPECT_THROW(to(StringPiece(s)), std::range_error); - EXPECT_EQ(to(StringPiece(s.data(), pc.data())), 2134123.125); + vector> strs{ + make_tuple("2134123.125 zorro", " zorro", 2134123.125), + make_tuple(" 2134123.125 zorro", " zorro", 2134123.125), + make_tuple(" 2134123.125 zorro", " zorro", 2134123.125), + make_tuple(" 2134123.125 zorro ", " zorro ", 2134123.125), + make_tuple("2134123.125zorro", "zorro", 2134123.125), + make_tuple("0 zorro", " zorro", 0.0), + make_tuple(" 0 zorro", " zorro", 0.0), + make_tuple(" 0 zorro", " zorro", 0.0), + make_tuple(" 0 zorro ", " zorro ", 0.0), + make_tuple("0zorro", "zorro", 0.0), + make_tuple("0.0 zorro", " zorro", 0.0), + make_tuple(" 0.0 zorro", " zorro", 0.0), + make_tuple(" 0.0 zorro", " zorro", 0.0), + make_tuple(" 0.0 zorro ", " zorro ", 0.0), + make_tuple("0.0zorro", "zorro", 0.0), + }; + for (const auto& s : strs) { + StringPiece pc(get<0>(s)); + EXPECT_EQ(get<2>(s), to(&pc)) << "\"" << get<0>(s) << "\""; + EXPECT_EQ(get<1>(s), pc); + EXPECT_THROW(to(StringPiece(get<0>(s))), std::range_error); + EXPECT_EQ(get<2>(s), to(StringPiece(get<0>(s), pc.data()))); + } -// Test NaN conversion + // Test NaN conversion try { to("not a number"); EXPECT_TRUE(false); @@ -849,6 +868,164 @@ TEST(Conv, FloatToBool) { EXPECT_EQ(to(-std::numeric_limits::infinity()), true); } +namespace { + +template +void testConvError( + F&& expr, + const char* exprStr, + ConversionError::Code code, + const char* value, + bool quotedValue, + int line) { + std::string where = to(__FILE__, "(", line, "): "); + try { + auto res = expr(); + EXPECT_TRUE(false) << where << exprStr << " -> " << res; + } catch (const ConversionError& e) { + EXPECT_EQ(code, e.errorCode()) << where << exprStr; + std::string str(e.what()); + EXPECT_FALSE(str.empty()) << where << exprStr << " -> " << str; + auto pos = str.find(':'); + if (value) { + std::ostringstream exp; + exp << str.substr(0, pos) + ": "; + if (quotedValue) { + exp << "\"" << value << "\""; + } else { + exp << value; + } + EXPECT_EQ(exp.str(), str) << where << exprStr << " -> " << str; + } else { + EXPECT_EQ(pos, std::string::npos) << where << exprStr << " -> " << str; + } + } +} +} + +#define EXPECT_CONV_ERROR_QUOTE(expr, code, value, quoted) \ + testConvError( \ + [&] { return expr; }, \ + #expr, \ + ConversionError::code, \ + value, \ + quoted, \ + __LINE__) + +#define EXPECT_CONV_ERROR(expr, code, value) \ + EXPECT_CONV_ERROR_QUOTE(expr, code, value, true) + +#define EXPECT_CONV_ERROR_STR(type, str, code) \ + EXPECT_CONV_ERROR(to(str), code, str) + +#define EXPECT_CONV_ERROR_STR_NOVAL(type, str, code) \ + EXPECT_CONV_ERROR(to(str), code, nullptr) + +TEST(Conv, ConversionErrorStrToBool) { + EXPECT_CONV_ERROR_STR_NOVAL(bool, StringPiece(), EMPTY_INPUT_STRING); + EXPECT_CONV_ERROR_STR_NOVAL(bool, "", EMPTY_INPUT_STRING); + EXPECT_CONV_ERROR_STR(bool, " ", EMPTY_INPUT_STRING); + EXPECT_CONV_ERROR_STR(bool, " 11 ", BOOL_OVERFLOW); + EXPECT_CONV_ERROR_STR(bool, "other ", BOOL_INVALID_VALUE); + EXPECT_CONV_ERROR_STR(bool, " bla", BOOL_INVALID_VALUE); + EXPECT_CONV_ERROR(to(" offbla"), NON_WHITESPACE_AFTER_END, "bla"); +} + +TEST(Conv, ConversionErrorStrToFloat) { + EXPECT_CONV_ERROR_STR_NOVAL(float, StringPiece(), EMPTY_INPUT_STRING); + EXPECT_CONV_ERROR_STR_NOVAL(float, "", EMPTY_INPUT_STRING); + EXPECT_CONV_ERROR_STR(float, " ", EMPTY_INPUT_STRING); + EXPECT_CONV_ERROR_STR(float, " junk", STRING_TO_FLOAT_ERROR); + EXPECT_CONV_ERROR(to(" 1bla"), NON_WHITESPACE_AFTER_END, "bla"); +} + +TEST(Conv, ConversionErrorStrToInt) { + // empty string handling + EXPECT_CONV_ERROR_STR_NOVAL(int, StringPiece(), EMPTY_INPUT_STRING); + EXPECT_CONV_ERROR_STR_NOVAL(int, "", EMPTY_INPUT_STRING); + EXPECT_CONV_ERROR_STR(int, " ", EMPTY_INPUT_STRING); + + // signed integers + EXPECT_CONV_ERROR_STR(int, " *", INVALID_LEADING_CHAR); + EXPECT_CONV_ERROR_STR(int, " +", NO_DIGITS); + EXPECT_CONV_ERROR_STR(int, " +*", NON_DIGIT_CHAR); + EXPECT_CONV_ERROR_STR(int8_t, " 128", POSITIVE_OVERFLOW); + EXPECT_CONV_ERROR_STR(int8_t, " -129", NEGATIVE_OVERFLOW); + EXPECT_CONV_ERROR_STR(int8_t, " 1000", POSITIVE_OVERFLOW); + EXPECT_CONV_ERROR_STR(int8_t, "-1000", NEGATIVE_OVERFLOW); + EXPECT_CONV_ERROR(to(" -13bla"), NON_WHITESPACE_AFTER_END, "bla"); + + // unsigned integers + EXPECT_CONV_ERROR_STR(unsigned, " -", NON_DIGIT_CHAR); + EXPECT_CONV_ERROR_STR(uint8_t, " 256", POSITIVE_OVERFLOW); + EXPECT_CONV_ERROR(to("42bla"), NON_WHITESPACE_AFTER_END, "bla"); +} + +#define EXPECT_CONV_ERROR_PP_VAL(type, str, code, val) \ + do { \ + StringPiece input(str); \ + EXPECT_CONV_ERROR(to(input.begin(), input.end()), code, val); \ + } while (0) + +#define EXPECT_CONV_ERROR_PP(type, str, code) \ + EXPECT_CONV_ERROR_PP_VAL(type, str, code, str) + +TEST(Conv, ConversionErrorPtrPairToInt) { + // signed integers + EXPECT_CONV_ERROR_PP(int, "", INVALID_LEADING_CHAR); + EXPECT_CONV_ERROR_PP(int, " ", INVALID_LEADING_CHAR); + EXPECT_CONV_ERROR_PP(int, "*", INVALID_LEADING_CHAR); + EXPECT_CONV_ERROR_PP(int, "+", NO_DIGITS); + EXPECT_CONV_ERROR_PP(int8_t, "128", POSITIVE_OVERFLOW); + EXPECT_CONV_ERROR_PP(int8_t, "-129", NEGATIVE_OVERFLOW); + EXPECT_CONV_ERROR_PP(int8_t, "1000", POSITIVE_OVERFLOW); + EXPECT_CONV_ERROR_PP(int8_t, "-1000", NEGATIVE_OVERFLOW); + EXPECT_CONV_ERROR_PP(int, "-junk", NON_DIGIT_CHAR); + + // unsigned integers + EXPECT_CONV_ERROR_PP(unsigned, "", NO_DIGITS); + EXPECT_CONV_ERROR_PP(uint8_t, "256", POSITIVE_OVERFLOW); + EXPECT_CONV_ERROR_PP(unsigned, "junk", NON_DIGIT_CHAR); +} + +namespace { + +template +std::string prefixWithType(V value) { + std::ostringstream oss; +#ifdef FOLLY_HAS_RTTI + oss << "(" << demangle(typeid(T)) << ") "; +#endif + oss << to(value); + return oss.str(); +} +} + +#define EXPECT_CONV_ERROR_ARITH(type, val, code) \ + EXPECT_CONV_ERROR_QUOTE( \ + to(val), code, prefixWithType(val).c_str(), false) + +TEST(Conv, ConversionErrorIntToInt) { + EXPECT_CONV_ERROR_ARITH(signed char, 128, ARITH_POSITIVE_OVERFLOW); + EXPECT_CONV_ERROR_ARITH(unsigned char, -1, ARITH_NEGATIVE_OVERFLOW); +} + +TEST(Conv, ConversionErrorFloatToFloat) { + EXPECT_CONV_ERROR_ARITH( + float, std::numeric_limits::max(), ARITH_POSITIVE_OVERFLOW); + EXPECT_CONV_ERROR_ARITH( + float, std::numeric_limits::lowest(), ARITH_NEGATIVE_OVERFLOW); +} + +TEST(Conv, ConversionErrorIntToFloat) { + EXPECT_CONV_ERROR_ARITH( + float, std::numeric_limits::max(), ARITH_LOSS_OF_PRECISION); +} + +TEST(Conv, ConversionErrorFloatToInt) { + EXPECT_CONV_ERROR_ARITH(int8_t, 65.5, ARITH_LOSS_OF_PRECISION); +} + TEST(Conv, NewUint64ToString) { char buf[21]; diff --git a/folly/test/FormatTest.cpp b/folly/test/FormatTest.cpp index 61887a42..d1d8c96a 100644 --- a/folly/test/FormatTest.cpp +++ b/folly/test/FormatTest.cpp @@ -452,8 +452,7 @@ TEST(Format, BogusFormatString) { // This one fails in detail::enforceWhitespace(), which throws // std::range_error - EXPECT_THROW_STR(sformat("{0[test}"), std::range_error, - "Non-whitespace: ["); + EXPECT_THROW_STR(sformat("{0[test}"), std::range_error, "Non-whitespace"); } template diff --git a/folly/test/StringTest.cpp b/folly/test/StringTest.cpp index 781aacdb..76cf6ac4 100644 --- a/folly/test/StringTest.cpp +++ b/folly/test/StringTest.cpp @@ -413,8 +413,8 @@ TEST(PrettyToDouble, Basic) { double recoveredX = 0; try{ recoveredX = prettyToDouble(testString, formatType); - } catch (std::range_error &ex){ - EXPECT_TRUE(false); + } catch (const std::range_error& ex) { + EXPECT_TRUE(false) << testCase.prettyString << " -> " << ex.what(); } double relativeError = fabs(x) < 1e-5 ? (x-recoveredX) : (x - recoveredX) / x;