X-Git-Url: http://plrg.eecs.uci.edu/git/?p=folly.git;a=blobdiff_plain;f=folly%2FString.h;h=750863b986bbe52b4812aaa0ed8f84cc89052779;hp=604c0883ff4a31359b753747a54ecd2521a02056;hb=fe9d8cadb9bba9ddc7ea151f779458b8dc1904cd;hpb=2afd1f1acb7b864fc0e5f2791c5250c1fbc82396 diff --git a/folly/String.h b/folly/String.h index 604c0883..750863b9 100644 --- a/folly/String.h +++ b/folly/String.h @@ -1,5 +1,5 @@ /* - * Copyright 2012 Facebook, Inc. + * Copyright 2017 Facebook, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -14,22 +14,26 @@ * limitations under the License. */ -#ifndef FOLLY_BASE_STRING_H_ -#define FOLLY_BASE_STRING_H_ +#pragma once +#define FOLLY_STRING_H_ +#include +#include #include -#include +#include +#include +#include -#ifdef __GNUC__ -# include -# include -#endif +#include +#include -#include "folly/Conv.h" -#include "folly/FBString.h" -#include "folly/FBVector.h" -#include "folly/Range.h" -#include "folly/ScopeGuard.h" +#include +#include +#include +#include +#include +#include +#include // Compatibility function, to make sure toStdString(s) can be called // to convert a std::string or fbstring variable s into type std::string @@ -111,22 +115,82 @@ String cUnescape(StringPiece str, bool strict = true) { return out; } +/** + * URI-escape a string. Appends the result to the output string. + * + * Alphanumeric characters and other characters marked as "unreserved" in RFC + * 3986 ( -_.~ ) are left unchanged. In PATH mode, the forward slash (/) is + * also left unchanged. In QUERY mode, spaces are replaced by '+'. All other + * characters are percent-encoded. + */ +enum class UriEscapeMode : unsigned char { + // The values are meaningful, see generate_escape_tables.py + ALL = 0, + QUERY = 1, + PATH = 2 +}; +template +void uriEscape(StringPiece str, + String& out, + UriEscapeMode mode = UriEscapeMode::ALL); + +/** + * Similar to uriEscape above, but returns the escaped string. + */ +template +String uriEscape(StringPiece str, UriEscapeMode mode = UriEscapeMode::ALL) { + String out; + uriEscape(str, out, mode); + return out; +} + +/** + * URI-unescape a string. Appends the result to the output string. + * + * In QUERY mode, '+' are replaced by space. %XX sequences are decoded if + * XX is a valid hex sequence, otherwise we throw invalid_argument. + */ +template +void uriUnescape(StringPiece str, + String& out, + UriEscapeMode mode = UriEscapeMode::ALL); + +/** + * Similar to uriUnescape above, but returns the unescaped string. + */ +template +String uriUnescape(StringPiece str, UriEscapeMode mode = UriEscapeMode::ALL) { + String out; + uriUnescape(str, out, mode); + return out; +} + /** * stringPrintf is much like printf but deposits its result into a * string. Two signatures are supported: the first simply returns the * resulting string, and the second appends the produced characters to * the specified string and returns a reference to it. */ -std::string stringPrintf(const char* format, ...) - __attribute__ ((format (printf, 1, 2))); +std::string stringPrintf(FOLLY_PRINTF_FORMAT const char* format, ...) + FOLLY_PRINTF_FORMAT_ATTR(1, 2); + +/* Similar to stringPrintf, with different signature. */ +void stringPrintf(std::string* out, FOLLY_PRINTF_FORMAT const char* fmt, ...) + FOLLY_PRINTF_FORMAT_ATTR(2, 3); -/** Similar to stringPrintf, with different signiture. - */ -void stringPrintf(std::string* out, const char* fmt, ...) - __attribute__ ((format (printf, 2, 3))); +std::string& stringAppendf(std::string* output, + FOLLY_PRINTF_FORMAT const char* format, ...) + FOLLY_PRINTF_FORMAT_ATTR(2, 3); -std::string& stringAppendf(std::string* output, const char* format, ...) - __attribute__ ((format (printf, 2, 3))); +/** + * Similar to stringPrintf, but accepts a va_list argument. + * + * As with vsnprintf() itself, the value of ap is undefined after the call. + * These functions do not call va_end() on ap. + */ +std::string stringVPrintf(const char* format, va_list ap); +void stringVPrintf(std::string* out, const char* format, va_list ap); +std::string& stringVAppendf(std::string* out, const char* format, va_list ap); /** * Backslashify a string, that is, replace non-printable characters @@ -147,12 +211,15 @@ std::string& stringAppendf(std::string* output, const char* format, ...) * C++, use cEscape instead. This function is for display purposes * only. */ -template -void backslashify(const String1& input, String2& output, bool hex_style=false); - -template -String backslashify(const String& input, bool hex_style=false) { - String output; +template +void backslashify( + folly::StringPiece input, + OutputString& output, + bool hex_style = false); + +template +OutputString backslashify(StringPiece input, bool hex_style = false) { + OutputString output; backslashify(input, output, hex_style); return output; } @@ -185,17 +252,43 @@ String humanify(const String& input) { * If append_output is true, append data to the output rather than * replace it. */ -template +template bool hexlify(const InputString& input, OutputString& output, bool append=false); +template +OutputString hexlify(ByteRange input) { + OutputString output; + if (!hexlify(input, output)) { + // hexlify() currently always returns true, so this can't really happen + throw std::runtime_error("hexlify failed"); + } + return output; +} + +template +OutputString hexlify(StringPiece input) { + return hexlify(ByteRange{input}); +} + /** * Same functionality as Python's binascii.unhexlify. Returns true * on successful conversion. */ -template +template bool unhexlify(const InputString& input, OutputString& output); +template +OutputString unhexlify(StringPiece input) { + OutputString output; + if (!unhexlify(input, output)) { + // unhexlify() fails if the input has non-hexidecimal characters, + // or if it doesn't consist of a whole number of bytes + throw std::domain_error("unhexlify() called with non-hex input"); + } + return output; +} + /* * A pretty-printer for numbers that appends suffixes of units of the * given type. It prints 4 sig-figs of value with the most @@ -212,7 +305,8 @@ bool unhexlify(const InputString& input, OutputString& output); * PRETTY_UNITS_METRIC - k, M, G, etc (goes up by 10^3 = 1000 each time) * PRETTY_UNITS_BINARY - k, M, G, etc (goes up by 2^10 = 1024 each time) * PRETTY_UNITS_BINARY_IEC - Ki, Mi, Gi, etc - * + * PRETTY_SI - full SI metric prefixes from yocto to Yotta + * http://en.wikipedia.org/wiki/Metric_prefix * @author Mark Rabkin */ enum PrettyType { @@ -228,11 +322,38 @@ enum PrettyType { PRETTY_UNITS_BINARY, PRETTY_UNITS_BINARY_IEC, - PRETTY_NUM_TYPES + PRETTY_SI, + PRETTY_NUM_TYPES, }; std::string prettyPrint(double val, PrettyType, bool addSpace = true); +/** + * This utility converts StringPiece in pretty format (look above) to double, + * with progress information. Alters the StringPiece parameter + * to get rid of the already-parsed characters. + * Expects string in form {space}* [] + * If string is not in correct format, utility finds longest valid prefix and + * if there at least one, returns double value based on that prefix and + * modifies string to what is left after parsing. Throws and std::range_error + * exception if there is no correct parse. + * Examples(for PRETTY_UNITS_METRIC): + * '10M' => 10 000 000 + * '10 M' => 10 000 000 + * '10' => 10 + * '10 Mx' => 10 000 000, prettyString == "x" + * 'abc' => throws std::range_error + */ +double prettyToDouble(folly::StringPiece *const prettyString, + const PrettyType type); + +/* + * Same as prettyToDouble(folly::StringPiece*, PrettyType), but + * expects whole string to be correctly parseable. Throws std::range_error + * otherwise + */ +double prettyToDouble(folly::StringPiece prettyString, const PrettyType type); + /** * Write a hex dump of size bytes starting at ptr to out. * @@ -262,29 +383,6 @@ std::string hexDump(const void* ptr, size_t size); */ fbstring errnoStr(int err); -/** - * Return the demangled (prettyfied) version of a C++ type. - * - * This function tries to produce a human-readable type, but the type name will - * be returned unchanged in case of error or if demangling isn't supported on - * your system. - * - * Use for debugging -- do not rely on demangle() returning anything useful. - * - * This function may allocate memory (and therefore throw). - */ -fbstring demangle(const char* name); -inline fbstring demangle(const std::type_info& type) { - return demangle(type.name()); -} - -/** - * Debug string for an exception: include type and what(). - */ -inline fbstring exceptionStr(const std::exception& e) { - return folly::to(demangle(typeid(e)), ": ", e.what()); -} - /* * Split a string into a list of tokens by delimiter. * @@ -314,56 +412,229 @@ inline fbstring exceptionStr(const std::exception& e) { * or not (generating empty tokens). */ -template +template void split(const Delim& delimiter, const String& input, std::vector& out, - bool ignoreEmpty = false); + const bool ignoreEmpty = false); -template +template void split(const Delim& delimiter, const String& input, folly::fbvector& out, - bool ignoreEmpty = false); + const bool ignoreEmpty = false); -template +template < + class OutputValueType, + class Delim, + class String, + class OutputIterator> void splitTo(const Delim& delimiter, const String& input, OutputIterator out, - bool ignoreEmpty = false); + const bool ignoreEmpty = false); -} // namespace folly +/* + * Split a string into a fixed number of string pieces and/or numeric types + * by delimiter. Conversions are supported for any type which folly:to<> can + * target, including all overloads of parseTo(). Returns 'true' if the fields + * were all successfully populated. Returns 'false' if there were too few + * fields in the input, or too many fields if exact=true. Casting exceptions + * will not be caught. + * + * Examples: + * + * folly::StringPiece name, key, value; + * if (folly::split('\t', line, name, key, value)) + * ... + * + * folly::StringPiece name; + * double value; + * int id; + * if (folly::split('\t', line, name, value, id)) + * ... + * + * The 'exact' template parameter specifies how the function behaves when too + * many fields are present in the input string. When 'exact' is set to its + * default value of 'true', a call to split will fail if the number of fields in + * the input string does not exactly match the number of output parameters + * passed. If 'exact' is overridden to 'false', all remaining fields will be + * stored, unsplit, in the last field, as shown below: + * + * folly::StringPiece x, y. + * if (folly::split(':', "a:b:c", x, y)) + * assert(x == "a" && y == "b:c"); + * + * Note that this will likely not work if the last field's target is of numeric + * type, in which case folly::to<> will throw an exception. + */ +template +struct IsConvertible { + enum { value = false }; +}; + +template +struct IsConvertible< + T, + decltype(static_cast( + parseTo(std::declval(), std::declval())))> { + enum { value = true }; +}; -// Hash functions for string and fbstring usable with e.g. hash_map -#ifdef __GNUC__ -namespace __gnu_cxx { +template +struct AllConvertible; -template -struct hash > : private hash { - size_t operator()(const folly::basic_fbstring & s) const { - return hash::operator()(s.c_str()); - } +template +struct AllConvertible { + enum { value = IsConvertible::value && AllConvertible::value }; }; -template -struct hash > : private hash { - size_t operator()(const std::basic_string & s) const { - return hash::operator()(s.c_str()); - } +template <> +struct AllConvertible<> { + enum { value = true }; }; -} // namespace __gnu_cxx -#endif +static_assert(AllConvertible::value, ""); +static_assert(AllConvertible::value, ""); +static_assert(AllConvertible::value, ""); +static_assert(AllConvertible::value, ""); +static_assert(!AllConvertible>::value, ""); -// Hook into boost's type traits -namespace boost { -template -struct has_nothrow_constructor > : true_type { - enum { value = true }; +template +typename std::enable_if< + AllConvertible::value && sizeof...(OutputTypes) >= 1, + bool>::type +split(const Delim& delimiter, StringPiece input, OutputTypes&... outputs); + +/* + * Join list of tokens. + * + * Stores a string representation of tokens in the same order with + * deliminer between each element. + */ + +template +void join(const Delim& delimiter, + Iterator begin, + Iterator end, + String& output); + +template +void join(const Delim& delimiter, + const Container& container, + String& output) { + join(delimiter, container.begin(), container.end(), output); +} + +template +void join(const Delim& delimiter, + const std::initializer_list& values, + String& output) { + join(delimiter, values.begin(), values.end(), output); +} + +template +std::string join(const Delim& delimiter, + const Container& container) { + std::string output; + join(delimiter, container.begin(), container.end(), output); + return output; +} + +template +std::string join(const Delim& delimiter, + const std::initializer_list& values) { + std::string output; + join(delimiter, values.begin(), values.end(), output); + return output; +} + +template < + class Delim, + class Iterator, + typename std::enable_if::iterator_category, + std::random_access_iterator_tag>::value>::type* = nullptr> +std::string join(const Delim& delimiter, Iterator begin, Iterator end) { + std::string output; + join(delimiter, begin, end, output); + return output; +} + +/** + * Returns a subpiece with all whitespace removed from the front of @sp. + * Whitespace means any of [' ', '\n', '\r', '\t']. + */ +StringPiece ltrimWhitespace(StringPiece sp); + +/** + * Returns a subpiece with all whitespace removed from the back of @sp. + * Whitespace means any of [' ', '\n', '\r', '\t']. + */ +StringPiece rtrimWhitespace(StringPiece sp); + +/** + * Returns a subpiece with all whitespace removed from the back and front of @sp. + * Whitespace means any of [' ', '\n', '\r', '\t']. + */ +inline StringPiece trimWhitespace(StringPiece sp) { + return ltrimWhitespace(rtrimWhitespace(sp)); +} + +/** + * Returns a subpiece with all whitespace removed from the front of @sp. + * Whitespace means any of [' ', '\n', '\r', '\t']. + * DEPRECATED: @see ltrimWhitespace @see rtrimWhitespace + */ +inline StringPiece skipWhitespace(StringPiece sp) { + return ltrimWhitespace(sp); +} + +/** + * Strips the leading and the trailing whitespace-only lines. Then looks for + * the least indented non-whitespace-only line and removes its amount of + * leading whitespace from every line. Assumes leading whitespace is either all + * spaces or all tabs. + * + * Purpose: including a multiline string literal in source code, indented to + * the level expected from context. + */ +std::string stripLeftMargin(std::string s); + +/** + * Fast, in-place lowercasing of ASCII alphabetic characters in strings. + * Leaves all other characters unchanged, including those with the 0x80 + * bit set. + * @param str String to convert + * @param length Length of str, in bytes + */ +void toLowerAscii(char* str, size_t length); + +inline void toLowerAscii(MutableStringPiece str) { + toLowerAscii(str.begin(), str.size()); +} + +inline void toLowerAscii(std::string& str) { + // str[0] is legal also if the string is empty. + toLowerAscii(&str[0], str.size()); +} + +template < + class Iterator = const char*, + class Base = folly::Range>> +class UTF8Range : public Base { + public: + /* implicit */ UTF8Range(const folly::Range baseRange) + : Base(boost::u8_to_u32_iterator( + baseRange.begin(), baseRange.begin(), baseRange.end()), + boost::u8_to_u32_iterator( + baseRange.end(), baseRange.begin(), baseRange.end())) {} + /* implicit */ UTF8Range(const std::string& baseString) + : Base(folly::Range(baseString)) {} }; -} // namespace boost -#include "folly/String-inl.h" +using UTF8StringPiece = UTF8Range; + +} // namespace folly -#endif +#include