X-Git-Url: http://plrg.eecs.uci.edu/git/?a=blobdiff_plain;f=lib%2FSupport%2FStringRef.cpp;h=7ecff2964c517f25274740f12adc8a9b7b114e18;hb=4b85564ba4a41465155b9128a68e5e14fea78365;hp=a862ed2fa9ce6589cf987b972f730583072f77a6;hpb=6e6a558ebce556476d8fd659b419a2760f2ab154;p=oota-llvm.git diff --git a/lib/Support/StringRef.cpp b/lib/Support/StringRef.cpp index a862ed2fa9c..7ecff2964c5 100644 --- a/lib/Support/StringRef.cpp +++ b/lib/Support/StringRef.cpp @@ -9,7 +9,8 @@ #include "llvm/ADT/StringRef.h" #include "llvm/ADT/APInt.h" -#include "llvm/ADT/OwningPtr.h" +#include "llvm/ADT/Hashing.h" +#include "llvm/ADT/edit_distance.h" #include using namespace llvm; @@ -25,27 +26,52 @@ static char ascii_tolower(char x) { return x; } +static char ascii_toupper(char x) { + if (x >= 'a' && x <= 'z') + return x - 'a' + 'A'; + return x; +} + static bool ascii_isdigit(char x) { return x >= '0' && x <= '9'; } -/// compare_lower - Compare strings, ignoring case. -int StringRef::compare_lower(StringRef RHS) const { - for (size_t I = 0, E = min(Length, RHS.Length); I != E; ++I) { - unsigned char LHC = ascii_tolower(Data[I]); - unsigned char RHC = ascii_tolower(RHS.Data[I]); +// strncasecmp() is not available on non-POSIX systems, so define an +// alternative function here. +static int ascii_strncasecmp(const char *LHS, const char *RHS, size_t Length) { + for (size_t I = 0; I < Length; ++I) { + unsigned char LHC = ascii_tolower(LHS[I]); + unsigned char RHC = ascii_tolower(RHS[I]); if (LHC != RHC) return LHC < RHC ? -1 : 1; } + return 0; +} +/// compare_lower - Compare strings, ignoring case. +int StringRef::compare_lower(StringRef RHS) const { + if (int Res = ascii_strncasecmp(Data, RHS.Data, std::min(Length, RHS.Length))) + return Res; if (Length == RHS.Length) return 0; return Length < RHS.Length ? -1 : 1; } +/// Check if this string starts with the given \p Prefix, ignoring case. +bool StringRef::startswith_lower(StringRef Prefix) const { + return Length >= Prefix.Length && + ascii_strncasecmp(Data, Prefix.Data, Prefix.Length) == 0; +} + +/// Check if this string ends with the given \p Suffix, ignoring case. +bool StringRef::endswith_lower(StringRef Suffix) const { + return Length >= Suffix.Length && + ascii_strncasecmp(end() - Suffix.Length, Suffix.Data, Suffix.Length) == 0; +} + /// compare_numeric - Compare strings, handle embedded numbers. int StringRef::compare_numeric(StringRef RHS) const { - for (size_t I = 0, E = min(Length, RHS.Length); I != E; ++I) { + for (size_t I = 0, E = std::min(Length, RHS.Length); I != E; ++I) { // Check for sequences of digits. if (ascii_isdigit(Data[I]) && ascii_isdigit(RHS.Data[I])) { // The longer sequence of numbers is considered larger. @@ -77,57 +103,30 @@ int StringRef::compare_numeric(StringRef RHS) const { // Compute the edit distance between the two given strings. unsigned StringRef::edit_distance(llvm::StringRef Other, bool AllowReplacements, - unsigned MaxEditDistance) { - // The algorithm implemented below is the "classic" - // dynamic-programming algorithm for computing the Levenshtein - // distance, which is described here: - // - // http://en.wikipedia.org/wiki/Levenshtein_distance - // - // Although the algorithm is typically described using an m x n - // array, only two rows are used at a time, so this implemenation - // just keeps two separate vectors for those two rows. - size_type m = size(); - size_type n = Other.size(); - - const unsigned SmallBufferSize = 64; - unsigned SmallBuffer[SmallBufferSize]; - llvm::OwningArrayPtr Allocated; - unsigned *previous = SmallBuffer; - if (2*(n + 1) > SmallBufferSize) { - previous = new unsigned [2*(n+1)]; - Allocated.reset(previous); - } - unsigned *current = previous + (n + 1); - - for (unsigned i = 0; i <= n; ++i) - previous[i] = i; - - for (size_type y = 1; y <= m; ++y) { - current[0] = y; - unsigned BestThisRow = current[0]; - - for (size_type x = 1; x <= n; ++x) { - if (AllowReplacements) { - current[x] = min(previous[x-1] + ((*this)[y-1] == Other[x-1]? 0u:1u), - min(current[x-1], previous[x])+1); - } - else { - if ((*this)[y-1] == Other[x-1]) current[x] = previous[x-1]; - else current[x] = min(current[x-1], previous[x]) + 1; - } - BestThisRow = min(BestThisRow, current[x]); - } + unsigned MaxEditDistance) const { + return llvm::ComputeEditDistance( + makeArrayRef(data(), size()), + makeArrayRef(Other.data(), Other.size()), + AllowReplacements, MaxEditDistance); +} - if (MaxEditDistance && BestThisRow > MaxEditDistance) - return MaxEditDistance + 1; +//===----------------------------------------------------------------------===// +// String Operations +//===----------------------------------------------------------------------===// - unsigned *tmp = current; - current = previous; - previous = tmp; +std::string StringRef::lower() const { + std::string Result(size(), char()); + for (size_type i = 0, e = size(); i != e; ++i) { + Result[i] = ascii_tolower(Data[i]); } + return Result; +} - unsigned Result = previous[n]; +std::string StringRef::upper() const { + std::string Result(size(), char()); + for (size_type i = 0, e = size(); i != e; ++i) { + Result[i] = ascii_toupper(Data[i]); + } return Result; } @@ -141,15 +140,28 @@ unsigned StringRef::edit_distance(llvm::StringRef Other, /// \return - The index of the first occurrence of \arg Str, or npos if not /// found. size_t StringRef::find(StringRef Str, size_t From) const { + if (From > Length) + return npos; + + const char *Needle = Str.data(); size_t N = Str.size(); - if (N > Length) + if (N == 0) + return From; + + size_t Size = Length - From; + if (Size < N) return npos; + const char *Start = Data + From; + const char *Stop = Start + (Size - N + 1); + // For short haystacks or unsupported needles fall back to the naive algorithm - if (Length < 16 || N > 255 || N == 0) { - for (size_t e = Length - N + 1, i = min(From, e); i != e; ++i) - if (substr(i, N).equals(Str)) - return i; + if (Size < 16 || N > 255) { + do { + if (std::memcmp(Start, Needle, N) == 0) + return Start - Data; + ++Start; + } while (Start < Stop); return npos; } @@ -159,16 +171,13 @@ size_t StringRef::find(StringRef Str, size_t From) const { for (unsigned i = 0; i != N-1; ++i) BadCharSkip[(uint8_t)Str[i]] = N-1-i; - unsigned Len = Length, Pos = min(From, Length); - while (Len >= N) { - if (substr(Pos, N).equals(Str)) // See if this is the correct substring. - return Pos; + do { + if (std::memcmp(Start, Needle, N) == 0) + return Start - Data; // Otherwise skip the appropriate number of bytes. - uint8_t Skip = BadCharSkip[(uint8_t)Data[Pos+N-1]]; - Len -= Skip; - Pos += Skip; - } + Start += BadCharSkip[(uint8_t)Start[N-1]]; + } while (Start < Stop); return npos; } @@ -199,7 +208,7 @@ StringRef::size_type StringRef::find_first_of(StringRef Chars, for (size_type i = 0; i != Chars.size(); ++i) CharBits.set((unsigned char)Chars[i]); - for (size_type i = min(From, Length), e = Length; i != e; ++i) + for (size_type i = std::min(From, Length), e = Length; i != e; ++i) if (CharBits.test((unsigned char)Data[i])) return i; return npos; @@ -208,7 +217,7 @@ StringRef::size_type StringRef::find_first_of(StringRef Chars, /// find_first_not_of - Find the first character in the string that is not /// \arg C or npos if not found. StringRef::size_type StringRef::find_first_not_of(char C, size_t From) const { - for (size_type i = min(From, Length), e = Length; i != e; ++i) + for (size_type i = std::min(From, Length), e = Length; i != e; ++i) if (Data[i] != C) return i; return npos; @@ -224,7 +233,7 @@ StringRef::size_type StringRef::find_first_not_of(StringRef Chars, for (size_type i = 0; i != Chars.size(); ++i) CharBits.set((unsigned char)Chars[i]); - for (size_type i = min(From, Length), e = Length; i != e; ++i) + for (size_type i = std::min(From, Length), e = Length; i != e; ++i) if (!CharBits.test((unsigned char)Data[i])) return i; return npos; @@ -240,12 +249,90 @@ StringRef::size_type StringRef::find_last_of(StringRef Chars, for (size_type i = 0; i != Chars.size(); ++i) CharBits.set((unsigned char)Chars[i]); - for (size_type i = min(From, Length) - 1, e = -1; i != e; --i) + for (size_type i = std::min(From, Length) - 1, e = -1; i != e; --i) if (CharBits.test((unsigned char)Data[i])) return i; return npos; } +/// find_last_not_of - Find the last character in the string that is not +/// \arg C, or npos if not found. +StringRef::size_type StringRef::find_last_not_of(char C, size_t From) const { + for (size_type i = std::min(From, Length) - 1, e = -1; i != e; --i) + if (Data[i] != C) + return i; + return npos; +} + +/// find_last_not_of - Find the last character in the string that is not in +/// \arg Chars, or npos if not found. +/// +/// Note: O(size() + Chars.size()) +StringRef::size_type StringRef::find_last_not_of(StringRef Chars, + size_t From) const { + std::bitset<1 << CHAR_BIT> CharBits; + for (size_type i = 0, e = Chars.size(); i != e; ++i) + CharBits.set((unsigned char)Chars[i]); + + for (size_type i = std::min(From, Length) - 1, e = -1; i != e; --i) + if (!CharBits.test((unsigned char)Data[i])) + return i; + return npos; +} + +void StringRef::split(SmallVectorImpl &A, + StringRef Separator, int MaxSplit, + bool KeepEmpty) const { + StringRef S = *this; + + // Count down from MaxSplit. When MaxSplit is -1, this will just split + // "forever". This doesn't support splitting more than 2^31 times + // intentionally; if we ever want that we can make MaxSplit a 64-bit integer + // but that seems unlikely to be useful. + while (MaxSplit-- != 0) { + size_t Idx = S.find(Separator); + if (Idx == npos) + break; + + // Push this split. + if (KeepEmpty || Idx > 0) + A.push_back(S.slice(0, Idx)); + + // Jump forward. + S = S.slice(Idx + Separator.size(), npos); + } + + // Push the tail. + if (KeepEmpty || !S.empty()) + A.push_back(S); +} + +void StringRef::split(SmallVectorImpl &A, char Separator, + int MaxSplit, bool KeepEmpty) const { + StringRef S = *this; + + // Count down from MaxSplit. When MaxSplit is -1, this will just split + // "forever". This doesn't support splitting more than 2^31 times + // intentionally; if we ever want that we can make MaxSplit a 64-bit integer + // but that seems unlikely to be useful. + while (MaxSplit-- != 0) { + size_t Idx = S.find(Separator); + if (Idx == npos) + break; + + // Push this split. + if (KeepEmpty || Idx > 0) + A.push_back(S.slice(0, Idx)); + + // Jump forward. + S = S.slice(Idx + 1, npos); + } + + // Push the tail. + if (KeepEmpty || !S.empty()) + A.push_back(S); +} + //===----------------------------------------------------------------------===// // Helpful Algorithms //===----------------------------------------------------------------------===// @@ -267,21 +354,29 @@ static unsigned GetAutoSenseRadix(StringRef &Str) { if (Str.startswith("0x")) { Str = Str.substr(2); return 16; - } else if (Str.startswith("0b")) { + } + + if (Str.startswith("0b")) { Str = Str.substr(2); return 2; - } else if (Str.startswith("0")) { + } + + if (Str.startswith("0o")) { + Str = Str.substr(2); return 8; - } else { - return 10; } + + if (Str.startswith("0")) + return 8; + + return 10; } /// GetAsUnsignedInteger - Workhorse method that converts a integer character /// sequence of radix up to 36 to an unsigned long long value. -static bool GetAsUnsignedInteger(StringRef Str, unsigned Radix, - unsigned long long &Result) { +bool llvm::getAsUnsignedInteger(StringRef Str, unsigned Radix, + unsigned long long &Result) { // Autosense radix if not specified. if (Radix == 0) Radix = GetAutoSenseRadix(Str); @@ -311,8 +406,8 @@ static bool GetAsUnsignedInteger(StringRef Str, unsigned Radix, unsigned long long PrevResult = Result; Result = Result*Radix+CharVal; - // Check for overflow. - if (Result < PrevResult) + // Check for overflow by shifting back and seeing if bits were lost. + if (Result/Radix < PrevResult) return true; Str = Str.substr(1); @@ -321,17 +416,13 @@ static bool GetAsUnsignedInteger(StringRef Str, unsigned Radix, return false; } -bool StringRef::getAsInteger(unsigned Radix, unsigned long long &Result) const { - return GetAsUnsignedInteger(*this, Radix, Result); -} - - -bool StringRef::getAsInteger(unsigned Radix, long long &Result) const { +bool llvm::getAsSignedInteger(StringRef Str, unsigned Radix, + long long &Result) { unsigned long long ULLVal; // Handle positive strings first. - if (empty() || front() != '-') { - if (GetAsUnsignedInteger(*this, Radix, ULLVal) || + if (Str.empty() || Str.front() != '-') { + if (getAsUnsignedInteger(Str, Radix, ULLVal) || // Check for value so large it overflows a signed value. (long long)ULLVal < 0) return true; @@ -340,7 +431,7 @@ bool StringRef::getAsInteger(unsigned Radix, long long &Result) const { } // Get the positive part of the value. - if (GetAsUnsignedInteger(substr(1), Radix, ULLVal) || + if (getAsUnsignedInteger(Str.substr(1), Radix, ULLVal) || // Reject values so large they'd overflow as negative signed, but allow // "-0". This negates the unsigned so that the negative isn't undefined // on signed overflow. @@ -351,24 +442,6 @@ bool StringRef::getAsInteger(unsigned Radix, long long &Result) const { return false; } -bool StringRef::getAsInteger(unsigned Radix, int &Result) const { - long long Val; - if (getAsInteger(Radix, Val) || - (int)Val != Val) - return true; - Result = Val; - return false; -} - -bool StringRef::getAsInteger(unsigned Radix, unsigned &Result) const { - unsigned long long Val; - if (getAsInteger(Radix, Val) || - (unsigned)Val != Val) - return true; - Result = Val; - return false; -} - bool StringRef::getAsInteger(unsigned Radix, APInt &Result) const { StringRef Str = *this; @@ -400,7 +473,7 @@ bool StringRef::getAsInteger(unsigned Radix, APInt &Result) const { unsigned BitWidth = Log2Radix * Str.size(); if (BitWidth < Result.getBitWidth()) BitWidth = Result.getBitWidth(); // don't shrink the result - else + else if (BitWidth > Result.getBitWidth()) Result = Result.zext(BitWidth); APInt RadixAP, CharAP; // unused unless !IsPowerOf2Radix @@ -443,3 +516,9 @@ bool StringRef::getAsInteger(unsigned Radix, APInt &Result) const { return false; } + + +// Implementation of StringRef hashing. +hash_code llvm::hash_value(StringRef S) { + return hash_combine_range(S.begin(), S.end()); +}