From: Marcin Pawlowski Date: Tue, 15 Jul 2014 22:20:29 +0000 (-0700) Subject: added replaceAt and replaceAll to MutableStringPiece X-Git-Tag: v0.22.0~450 X-Git-Url: http://plrg.eecs.uci.edu/git/?a=commitdiff_plain;h=28a34a4e25f909d14639134701baaebc48524b0c;p=folly.git added replaceAt and replaceAll to MutableStringPiece Summary: To make MutableStringPiece more usable (and make the implementation of replace* methods easier) I made sure that all the const methods (like find) work for MutableStringPiece in a fashion similar to how they work with StringPiece (work with everything auto convertible to StringPiece). As a result this change is pretty susbstatial in LOC, even though now much happens here. Test Plan: unit tests Reviewed By: marcelo.juchem@fb.com FB internal diff: D1420965 Tasks: 4632424 --- diff --git a/folly/Range.h b/folly/Range.h index 1bb0cb34..f5569865 100644 --- a/folly/Range.h +++ b/folly/Range.h @@ -144,6 +144,19 @@ public: typename std::iterator_traits::reference>::type value_type; typedef typename std::iterator_traits::reference reference; + + /** + * For MutableStringPiece and MutableByteRange we define StringPiece + * and ByteRange as const_range_type (for everything else its just + * identity). We do that to enable operations such as find with + * args which are const. + */ + typedef typename std::conditional< + std::is_same::value + || std::is_same::value, + Range, + Range>::type const_range_type; + typedef std::char_traits::type> traits_type; @@ -352,8 +365,12 @@ public: fbstring fbstr() const { return fbstring(b_, size()); } fbstring toFbstring() const { return fbstr(); } - // Works only for Range - int compare(const Range& o) const { + const_range_type castToConst() const { + return const_range_type(*this); + }; + + // Works only for Range (and Range) + int compare(const const_range_type& o) const { const size_type tsize = this->size(); const size_type osize = o.size(); const size_type msize = std::min(tsize, osize); @@ -427,70 +444,72 @@ public: } // string work-alike functions - size_type find(Range str) const { - return qfind(*this, str); + size_type find(const_range_type str) const { + return qfind(castToConst(), str); } - size_type find(Range str, size_t pos) const { + size_type find(const_range_type str, size_t pos) const { if (pos > size()) return std::string::npos; - size_t ret = qfind(subpiece(pos), str); + size_t ret = qfind(castToConst().subpiece(pos), str); return ret == npos ? ret : ret + pos; } size_type find(Iter s, size_t pos, size_t n) const { if (pos > size()) return std::string::npos; - size_t ret = qfind(pos ? subpiece(pos) : *this, Range(s, n)); + auto forFinding = castToConst(); + size_t ret = qfind( + pos ? forFinding.subpiece(pos) : forFinding, const_range_type(s, n)); return ret == npos ? ret : ret + pos; } - // Works only for Range which have Range(Iter) ctor + // Works only for Range<(const) (unsigned) char*> which have Range(Iter) ctor size_type find(const Iter s) const { - return qfind(*this, Range(s)); + return qfind(castToConst(), const_range_type(s)); } - // Works only for Range which have Range(Iter) ctor + // Works only for Range<(const) (unsigned) char*> which have Range(Iter) ctor size_type find(const Iter s, size_t pos) const { if (pos > size()) return std::string::npos; - size_type ret = qfind(subpiece(pos), Range(s)); + size_type ret = qfind(castToConst().subpiece(pos), const_range_type(s)); return ret == npos ? ret : ret + pos; } size_type find(value_type c) const { - return qfind(*this, c); + return qfind(castToConst(), c); } size_type rfind(value_type c) const { - return folly::rfind(*this, c); + return folly::rfind(castToConst(), c); } size_type find(value_type c, size_t pos) const { if (pos > size()) return std::string::npos; - size_type ret = qfind(subpiece(pos), c); + size_type ret = qfind(castToConst().subpiece(pos), c); return ret == npos ? ret : ret + pos; } - size_type find_first_of(Range needles) const { - return qfind_first_of(*this, needles); + size_type find_first_of(const_range_type needles) const { + return qfind_first_of(castToConst(), needles); } - size_type find_first_of(Range needles, size_t pos) const { + size_type find_first_of(const_range_type needles, size_t pos) const { if (pos > size()) return std::string::npos; - size_type ret = qfind_first_of(subpiece(pos), needles); + size_type ret = qfind_first_of(castToConst().subpiece(pos), needles); return ret == npos ? ret : ret + pos; } - // Works only for Range which have Range(Iter) ctor + // Works only for Range<(const) (unsigned) char*> which have Range(Iter) ctor size_type find_first_of(Iter needles) const { - return find_first_of(Range(needles)); + return find_first_of(const_range_type(needles)); } - // Works only for Range which have Range(Iter) ctor + // Works only for Range<(const) (unsigned) char*> which have Range(Iter) ctor size_type find_first_of(Iter needles, size_t pos) const { - return find_first_of(Range(needles), pos); + return find_first_of(const_range_type(needles), pos); } size_type find_first_of(Iter needles, size_t pos, size_t n) const { - return find_first_of(Range(needles, n), pos); + return find_first_of(const_range_type(needles, n), pos); } size_type find_first_of(value_type c) const { @@ -506,7 +525,7 @@ public: * * Note: Call find() directly if the index is needed. */ - bool contains(const Range& other) const { + bool contains(const const_range_type& other) const { return find(other) != std::string::npos; } @@ -522,8 +541,9 @@ public: /** * Does this Range start with another range? */ - bool startsWith(const Range& other) const { - return size() >= other.size() && subpiece(0, other.size()) == other; + bool startsWith(const const_range_type& other) const { + return size() >= other.size() + && castToConst().subpiece(0, other.size()) == other; } bool startsWith(value_type c) const { return !empty() && front() == c; @@ -532,8 +552,9 @@ public: /** * Does this Range end with another range? */ - bool endsWith(const Range& other) const { - return size() >= other.size() && subpiece(size() - other.size()) == other; + bool endsWith(const const_range_type& other) const { + return size() >= other.size() + && castToConst().subpiece(size() - other.size()) == other; } bool endsWith(value_type c) const { return !empty() && back() == c; @@ -543,7 +564,7 @@ public: * Remove the given prefix and return true if the range starts with the given * prefix; return false otherwise. */ - bool removePrefix(const Range& prefix) { + bool removePrefix(const const_range_type& prefix) { return startsWith(prefix) && (b_ += prefix.size(), true); } bool removePrefix(value_type prefix) { @@ -554,13 +575,73 @@ public: * Remove the given suffix and return true if the range ends with the given * suffix; return false otherwise. */ - bool removeSuffix(const Range& suffix) { + bool removeSuffix(const const_range_type& suffix) { return endsWith(suffix) && (e_ -= suffix.size(), true); } bool removeSuffix(value_type suffix) { return endsWith(suffix) && (--e_, true); } + /** + * Replaces the content of the range, starting at position 'pos', with + * contents of 'replacement'. Entire 'replacement' must fit into the + * range. Returns false if 'replacements' does not fit. Example use: + * + * char in[] = "buffer"; + * auto msp = MutablesStringPiece(input); + * EXPECT_TRUE(msp.replaceAt(2, "tt")); + * EXPECT_EQ(msp, "butter"); + * + * // not enough space + * EXPECT_FALSE(msp.replace(msp.size() - 1, "rr")); + * EXPECT_EQ(msp, "butter"); // unchanged + */ + bool replaceAt(size_t pos, const_range_type replacement) { + if (size() < pos + replacement.size()) { + return false; + } + + std::copy(replacement.begin(), replacement.end(), begin() + pos); + + return true; + } + + /** + * Replaces all occurences of 'source' with 'dest'. Returns number + * of replacements made. Source and dest have to have the same + * length. Throws if the lengths are different. If 'source' is a + * pattern that is overlapping with itself, we perform sequential + * replacement: "aaaaaaa".replaceAll("aa", "ba") --> "bababaa" + * + * Example use: + * + * char in[] = "buffer"; + * auto msp = MutablesStringPiece(input); + * EXPECT_EQ(msp.replaceAll("ff","tt"), 1); + * EXPECT_EQ(msp, "butter"); + */ + size_t replaceAll(const_range_type source, const_range_type dest) { + if (source.size() != dest.size()) { + throw std::invalid_argument( + "replacement must have the same size as source"); + } + + if (dest.empty()) { + return 0; + } + + size_t pos = 0; + size_t num_replaced = 0; + size_type found = std::string::npos; + while ((found = find(source, pos)) != std::string::npos) { + replaceAt(found, dest); + pos += source.size(); + ++num_replaced; + } + + return num_replaced; + } + /** * Splits this `Range` `[b, e)` in the position `i` dictated by the next * occurence of `delimiter`. diff --git a/folly/test/RangeTest.cpp b/folly/test/RangeTest.cpp index d088c9ce..0bf328ab 100644 --- a/folly/test/RangeTest.cpp +++ b/folly/test/RangeTest.cpp @@ -25,6 +25,7 @@ #include #include #include +#include #include #include #include @@ -1042,3 +1043,123 @@ TEST(RangeFunc, CArray) { int x[] {1, 2, 3, 4}; testRangeFunc(x, 4); } + +std::string get_rand_str( + int size, std::uniform_int_distribution<>& dist, std::mt19937& gen) { + std::string ret(size, '\0'); + for (int i=0; i(dist(gen)); + } + + return ret; +} + +namespace folly { +bool operator==(MutableStringPiece mp, StringPiece sp) { + return mp.compare(sp) == 0; +} + +bool operator==(StringPiece sp, MutableStringPiece mp) { + return mp.compare(sp) == 0; +} +} + +TEST(ReplaceAt, exhaustiveTest) { + char input[] = "this is nice and long input"; + auto msp = MutableStringPiece(input); + auto str = std::string(input); + std::random_device rd; + std::mt19937 gen(rd()); + std::uniform_int_distribution<> dist('a', 'z'); + + for (int i=0; i < 100; ++i) { + for (int j = 1; j <= msp.size(); ++j) { + auto replacement = get_rand_str(j, dist, gen); + for (int pos=0; pos < msp.size() - j; ++pos) { + msp.replaceAt(pos, replacement); + str.replace(pos, replacement.size(), replacement); + EXPECT_EQ(msp.compare(str), 0); + } + } + } + + // too far + EXPECT_EQ(msp.replaceAt(msp.size() - 2, StringPiece("meh")), false); +} + +TEST(ReplaceAll, basicTest) { + char input[] = "this is nice and long input"; + auto orig = std::string(input); + auto msp = MutableStringPiece(input); + + EXPECT_EQ(msp.replaceAll("is", "si"), 2); + EXPECT_EQ("thsi si nice and long input", msp); + EXPECT_EQ(msp.replaceAll("si", "is"), 2); + EXPECT_EQ(msp, orig); + + EXPECT_EQ(msp.replaceAll("abcd", "efgh"), 0); // nothing to replace + EXPECT_EQ(msp, orig); + + // at the very beginning + EXPECT_EQ(msp.replaceAll("this", "siht"), 1); + EXPECT_EQ("siht is nice and long input", msp); + EXPECT_EQ(msp.replaceAll("siht", "this"), 1); + EXPECT_EQ(msp, orig); + + // at the very end + EXPECT_EQ(msp.replaceAll("input", "soput"), 1); + EXPECT_EQ("this is nice and long soput", msp); + EXPECT_EQ(msp.replaceAll("soput", "input"), 1); + EXPECT_EQ(msp, orig); + + // all spaces + EXPECT_EQ(msp.replaceAll(" ", "@"), 5); + EXPECT_EQ("this@is@nice@and@long@input", msp); + EXPECT_EQ(msp.replaceAll("@", " "), 5); + EXPECT_EQ(msp, orig); +} + +TEST(ReplaceAll, randomTest) { + char input[] = "abcdefghijklmnoprstuwqz"; // no pattern repeata inside + auto orig = std::string(input); + auto msp = MutableStringPiece(input); + + std::random_device rd; + std::mt19937 gen(rd()); + std::uniform_int_distribution<> dist('A', 'Z'); + + for (int i=0; i < 100; ++i) { + for (int j = 1; j <= orig.size(); ++j) { + auto replacement = get_rand_str(j, dist, gen); + for (int pos=0; pos < msp.size() - j; ++pos) { + auto piece = orig.substr(pos, j); + EXPECT_EQ(msp.replaceAll(piece, replacement), 1); + EXPECT_EQ(msp.find(replacement), pos); + EXPECT_EQ(msp.replaceAll(replacement, piece), 1); + EXPECT_EQ(msp, orig); + } + } + } +} + +TEST(ReplaceAll, BadArg) { + int count = 0; + auto fst = "longer"; + auto snd = "small"; + char input[] = "meh meh meh"; + auto all = MutableStringPiece(input); + + try { + all.replaceAll(fst, snd); + } catch (std::invalid_argument&) { + ++count; + } + + try { + all.replaceAll(snd, fst); + } catch (std::invalid_argument&) { + ++count; + } + + EXPECT_EQ(count, 2); +}