2 * Copyright 2014 Facebook, Inc.
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
17 #ifndef FOLLY_GEN_STRING_H
18 #error This file may only be included from folly/gen/String.h
21 #include "folly/Conv.h"
22 #include "folly/String.h"
23 #include "folly/io/IOBuf.h"
29 inline bool splitPrefix(StringPiece& in,
31 StringPiece delimiter) {
32 auto p = in.find(delimiter);
33 if (p != std::string::npos) {
34 prefix.assign(in.data(), in.data() + p);
35 in.advance(p + delimiter.size());
43 * Split by any of the EOL terms: \r, \n, or \r\n.
45 inline bool splitPrefix(StringPiece& in,
48 auto newline = "\r\n";
49 auto p = in.find_first_of(newline);
50 if (p != std::string::npos) {
51 prefix.assign(in.data(), in.data() + p);
53 if (!in.removePrefix(newline)) {
62 inline bool splitPrefix(StringPiece& in, StringPiece& prefix, char delimiter) {
63 auto p = static_cast<const char*>(memchr(in.data(), delimiter, in.size()));
65 prefix.assign(in.data(), p);
66 in.assign(p + 1, in.end());
73 inline const char* ch(const unsigned char* p) {
74 return reinterpret_cast<const char*>(p);
77 class StringResplitter : public Operator<StringResplitter> {
80 explicit StringResplitter(char delimiter) : delimiter_(delimiter) { }
82 template <class Source>
83 class Generator : public GenImpl<StringPiece, Generator<Source>> {
87 Generator(Source source, char delimiter)
88 : source_(std::move(source)), delimiter_(delimiter) { }
91 bool apply(Body&& body) const {
92 std::unique_ptr<IOBuf> buffer;
94 auto fn = [&](StringPiece in) -> bool {
96 bool found = splitPrefix(in, prefix, this->delimiter_);
97 if (found && buffer && buffer->length() != 0) {
98 // Append to end of buffer, return line
99 if (!prefix.empty()) {
100 buffer->reserve(0, prefix.size());
101 memcpy(buffer->writableTail(), prefix.data(), prefix.size());
102 buffer->append(prefix.size());
104 if (!body(StringPiece(ch(buffer->data()), buffer->length()))) {
108 found = splitPrefix(in, prefix, this->delimiter_);
110 // Buffer is empty, return lines directly from input (no buffer)
115 found = splitPrefix(in, prefix, this->delimiter_);
118 // Incomplete line left, append to buffer
120 // Arbitrarily assume that we have half a line and get enough
121 // room for twice that.
122 constexpr size_t kDefaultLineSize = 256;
123 buffer = IOBuf::create(std::max(kDefaultLineSize, 2 * in.size()));
125 buffer->reserve(0, in.size());
126 memcpy(buffer->writableTail(), in.data(), in.size());
127 buffer->append(in.size());
133 if (!source_.apply(std::move(fn))) {
137 // Incomplete last line
138 if (buffer && buffer->length() != 0) {
139 if (!body(StringPiece(ch(buffer->data()), buffer->length()))) {
146 static constexpr bool infinite = Source::infinite;
149 template<class Source,
151 class Gen = Generator<Source>>
152 Gen compose(GenImpl<Value, Source>&& source) const {
153 return Gen(std::move(source.self()), delimiter_);
156 template<class Source,
158 class Gen = Generator<Source>>
159 Gen compose(const GenImpl<Value, Source>& source) const {
160 return Gen(source.self(), delimiter_);
164 template <class DelimiterType = char>
165 class SplitStringSource
166 : public GenImpl<StringPiece, SplitStringSource<DelimiterType>> {
168 DelimiterType delimiter_;
170 SplitStringSource(const StringPiece& source,
171 DelimiterType delimiter)
173 , delimiter_(std::move(delimiter)) { }
175 template <class Body>
176 bool apply(Body&& body) const {
177 StringPiece rest(source_);
179 while (splitPrefix(rest, prefix, this->delimiter_)) {
194 * Unsplit - For joining tokens from a generator into a string. This is
195 * the inverse of `split` above.
197 * This type is primarily used through the 'unsplit' function.
199 template<class Delimiter,
201 class Unsplit : public Operator<Unsplit<Delimiter, Output>> {
202 Delimiter delimiter_;
204 explicit Unsplit(const Delimiter& delimiter)
205 : delimiter_(delimiter) {
208 template<class Source,
210 Output compose(const GenImpl<Value, Source>& source) const {
212 UnsplitBuffer<Delimiter, Output> unsplitter(delimiter_, &outputBuffer);
213 unsplitter.compose(source);
219 * UnsplitBuffer - For joining tokens from a generator into a string,
220 * and inserting them into a custom buffer.
222 * This type is primarily used through the 'unsplit' function.
224 template<class Delimiter,
226 class UnsplitBuffer : public Operator<UnsplitBuffer<Delimiter, OutputBuffer>> {
227 Delimiter delimiter_;
228 OutputBuffer* outputBuffer_;
230 UnsplitBuffer(const Delimiter& delimiter, OutputBuffer* outputBuffer)
231 : delimiter_(delimiter)
232 , outputBuffer_(outputBuffer) {
236 template<class Source,
238 void compose(const GenImpl<Value, Source>& source) const {
239 // If the output buffer is empty, we skip inserting the delimiter for the
241 bool skipDelim = outputBuffer_->empty();
242 source | [&](Value v) {
245 toAppend(std::forward<Value>(v), outputBuffer_);
247 toAppend(delimiter_, std::forward<Value>(v), outputBuffer_);
255 * Hack for static for-like constructs
257 template<class Target, class=void>
258 inline Target passthrough(Target target) { return target; }
260 #pragma GCC diagnostic push
262 // Clang isn't happy with eatField() hack below.
263 #pragma GCC diagnostic ignored "-Wreturn-stack-address"
267 * ParseToTuple - For splitting a record and immediatlely converting it to a
268 * target tuple type. Primary used through the 'eachToTuple' helper, like so:
271 * = split("1:a 2:b", ' ')
272 * | eachToTuple<int, string>()
273 * | as<vector<tuple<int, string>>>();
276 template<class TargetContainer,
280 Delimiter delimiter_;
282 explicit SplitTo(Delimiter delimiter)
283 : delimiter_(delimiter) {}
285 TargetContainer operator()(StringPiece line) const {
287 StringPiece fields[sizeof...(Targets)];
288 // HACK(tjackson): Used for referencing fields[] corresponding to variadic
289 // template parameters.
290 auto eatField = [&]() -> StringPiece& { return fields[i++]; };
291 if (!split(delimiter_,
293 detail::passthrough<StringPiece&, Targets>(eatField())...)) {
294 throw std::runtime_error("field count mismatch");
297 return TargetContainer(To<Targets>()(eatField())...);
301 #pragma GCC diagnostic pop
303 } // namespace detail