Defer query string parsing from ctor to getQueryParams()
authorJun LI <albertli@fb.com>
Wed, 15 Oct 2014 20:53:51 +0000 (13:53 -0700)
committerdcsommer <dcsommer@fb.com>
Fri, 17 Oct 2014 18:43:34 +0000 (11:43 -0700)
Summary:
Query string parsing uses a lot of CPU, it happens in ctor killing CPU
even in users that are not interested in query string. Move it to
getQueryParams().

Test Plan:
fbconfig folly/test
fbmake runtests_dbg

Reviewed By: ldemailly@fb.com

Subscribers: trunkagent, njormrod, zellux

FB internal diff: D1604973

Tasks: 5304484

Blame Revision: https://phabricator.fb.com/D1455158

folly/Uri.cpp
folly/Uri.h
folly/test/UriTest.cpp

index f7bf80fafeec1a8fbcca141082284bee316e678a..f4618d8750b792785a36dca071e235611cd53297 100644 (file)
@@ -91,26 +91,6 @@ Uri::Uri(StringPiece str) : port_(0) {
   }
 
   query_ = submatch(match, 3);
-  if (!query_.empty()) {
-    // Parse query string
-    static const boost::regex queryParamRegex(
-      "(^|&)([^=&]*)=?([^=&]*)(?=(&|$))");
-    boost::cregex_iterator paramBeginItr(
-      match[3].first,
-      match[3].second,
-      queryParamRegex);
-    boost::cregex_iterator paramEndItr;
-    for(auto itr = paramBeginItr; itr != paramEndItr; itr++) {
-      if (itr->length(2) == 0) {
-        // key is empty, ignore it
-        continue;
-      }
-      queryParams_.emplace_back(
-        fbstring((*itr)[2].first, (*itr)[2].second), // parameter name
-        fbstring((*itr)[3].first, (*itr)[3].second)  // parameter value
-      );
-    }
-  }
   fragment_ = submatch(match, 4);
 }
 
@@ -150,4 +130,30 @@ fbstring Uri::hostname() const {
   return host_;
 }
 
+const std::vector<std::pair<fbstring, fbstring>>& Uri::getQueryParams() {
+  if (!query_.empty() && queryParams_.empty()) {
+    // Parse query string
+    static const boost::regex queryParamRegex(
+        "(^|&)" /*start of query or start of parameter "&"*/
+        "([^=&]*)=?" /*parameter name and "=" if value is expected*/
+        "([^=&]*)" /*parameter value*/
+        "(?=(&|$))" /*forward reference, next should be end of query or
+                      start of next parameter*/);
+    boost::cregex_iterator paramBeginItr(
+        query_.data(), query_.data() + query_.size(), queryParamRegex);
+    boost::cregex_iterator paramEndItr;
+    for (auto itr = paramBeginItr; itr != paramEndItr; itr++) {
+      if (itr->length(2) == 0) {
+        // key is empty, ignore it
+        continue;
+      }
+      queryParams_.emplace_back(
+          fbstring((*itr)[2].first, (*itr)[2].second), // parameter name
+          fbstring((*itr)[3].first, (*itr)[3].second) // parameter value
+          );
+    }
+  }
+  return queryParams_;
+}
+
 }  // namespace folly
index 581307d928d8315e3ab417a38fa1345159ef048c..3d075d578c47a0c3a27bb02585601a4efdb5af94 100644 (file)
@@ -90,13 +90,15 @@ class Uri {
    * one equal signs, we don't know which one is the delimiter for key and
    * value.
    *
+   * Note, this method is not thread safe, it might update internal state, but
+   * only the first call to this method update the state. After the first call
+   * is finished, subsequent calls to this method are thread safe.
+   *
    * @return  query parameter key-value pairs in a vector, each element is a
    *          pair of which the first element is parameter name and the second
    *          one is parameter value
    */
-  const std::vector<std::pair<fbstring, fbstring>>& getQueryParams() const {
-    return queryParams_;
-  };
+  const std::vector<std::pair<fbstring, fbstring>>& getQueryParams();
 
  private:
   fbstring scheme_;
index 7f8faee8847d5ad2728b9f94f104a53407f85a58..d16242d60f373af3d0c84e1ff018b036964aa867 100644 (file)
@@ -15,6 +15,7 @@
  */
 
 #include <folly/Uri.h>
+#include <folly/Benchmark.h>
 
 #include <boost/algorithm/string.hpp>
 #include <glog/logging.h>
@@ -389,3 +390,74 @@ TEST(Uri, Simple) {
     }
   }
 }
+
+/**
+ * Result of benchmark varies by the complexity of query.
+ * ============================================================================
+ * folly/test/UriTest.cpp                          relative  time/iter  iters/s
+ * ============================================================================
+ * init_uri_simple                                              4.88us  204.80K
+ * init_uri_simple_with_query_parsing                          22.46us   44.52K
+ * init_uri_complex                                             5.92us  168.85K
+ * init_uri_complex_with_query_parsing                         48.70us   20.53K
+ * ============================================================================
+ */
+BENCHMARK(init_uri_simple, iters) {
+  const fbstring s("http://localhost?&key1=foo&key2=&key3&=bar&=bar=&");
+  for (int i = 0; i < iters; ++i) {
+    Uri u(s);
+  }
+}
+
+BENCHMARK(init_uri_simple_with_query_parsing, iters) {
+  const fbstring s("http://localhost?&key1=foo&key2=&key3&=bar&=bar=&");
+  for (int i = 0; i < iters; ++i) {
+    Uri u(s);
+    u.getQueryParams();
+  }
+}
+
+BENCHMARK(init_uri_complex, iters) {
+  const fbstring s(
+      "https://mock.example.com/farm/track.php?TmOxQUDF=uSmTS_VwhjKnh_JME&DI"
+      "h=fbbN&GRsoIm=bGshjaUqavZxQai&UMT=36k18N4dn21&3U=CD8o4A4497W152j6m0V%14"
+      "%57&Hy=t%05mpr.80JUZ7ne_%23zS8DcA%0qc_%291ymamz096%11Zfb3r%09ZqPD%311ZX"
+      "tqJd600ot&5U96U-Rh-VZ=-D_6-9xKYj%1gW6b43s1B9-j21P0oUW5-t46G4kgt&ezgj=mcW"
+      "TTQ.c&Oh=%2PblUfuC%7C997048884827569%03xnyJ%2L1pi7irBioQ6D4r7nNHNdo6v7Y%"
+      "84aurnSJ%2wCFePHMlGZmIHGfCe7392_lImWsSvN&sBeNN=Nf%80yOE%6X10M64F4gG197aX"
+      "R2B4g2533x235A0i4e%57%58uWB%04Erw.60&VMS4=Ek_%02GC0Pkx%6Ov_%207WICUz007%"
+      "04nYX8N%46zzpv%999h&KGmBt988y=q4P57C-Dh-Nz-x_7-5oPxz%1gz3N03t6c7-R67N4DT"
+      "Y6-f98W1&Lts&%02dOty%8eEYEnLz4yexQQLnL4MGU2JFn3OcmXcatBcabZgBdDdy67hdgW"
+      "tYn4");
+  for (int i = 0; i < iters; ++i) {
+    Uri u(s);
+  }
+}
+
+BENCHMARK(init_uri_complex_with_query_parsing, iters) {
+  const fbstring s(
+      "https://mock.example.com/farm/track.php?TmOxQUDF=uSmTS_VwhjKnh_JME&DI"
+      "h=fbbN&GRsoIm=bGshjaUqavZxQai&UMT=36k18N4dn21&3U=CD8o4A4497W152j6m0V%14"
+      "%57&Hy=t%05mpr.80JUZ7ne_%23zS8DcA%0qc_%291ymamz096%11Zfb3r%09ZqPD%311ZX"
+      "tqJd600ot&5U96U-Rh-VZ=-D_6-9xKYj%1gW6b43s1B9-j21P0oUW5-t46G4kgt&ezgj=mcW"
+      "TTQ.c&Oh=%2PblUfuC%7C997048884827569%03xnyJ%2L1pi7irBioQ6D4r7nNHNdo6v7Y%"
+      "84aurnSJ%2wCFePHMlGZmIHGfCe7392_lImWsSvN&sBeNN=Nf%80yOE%6X10M64F4gG197aX"
+      "R2B4g2533x235A0i4e%57%58uWB%04Erw.60&VMS4=Ek_%02GC0Pkx%6Ov_%207WICUz007%"
+      "04nYX8N%46zzpv%999h&KGmBt988y=q4P57C-Dh-Nz-x_7-5oPxz%1gz3N03t6c7-R67N4DT"
+      "Y6-f98W1&Lts&%02dOty%8eEYEnLz4yexQQLnL4MGU2JFn3OcmXcatBcabZgBdDdy67hdgW"
+      "tYn4");
+  for (int i = 0; i < iters; ++i) {
+    Uri u(s);
+    u.getQueryParams();
+  }
+}
+
+int main(int argc, char** argv) {
+  testing::InitGoogleTest(&argc, argv);
+  auto r = RUN_ALL_TESTS();
+  if (r) {
+    return r;
+  }
+  runBenchmarks();
+  return 0;
+}