2 * Copyright 2016 Facebook, Inc.
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
17 #include <folly/Uri.h>
20 #include <boost/regex.hpp>
26 fbstring submatch(const boost::cmatch& m, size_t idx) {
28 return fbstring(sub.first, sub.second);
31 template <class String>
32 void toLower(String& s) {
40 Uri::Uri(StringPiece str) : hasAuthority_(false), port_(0) {
41 static const boost::regex uriRegex(
42 "([a-zA-Z][a-zA-Z0-9+.-]*):" // scheme:
43 "([^?#]*)" // authority and path
44 "(?:\\?([^#]*))?" // ?query
45 "(?:#(.*))?"); // #fragment
46 static const boost::regex authorityAndPathRegex("//([^/]*)(/.*)?");
49 if (UNLIKELY(!boost::regex_match(str.begin(), str.end(), match, uriRegex))) {
50 throw std::invalid_argument(to<std::string>("invalid URI ", str));
53 scheme_ = submatch(match, 1);
56 StringPiece authorityAndPath(match[2].first, match[2].second);
57 boost::cmatch authorityAndPathMatch;
58 if (!boost::regex_match(authorityAndPath.begin(),
59 authorityAndPath.end(),
60 authorityAndPathMatch,
61 authorityAndPathRegex)) {
62 // Does not start with //, doesn't have authority
63 hasAuthority_ = false;
64 path_ = authorityAndPath.fbstr();
66 static const boost::regex authorityRegex(
67 "(?:([^@:]*)(?::([^@]*))?@)?" // username, password
68 "(\\[[^\\]]*\\]|[^\\[:]*)" // host (IP-literal (e.g. '['+IPv6+']',
69 // dotted-IPv4, or named host)
70 "(?::(\\d*))?"); // port
72 auto authority = authorityAndPathMatch[1];
73 boost::cmatch authorityMatch;
74 if (!boost::regex_match(authority.first,
78 throw std::invalid_argument(
79 to<std::string>("invalid URI authority ",
80 StringPiece(authority.first, authority.second)));
83 StringPiece port(authorityMatch[4].first, authorityMatch[4].second);
85 port_ = to<uint16_t>(port);
89 username_ = submatch(authorityMatch, 1);
90 password_ = submatch(authorityMatch, 2);
91 host_ = submatch(authorityMatch, 3);
92 path_ = submatch(authorityAndPathMatch, 2);
95 query_ = submatch(match, 3);
96 fragment_ = submatch(match, 4);
99 fbstring Uri::authority() const {
102 // Port is 5 characters max and we have up to 3 delimiters.
103 result.reserve(host().size() + username().size() + password().size() + 8);
105 if (!username().empty() || !password().empty()) {
106 result.append(username());
108 if (!password().empty()) {
109 result.push_back(':');
110 result.append(password());
113 result.push_back('@');
116 result.append(host());
119 result.push_back(':');
120 toAppend(port(), &result);
126 fbstring Uri::hostname() const {
127 if (host_.size() > 0 && host_[0] == '[') {
128 // If it starts with '[', then it should end with ']', this is ensured by
130 return host_.substr(1, host_.size() - 2);
135 const std::vector<std::pair<fbstring, fbstring>>& Uri::getQueryParams() {
136 if (!query_.empty() && queryParams_.empty()) {
137 // Parse query string
138 static const boost::regex queryParamRegex(
139 "(^|&)" /*start of query or start of parameter "&"*/
140 "([^=&]*)=?" /*parameter name and "=" if value is expected*/
141 "([^=&]*)" /*parameter value*/
142 "(?=(&|$))" /*forward reference, next should be end of query or
143 start of next parameter*/);
144 boost::cregex_iterator paramBeginItr(
145 query_.data(), query_.data() + query_.size(), queryParamRegex);
146 boost::cregex_iterator paramEndItr;
147 for (auto itr = paramBeginItr; itr != paramEndItr; itr++) {
148 if (itr->length(2) == 0) {
149 // key is empty, ignore it
152 queryParams_.emplace_back(
153 fbstring((*itr)[2].first, (*itr)[2].second), // parameter name
154 fbstring((*itr)[3].first, (*itr)[3].second) // parameter value