2 * Copyright 2013 Facebook, Inc.
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
17 // @author Nicholas Ormrod <njormrod@fb.com>
19 /******************************************************************************
21 * This file is not perfect - benchmarking is a finicky process.
23 * TAKE THE NUMBERS YOU SEE TO MIND, NOT TO HEART
25 *****************************************************************************/
28 #include "OFBVector.h"
29 #define FOLLY_BENCHMARK_USE_NS_IFOLLY
30 #include "folly/FBVector.h"
40 #include <boost/preprocessor.hpp>
44 static const bool enableColors = true;
46 //=============================================================================
47 // use the timestamp counter for time measurements
50 void clear_icache() {} // placeholder
52 // return the CPU timestamp counter
53 static uint64_t readTSC() {
54 unsigned reslo, reshi;
56 __asm__ __volatile__ (
57 "xorl %%eax,%%eax \n cpuid \n"
58 ::: "%eax", "%ebx", "%ecx", "%edx");
59 __asm__ __volatile__ (
61 : "=a" (reslo), "=d" (reshi) );
62 __asm__ __volatile__ (
63 "xorl %%eax,%%eax \n cpuid \n"
64 ::: "%eax", "%ebx", "%ecx", "%edx");
66 return ((uint64_t)reshi << 32) | reslo;
69 //=============================================================================
72 // The TIME* macros expand to a sequence of functions and classes whose aim
73 // is to run a benchmark function several times with different types and
76 // The first and last thing that TIME* expands to is a function declaration,
77 // through the DECL macro. The declared function is templated on the full
78 // vector type, its value_type, its allocator_type, and a number N.
79 // The first DECL is a forward declaration, and is followed by a
80 // semicolon. The second DECL ends the macro - a function body is to be
81 // supplied after the macro invocation.
82 // The declared function returns a uint64_t, which is assumed to be a time.
84 // The GETTER macro calls the DECL function repeatedly. It returns the
85 // minimum time taken to execute DECL. GETTER runs DECL between 2 and 100
86 // times (it will not run the full 100 if the tests take a long time).
88 // The EVALUATOR macro calls the GETTER macro with each of std::vector,
89 // the original fbvector (folly::fbvector), and the new fbvector
90 // (Ifolly::fbvector). It runs all three twice, and then calls the
91 // pretty printer to display the results. Before calling the pretty
92 // printer, the EVALUATOR outputs the three message strings.
94 // The EXECUTOR macro calls the EVALUATOR with different values of N.
95 // It also defines the string message for N.
97 // The RUNNER macro defines a struct. That struct defines the testname
98 // string. The constructor calls the EXECUTOR with each test type, and
99 // also defines the test type string.
100 // The RUNNER class is also instantiated, so the constructor will be run
101 // before entering main().
104 #define TIME(str, types) TIME_I(str, types, (0))
105 #define TIME_N(str, types) TIME_I(str, types, (0)(16)(64)(1024)(16384)(262144))
107 #define TIME_I(str, types, values) \
108 TIME_II(str, BOOST_PP_CAT(t_, __LINE__), types, values)
110 #define TIME_II(str, name, types, values) \
114 EXECUTOR(name, values) \
115 RUNNER(str, name, types) \
119 template <class Vector, typename T, typename Allocator, int N> \
120 static inline uint64_t BOOST_PP_CAT(run_, name) ()
122 #define GETTER(name) \
123 template <class Vector, int N> \
124 static uint64_t BOOST_PP_CAT(get_, name) () { \
125 auto s = chrono::high_resolution_clock::now(); \
126 uint64_t minticks = ~uint64_t(0); \
128 for (; burst < 100; ++burst) { \
129 auto t = BOOST_PP_CAT(run_, name) <Vector, \
130 typename Vector::value_type, typename Vector::allocator_type, N> (); \
131 minticks = min(minticks, t); \
132 if (minticks * burst > 10000000) break; \
134 auto e = chrono::high_resolution_clock::now(); \
135 chrono::nanoseconds d(e - s); \
137 return d.count() / burst; \
140 #define EVALUATOR(name) \
141 template <typename T, typename Allocator, int N> \
142 void BOOST_PP_CAT(evaluate_, name) \
143 ( string& part1, string& part2, string& part3 ) { \
144 cout << setw(25) << left << part1 \
145 << setw(4) << left << part2 \
146 << setw(6) << right << part3; \
147 part1.clear(); part2.clear(); part3.clear(); \
148 auto v1 = BOOST_PP_CAT(get_, name) \
149 <Ifolly::fbvector<T, Allocator>, N> (); \
150 auto v2 = BOOST_PP_CAT(get_, name) \
151 < folly::fbvector<T, Allocator>, N> (); \
152 auto v3 = BOOST_PP_CAT(get_, name) \
153 < std:: vector<T, Allocator>, N> (); \
154 auto v1b = BOOST_PP_CAT(get_, name) \
155 <Ifolly::fbvector<T, Allocator>, N> (); \
156 auto v2b = BOOST_PP_CAT(get_, name) \
157 < folly::fbvector<T, Allocator>, N> (); \
158 auto v3b = BOOST_PP_CAT(get_, name) \
159 < std:: vector<T, Allocator>, N> (); \
160 prettyPrint(min(v1, v1b), min(v2, v2b), min(v3, v3b)); \
164 #define EXECUTOR(name, values) \
165 template <typename T, typename Allocator> \
166 void BOOST_PP_CAT(execute_, name) ( string& part1, string& part2 ) { \
167 BOOST_PP_SEQ_FOR_EACH(EVALUATE, name, values) \
170 #define EVALUATE(r, name, value) \
171 { string part3(BOOST_PP_STRINGIZE(value)); \
172 BOOST_PP_CAT(evaluate_, name) <T, Allocator, value> \
173 ( part1, part2, part3 ); }
175 #define RUNNER(str, name, types) \
176 struct BOOST_PP_CAT(Runner_, name) { \
177 BOOST_PP_CAT(Runner_, name) () { \
179 BOOST_PP_SEQ_FOR_EACH(EXECUTE, (part1, name), types) \
181 } BOOST_PP_CAT(runner_, name);
183 #define EXECUTE(r, pn, type) \
184 { string part2(BOOST_PP_STRINGIZE(type)); \
185 BOOST_PP_CAT(execute_, BOOST_PP_TUPLE_ELEM(2, 1, pn)) \
186 <typename type::first_type, typename type::second_type> \
187 ( BOOST_PP_TUPLE_ELEM(2, 0, pn), part2 ); }
189 //=============================================================================
192 // The pretty printer displays the times for each of the three vectors.
193 // The fastest time (or times, if there is a tie) is highlighted in green.
194 // Additionally, if the new fbvector (time v1) is not the fastest, then
195 // it is highlighted with red or blue. It is highlighted with blue only
196 // if it lost by a small margin (5 clock cycles or 2%, whichever is
199 void prettyPrint(uint64_t v1, uint64_t v2, uint64_t v3) {
200 // rdtsc takes some time to run; about 157 clock cycles
201 // if we see a smaller positive number, adjust readtsc
202 uint64_t readtsc_time = 157;
203 if (v1 != 0 && v1 < readtsc_time) readtsc_time = v1;
204 if (v2 != 0 && v2 < readtsc_time) readtsc_time = v2;
205 if (v3 != 0 && v3 < readtsc_time) readtsc_time = v3;
207 if (v1 == 0) v1 = ~uint64_t(0); else v1 -= readtsc_time;
208 if (v2 == 0) v2 = ~uint64_t(0); else v2 -= readtsc_time;
209 if (v3 == 0) v3 = ~uint64_t(0); else v3 -= readtsc_time;
211 auto least = min({ v1, v2, v3 });
212 // a good time is less than 2% or 5 clock cycles slower
213 auto good = max(least + 5, (uint64_t)(least * 1.02));
215 string w("\x1b[1;;42m"); // green
216 string g("\x1b[1;;44m"); // blue
217 string b("\x1b[1;;41m"); // red
218 string e("\x1b[0m"); // reset
226 if (v1 == least) cout << w;
227 else if (v1 <= good) cout << g;
229 cout << setw(13) << right;
230 if (v1 == ~uint64_t(0)) cout << "-"; else cout << v1;
231 cout << " " << e << " ";
233 if (v2 == least) cout << w;
234 cout << setw(13) << right;
235 if (v2 == ~uint64_t(0)) cout << "-"; else cout << v2;
236 cout << " " << e << " ";
238 if (v3 == least) cout << w;
239 cout << setw(13) << right;
240 if (v3 == ~uint64_t(0)) cout << "-"; else cout << v3;
241 cout << " " << e << " ";
244 //=============================================================================
247 // Much like the TIME macros, the Leader and Line struct/macros
248 // instantiate a class before main, and work is done inside the
249 // constructors. The Leader and Line struct print out pretty
250 // table boundaries and titles.
252 uint64_t leader_elapsed() {
253 static auto t = chrono::high_resolution_clock::now();
254 chrono::nanoseconds d(chrono::high_resolution_clock::now() - t);
255 return d.count() / 1000000000;
261 std::cout.imbue(std::locale(""));
264 cout << "========================================"
265 << "========================================" << endl;
266 cout << setw(35) << left << "Test";
267 cout << setw(15) << right << "new fbvector ";
268 cout << setw(15) << right << "old fbvector ";
269 cout << setw(15) << right << "std::vector ";
271 cout << "========================================"
272 << "========================================" << endl;
276 cout << "========================================"
277 << "========================================" << endl;
278 cout << setw(78) << right << leader_elapsed() << " s" << endl;
283 explicit Line(string text) {
284 cout << "\n--- " << text << " ---" << endl;
287 #define SECTION(str) Line BOOST_PP_CAT(l_, __LINE__) ( str )
289 //=============================================================================
292 typedef pair<int, std::allocator<int>> T1;
293 typedef pair<vector<int>, std::allocator<vector<int>>> T2;
295 uint64_t v1_T1 = 0, v2_T1 = 0, v3_T1 = 0;
296 uint64_t v1_T2 = 0, v2_T2 = 0, v3_T2 = 0;
298 #define BASICS (T1)(T2)
300 //=============================================================================
301 // prevent optimizing
303 std::vector<int> O_vi(10000000);
311 for (int i = 0; i < s; ++i) O(v[i]);
314 //=============================================================================
318 //-----------------------------------------------------------------------------
321 //#define BASICS (T1)
326 //-----------------------------------------------------------------------------
327 SECTION("Essentials");
329 TIME_N("~Vector()", BASICS) {
332 clear_icache(); auto b = readTSC();
340 TIME_N("a.clear()", BASICS) {
343 clear_icache(); auto b = readTSC();
350 TIME("Vector u", BASICS) {
351 clear_icache(); auto b = readTSC();
358 TIME_N("Vector u(a)", BASICS) {
359 static const Vector a(N);
360 clear_icache(); auto b = readTSC();
367 TIME_N("Vector u(move(a))", BASICS) {
369 clear_icache(); auto b = readTSC();
376 //-----------------------------------------------------------------------------
377 SECTION("Constructors");
379 TIME_N("Vector u(n)", BASICS) {
380 clear_icache(); auto b = readTSC();
387 TIME_N("Vector u(n, t)", BASICS) {
389 clear_icache(); auto b = readTSC();
396 TIME_N("Vector u(first, last)", BASICS) {
397 static const deque<T> d(N);
398 clear_icache(); auto b = readTSC();
399 Vector u(d.begin(), d.end());
405 //-----------------------------------------------------------------------------
406 SECTION("Assignment");
408 TIME_N("a = b", BASICS) {
410 static const Vector c(N/2 + 10);
412 clear_icache(); auto b = readTSC();
419 TIME_N("a = move(b)", BASICS) {
424 clear_icache(); auto b = readTSC();
432 TIME_N("a = destructive_move(b)", BASICS) {
437 clear_icache(); auto b = readTSC();
446 TIME_N("a.assign(N, t)", BASICS) {
450 clear_icache(); auto b = readTSC();
457 TIME_N("a.assign(first, last)", BASICS) {
458 static const deque<T> d(N);
460 clear_icache(); auto b = readTSC();
461 a.assign(d.begin(), d.end());
467 TIME_N("a.swap(b)", BASICS) {
472 clear_icache(); auto b = readTSC();
480 //-----------------------------------------------------------------------------
481 SECTION("Iterators");
483 TIME("a.begin()", BASICS) {
485 clear_icache(); auto b = readTSC();
492 TIME("a.cbegin()", BASICS) {
494 clear_icache(); auto b = readTSC();
501 TIME("a.rbegin()", BASICS) {
503 clear_icache(); auto b = readTSC();
510 //-----------------------------------------------------------------------------
513 TIME_N("a.size()", BASICS) {
514 static const Vector a(N);
515 clear_icache(); auto b = readTSC();
522 TIME("a.max_size()", BASICS) {
524 clear_icache(); auto b = readTSC();
525 int n = a.max_size();
531 TIME_N("a.capacity()", BASICS) {
532 static const Vector a(N);
533 clear_icache(); auto b = readTSC();
534 int n = a.capacity();
540 TIME_N("a.empty()", BASICS) {
541 static const Vector a(N);
542 clear_icache(); auto b = readTSC();
549 TIME_N("reserve(n)", BASICS) {
552 clear_icache(); auto b = readTSC();
559 TIME_N("resize(n)", BASICS) {
562 clear_icache(); auto b = readTSC();
569 TIME_N("resize(n, t)", BASICS) {
573 clear_icache(); auto b = readTSC();
580 TIME("staged reserve", BASICS) {
583 clear_icache(); auto b = readTSC();
591 TIME("staged resize", BASICS) {
594 clear_icache(); auto b = readTSC();
602 TIME("resize then reserve", BASICS) {
605 clear_icache(); auto b = readTSC();
613 TIME("shrink", BASICS) {
618 clear_icache(); auto b = readTSC();
625 //-----------------------------------------------------------------------------
628 TIME("operator[]", BASICS) {
629 static const Vector a(10);
630 clear_icache(); auto b = readTSC();
631 const auto& v = a[8];
637 TIME("at()", BASICS) {
638 static const Vector a(10);
639 clear_icache(); auto b = readTSC();
640 const auto& v = a.at(8);
646 TIME("front()", BASICS) {
647 static const Vector a(10);
648 clear_icache(); auto b = readTSC();
649 const auto& v = a.front();
655 TIME("back()", BASICS) {
656 static const Vector a(10);
657 clear_icache(); auto b = readTSC();
658 const auto& v = a.back();
664 TIME("data()", BASICS) {
665 static const Vector a(10);
666 clear_icache(); auto b = readTSC();
667 const auto& v = a.data();
673 //-----------------------------------------------------------------------------
676 TIME("reserved emplace", BASICS) {
680 clear_icache(); auto b = readTSC();
687 TIME("full emplace", BASICS) {
690 clear_icache(); auto b = readTSC();
697 TIME("reserved push_back", BASICS) {
702 clear_icache(); auto b = readTSC();
709 TIME("full push_back", BASICS) {
713 clear_icache(); auto b = readTSC();
720 TIME("reserved push_back&&", BASICS) {
725 clear_icache(); auto b = readTSC();
726 u.push_back(std::move(t));
732 TIME("full push_back&&", BASICS) {
736 clear_icache(); auto b = readTSC();
737 u.push_back(std::move(t));
743 TIME("reserved push emplace", BASICS) {
747 clear_icache(); auto b = readTSC();
754 TIME("full push emplace", BASICS) {
757 clear_icache(); auto b = readTSC();
764 TIME_N("bulk append", BASICS) {
765 static deque<T> d(N);
768 clear_icache(); auto b = readTSC();
769 u.insert(u.end(), d.begin(), d.end());
775 TIME_N("erase end", BASICS) {
780 if (it != u.end()) O(*it);
781 clear_icache(); auto b = readTSC();
782 u.erase(it, u.end());
788 TIME("pop_back", BASICS) {
791 clear_icache(); auto b = readTSC();
798 //-----------------------------------------------------------------------------
799 SECTION("Insert/Erase - Bad Ops");
801 TIME("insert", BASICS) {
809 clear_icache(); auto b = readTSC();
816 TIME("insert&&", BASICS) {
824 clear_icache(); auto b = readTSC();
825 u.insert(it, std::move(t));
831 TIME("insert n few", BASICS) {
839 clear_icache(); auto b = readTSC();
846 TIME("insert n many", BASICS) {
854 clear_icache(); auto b = readTSC();
855 u.insert(it, 200, t);
861 TIME("iterator insert few", BASICS) {
862 static deque<T> d(10);
870 clear_icache(); auto b = readTSC();
871 u.insert(it, d.begin(), d.end());
877 TIME("iterator insert many", BASICS) {
878 static deque<T> d(200);
886 clear_icache(); auto b = readTSC();
887 u.insert(it, d.begin(), d.end());
893 TIME("erase", BASICS) {
899 clear_icache(); auto b = readTSC();
906 TIME("erase many", BASICS) {
909 auto it1 = u.begin();
912 auto it2 = u.begin();
915 clear_icache(); auto b = readTSC();
922 //-----------------------------------------------------------------------------
923 SECTION("Large Tests");
925 TIME_N("reserved bulk push_back", BASICS) {
930 clear_icache(); auto b = readTSC();
931 for (int i = 0; i < N; ++i) u.emplace_back(t);
937 TIME_N("reserved bulk emplace", BASICS) {
941 clear_icache(); auto b = readTSC();
942 for (int i = 0; i < N; ++i) u.emplace_back(0);
948 TIME_N("populate", BASICS) {
952 clear_icache(); auto b = readTSC();
953 for (int i = 0; i < N; ++i) u.push_back(t);
959 TIME("jigsaw growth", BASICS) {
961 { 1, 5, 2, 80, 17, 8, 9, 8, 140, 130, 1000, 130, 10000, 0, 8000, 2000 };
962 clear_icache(); auto b = readTSC();
964 for (auto s : sizes) {
966 int toAdd = u.size() - s;
967 for (int i = 0; i < toAdd / 2; ++i) u.emplace_back(0);
968 u.insert(u.end(), (toAdd + 1) / 2, T(1));
970 int toRm = u.size() - s;
971 for (int i = 0; i < toRm / 2; ++i) u.pop_back();
974 if (it < u.end()) u.erase(it, u.end());
982 TIME("random access and modify", (T1)) {
983 static const int n = 1024 * 1024 * 16;
986 clear_icache(); auto b = readTSC();
988 for (int i = 0; i < 100000; ++i) {
989 j = (j * 2 + j) ^ 0xdeadbeef;
999 TIME_N("iterate", (T1)) {
1002 clear_icache(); auto b = readTSC();
1014 TIME("emplace massive", BASICS) {
1015 clear_icache(); auto b = readTSC();
1017 for (int i = 0; i < 10000000; ++i) {
1027 //=============================================================================