1 /* $OpenBSD: qsort.c,v 1.18 2017/05/30 14:54:09 millert Exp $ */
3 * Copyright (c) 1992, 1993
4 * The Regents of the University of California. All rights reserved.
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 * 3. Neither the name of the University nor the names of its contributors
15 * may be used to endorse or promote products derived from this software
16 * without specific prior written permission.
18 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
22 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31 /* $OpenBSD: heapsort.c,v 1.11 2017/05/20 12:48:56 millert Exp $ */
33 * Copyright (c) 1991, 1993
34 * The Regents of the University of California. All rights reserved.
36 * This code is derived from software contributed to Berkeley by
37 * Ronnie Kon at Mindcraft Inc., Kevin Lew and Elmer Yglesias.
39 * Redistribution and use in source and binary forms, with or without
40 * modification, are permitted provided that the following conditions
42 * 1. Redistributions of source code must retain the above copyright
43 * notice, this list of conditions and the following disclaimer.
44 * 2. Redistributions in binary form must reproduce the above copyright
45 * notice, this list of conditions and the following disclaimer in the
46 * documentation and/or other materials provided with the distribution.
47 * 3. Neither the name of the University nor the names of its contributors
48 * may be used to endorse or promote products derived from this software
49 * without specific prior written permission.
51 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
52 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
53 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
54 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
55 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
56 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
57 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
58 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
59 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
60 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
66 #include <sys/types.h>
72 * Swap two areas of size number of bytes. Although qsort(3) permits random
73 * blocks of memory to be sorted, sorting pointers is almost certainly the
74 * common case (and, were it not, could easily be made so). Regardless, it
75 * isn't worth optimizing; the SWAP's get sped up by the cache, and pointer
76 * arithmetic gets lost in the time required for comparison function calls.
78 #define SWAP(a, b, count, size, tmp) { \
87 /* Copy one block of size size to another. */
88 #define COPY(a, b, count, size, tmp1, tmp2) { \
98 * Build the list into a heap, where a heap is defined such that for
99 * the records K1 ... KN, Kj/2 >= Kj for 1 <= j/2 <= j <= N.
101 * There are two cases. If j == nmemb, select largest of Ki and Kj. If
102 * j < nmemb, select largest of Ki, Kj and Kj+1.
104 #define CREATE(initval, nmemb, par_i, child_i, par, child, size, count, tmp) { \
105 for (par_i = initval; (child_i = par_i * 2) <= nmemb; \
107 child = base + child_i * size; \
108 if (child_i < nmemb && compar(child, child + size) < 0) { \
112 par = base + par_i * size; \
113 if (compar(child, par) <= 0) \
115 SWAP(par, child, count, size, tmp); \
120 * Select the top of the heap and 'heapify'. Since by far the most expensive
121 * action is the call to the compar function, a considerable optimization
122 * in the average case can be achieved due to the fact that k, the displaced
123 * element, is usually quite small, so it would be preferable to first
124 * heapify, always maintaining the invariant that the larger child is copied
125 * over its parent's record.
127 * Then, starting from the *bottom* of the heap, finding k's correct place,
128 * again maintaining the invariant. As a result of the invariant no element
129 * is 'lost' when k is assigned its correct place in the heap.
131 * The time savings from this optimization are on the order of 15-20% for the
132 * average case. See Knuth, Vol. 3, page 158, problem 18.
134 * XXX Don't break the #define SELECT line, below. Reiser cpp gets upset.
136 #define SELECT(par_i, child_i, nmemb, par, child, size, k, count, tmp1, tmp2) { \
137 for (par_i = 1; (child_i = par_i * 2) <= nmemb; par_i = child_i) { \
138 child = base + child_i * size; \
139 if (child_i < nmemb && compar(child, child + size) < 0) { \
143 par = base + par_i * size; \
144 COPY(par, child, count, size, tmp1, tmp2); \
148 par_i = child_i / 2; \
149 child = base + child_i * size; \
150 par = base + par_i * size; \
151 if (child_i == 1 || compar(k, par) < 0) { \
152 COPY(child, k, count, size, tmp1, tmp2); \
155 COPY(child, par, count, size, tmp1, tmp2); \
160 * Heapsort -- Knuth, Vol. 3, page 145. Runs in O (N lg N), both average
161 * and worst. While heapsort is faster than the worst case of quicksort,
162 * the BSD quicksort does median selection so that the chance of finding
163 * a data set that will trigger the worst case is nonexistent. Heapsort's
164 * only advantage over quicksort is that it requires little additional memory.
167 bsdheapsort(void *vbase, size_t nmemb, size_t size,
168 int (*compar)(const void *, const void *))
171 char tmp, *tmp1, *tmp2;
172 char *base, *k, *p, *t;
182 if ((k = (char *) ourmalloc(size)) == NULL)
186 * Items are numbered from 1 to nmemb, so offset from size bytes
187 * below the starting address.
189 base = (char *)vbase - size;
191 for (l = nmemb / 2 + 1; --l; )
192 CREATE(l, nmemb, i, j, t, p, size, cnt, tmp);
195 * For each element of the heap, save the largest element into its
196 * final slot, save the displaced element (k), then recreate the
200 COPY(k, base + nmemb * size, cnt, size, tmp1, tmp2);
201 COPY(base + nmemb * size, base + size, cnt, size, tmp1, tmp2);
203 SELECT(i, j, nmemb, t, p, size, k, cnt, tmp1, tmp2);
210 static __inline char *med3(char *, char *, char *, int (*)(const void *, const void *));
211 static __inline void swapfunc(char *, char *, size_t, int);
213 #define min(a, b) (a) < (b) ? a : b
216 * Qsort routine from Bentley & McIlroy's "Engineering a Sort Function".
218 * This version differs from Bentley & McIlroy in the following ways:
219 * 1. The partition value is swapped into a[0] instead of being
220 * stored out of line.
222 * 2. The swap function can swap 32-bit aligned elements on 64-bit
223 * platforms instead of swapping them as byte-aligned.
225 * 3. It uses David Musser's introsort algorithm to fall back to
226 * heapsort(3) when the recursion depth reaches 2*lg(n + 1).
227 * This avoids quicksort's quadratic behavior for pathological
228 * input without appreciably changing the average run time.
230 * 4. Tail recursion is eliminated when sorting the larger of two
231 * subpartitions to save stack space.
233 #define SWAPTYPE_BYTEV 1
234 #define SWAPTYPE_INTV 2
235 #define SWAPTYPE_LONGV 3
236 #define SWAPTYPE_INT 4
237 #define SWAPTYPE_LONG 5
239 #define TYPE_ALIGNED(TYPE, a, es) \
240 (((char *)a - (char *)0) % sizeof(TYPE) == 0 && es % sizeof(TYPE) == 0)
242 #define swapcode(TYPE, parmi, parmj, n) { \
243 size_t i = (n) / sizeof (TYPE); \
244 TYPE *pi = (TYPE *) (parmi); \
245 TYPE *pj = (TYPE *) (parmj); \
254 swapfunc(char *a, char *b, size_t n, int swaptype)
259 swapcode(int, a, b, n);
263 swapcode(long, a, b, n);
266 swapcode(char, a, b, n);
271 #define swap(a, b) do { \
272 switch (swaptype) { \
273 case SWAPTYPE_INT: { \
274 int t = *(int *)(a); \
275 *(int *)(a) = *(int *)(b); \
279 case SWAPTYPE_LONG: { \
280 long t = *(long *)(a); \
281 *(long *)(a) = *(long *)(b); \
286 swapfunc(a, b, es, swaptype); \
290 #define vecswap(a, b, n) if ((n) > 0) swapfunc(a, b, n, swaptype)
292 static __inline char *
293 med3(char *a, char *b, char *c, int (*cmp)(const void *, const void *))
295 return cmp(a, b) < 0 ?
296 (cmp(b, c) < 0 ? b : (cmp(a, c) < 0 ? c : a))
297 : (cmp(b, c) > 0 ? b : (cmp(a, c) < 0 ? a : c));
301 introsort(char *a, size_t n, size_t es, size_t maxdepth, int swaptype,
302 int (*cmp)(const void *, const void *))
304 char *pa, *pb, *pc, *pd, *pl, *pm, *pn;
309 for (pm = a + es; pm < a + n * es; pm += es)
310 for (pl = pm; pl > a && cmp(pl - es, pl) > 0;
316 if (bsdheapsort(a, n, es, cmp) == 0)
320 pm = a + (n / 2) * es;
323 pn = a + (n - 1) * es;
326 pl = med3(pl, pl + s, pl + 2 * s, cmp);
327 pm = med3(pm - s, pm, pm + s, cmp);
328 pn = med3(pn - 2 * s, pn - s, pn, cmp);
330 pm = med3(pl, pm, pn, cmp);
334 pc = pd = a + (n - 1) * es;
336 while (pb <= pc && (cmp_result = cmp(pb, a)) <= 0) {
337 if (cmp_result == 0) {
343 while (pb <= pc && (cmp_result = cmp(pc, a)) >= 0) {
344 if (cmp_result == 0) {
358 r = min(pa - a, pb - pa);
359 vecswap(a, pb - r, r);
360 r = min(((uintptr_t)(pd - pc)), ((uintptr_t)(pn - pd - es)));
361 vecswap(pb, pn - r, r);
363 * To save stack space we sort the smaller side of the partition first
364 * using recursion and eliminate tail recursion for the larger side.
369 /* Recurse for 1st side, iterate for 2nd side. */
372 introsort(a, r / es, es, maxdepth,
380 /* Recurse for 2nd side, iterate for 1st side. */
383 introsort(pn - s, s / es, es, maxdepth,
393 bsdqsort(void *a, size_t n, size_t es, int (*cmp)(const void *, const void *))
395 size_t i, maxdepth = 0;
398 /* Approximate 2*ceil(lg(n + 1)) */
399 for (i = n; i > 0; i >>= 1)
403 if (TYPE_ALIGNED(long, a, es))
404 swaptype = es == sizeof(long) ? SWAPTYPE_LONG : SWAPTYPE_LONGV;
405 else if (sizeof(int) != sizeof(long) && TYPE_ALIGNED(int, a, es))
406 swaptype = es == sizeof(int) ? SWAPTYPE_INT : SWAPTYPE_INTV;
408 swaptype = SWAPTYPE_BYTEV;
410 introsort((char *) a, n, es, maxdepth, swaptype, cmp);