@@ -735,7 +735,7 @@ for options, bad phase of the moon, etc.
@c hol_set_group ok
@c hol_find_entry ok
@c hol_sort @mtslocale @acucorrupt
-@c qsort dup
+@c qsort dup @acucorrupt
@c hol_entry_qcmp @mtslocale
@c hol_entry_cmp @mtslocale
@c group_cmp ok
@@ -253,7 +253,7 @@ The symbols in this section are defined in the header file @file{locale.h}.
@c calculate_head_size ok
@c __munmap ok
@c compute_hashval ok
-@c qsort dup
+@c qsort dup @acucorrupt
@c rangecmp ok
@c malloc @ascuheap @acsmem
@c strdup @ascuheap @acsmem
@@ -159,7 +159,7 @@ To sort an array using an arbitrary comparison function, use the
@deftypefun void qsort (void *@var{array}, size_t @var{count}, size_t @var{size}, comparison_fn_t @var{compare})
@standards{ISO, stdlib.h}
-@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}}
+@safety{@prelim{}@mtsafe{}@assafe{}@acunsafe{@acucorrupt{}}}
The @code{qsort} function sorts the array @var{array}. The array
contains @var{count} elements, each of which is of size @var{size}.
@@ -199,8 +199,9 @@ Functions}):
The @code{qsort} function derives its name from the fact that it was
originally implemented using the ``quick sort'' algorithm.
-The implementation of @code{qsort} in this library is an in-place sort
-and uses a constant extra space (allocated on the stack).
+The implementation of @code{qsort} in this library might not be an
+in-place sort and might thereby use an extra amount of memory to store
+the array.
@end deftypefun
@node Search/Sort Example
@@ -215,7 +215,6 @@ tests := \
tst-qsort \
tst-qsort2 \
tst-qsort3 \
- tst-qsort5 \
tst-qsort6 \
tst-quick_exit \
tst-rand48 \
@@ -264,6 +263,7 @@ tests := \
tests-internal := \
tst-qsort4 \
+ tst-qsort5 \
tst-strtod1i \
tst-strtod3 \
tst-strtod4 \
@@ -19,6 +19,7 @@
Engineering a sort function; Jon Bentley and M. Douglas McIlroy;
Software - Practice and Experience; Vol. 23 (11), 1249-1265, 1993. */
+#include <errno.h>
#include <limits.h>
#include <memswap.h>
#include <stdlib.h>
@@ -32,9 +33,13 @@ enum swap_type_t
{
SWAP_WORDS_64,
SWAP_WORDS_32,
+ SWAP_VOID_ARG,
SWAP_BYTES
};
+typedef uint32_t __attribute__ ((__may_alias__)) u32_alias_t;
+typedef uint64_t __attribute__ ((__may_alias__)) u64_alias_t;
+
/* If this function returns true, elements can be safely copied using word
loads and stores. Otherwise, it might not be safe. BASE (as an integer)
must be a multiple of the word alignment. SIZE must be a multiple of
@@ -51,7 +56,6 @@ is_aligned (const void *base, size_t size, size_t wordsize)
static inline void
swap_words_64 (void * restrict a, void * restrict b, size_t n)
{
- typedef uint64_t __attribute__ ((__may_alias__)) u64_alias_t;
do
{
n -= 8;
@@ -64,7 +68,6 @@ swap_words_64 (void * restrict a, void * restrict b, size_t n)
static inline void
swap_words_32 (void * restrict a, void * restrict b, size_t n)
{
- typedef uint32_t __attribute__ ((__may_alias__)) u32_alias_t;
do
{
n -= 4;
@@ -88,43 +91,6 @@ do_swap (void * restrict a, void * restrict b, size_t size,
__memswap (a, b, size);
}
-/* Discontinue quicksort algorithm when partition gets below this size.
- This particular magic number was chosen to work best on a Sun 4/260. */
-#define MAX_THRESH 4
-
-/* Stack node declarations used to store unfulfilled partition obligations. */
-typedef struct
- {
- char *lo;
- char *hi;
- size_t depth;
- } stack_node;
-
-/* The stack needs log (total_elements) entries (we could even subtract
- log(MAX_THRESH)). Since total_elements has type size_t, we get as
- upper bound for log (total_elements):
- bits per byte (CHAR_BIT) * sizeof(size_t). */
-enum { STACK_SIZE = CHAR_BIT * sizeof (size_t) };
-
-static inline stack_node *
-push (stack_node *top, char *lo, char *hi, size_t depth)
-{
- top->lo = lo;
- top->hi = hi;
- top->depth = depth;
- return ++top;
-}
-
-static inline stack_node *
-pop (stack_node *top, char **lo, char **hi, size_t *depth)
-{
- --top;
- *lo = top->lo;
- *hi = top->hi;
- *depth = top->depth;
- return top;
-}
-
/* Establish the heap condition at index K, that is, the key at K will
not be less than either of its children, at 2 * K + 1 and 2 * K + 2
(if they exist). N is the last valid index. */
@@ -172,21 +138,25 @@ heapify (void *base, size_t size, size_t n, enum swap_type_t swap_type,
}
}
-/* A non-recursive heapsort, used on introsort implementation as a
- fallback routine with worst-case performance of O(nlog n) and
- worst-case space complexity of O(1). It sorts the array starting
- at BASE and ending at END (inclusive), with each element of SIZE
- bytes. The SWAP_TYPE is the callback function used to swap
- elements, and CMP is the function used to compare elements. */
+/* A non-recursive heapsort with worst-case performance of O(nlog n) and
+ worst-case space complexity of O(1). It sorts the array starting at
+ BASE with n + 1 elements of SIZE bytes. The SWAP_TYPE is the callback
+ function used to swap elements, and CMP is the function used to compare
+ elements. */
static void
-heapsort_r (void *base, void *end, size_t size, enum swap_type_t swap_type,
- __compar_d_fn_t cmp, void *arg)
+heapsort_r (void *base, size_t n, size_t size, __compar_d_fn_t cmp, void *arg)
{
- size_t n = ((uintptr_t) end - (uintptr_t) base) / size;
if (n <= 1)
- /* Handled by insertion sort. */
return;
+ enum swap_type_t swap_type;
+ if (is_aligned (base, size, 8))
+ swap_type = SWAP_WORDS_64;
+ else if (is_aligned (base, size, 4))
+ swap_type = SWAP_WORDS_32;
+ else
+ swap_type = SWAP_BYTES;
+
/* Build the binary heap, largest value at the base[0]. */
heapify (base, size, n, swap_type, cmp, arg);
@@ -208,226 +178,236 @@ heapsort_r (void *base, void *end, size_t size, enum swap_type_t swap_type,
}
}
-static inline void
-insertion_sort_qsort_partitions (void *const pbase, size_t total_elems,
- size_t size, enum swap_type_t swap_type,
- __compar_d_fn_t cmp, void *arg)
+/* The maximum size in bytes required by mergesort that will be provided
+ through a buffer allocated in the stack. */
+#define QSORT_STACK_SIZE 1024
+
+/* Elements larger than this value will be sorted through indirect sorting
+ to minimize the need to memory swap calls. */
+#define INDIRECT_SORT_SIZE_THRES 32
+
+struct msort_param
{
- char *base_ptr = (char *) pbase;
- char *const end_ptr = &base_ptr[size * (total_elems - 1)];
- char *tmp_ptr = base_ptr;
-#define min(x, y) ((x) < (y) ? (x) : (y))
- const size_t max_thresh = MAX_THRESH * size;
- char *thresh = min(end_ptr, base_ptr + max_thresh);
- char *run_ptr;
+ size_t s;
+ enum swap_type_t var;
+ __compar_d_fn_t cmp;
+ void *arg;
+ char *t;
+};
+
+static enum swap_type_t
+__attribute_used__
+msort_swap_type (void *const pbase, size_t size)
+{
+ if ((size & (sizeof (uint32_t) - 1)) == 0
+ && ((uintptr_t) pbase) % __alignof__ (uint32_t) == 0)
+ {
+ if (size == sizeof (uint32_t))
+ return SWAP_WORDS_32;
+ else if (size == sizeof (uint64_t)
+ && ((uintptr_t) pbase) % __alignof__ (uint64_t) == 0)
+ return SWAP_WORDS_64;
+ }
+ return SWAP_BYTES;
+}
- /* Find smallest element in first threshold and place it at the
- array's beginning. This is the smallest array element,
- and the operation speeds up insertion sort's inner loop. */
+static void
+msort_with_tmp (const struct msort_param *p, void *b, size_t n)
+{
+ char *b1, *b2;
+ size_t n1, n2;
- for (run_ptr = tmp_ptr + size; run_ptr <= thresh; run_ptr += size)
- if (cmp (run_ptr, tmp_ptr, arg) < 0)
- tmp_ptr = run_ptr;
+ if (n <= 1)
+ return;
- if (tmp_ptr != base_ptr)
- do_swap (tmp_ptr, base_ptr, size, swap_type);
+ n1 = n / 2;
+ n2 = n - n1;
+ b1 = b;
+ b2 = (char *) b + (n1 * p->s);
- /* Insertion sort, running from left-hand-side up to right-hand-side. */
+ msort_with_tmp (p, b1, n1);
+ msort_with_tmp (p, b2, n2);
- run_ptr = base_ptr + size;
- while ((run_ptr += size) <= end_ptr)
+ char *tmp = p->t;
+ const size_t s = p->s;
+ __compar_d_fn_t cmp = p->cmp;
+ void *arg = p->arg;
+ switch (p->var)
{
- tmp_ptr = run_ptr - size;
- while (tmp_ptr != base_ptr && cmp (run_ptr, tmp_ptr, arg) < 0)
- tmp_ptr -= size;
-
- tmp_ptr += size;
- if (tmp_ptr != run_ptr)
- {
- char *trav;
-
- trav = run_ptr + size;
- while (--trav >= run_ptr)
- {
- char c = *trav;
- char *hi, *lo;
-
- for (hi = lo = trav; (lo -= size) >= tmp_ptr; hi = lo)
- *hi = *lo;
- *hi = c;
- }
- }
+ case SWAP_WORDS_32:
+ while (n1 > 0 && n2 > 0)
+ {
+ if (cmp (b1, b2, arg) <= 0)
+ {
+ *(u32_alias_t *) tmp = *(u32_alias_t *) b1;
+ b1 += sizeof (u32_alias_t);
+ --n1;
+ }
+ else
+ {
+ *(u32_alias_t *) tmp = *(u32_alias_t *) b2;
+ b2 += sizeof (u32_alias_t);
+ --n2;
+ }
+ tmp += sizeof (u32_alias_t);
+ }
+ break;
+ case SWAP_WORDS_64:
+ while (n1 > 0 && n2 > 0)
+ {
+ if (cmp (b1, b2, arg) <= 0)
+ {
+ *(u64_alias_t *) tmp = *(u64_alias_t *) b1;
+ b1 += sizeof (u64_alias_t);
+ --n1;
+ }
+ else
+ {
+ *(u64_alias_t *) tmp = *(u64_alias_t *) b2;
+ b2 += sizeof (u64_alias_t);
+ --n2;
+ }
+ tmp += sizeof (u64_alias_t);
+ }
+ break;
+ case SWAP_VOID_ARG:
+ while (n1 > 0 && n2 > 0)
+ {
+ if ((*cmp) (*(const void **) b1, *(const void **) b2, arg) <= 0)
+ {
+ *(void **) tmp = *(void **) b1;
+ b1 += sizeof (void *);
+ --n1;
+ }
+ else
+ {
+ *(void **) tmp = *(void **) b2;
+ b2 += sizeof (void *);
+ --n2;
+ }
+ tmp += sizeof (void *);
+ }
+ default:
+ while (n1 > 0 && n2 > 0)
+ {
+ if (cmp (b1, b2, arg) <= 0)
+ {
+ tmp = (char *) __mempcpy (tmp, b1, s);
+ b1 += s;
+ --n1;
+ }
+ else
+ {
+ tmp = (char *) __mempcpy (tmp, b2, s);
+ b2 += s;
+ --n2;
+ }
+ }
+ break;
}
-}
-
-/* Order size using quicksort. This implementation incorporates
- four optimizations discussed in Sedgewick:
- 1. Non-recursive, using an explicit stack of pointer that store the
- next array partition to sort. To save time, this maximum amount
- of space required to store an array of SIZE_MAX is allocated on the
- stack. Assuming a 32-bit (64 bit) integer for size_t, this needs
- only 32 * sizeof(stack_node) == 256 bytes (for 64 bit: 1024 bytes).
- Pretty cheap, actually.
-
- 2. Chose the pivot element using a median-of-three decision tree.
- This reduces the probability of selecting a bad pivot value and
- eliminates certain extraneous comparisons.
+ if (n1 > 0)
+ memcpy (tmp, b1, n1 * s);
+ memcpy (b, p->t, (n - n2) * s);
+}
- 3. Only quicksorts TOTAL_ELEMS / MAX_THRESH partitions, leaving
- insertion sort to order the MAX_THRESH items within each partition.
- This is a big win, since insertion sort is faster for small, mostly
- sorted array segments.
+static void
+indirect_msort_with_tmp (const struct msort_param *p, void *b, size_t n,
+ size_t s)
+{
+ /* Indirect sorting. */
+ char *ip = (char *) b;
+ void **tp = (void **) (p->t + n * sizeof (void *));
+ void **t = tp;
+ void *tmp_storage = (void *) (tp + n);
- 4. The larger of the two sub-partitions is always pushed onto the
- stack first, with the algorithm then concentrating on the
- smaller partition. This *guarantees* no more than log (total_elems)
- stack size is needed (actually O(1) in this case)! */
+ while ((void *) t < tmp_storage)
+ {
+ *t++ = ip;
+ ip += s;
+ }
+ msort_with_tmp (p, p->t + n * sizeof (void *), n);
+
+ /* tp[0] .. tp[n - 1] is now sorted, copy around entries of
+ the original array. Knuth vol. 3 (2nd ed.) exercise 5.2-10. */
+ char *kp;
+ size_t i;
+ for (i = 0, ip = (char *) b; i < n; i++, ip += s)
+ if ((kp = tp[i]) != ip)
+ {
+ size_t j = i;
+ char *jp = ip;
+ memcpy (tmp_storage, ip, s);
+
+ do
+ {
+ size_t k = (kp - (char *) b) / s;
+ tp[j] = jp;
+ memcpy (jp, kp, s);
+ j = k;
+ jp = kp;
+ kp = tp[k];
+ }
+ while (kp != ip);
+
+ tp[j] = jp;
+ memcpy (jp, tmp_storage, s);
+ }
+}
void
__qsort_r (void *const pbase, size_t total_elems, size_t size,
__compar_d_fn_t cmp, void *arg)
{
- char *base_ptr = (char *) pbase;
-
- const size_t max_thresh = MAX_THRESH * size;
-
if (total_elems <= 1)
- /* Avoid lossage with unsigned arithmetic below. */
return;
- enum swap_type_t swap_type;
- if (is_aligned (pbase, size, 8))
- swap_type = SWAP_WORDS_64;
- else if (is_aligned (pbase, size, 4))
- swap_type = SWAP_WORDS_32;
- else
- swap_type = SWAP_BYTES;
+ char tmp[QSORT_STACK_SIZE];
+ size_t total_size = total_elems * size;
+ char *buf;
- /* Maximum depth before quicksort switches to heapsort. */
- size_t depth = 2 * (sizeof (size_t) * CHAR_BIT - 1
- - __builtin_clzl (total_elems));
+ if (total_size < sizeof buf)
+ buf = tmp;
+ else
+ {
+ int save = errno;
+ buf = malloc (total_size);
+ __set_errno (save);
+ if (buf == NULL)
+ {
+ /* Fallback to heapsort in case of memory failure. */
+ heapsort_r (pbase, total_elems - 1, size, cmp, arg);
+ return;
+ }
+ }
- if (total_elems > MAX_THRESH)
+ if (size > INDIRECT_SORT_SIZE_THRES)
{
- char *lo = base_ptr;
- char *hi = &lo[size * (total_elems - 1)];
- stack_node stack[STACK_SIZE];
- stack_node *top = push (stack, NULL, NULL, depth);
-
- while (stack < top)
- {
- if (depth == 0)
- {
- heapsort_r (lo, hi, size, swap_type, cmp, arg);
- top = pop (top, &lo, &hi, &depth);
- continue;
- }
-
- char *left_ptr;
- char *right_ptr;
-
- /* Select median value from among LO, MID, and HI. Rearrange
- LO and HI so the three values are sorted. This lowers the
- probability of picking a pathological pivot value and
- skips a comparison for both the LEFT_PTR and RIGHT_PTR in
- the while loops. */
-
- char *mid = lo + size * ((hi - lo) / size >> 1);
-
- if ((*cmp) ((void *) mid, (void *) lo, arg) < 0)
- do_swap (mid, lo, size, swap_type);
- if ((*cmp) ((void *) hi, (void *) mid, arg) < 0)
- do_swap (mid, hi, size, swap_type);
- else
- goto jump_over;
- if ((*cmp) ((void *) mid, (void *) lo, arg) < 0)
- do_swap (mid, lo, size, swap_type);
- jump_over:;
-
- left_ptr = lo + size;
- right_ptr = hi - size;
-
- /* Here's the famous ``collapse the walls'' section of quicksort.
- Gotta like those tight inner loops! They are the main reason
- that this algorithm runs much faster than others. */
- do
- {
- while (left_ptr != mid
- && (*cmp) ((void *) left_ptr, (void *) mid, arg) < 0)
- left_ptr += size;
-
- while (right_ptr != mid
- && (*cmp) ((void *) mid, (void *) right_ptr, arg) < 0)
- right_ptr -= size;
-
- if (left_ptr < right_ptr)
- {
- do_swap (left_ptr, right_ptr, size, swap_type);
- if (mid == left_ptr)
- mid = right_ptr;
- else if (mid == right_ptr)
- mid = left_ptr;
- left_ptr += size;
- right_ptr -= size;
- }
- else if (left_ptr == right_ptr)
- {
- left_ptr += size;
- right_ptr -= size;
- break;
- }
- }
- while (left_ptr <= right_ptr);
-
- /* Set up pointers for next iteration. First determine whether
- left and right partitions are below the threshold size. If so,
- ignore one or both. Otherwise, push the larger partition's
- bounds on the stack and continue sorting the smaller one. */
-
- if ((size_t) (right_ptr - lo) <= max_thresh)
- {
- if ((size_t) (hi - left_ptr) <= max_thresh)
- /* Ignore both small partitions. */
- {
- top = pop (top, &lo, &hi, &depth);
- --depth;
- }
- else
- {
- /* Ignore small left partition. */
- lo = left_ptr;
- --depth;
- }
- }
- else if ((size_t) (hi - left_ptr) <= max_thresh)
- /* Ignore small right partition. */
- {
- hi = right_ptr;
- --depth;
- }
- else if ((right_ptr - lo) > (hi - left_ptr))
- {
- /* Push larger left partition indices. */
- top = push (top, lo, right_ptr, depth - 1);
- lo = left_ptr;
- }
- else
- {
- /* Push larger right partition indices. */
- top = push (top, left_ptr, hi, depth - 1);
- hi = right_ptr;
- }
- }
+ const struct msort_param msort_param =
+ {
+ .s = sizeof (void *),
+ .cmp = cmp,
+ .arg = arg,
+ .var = SWAP_VOID_ARG,
+ .t = buf,
+ };
+ indirect_msort_with_tmp (&msort_param, pbase, total_elems, size);
+ }
+ else
+ {
+ const struct msort_param msort_param =
+ {
+ .s = size,
+ .cmp = cmp,
+ .arg = arg,
+ .var = msort_swap_type (pbase, size),
+ .t = buf,
+ };
+ msort_with_tmp (&msort_param, pbase, total_elems);
}
- /* Once the BASE_PTR array is partially sorted by quicksort the rest
- is completely sorted using insertion sort, since this is efficient
- for partitions below MAX_THRESH size. BASE_PTR points to the beginning
- of the array to sort, and END_PTR points at the very last element in
- the array (*not* one beyond it!). */
- insertion_sort_qsort_partitions (pbase, total_elems, size, swap_type, cmp,
- arg);
+ if (buf != tmp)
+ free (buf);
}
libc_hidden_def (__qsort_r)
weak_alias (__qsort_r, qsort_r)
@@ -30,35 +30,12 @@ cmp (const void *a1, const void *b1, void *closure)
return *a - *b;
}
-/* Wrapper around heapsort_r that set ups the required variables. */
-static void
-heapsort_wrapper (void *const pbase, size_t total_elems, size_t size,
- __compar_d_fn_t cmp, void *arg)
-{
- char *base_ptr = (char *) pbase;
- char *lo = base_ptr;
- char *hi = &lo[size * (total_elems - 1)];
-
- if (total_elems <= 1)
- /* Avoid lossage with unsigned arithmetic below. */
- return;
-
- enum swap_type_t swap_type;
- if (is_aligned (pbase, size, 8))
- swap_type = SWAP_WORDS_64;
- else if (is_aligned (pbase, size, 4))
- swap_type = SWAP_WORDS_32;
- else
- swap_type = SWAP_BYTES;
- heapsort_r (lo, hi, size, swap_type, cmp, arg);
-}
-
static void
check_one_sort (signed char *array, int length)
{
signed char *copy = xmalloc (length);
memcpy (copy, array, length);
- heapsort_wrapper (copy, length, 1, cmp, NULL);
+ heapsort_r (copy, length - 1, 1, cmp, NULL);
/* Verify that the result is sorted. */
for (int i = 1; i < length; ++i)
@@ -27,6 +27,55 @@
#include <support/check.h>
#include <support/support.h>
+#include "qsort.c"
+
+typedef void (*func_r_wrapper) (void *, size_t, size_t, __compar_d_fn_t,
+ void *);
+
+static void
+mergesort_r_wrapper (void *pbase, size_t total_elems, size_t size,
+ int (*cmp)(const void *, const void *, void *),
+ void *arg)
+{
+ size_t total_size = total_elems * size;
+ void *buf = xmalloc (total_size);
+
+ if (size > INDIRECT_SORT_SIZE_THRES)
+ {
+ const struct msort_param msort_param =
+ {
+ .s = sizeof (void *),
+ .cmp = cmp,
+ .arg = arg,
+ .var = SWAP_VOID_ARG,
+ .t = buf,
+ };
+ indirect_msort_with_tmp (&msort_param, pbase, total_elems, size);
+ }
+ else
+ {
+ const struct msort_param msort_param =
+ {
+ .s = size,
+ .cmp = cmp,
+ .arg = arg,
+ .var = msort_swap_type (pbase, size),
+ .t = buf,
+ };
+ msort_with_tmp (&msort_param, pbase, total_elems);
+ }
+
+ free (buf);
+}
+
+static void
+heapsort_r_wrapper (void *pbase, size_t total_elems, size_t size,
+ int (*cmp)(const void *, const void *, void *),
+ void *arg)
+{
+ heapsort_r (pbase, total_elems - 1, size, cmp, arg);
+}
+
struct context
{
/* Called the gas value in the paper. This value is larger than all
@@ -135,11 +184,11 @@ compare_counter (const void *l1, const void *r1, void *closure)
/* Count the comparisons required for an adversarial permutation of
length N. */
static unsigned long long int
-count_comparisons (size_t n)
+count_comparisons (func_r_wrapper func, size_t n)
{
int *array = create_permutation (n);
unsigned long long int counter = 0;
- qsort_r (array, n, sizeof (*array), compare_counter, &counter);
+ func (array, n, sizeof (*array), compare_counter, &counter);
free (array);
return counter;
}
@@ -149,13 +198,19 @@ count_comparisons (size_t n)
static void
check_one_n (size_t n)
{
- unsigned long long int count = count_comparisons (n);
- double factor = count / (n * log (count));
- printf ("info: length %zu: %llu comparisons ~ %f * n * log (n)\n",
- n, count, factor);
- /* This is an arbitrary factor which is true for the current
+ unsigned long long int m_count = count_comparisons (mergesort_r_wrapper, n);
+ double m_factor = m_count / (n * log (m_count));
+
+ unsigned long long int h_count = count_comparisons (heapsort_r_wrapper, n);
+ double h_factor = h_count / (n * log (h_count));
+
+ printf ("info: length %zu: (%llu, %llu) comparisons ~ (%f, %f) * n * log (n)\n",
+ n, m_count, h_count, m_factor, h_factor);
+
+ /* This is an arbitrary factor which is true for the largest current
implementation across a wide range of sizes. */
- TEST_VERIFY (factor <= 4.5);
+ double factor = fmax (m_factor, h_factor);
+ TEST_VERIFY (factor <= 2.5);
}
static int