Blame - lib/sort.c - SHIFTPHONES/mainline/linux

blob: 0d24d0c5c0fc784528e4eb360e723d82811b6b7a [file] [log] [blame]

Greg Kroah-Hartman	b244131	2017-11-01 15:07:57 +0100	[diff] [blame]	1	// SPDX-License-Identifier: GPL-2.0
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2	/*
George Spelvin	22a241c	2019-05-14 15:42:52 -0700	[diff] [blame^]	3	* A fast, small, non-recursive O(n log n) sort for the Linux kernel
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	4	*
George Spelvin	22a241c	2019-05-14 15:42:52 -0700	[diff] [blame^]	5	* This performs nlog2(n) + 0.37n + o(n) comparisons on average,
				6	* and 1.5nlog2(n) + O(n) in the (very contrived) worst case.
				7	*
				8	* Glibc qsort() manages nlog2(n) - 1.26n for random inputs (1.63*n
				9	* better) at the expense of stack usage and much larger code to avoid
				10	* quicksort's O(n^2) worst case.
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	11	*/
				12
Kostenzer Felix	c5adae9	2017-02-24 15:01:07 -0800	[diff] [blame]	13	#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
				14
Rasmus Villemoes	42cf809	2015-02-12 15:02:35 -0800	[diff] [blame]	15	#include <linux/types.h>
				16	#include <linux/export.h>
Adrian Bunk	ecec4cb	2005-09-10 00:26:59 -0700	[diff] [blame]	17	#include <linux/sort.h>
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	18
George Spelvin	37d0ec3	2019-05-14 15:42:49 -0700	[diff] [blame]	19	/**
				20	* is_aligned - is this pointer & size okay for word-wide copying?
				21	* @base: pointer to data
				22	* @size: size of each element
George Spelvin	22a241c	2019-05-14 15:42:52 -0700	[diff] [blame^]	23	* @align: required alignment (typically 4 or 8)
George Spelvin	37d0ec3	2019-05-14 15:42:49 -0700	[diff] [blame]	24	*
				25	* Returns true if elements can be copied using word loads and stores.
				26	* The size must be a multiple of the alignment, and the base address must
				27	* be if we do not have CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS.
				28	*
				29	* For some reason, gcc doesn't know to optimize "if (a & mask \|\| b & mask)"
				30	* to "if ((a \| b) & mask)", so we do that by hand.
				31	*/
				32	__attribute_const__ __always_inline
				33	static bool is_aligned(const void *base, size_t size, unsigned char align)
Daniel Wagner	ca96ab8	2015-06-25 15:02:14 -0700	[diff] [blame]	34	{
George Spelvin	37d0ec3	2019-05-14 15:42:49 -0700	[diff] [blame]	35	unsigned char lsbits = (unsigned char)size;
				36
				37	(void)base;
				38	#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
				39	lsbits \|= (unsigned char)(uintptr_t)base;
				40	#endif
				41	return (lsbits & (align - 1)) == 0;
Daniel Wagner	ca96ab8	2015-06-25 15:02:14 -0700	[diff] [blame]	42	}
				43
George Spelvin	37d0ec3	2019-05-14 15:42:49 -0700	[diff] [blame]	44	/**
				45	* swap_words_32 - swap two elements in 32-bit chunks
				46	* @a, @b: pointers to the elements
				47	* @size: element size (must be a multiple of 4)
				48	*
				49	* Exchange the two objects in memory. This exploits base+index addressing,
				50	* which basically all CPUs have, to minimize loop overhead computations.
				51	*
				52	* For some reason, on x86 gcc 7.3.0 adds a redundant test of n at the
				53	* bottom of the loop, even though the zero flag is stil valid from the
				54	* subtract (since the intervening mov instructions don't alter the flags).
				55	* Gcc 8.1.0 doesn't have that problem.
				56	*/
				57	static void swap_words_32(void a, void b, int size)
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	58	{
George Spelvin	37d0ec3	2019-05-14 15:42:49 -0700	[diff] [blame]	59	size_t n = (unsigned int)size;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	60
				61	do {
George Spelvin	37d0ec3	2019-05-14 15:42:49 -0700	[diff] [blame]	62	u32 t = (u32 )(a + (n -= 4));
				63	(u32 )(a + n) = (u32 )(b + n);
				64	(u32 )(b + n) = t;
				65	} while (n);
				66	}
				67
				68	/**
				69	* swap_words_64 - swap two elements in 64-bit chunks
				70	* @a, @b: pointers to the elements
				71	* @size: element size (must be a multiple of 8)
				72	*
				73	* Exchange the two objects in memory. This exploits base+index
				74	* addressing, which basically all CPUs have, to minimize loop overhead
				75	* computations.
				76	*
				77	* We'd like to use 64-bit loads if possible. If they're not, emulating
				78	* one requires base+index+4 addressing which x86 has but most other
				79	* processors do not. If CONFIG_64BIT, we definitely have 64-bit loads,
				80	* but it's possible to have 64-bit loads without 64-bit pointers (e.g.
				81	* x32 ABI). Are there any cases the kernel needs to worry about?
				82	*/
				83	static void swap_words_64(void a, void b, int size)
				84	{
				85	size_t n = (unsigned int)size;
				86
				87	do {
				88	#ifdef CONFIG_64BIT
				89	u64 t = (u64 )(a + (n -= 8));
				90	(u64 )(a + n) = (u64 )(b + n);
				91	(u64 )(b + n) = t;
				92	#else
				93	/* Use two 32-bit transfers to avoid base+index+4 addressing */
				94	u32 t = (u32 )(a + (n -= 4));
				95	(u32 )(a + n) = (u32 )(b + n);
				96	(u32 )(b + n) = t;
				97
				98	t = (u32 )(a + (n -= 4));
				99	(u32 )(a + n) = (u32 )(b + n);
				100	(u32 )(b + n) = t;
				101	#endif
				102	} while (n);
				103	}
				104
				105	/**
				106	* swap_bytes - swap two elements a byte at a time
				107	* @a, @b: pointers to the elements
				108	* @size: element size
				109	*
				110	* This is the fallback if alignment doesn't allow using larger chunks.
				111	*/
				112	static void swap_bytes(void a, void b, int size)
				113	{
				114	size_t n = (unsigned int)size;
				115
				116	do {
				117	char t = ((char *)a)[--n];
				118	((char )a)[n] = ((char )b)[n];
				119	((char *)b)[n] = t;
				120	} while (n);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	121	}
				122
Robert P. J. Day	72fd4a3	2007-02-10 01:45:59 -0800	[diff] [blame]	123	/**
George Spelvin	22a241c	2019-05-14 15:42:52 -0700	[diff] [blame^]	124	* parent - given the offset of the child, find the offset of the parent.
				125	* @i: the offset of the heap element whose parent is sought. Non-zero.
				126	* @lsbit: a precomputed 1-bit mask, equal to "size & -size"
				127	* @size: size of each element
				128	*
				129	* In terms of array indexes, the parent of element j = @i/@size is simply
				130	* (j-1)/2. But when working in byte offsets, we can't use implicit
				131	* truncation of integer divides.
				132	*
				133	* Fortunately, we only need one bit of the quotient, not the full divide.
				134	* @size has a least significant bit. That bit will be clear if @i is
				135	* an even multiple of @size, and set if it's an odd multiple.
				136	*
				137	* Logically, we're doing "if (i & lsbit) i -= size;", but since the
				138	* branch is unpredictable, it's done with a bit of clever branch-free
				139	* code instead.
				140	*/
				141	__attribute_const__ __always_inline
				142	static size_t parent(size_t i, unsigned int lsbit, size_t size)
				143	{
				144	i -= size;
				145	i -= size & -(i & lsbit);
				146	return i / 2;
				147	}
				148
				149	/**
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	150	* sort - sort an array of elements
				151	* @base: pointer to data to sort
				152	* @num: number of elements
				153	* @size: size of each element
Wu Fengguang	b53907c	2009-01-07 18:09:11 -0800	[diff] [blame]	154	* @cmp_func: pointer to comparison function
				155	* @swap_func: pointer to swap function or NULL
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	156	*
George Spelvin	37d0ec3	2019-05-14 15:42:49 -0700	[diff] [blame]	157	* This function does a heapsort on the given array. You may provide
				158	* a swap_func function if you need to do something more than a memory
				159	* copy (e.g. fix up pointers or auxiliary data), but the built-in swap
				160	* isn't usually a bottleneck.
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	161	*
				162	* Sorting time is O(n log n) both on average and worst-case. While
George Spelvin	22a241c	2019-05-14 15:42:52 -0700	[diff] [blame^]	163	* quicksort is slightly faster on average, it suffers from exploitable
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	164	* O(n*n) worst-case behavior and extra memory requirements that make
				165	* it less suitable for kernel use.
				166	*/
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	167	void sort(void *base, size_t num, size_t size,
Wu Fengguang	b53907c	2009-01-07 18:09:11 -0800	[diff] [blame]	168	int (cmp_func)(const void , const void *),
				169	void (swap_func)(void , void *, int size))
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	170	{
				171	/* pre-scale counters for performance */
George Spelvin	22a241c	2019-05-14 15:42:52 -0700	[diff] [blame^]	172	size_t n = num * size, a = (num/2) * size;
				173	const unsigned int lsbit = size & -size; /* Used to find parent */
				174
				175	if (!a) /* num < 2 \|\| size == 0 */
				176	return;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	177
Daniel Wagner	ca96ab8	2015-06-25 15:02:14 -0700	[diff] [blame]	178	if (!swap_func) {
George Spelvin	37d0ec3	2019-05-14 15:42:49 -0700	[diff] [blame]	179	if (is_aligned(base, size, 8))
				180	swap_func = swap_words_64;
				181	else if (is_aligned(base, size, 4))
				182	swap_func = swap_words_32;
Daniel Wagner	ca96ab8	2015-06-25 15:02:14 -0700	[diff] [blame]	183	else
George Spelvin	37d0ec3	2019-05-14 15:42:49 -0700	[diff] [blame]	184	swap_func = swap_bytes;
Daniel Wagner	ca96ab8	2015-06-25 15:02:14 -0700	[diff] [blame]	185	}
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	186
George Spelvin	22a241c	2019-05-14 15:42:52 -0700	[diff] [blame^]	187	/*
				188	* Loop invariants:
				189	* 1. elements [a,n) satisfy the heap property (compare greater than
				190	* all of their children),
				191	* 2. elements [n,num*size) are sorted, and
				192	* 3. a <= b <= c <= d <= n (whenever they are valid).
				193	*/
				194	for (;;) {
				195	size_t b, c, d;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	196
George Spelvin	22a241c	2019-05-14 15:42:52 -0700	[diff] [blame^]	197	if (a) /* Building heap: sift down --a */
				198	a -= size;
				199	else if (n -= size) /* Sorting: Extract root to --n */
				200	swap_func(base, base + n, size);
				201	else /* Sort complete */
				202	break;
				203
				204	/*
				205	* Sift element at "a" down into heap. This is the
				206	* "bottom-up" variant, which significantly reduces
				207	* calls to cmp_func(): we find the sift-down path all
				208	* the way to the leaves (one compare per level), then
				209	* backtrack to find where to insert the target element.
				210	*
				211	* Because elements tend to sift down close to the leaves,
				212	* this uses fewer compares than doing two per level
				213	* on the way down. (A bit more than half as many on
				214	* average, 3/4 worst-case.)
				215	*/
				216	for (b = a; c = 2*b + size, (d = c + size) < n;)
				217	b = cmp_func(base + c, base + d) >= 0 ? c : d;
				218	if (d == n) /* Special case last leaf with no sibling */
				219	b = c;
				220
				221	/* Now backtrack from "b" to the correct location for "a" */
				222	while (b != a && cmp_func(base + a, base + b) >= 0)
				223	b = parent(b, lsbit, size);
				224	c = b; /* Where "a" belongs */
				225	while (b != a) { /* Shift it into place */
				226	b = parent(b, lsbit, size);
				227	swap_func(base + b, base + c, size);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	228	}
				229	}
				230	}
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	231	EXPORT_SYMBOL(sort);