Blame - include/linux/reciprocal_div.h - SHIFTPHONES/kernel/common

blob: 585ce89c0f336d0d5ab6895be6ccf9fe1b0a1c02 [file] [log] [blame]

Greg Kroah-Hartman	b244131	2017-11-01 15:07:57 +0100	[diff] [blame]	1	/* SPDX-License-Identifier: GPL-2.0 */
Eric Dumazet	6a2d7a9	2006-12-13 00:34:27 -0800	[diff] [blame]	2	#ifndef _LINUX_RECIPROCAL_DIV_H
				3	#define _LINUX_RECIPROCAL_DIV_H
				4
				5	#include <linux/types.h>
				6
				7	/*
Hannes Frederic Sowa	809fa97	2014-01-22 02:29:41 +0100	[diff] [blame]	8	* This algorithm is based on the paper "Division by Invariant
				9	* Integers Using Multiplication" by Torbjörn Granlund and Peter
				10	* L. Montgomery.
Eric Dumazet	6a2d7a9	2006-12-13 00:34:27 -0800	[diff] [blame]	11	*
Hannes Frederic Sowa	809fa97	2014-01-22 02:29:41 +0100	[diff] [blame]	12	* The assembler implementation from Agner Fog, which this code is
				13	* based on, can be found here:
				14	* http://www.agner.org/optimize/asmlib.zip
Eric Dumazet	6a2d7a9	2006-12-13 00:34:27 -0800	[diff] [blame]	15	*
Hannes Frederic Sowa	809fa97	2014-01-22 02:29:41 +0100	[diff] [blame]	16	* This optimization for A/B is helpful if the divisor B is mostly
				17	* runtime invariant. The reciprocal of B is calculated in the
				18	* slow-path with reciprocal_value(). The fast-path can then just use
				19	* a much faster multiplication operation with a variable dividend A
				20	* to calculate the division A/B.
Eric Dumazet	6a2d7a9	2006-12-13 00:34:27 -0800	[diff] [blame]	21	*/
				22
Hannes Frederic Sowa	809fa97	2014-01-22 02:29:41 +0100	[diff] [blame]	23	struct reciprocal_value {
				24	u32 m;
				25	u8 sh1, sh2;
				26	};
Eric Dumazet	6a2d7a9	2006-12-13 00:34:27 -0800	[diff] [blame]	27
Jiong Wang	06ae482	2018-07-06 15:13:18 -0700	[diff] [blame]	28	/* "reciprocal_value" and "reciprocal_divide" together implement the basic
				29	* version of the algorithm described in Figure 4.1 of the paper.
				30	*/
Hannes Frederic Sowa	809fa97	2014-01-22 02:29:41 +0100	[diff] [blame]	31	struct reciprocal_value reciprocal_value(u32 d);
Eric Dumazet	6a2d7a9	2006-12-13 00:34:27 -0800	[diff] [blame]	32
Hannes Frederic Sowa	809fa97	2014-01-22 02:29:41 +0100	[diff] [blame]	33	static inline u32 reciprocal_divide(u32 a, struct reciprocal_value R)
Eric Dumazet	6a2d7a9	2006-12-13 00:34:27 -0800	[diff] [blame]	34	{
Hannes Frederic Sowa	809fa97	2014-01-22 02:29:41 +0100	[diff] [blame]	35	u32 t = (u32)(((u64)a * R.m) >> 32);
				36	return (t + ((a - t) >> R.sh1)) >> R.sh2;
Eric Dumazet	6a2d7a9	2006-12-13 00:34:27 -0800	[diff] [blame]	37	}
Hannes Frederic Sowa	809fa97	2014-01-22 02:29:41 +0100	[diff] [blame]	38
Jiong Wang	06ae482	2018-07-06 15:13:18 -0700	[diff] [blame]	39	struct reciprocal_value_adv {
				40	u32 m;
				41	u8 sh, exp;
				42	bool is_wide_m;
				43	};
				44
				45	/* "reciprocal_value_adv" implements the advanced version of the algorithm
				46	* described in Figure 4.2 of the paper except when "divisor > (1U << 31)" whose
				47	* ceil(log2(d)) result will be 32 which then requires u128 divide on host. The
				48	* exception case could be easily handled before calling "reciprocal_value_adv".
				49	*
				50	* The advanced version requires more complex calculation to get the reciprocal
				51	* multiplier and other control variables, but then could reduce the required
				52	* emulation operations.
				53	*
				54	* It makes no sense to use this advanced version for host divide emulation,
				55	* those extra complexities for calculating multiplier etc could completely
				56	* waive our saving on emulation operations.
				57	*
				58	* However, it makes sense to use it for JIT divide code generation for which
				59	* we are willing to trade performance of JITed code with that of host. As shown
				60	* by the following pseudo code, the required emulation operations could go down
				61	* from 6 (the basic version) to 3 or 4.
				62	*
				63	* To use the result of "reciprocal_value_adv", suppose we want to calculate
				64	* n/d, the pseudo C code will be:
				65	*
				66	* struct reciprocal_value_adv rvalue;
				67	* u8 pre_shift, exp;
				68	*
				69	* // handle exception case.
				70	* if (d >= (1U << 31)) {
				71	* result = n >= d;
				72	* return;
				73	* }
				74	*
				75	* rvalue = reciprocal_value_adv(d, 32)
				76	* exp = rvalue.exp;
				77	* if (rvalue.is_wide_m && !(d & 1)) {
				78	* // floor(log2(d & (2^32 -d)))
				79	* pre_shift = fls(d & -d) - 1;
				80	* rvalue = reciprocal_value_adv(d >> pre_shift, 32 - pre_shift);
				81	* } else {
				82	* pre_shift = 0;
				83	* }
				84	*
				85	* // code generation starts.
				86	* if (imm == 1U << exp) {
				87	* result = n >> exp;
				88	* } else if (rvalue.is_wide_m) {
				89	* // pre_shift must be zero when reached here.
				90	* t = (n * rvalue.m) >> 32;
				91	* result = n - t;
				92	* result >>= 1;
				93	* result += t;
				94	* result >>= rvalue.sh - 1;
				95	* } else {
				96	* if (pre_shift)
				97	* result = n >> pre_shift;
				98	* result = ((u64)result * rvalue.m) >> 32;
				99	* result >>= rvalue.sh;
				100	* }
				101	*/
				102	struct reciprocal_value_adv reciprocal_value_adv(u32 d, u8 prec);
				103
Hannes Frederic Sowa	809fa97	2014-01-22 02:29:41 +0100	[diff] [blame]	104	#endif /* _LINUX_RECIPROCAL_DIV_H */