div64.h: optimize do_div() for power-of-two constant divisors

Let's perform the obvious mask and shift operation in this case.

On 32-bit targets, gcc is able to do the same thing with a constant
divisor that happens to be a power of two i.e. it turns the division
into an inline shift, but it doesn't hurt to be explicit.

Signed-off-by: Nicolas Pitre <nico@linaro.org>
diff --git a/include/asm-generic/div64.h b/include/asm-generic/div64.h
index 8f4e319..5d97468 100644
--- a/include/asm-generic/div64.h
+++ b/include/asm-generic/div64.h
@@ -32,6 +32,8 @@
 
 #elif BITS_PER_LONG == 32
 
+#include <linux/log2.h>
+
 extern uint32_t __div64_32(uint64_t *dividend, uint32_t divisor);
 
 /* The unnecessary pointer compare is there
@@ -41,7 +43,11 @@
 	uint32_t __base = (base);			\
 	uint32_t __rem;					\
 	(void)(((typeof((n)) *)0) == ((uint64_t *)0));	\
-	if (likely(((n) >> 32) == 0)) {			\
+	if (__builtin_constant_p(__base) &&		\
+	    is_power_of_2(__base)) {			\
+		__rem = (n) & (__base - 1);		\
+		(n) >>= ilog2(__base);			\
+	} else if (likely(((n) >> 32) == 0)) {		\
 		__rem = (uint32_t)(n) % __base;		\
 		(n) = (uint32_t)(n) / __base;		\
 	} else 						\