arm64: mm: use bit ops rather than arithmetic in pa/va translations

Since PAGE_OFFSET is chosen such that it cuts the kernel VA space right
in half, and since the size of the kernel VA space itself is always a
power of 2, we can treat PAGE_OFFSET as a bitmask and replace the
additions/subtractions with 'or' and 'and-not' operations.

For the comparison against PAGE_OFFSET, a mov/cmp/branch sequence ends
up getting replaced with a single tbz instruction. For the additions and
subtractions, we save a mov instruction since the mask is folded into the
instruction's immediate field.

Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
diff --git a/arch/arm64/include/asm/memory.h b/arch/arm64/include/asm/memory.h
index 460d09b..eb79815 100644
--- a/arch/arm64/include/asm/memory.h
+++ b/arch/arm64/include/asm/memory.h
@@ -88,10 +88,10 @@
  */
 #define __virt_to_phys(x) ({						\
 	phys_addr_t __x = (phys_addr_t)(x);				\
-	__x >= PAGE_OFFSET ? (__x - PAGE_OFFSET + PHYS_OFFSET) :	\
-			     (__x - kimage_voffset); })
+	__x & BIT(VA_BITS - 1) ? (__x & ~PAGE_OFFSET) + PHYS_OFFSET :	\
+				 (__x - kimage_voffset); })
 
-#define __phys_to_virt(x)	((unsigned long)((x) - PHYS_OFFSET + PAGE_OFFSET))
+#define __phys_to_virt(x)	((unsigned long)((x) - PHYS_OFFSET) | PAGE_OFFSET)
 #define __phys_to_kimg(x)	((unsigned long)((x) + kimage_voffset))
 
 /*
@@ -132,6 +132,7 @@
 
 #ifndef __ASSEMBLY__
 
+#include <linux/bitops.h>
 #include <linux/mmdebug.h>
 
 extern phys_addr_t		memstart_addr;