[PATCH] i386: pte xchg optimization
In situations where page table updates need only be made locally, and there is
no cross-processor A/D bit races involved, we need not use the heavyweight
xchg instruction to atomically fetch and clear page table entries. Instead,
we can just read and clear them directly.
This introduces a neat optimization for non-SMP kernels; drop the atomic xchg
operations from page table updates.
Thanks to Michel Lespinasse for noting this potential optimization.
Signed-off-by: Zachary Amsden <zach@vmware.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Andi Kleen <ak@suse.de>
diff --git a/include/asm-i386/pgtable-3level.h b/include/asm-i386/pgtable-3level.h
index 664bfee..45b0241 100644
--- a/include/asm-i386/pgtable-3level.h
+++ b/include/asm-i386/pgtable-3level.h
@@ -139,6 +139,17 @@
#define pmd_offset(pud, address) ((pmd_t *) pud_page(*(pud)) + \
pmd_index(address))
+/* local pte updates need not use xchg for locking */
+static inline pte_t native_local_ptep_get_and_clear(pte_t *ptep)
+{
+ pte_t res;
+
+ res = *ptep;
+ native_pte_clear(NULL, 0, ptep);
+ return res;
+}
+
+#ifdef CONFIG_SMP
static inline pte_t native_ptep_get_and_clear(pte_t *ptep)
{
pte_t res;
@@ -150,6 +161,9 @@
return res;
}
+#else
+#define native_ptep_get_and_clear(xp) native_local_ptep_get_and_clear(xp)
+#endif
#define __HAVE_ARCH_PTE_SAME
static inline int pte_same(pte_t a, pte_t b)