lib: radix-tree: check accounting of existing slot replacement users
The bug in khugepaged fixed earlier in this series shows that radix tree
slot replacement is fragile; and it will become more so when not only
NULL<->!NULL transitions need to be caught but transitions from and to
exceptional entries as well. We need checks.
Re-implement radix_tree_replace_slot() on top of the sanity-checked
__radix_tree_replace(). This requires existing callers to also pass the
radix tree root, but it'll warn us when somebody replaces slots with
contents that need proper accounting (transitions between NULL entries,
real entries, exceptional entries) and where a replacement through the
slot pointer would corrupt the radix tree node counts.
Link: http://lkml.kernel.org/r/20161117193021.GB23430@cmpxchg.org
Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>
Suggested-by: Jan Kara <jack@suse.cz>
Reviewed-by: Jan Kara <jack@suse.cz>
Cc: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Matthew Wilcox <mawilcox@linuxonhyperv.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
diff --git a/lib/radix-tree.c b/lib/radix-tree.c
index 7885796..f91d5b0 100644
--- a/lib/radix-tree.c
+++ b/lib/radix-tree.c
@@ -753,6 +753,28 @@ void *radix_tree_lookup(struct radix_tree_root *root, unsigned long index)
}
EXPORT_SYMBOL(radix_tree_lookup);
+static void replace_slot(struct radix_tree_root *root,
+ struct radix_tree_node *node,
+ void **slot, void *item,
+ bool warn_typeswitch)
+{
+ void *old = rcu_dereference_raw(*slot);
+ int exceptional;
+
+ WARN_ON_ONCE(radix_tree_is_internal_node(item));
+ WARN_ON_ONCE(!!item - !!old);
+
+ exceptional = !!radix_tree_exceptional_entry(item) -
+ !!radix_tree_exceptional_entry(old);
+
+ WARN_ON_ONCE(warn_typeswitch && exceptional);
+
+ if (node)
+ node->exceptional += exceptional;
+
+ rcu_assign_pointer(*slot, item);
+}
+
/**
* __radix_tree_replace - replace item in a slot
* @root: radix tree root
@@ -767,21 +789,34 @@ void __radix_tree_replace(struct radix_tree_root *root,
struct radix_tree_node *node,
void **slot, void *item)
{
- void *old = rcu_dereference_raw(*slot);
- int exceptional;
+ /*
+ * This function supports replacing exceptional entries, but
+ * that needs accounting against the node unless the slot is
+ * root->rnode.
+ */
+ replace_slot(root, node, slot, item,
+ !node && slot != (void **)&root->rnode);
+}
- WARN_ON_ONCE(radix_tree_is_internal_node(item));
- WARN_ON_ONCE(!!item - !!old);
-
- exceptional = !!radix_tree_exceptional_entry(item) -
- !!radix_tree_exceptional_entry(old);
-
- WARN_ON_ONCE(exceptional && !node && slot != (void **)&root->rnode);
-
- if (node)
- node->exceptional += exceptional;
-
- rcu_assign_pointer(*slot, item);
+/**
+ * radix_tree_replace_slot - replace item in a slot
+ * @root: radix tree root
+ * @slot: pointer to slot
+ * @item: new item to store in the slot.
+ *
+ * For use with radix_tree_lookup_slot(), radix_tree_gang_lookup_slot(),
+ * radix_tree_gang_lookup_tag_slot(). Caller must hold tree write locked
+ * across slot lookup and replacement.
+ *
+ * NOTE: This cannot be used to switch between non-entries (empty slots),
+ * regular entries, and exceptional entries, as that requires accounting
+ * inside the radix tree node. When switching from one type of entry to
+ * another, use __radix_tree_lookup() and __radix_tree_replace().
+ */
+void radix_tree_replace_slot(struct radix_tree_root *root,
+ void **slot, void *item)
+{
+ replace_slot(root, NULL, slot, item, true);
}
/**