fsnotify: Move mark list head from object into dedicated structure

Currently notification marks are attached to object (inode or vfsmnt) by
a hlist_head in the object. The list is also protected by a spinlock in
the object. So while there is any mark attached to the list of marks,
the object must be pinned in memory (and thus e.g. last iput() deleting
inode cannot happen). Also for list iteration in fsnotify() to work, we
must hold fsnotify_mark_srcu lock so that mark itself and
mark->obj_list.next cannot get freed. Thus we are required to wait for
response to fanotify events from userspace process with
fsnotify_mark_srcu lock held. That causes issues when userspace process
is buggy and does not reply to some event - basically the whole
notification subsystem gets eventually stuck.

So to be able to drop fsnotify_mark_srcu lock while waiting for
response, we have to pin the mark in memory and make sure it stays in
the object list (as removing the mark waiting for response could lead to
lost notification events for groups later in the list). However we don't
want inode reclaim to block on such mark as that would lead to system
just locking up elsewhere.

This commit is the first in the series that paves way towards solving
these conflicting lifetime needs. Instead of anchoring the list of marks
directly in the object, we anchor it in a dedicated structure
(fsnotify_mark_connector) and just point to that structure from the
object. The following commits will also add spinlock protecting the list
and object pointer to the structure.

Reviewed-by: Miklos Szeredi <mszeredi@redhat.com>
Reviewed-by: Amir Goldstein <amir73il@gmail.com>
Signed-off-by: Jan Kara <jack@suse.cz>
diff --git a/fs/notify/mark.c b/fs/notify/mark.c
index 44836e5..24b6191 100644
--- a/fs/notify/mark.c
+++ b/fs/notify/mark.c
@@ -83,6 +83,8 @@
 #define FSNOTIFY_REAPER_DELAY	(1)	/* 1 jiffy */
 
 struct srcu_struct fsnotify_mark_srcu;
+struct kmem_cache *fsnotify_mark_connector_cachep;
+
 static DEFINE_SPINLOCK(destroy_lock);
 static LIST_HEAD(destroy_list);
 
@@ -104,12 +106,15 @@ void fsnotify_put_mark(struct fsnotify_mark *mark)
 }
 
 /* Calculate mask of events for a list of marks */
-u32 fsnotify_recalc_mask(struct hlist_head *head)
+u32 fsnotify_recalc_mask(struct fsnotify_mark_connector *conn)
 {
 	u32 new_mask = 0;
 	struct fsnotify_mark *mark;
 
-	hlist_for_each_entry(mark, head, obj_list)
+	if (!conn)
+		return 0;
+
+	hlist_for_each_entry(mark, &conn->list, obj_list)
 		new_mask |= mark->mask;
 	return new_mask;
 }
@@ -220,10 +225,14 @@ void fsnotify_destroy_mark(struct fsnotify_mark *mark,
 	fsnotify_free_mark(mark);
 }
 
-void fsnotify_destroy_marks(struct hlist_head *head, spinlock_t *lock)
+void fsnotify_destroy_marks(struct fsnotify_mark_connector *conn,
+			    spinlock_t *lock)
 {
 	struct fsnotify_mark *mark;
 
+	if (!conn)
+		return;
+
 	while (1) {
 		/*
 		 * We have to be careful since we can race with e.g.
@@ -233,11 +242,12 @@ void fsnotify_destroy_marks(struct hlist_head *head, spinlock_t *lock)
 		 * calling fsnotify_destroy_mark() more than once is fine.
 		 */
 		spin_lock(lock);
-		if (hlist_empty(head)) {
+		if (hlist_empty(&conn->list)) {
 			spin_unlock(lock);
 			break;
 		}
-		mark = hlist_entry(head->first, struct fsnotify_mark, obj_list);
+		mark = hlist_entry(conn->list.first, struct fsnotify_mark,
+				   obj_list);
 		/*
 		 * We don't update i_fsnotify_mask / mnt_fsnotify_mask here
 		 * since inode / mount is going away anyway. So just remove
@@ -251,6 +261,14 @@ void fsnotify_destroy_marks(struct hlist_head *head, spinlock_t *lock)
 	}
 }
 
+void fsnotify_connector_free(struct fsnotify_mark_connector **connp)
+{
+	if (*connp) {
+		kmem_cache_free(fsnotify_mark_connector_cachep, *connp);
+		*connp = NULL;
+	}
+}
+
 void fsnotify_set_mark_mask_locked(struct fsnotify_mark *mark, __u32 mask)
 {
 	assert_spin_locked(&mark->lock);
@@ -304,21 +322,54 @@ int fsnotify_compare_groups(struct fsnotify_group *a, struct fsnotify_group *b)
 	return -1;
 }
 
-/* Add mark into proper place in given list of marks */
-int fsnotify_add_mark_list(struct hlist_head *head, struct fsnotify_mark *mark,
-			   int allow_dups)
+static int fsnotify_attach_connector_to_object(
+					struct fsnotify_mark_connector **connp)
+{
+	struct fsnotify_mark_connector *conn;
+
+	conn = kmem_cache_alloc(fsnotify_mark_connector_cachep, GFP_ATOMIC);
+	if (!conn)
+		return -ENOMEM;
+	INIT_HLIST_HEAD(&conn->list);
+	/*
+	 * Make sure 'conn' initialization is visible. Matches
+	 * lockless_dereference() in fsnotify().
+	 */
+	smp_wmb();
+	*connp = conn;
+
+	return 0;
+}
+
+/*
+ * Add mark into proper place in given list of marks. These marks may be used
+ * for the fsnotify backend to determine which event types should be delivered
+ * to which group and for which inodes. These marks are ordered according to
+ * priority, highest number first, and then by the group's location in memory.
+ */
+int fsnotify_add_mark_list(struct fsnotify_mark_connector **connp,
+			   struct fsnotify_mark *mark, int allow_dups)
 {
 	struct fsnotify_mark *lmark, *last = NULL;
+	struct fsnotify_mark_connector *conn;
 	int cmp;
+	int err;
+
+	if (!*connp) {
+		err = fsnotify_attach_connector_to_object(connp);
+		if (err)
+			return err;
+	}
+	conn = *connp;
 
 	/* is mark the first mark? */
-	if (hlist_empty(head)) {
-		hlist_add_head_rcu(&mark->obj_list, head);
+	if (hlist_empty(&conn->list)) {
+		hlist_add_head_rcu(&mark->obj_list, &conn->list);
 		return 0;
 	}
 
 	/* should mark be in the middle of the current list? */
-	hlist_for_each_entry(lmark, head, obj_list) {
+	hlist_for_each_entry(lmark, &conn->list, obj_list) {
 		last = lmark;
 
 		if ((lmark->group == mark->group) && !allow_dups)
@@ -419,12 +470,15 @@ int fsnotify_add_mark(struct fsnotify_mark *mark, struct fsnotify_group *group,
  * Given a list of marks, find the mark associated with given group. If found
  * take a reference to that mark and return it, else return NULL.
  */
-struct fsnotify_mark *fsnotify_find_mark(struct hlist_head *head,
+struct fsnotify_mark *fsnotify_find_mark(struct fsnotify_mark_connector *conn,
 					 struct fsnotify_group *group)
 {
 	struct fsnotify_mark *mark;
 
-	hlist_for_each_entry(mark, head, obj_list) {
+	if (!conn)
+		return NULL;
+
+	hlist_for_each_entry(mark, &conn->list, obj_list) {
 		if (mark->group == group) {
 			fsnotify_get_mark(mark);
 			return mark;