mempolicy: use struct mempolicy pointer in shmem_sb_info

This patch replaces the mempolicy mode, mode_flags, and nodemask in the
shmem_sb_info struct with a struct mempolicy pointer, initialized to NULL.
This removes dependency on the details of mempolicy from shmem.c and hugetlbfs
inode.c and simplifies the interfaces.

mpol_parse_str() in mempolicy.c is changed to return, via a pointer to a
pointer arg, a struct mempolicy pointer on success.  For MPOL_DEFAULT, the
returned pointer is NULL.  Further, mpol_parse_str() now takes a 'no_context'
argument that causes the input nodemask to be stored in the w.user_nodemask of
the created mempolicy for use when the mempolicy is installed in a tmpfs inode
shared policy tree.  At that time, any cpuset contextualization is applied to
the original input nodemask.  This preserves the previous behavior where the
input nodemask was stored in the superblock.  We can think of the returned
mempolicy as "context free".

Because mpol_parse_str() is now calling mpol_new(), we can remove from
mpol_to_str() the semantic checks that mpol_new() already performs.

Add 'no_context' parameter to mpol_to_str() to specify that it should format
the nodemask in w.user_nodemask for 'bind' and 'interleave' policies.

Change mpol_shared_policy_init() to take a pointer to a "context free" struct
mempolicy and to create a new, "contextualized" mempolicy using the mode,
mode_flags and user_nodemask from the input mempolicy.

  Note: we know that the mempolicy passed to mpol_to_str() or
  mpol_shared_policy_init() from a tmpfs superblock is "context free".  This
  is currently the only instance thereof.  However, if we found more uses for
  this concept, and introduced any ambiguity as to whether a mempolicy was
  context free or not, we could add another internal mode flag to identify
  context free mempolicies.  Then, we could remove the 'no_context' argument
  from mpol_to_str().

Added shmem_get_sbmpol() to return a reference counted superblock mempolicy,
if one exists, to pass to mpol_shared_policy_init().  We must add the
reference under the sb stat_lock to prevent races with replacement of the mpol
by remount.  This reference is removed in mpol_shared_policy_init().

[akpm@linux-foundation.org: build fix]
[akpm@linux-foundation.org: another build fix]
[akpm@linux-foundation.org: yet another build fix]
Signed-off-by: Lee Schermerhorn <lee.schermerhorn@hp.com>
Cc: Christoph Lameter <clameter@sgi.com>
Cc: David Rientjes <rientjes@google.com>
Cc: Mel Gorman <mel@csn.ul.ie>
Cc: Andi Kleen <ak@suse.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
diff --git a/mm/shmem.c b/mm/shmem.c
index 3c620dc..e6d9298 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -1079,23 +1079,29 @@
 
 #ifdef CONFIG_NUMA
 #ifdef CONFIG_TMPFS
-static void shmem_show_mpol(struct seq_file *seq, unsigned short mode,
-			unsigned short flags, const nodemask_t policy_nodes)
+static void shmem_show_mpol(struct seq_file *seq, struct mempolicy *mpol)
 {
-	struct mempolicy temp;
 	char buffer[64];
 
-	if (mode == MPOL_DEFAULT)
+	if (!mpol || mpol->mode == MPOL_DEFAULT)
 		return;		/* show nothing */
 
-	temp.mode = mode;
-	temp.flags = flags;
-	temp.v.nodes = policy_nodes;
-
-	mpol_to_str(buffer, sizeof(buffer), &temp);
+	mpol_to_str(buffer, sizeof(buffer), mpol, 1);
 
 	seq_printf(seq, ",mpol=%s", buffer);
 }
+
+static struct mempolicy *shmem_get_sbmpol(struct shmem_sb_info *sbinfo)
+{
+	struct mempolicy *mpol = NULL;
+	if (sbinfo->mpol) {
+		spin_lock(&sbinfo->stat_lock);	/* prevent replace/use races */
+		mpol = sbinfo->mpol;
+		mpol_get(mpol);
+		spin_unlock(&sbinfo->stat_lock);
+	}
+	return mpol;
+}
 #endif /* CONFIG_TMPFS */
 
 static struct page *shmem_swapin(swp_entry_t entry, gfp_t gfp,
@@ -1135,8 +1141,7 @@
 }
 #else /* !CONFIG_NUMA */
 #ifdef CONFIG_TMPFS
-static inline void shmem_show_mpol(struct seq_file *seq, unsigned short policy,
-			unsigned short flags, const nodemask_t policy_nodes)
+static inline void shmem_show_mpol(struct seq_file *seq, struct mempolicy *p)
 {
 }
 #endif /* CONFIG_TMPFS */
@@ -1154,6 +1159,13 @@
 }
 #endif /* CONFIG_NUMA */
 
+#if !defined(CONFIG_NUMA) || !defined(CONFIG_TMPFS)
+static inline struct mempolicy *shmem_get_sbmpol(struct shmem_sb_info *sbinfo)
+{
+	return NULL;
+}
+#endif
+
 /*
  * shmem_getpage - either get the page from swap or allocate a new one
  *
@@ -1508,8 +1520,8 @@
 		case S_IFREG:
 			inode->i_op = &shmem_inode_operations;
 			inode->i_fop = &shmem_file_operations;
-			mpol_shared_policy_init(&info->policy, sbinfo->policy,
-					sbinfo->flags, &sbinfo->policy_nodes);
+			mpol_shared_policy_init(&info->policy,
+						 shmem_get_sbmpol(sbinfo));
 			break;
 		case S_IFDIR:
 			inc_nlink(inode);
@@ -1523,8 +1535,7 @@
 			 * Must not load anything in the rbtree,
 			 * mpol_free_shared_policy will not be called.
 			 */
-			mpol_shared_policy_init(&info->policy, MPOL_DEFAULT, 0,
-						NULL);
+			mpol_shared_policy_init(&info->policy, NULL);
 			break;
 		}
 	} else
@@ -2139,8 +2150,7 @@
 			if (*rest)
 				goto bad_val;
 		} else if (!strcmp(this_char,"mpol")) {
-			if (mpol_parse_str(value, &sbinfo->policy,
-					 &sbinfo->flags, &sbinfo->policy_nodes))
+			if (mpol_parse_str(value, &sbinfo->mpol, 1))
 				goto bad_val;
 		} else {
 			printk(KERN_ERR "tmpfs: Bad mount option %s\n",
@@ -2191,9 +2201,9 @@
 	sbinfo->free_blocks = config.max_blocks - blocks;
 	sbinfo->max_inodes  = config.max_inodes;
 	sbinfo->free_inodes = config.max_inodes - inodes;
-	sbinfo->policy      = config.policy;
-	sbinfo->flags	    = config.flags;
-	sbinfo->policy_nodes = config.policy_nodes;
+
+	mpol_put(sbinfo->mpol);
+	sbinfo->mpol        = config.mpol;	/* transfers initial ref */
 out:
 	spin_unlock(&sbinfo->stat_lock);
 	return error;
@@ -2214,8 +2224,7 @@
 		seq_printf(seq, ",uid=%u", sbinfo->uid);
 	if (sbinfo->gid != 0)
 		seq_printf(seq, ",gid=%u", sbinfo->gid);
-	shmem_show_mpol(seq, sbinfo->policy, sbinfo->flags,
-			sbinfo->policy_nodes);
+	shmem_show_mpol(seq, sbinfo->mpol);
 	return 0;
 }
 #endif /* CONFIG_TMPFS */
@@ -2245,9 +2254,7 @@
 	sbinfo->mode = S_IRWXUGO | S_ISVTX;
 	sbinfo->uid = current->fsuid;
 	sbinfo->gid = current->fsgid;
-	sbinfo->policy = MPOL_DEFAULT;
-	sbinfo->flags = 0;
-	sbinfo->policy_nodes = node_states[N_HIGH_MEMORY];
+	sbinfo->mpol = NULL;
 	sb->s_fs_info = sbinfo;
 
 #ifdef CONFIG_TMPFS