mm: introduce a common interface for balloon pages mobility
Memory fragmentation introduced by ballooning might reduce significantly
the number of 2MB contiguous memory blocks that can be used within a guest,
thus imposing performance penalties associated with the reduced number of
transparent huge pages that could be used by the guest workload.
This patch introduces a common interface to help a balloon driver on
making its page set movable to compaction, and thus allowing the system
to better leverage the compation efforts on memory defragmentation.
[akpm@linux-foundation.org: use PAGE_FLAGS_CHECK_AT_PREP, s/__balloon_page_flags/page_flags_cleared/, small cleanups]
[rientjes@google.com: allow balloon compaction for any system with memory compaction enabled, which is the defconfig]
Signed-off-by: Rafael Aquini <aquini@redhat.com>
Acked-by: Mel Gorman <mel@csn.ul.ie>
Cc: Rusty Russell <rusty@rustcorp.com.au>
Cc: "Michael S. Tsirkin" <mst@redhat.com>
Cc: Rik van Riel <riel@redhat.com>
Cc: Andi Kleen <andi@firstfloor.org>
Cc: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Cc: Minchan Kim <minchan@kernel.org>
Signed-off-by: David Rientjes <rientjes@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
diff --git a/include/linux/balloon_compaction.h b/include/linux/balloon_compaction.h
new file mode 100644
index 0000000..f7f1d71
--- /dev/null
+++ b/include/linux/balloon_compaction.h
@@ -0,0 +1,272 @@
+/*
+ * include/linux/balloon_compaction.h
+ *
+ * Common interface definitions for making balloon pages movable by compaction.
+ *
+ * Despite being perfectly possible to perform ballooned pages migration, they
+ * make a special corner case to compaction scans because balloon pages are not
+ * enlisted at any LRU list like the other pages we do compact / migrate.
+ *
+ * As the page isolation scanning step a compaction thread does is a lockless
+ * procedure (from a page standpoint), it might bring some racy situations while
+ * performing balloon page compaction. In order to sort out these racy scenarios
+ * and safely perform balloon's page compaction and migration we must, always,
+ * ensure following these three simple rules:
+ *
+ * i. when updating a balloon's page ->mapping element, strictly do it under
+ * the following lock order, independently of the far superior
+ * locking scheme (lru_lock, balloon_lock):
+ * +-page_lock(page);
+ * +--spin_lock_irq(&b_dev_info->pages_lock);
+ * ... page->mapping updates here ...
+ *
+ * ii. before isolating or dequeueing a balloon page from the balloon device
+ * pages list, the page reference counter must be raised by one and the
+ * extra refcount must be dropped when the page is enqueued back into
+ * the balloon device page list, thus a balloon page keeps its reference
+ * counter raised only while it is under our special handling;
+ *
+ * iii. after the lockless scan step have selected a potential balloon page for
+ * isolation, re-test the page->mapping flags and the page ref counter
+ * under the proper page lock, to ensure isolating a valid balloon page
+ * (not yet isolated, nor under release procedure)
+ *
+ * The functions provided by this interface are placed to help on coping with
+ * the aforementioned balloon page corner case, as well as to ensure the simple
+ * set of exposed rules are satisfied while we are dealing with balloon pages
+ * compaction / migration.
+ *
+ * Copyright (C) 2012, Red Hat, Inc. Rafael Aquini <aquini@redhat.com>
+ */
+#ifndef _LINUX_BALLOON_COMPACTION_H
+#define _LINUX_BALLOON_COMPACTION_H
+#include <linux/pagemap.h>
+#include <linux/page-flags.h>
+#include <linux/migrate.h>
+#include <linux/gfp.h>
+#include <linux/err.h>
+
+/*
+ * Balloon device information descriptor.
+ * This struct is used to allow the common balloon compaction interface
+ * procedures to find the proper balloon device holding memory pages they'll
+ * have to cope for page compaction / migration, as well as it serves the
+ * balloon driver as a page book-keeper for its registered balloon devices.
+ */
+struct balloon_dev_info {
+ void *balloon_device; /* balloon device descriptor */
+ struct address_space *mapping; /* balloon special page->mapping */
+ unsigned long isolated_pages; /* # of isolated pages for migration */
+ spinlock_t pages_lock; /* Protection to pages list */
+ struct list_head pages; /* Pages enqueued & handled to Host */
+};
+
+extern struct page *balloon_page_enqueue(struct balloon_dev_info *b_dev_info);
+extern struct page *balloon_page_dequeue(struct balloon_dev_info *b_dev_info);
+extern struct balloon_dev_info *balloon_devinfo_alloc(
+ void *balloon_dev_descriptor);
+
+static inline void balloon_devinfo_free(struct balloon_dev_info *b_dev_info)
+{
+ kfree(b_dev_info);
+}
+
+/*
+ * balloon_page_free - release a balloon page back to the page free lists
+ * @page: ballooned page to be set free
+ *
+ * This function must be used to properly set free an isolated/dequeued balloon
+ * page at the end of a sucessful page migration, or at the balloon driver's
+ * page release procedure.
+ */
+static inline void balloon_page_free(struct page *page)
+{
+ /*
+ * Balloon pages always get an extra refcount before being isolated
+ * and before being dequeued to help on sorting out fortuite colisions
+ * between a thread attempting to isolate and another thread attempting
+ * to release the very same balloon page.
+ *
+ * Before we handle the page back to Buddy, lets drop its extra refcnt.
+ */
+ put_page(page);
+ __free_page(page);
+}
+
+#ifdef CONFIG_BALLOON_COMPACTION
+extern bool balloon_page_isolate(struct page *page);
+extern void balloon_page_putback(struct page *page);
+extern int balloon_page_migrate(struct page *newpage,
+ struct page *page, enum migrate_mode mode);
+extern struct address_space
+*balloon_mapping_alloc(struct balloon_dev_info *b_dev_info,
+ const struct address_space_operations *a_ops);
+
+static inline void balloon_mapping_free(struct address_space *balloon_mapping)
+{
+ kfree(balloon_mapping);
+}
+
+/*
+ * page_flags_cleared - helper to perform balloon @page ->flags tests.
+ *
+ * As balloon pages are obtained from buddy and we do not play with page->flags
+ * at driver level (exception made when we get the page lock for compaction),
+ * we can safely identify a ballooned page by checking if the
+ * PAGE_FLAGS_CHECK_AT_PREP page->flags are all cleared. This approach also
+ * helps us skip ballooned pages that are locked for compaction or release, thus
+ * mitigating their racy check at balloon_page_movable()
+ */
+static inline bool page_flags_cleared(struct page *page)
+{
+ return !(page->flags & PAGE_FLAGS_CHECK_AT_PREP);
+}
+
+/*
+ * __is_movable_balloon_page - helper to perform @page mapping->flags tests
+ */
+static inline bool __is_movable_balloon_page(struct page *page)
+{
+ struct address_space *mapping = page->mapping;
+ return mapping_balloon(mapping);
+}
+
+/*
+ * balloon_page_movable - test page->mapping->flags to identify balloon pages
+ * that can be moved by compaction/migration.
+ *
+ * This function is used at core compaction's page isolation scheme, therefore
+ * most pages exposed to it are not enlisted as balloon pages and so, to avoid
+ * undesired side effects like racing against __free_pages(), we cannot afford
+ * holding the page locked while testing page->mapping->flags here.
+ *
+ * As we might return false positives in the case of a balloon page being just
+ * released under us, the page->mapping->flags need to be re-tested later,
+ * under the proper page lock, at the functions that will be coping with the
+ * balloon page case.
+ */
+static inline bool balloon_page_movable(struct page *page)
+{
+ /*
+ * Before dereferencing and testing mapping->flags, let's make sure
+ * this is not a page that uses ->mapping in a different way
+ */
+ if (page_flags_cleared(page) && !page_mapped(page) &&
+ page_count(page) == 1)
+ return __is_movable_balloon_page(page);
+
+ return false;
+}
+
+/*
+ * balloon_page_insert - insert a page into the balloon's page list and make
+ * the page->mapping assignment accordingly.
+ * @page : page to be assigned as a 'balloon page'
+ * @mapping : allocated special 'balloon_mapping'
+ * @head : balloon's device page list head
+ *
+ * Caller must ensure the page is locked and the spin_lock protecting balloon
+ * pages list is held before inserting a page into the balloon device.
+ */
+static inline void balloon_page_insert(struct page *page,
+ struct address_space *mapping,
+ struct list_head *head)
+{
+ page->mapping = mapping;
+ list_add(&page->lru, head);
+}
+
+/*
+ * balloon_page_delete - delete a page from balloon's page list and clear
+ * the page->mapping assignement accordingly.
+ * @page : page to be released from balloon's page list
+ *
+ * Caller must ensure the page is locked and the spin_lock protecting balloon
+ * pages list is held before deleting a page from the balloon device.
+ */
+static inline void balloon_page_delete(struct page *page)
+{
+ page->mapping = NULL;
+ list_del(&page->lru);
+}
+
+/*
+ * balloon_page_device - get the b_dev_info descriptor for the balloon device
+ * that enqueues the given page.
+ */
+static inline struct balloon_dev_info *balloon_page_device(struct page *page)
+{
+ struct address_space *mapping = page->mapping;
+ if (likely(mapping))
+ return mapping->private_data;
+
+ return NULL;
+}
+
+static inline gfp_t balloon_mapping_gfp_mask(void)
+{
+ return GFP_HIGHUSER_MOVABLE;
+}
+
+static inline bool balloon_compaction_check(void)
+{
+ return true;
+}
+
+#else /* !CONFIG_BALLOON_COMPACTION */
+
+static inline void *balloon_mapping_alloc(void *balloon_device,
+ const struct address_space_operations *a_ops)
+{
+ return ERR_PTR(-EOPNOTSUPP);
+}
+
+static inline void balloon_mapping_free(struct address_space *balloon_mapping)
+{
+ return;
+}
+
+static inline void balloon_page_insert(struct page *page,
+ struct address_space *mapping,
+ struct list_head *head)
+{
+ list_add(&page->lru, head);
+}
+
+static inline void balloon_page_delete(struct page *page)
+{
+ list_del(&page->lru);
+}
+
+static inline bool balloon_page_movable(struct page *page)
+{
+ return false;
+}
+
+static inline bool balloon_page_isolate(struct page *page)
+{
+ return false;
+}
+
+static inline void balloon_page_putback(struct page *page)
+{
+ return;
+}
+
+static inline int balloon_page_migrate(struct page *newpage,
+ struct page *page, enum migrate_mode mode)
+{
+ return 0;
+}
+
+static inline gfp_t balloon_mapping_gfp_mask(void)
+{
+ return GFP_HIGHUSER;
+}
+
+static inline bool balloon_compaction_check(void)
+{
+ return false;
+}
+#endif /* CONFIG_BALLOON_COMPACTION */
+#endif /* _LINUX_BALLOON_COMPACTION_H */
diff --git a/include/linux/migrate.h b/include/linux/migrate.h
index a4e886d..ce42847 100644
--- a/include/linux/migrate.h
+++ b/include/linux/migrate.h
@@ -11,8 +11,18 @@
* Return values from addresss_space_operations.migratepage():
* - negative errno on page migration failure;
* - zero on page migration success;
+ *
+ * The balloon page migration introduces this special case where a 'distinct'
+ * return code is used to flag a successful page migration to unmap_and_move().
+ * This approach is necessary because page migration can race against balloon
+ * deflation procedure, and for such case we could introduce a nasty page leak
+ * if a successfully migrated balloon page gets released concurrently with
+ * migration's unmap_and_move() wrap-up steps.
*/
#define MIGRATEPAGE_SUCCESS 0
+#define MIGRATEPAGE_BALLOON_SUCCESS 1 /* special ret code for balloon page
+ * sucessful migration case.
+ */
#ifdef CONFIG_MIGRATION
diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index e42c762..6da609d 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -24,6 +24,7 @@
AS_ENOSPC = __GFP_BITS_SHIFT + 1, /* ENOSPC on async write */
AS_MM_ALL_LOCKS = __GFP_BITS_SHIFT + 2, /* under mm_take_all_locks() */
AS_UNEVICTABLE = __GFP_BITS_SHIFT + 3, /* e.g., ramdisk, SHM_LOCK */
+ AS_BALLOON_MAP = __GFP_BITS_SHIFT + 4, /* balloon page special map */
};
static inline void mapping_set_error(struct address_space *mapping, int error)
@@ -53,6 +54,21 @@
return !!mapping;
}
+static inline void mapping_set_balloon(struct address_space *mapping)
+{
+ set_bit(AS_BALLOON_MAP, &mapping->flags);
+}
+
+static inline void mapping_clear_balloon(struct address_space *mapping)
+{
+ clear_bit(AS_BALLOON_MAP, &mapping->flags);
+}
+
+static inline int mapping_balloon(struct address_space *mapping)
+{
+ return mapping && test_bit(AS_BALLOON_MAP, &mapping->flags);
+}
+
static inline gfp_t mapping_gfp_mask(struct address_space * mapping)
{
return (__force gfp_t)mapping->flags & __GFP_BITS_MASK;