Merge git://git.kvack.org/~bcrl/aio-next
Pull AIO leak fixes from Ben LaHaise:
"I've put these two patches plus Linus's change through a round of
tests, and it passes millions of iterations of the aio numa
migratepage test, as well as a number of repetitions of a few simple
read and write tests.
The first patch fixes the memory leak Kent introduced, while the
second patch makes aio_migratepage() much more paranoid and robust"
* git://git.kvack.org/~bcrl/aio-next:
aio/migratepages: make aio migrate pages sane
aio: fix kioctx leak introduced by "aio: Fix a trinity splat"
diff --git a/fs/aio.c b/fs/aio.c
index efa708b..062a5f6 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -362,7 +362,7 @@
struct aio_ring *ring;
unsigned nr_events = ctx->max_reqs;
struct mm_struct *mm = current->mm;
- unsigned long size, populate;
+ unsigned long size, unused;
int nr_pages;
int i;
struct file *file;
@@ -383,18 +383,6 @@
return -EAGAIN;
}
- for (i = 0; i < nr_pages; i++) {
- struct page *page;
- page = find_or_create_page(file->f_inode->i_mapping,
- i, GFP_HIGHUSER | __GFP_ZERO);
- if (!page)
- break;
- pr_debug("pid(%d) page[%d]->count=%d\n",
- current->pid, i, page_count(page));
- SetPageUptodate(page);
- SetPageDirty(page);
- unlock_page(page);
- }
ctx->aio_ring_file = file;
nr_events = (PAGE_SIZE * nr_pages - sizeof(struct aio_ring))
/ sizeof(struct io_event);
@@ -409,15 +397,36 @@
}
}
+ for (i = 0; i < nr_pages; i++) {
+ struct page *page;
+ page = find_or_create_page(file->f_inode->i_mapping,
+ i, GFP_HIGHUSER | __GFP_ZERO);
+ if (!page)
+ break;
+ pr_debug("pid(%d) page[%d]->count=%d\n",
+ current->pid, i, page_count(page));
+ SetPageUptodate(page);
+ SetPageDirty(page);
+ unlock_page(page);
+
+ ctx->ring_pages[i] = page;
+ }
+ ctx->nr_pages = i;
+
+ if (unlikely(i != nr_pages)) {
+ aio_free_ring(ctx);
+ return -EAGAIN;
+ }
+
ctx->mmap_size = nr_pages * PAGE_SIZE;
pr_debug("attempting mmap of %lu bytes\n", ctx->mmap_size);
down_write(&mm->mmap_sem);
ctx->mmap_base = do_mmap_pgoff(ctx->aio_ring_file, 0, ctx->mmap_size,
PROT_READ | PROT_WRITE,
- MAP_SHARED | MAP_POPULATE, 0, &populate);
+ MAP_SHARED, 0, &unused);
+ up_write(&mm->mmap_sem);
if (IS_ERR((void *)ctx->mmap_base)) {
- up_write(&mm->mmap_sem);
ctx->mmap_size = 0;
aio_free_ring(ctx);
return -EAGAIN;
@@ -425,27 +434,6 @@
pr_debug("mmap address: 0x%08lx\n", ctx->mmap_base);
- /* We must do this while still holding mmap_sem for write, as we
- * need to be protected against userspace attempting to mremap()
- * or munmap() the ring buffer.
- */
- ctx->nr_pages = get_user_pages(current, mm, ctx->mmap_base, nr_pages,
- 1, 0, ctx->ring_pages, NULL);
-
- /* Dropping the reference here is safe as the page cache will hold
- * onto the pages for us. It is also required so that page migration
- * can unmap the pages and get the right reference count.
- */
- for (i = 0; i < ctx->nr_pages; i++)
- put_page(ctx->ring_pages[i]);
-
- up_write(&mm->mmap_sem);
-
- if (unlikely(ctx->nr_pages != nr_pages)) {
- aio_free_ring(ctx);
- return -EAGAIN;
- }
-
ctx->user_id = ctx->mmap_base;
ctx->nr_events = nr_events; /* trusted copy */