blob: 096efd73eb4ce0d8bafeca374523bb09fe9c4567 [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * linux/fs/nfs/pagelist.c
3 *
4 * A set of helper functions for managing NFS read and write requests.
5 * The main purpose of these routines is to provide support for the
6 * coalescing of several requests into a single RPC call.
7 *
8 * Copyright 2000, 2001 (c) Trond Myklebust <trond.myklebust@fys.uio.no>
9 *
10 */
11
Linus Torvalds1da177e2005-04-16 15:20:36 -070012#include <linux/slab.h>
13#include <linux/file.h>
14#include <linux/sunrpc/clnt.h>
15#include <linux/nfs3.h>
16#include <linux/nfs4.h>
17#include <linux/nfs_page.h>
18#include <linux/nfs_fs.h>
19#include <linux/nfs_mount.h>
20
Trond Myklebust8d5658c2007-04-10 09:26:35 -040021#include "internal.h"
22
Linus Torvalds1da177e2005-04-16 15:20:36 -070023#define NFS_PARANOIA 1
24
Christoph Lametere18b8902006-12-06 20:33:20 -080025static struct kmem_cache *nfs_page_cachep;
Linus Torvalds1da177e2005-04-16 15:20:36 -070026
27static inline struct nfs_page *
28nfs_page_alloc(void)
29{
30 struct nfs_page *p;
Christoph Lametere94b1762006-12-06 20:33:17 -080031 p = kmem_cache_alloc(nfs_page_cachep, GFP_KERNEL);
Linus Torvalds1da177e2005-04-16 15:20:36 -070032 if (p) {
33 memset(p, 0, sizeof(*p));
34 INIT_LIST_HEAD(&p->wb_list);
35 }
36 return p;
37}
38
39static inline void
40nfs_page_free(struct nfs_page *p)
41{
42 kmem_cache_free(nfs_page_cachep, p);
43}
44
45/**
46 * nfs_create_request - Create an NFS read/write request.
47 * @file: file descriptor to use
48 * @inode: inode to which the request is attached
49 * @page: page to write
50 * @offset: starting offset within the page for the write
51 * @count: number of bytes to read/write
52 *
53 * The page must be locked by the caller. This makes sure we never
54 * create two different requests for the same page, and avoids
55 * a possible deadlock when we reach the hard limit on the number
56 * of dirty pages.
57 * User should ensure it is safe to sleep in this function.
58 */
59struct nfs_page *
60nfs_create_request(struct nfs_open_context *ctx, struct inode *inode,
61 struct page *page,
62 unsigned int offset, unsigned int count)
63{
64 struct nfs_server *server = NFS_SERVER(inode);
65 struct nfs_page *req;
66
67 /* Deal with hard limits. */
68 for (;;) {
69 /* try to allocate the request struct */
70 req = nfs_page_alloc();
71 if (req != NULL)
72 break;
73
74 /* Try to free up at least one request in order to stay
75 * below the hard limit
76 */
77 if (signalled() && (server->flags & NFS_MOUNT_INTR))
78 return ERR_PTR(-ERESTARTSYS);
79 yield();
80 }
81
82 /* Initialize the request struct. Initially, we assume a
83 * long write-back delay. This will be adjusted in
84 * update_nfs_request below if the region is not locked. */
85 req->wb_page = page;
86 atomic_set(&req->wb_complete, 0);
87 req->wb_index = page->index;
88 page_cache_get(page);
Trond Myklebustcd52ed32006-03-20 13:44:04 -050089 BUG_ON(PagePrivate(page));
90 BUG_ON(!PageLocked(page));
91 BUG_ON(page->mapping->host != inode);
Linus Torvalds1da177e2005-04-16 15:20:36 -070092 req->wb_offset = offset;
93 req->wb_pgbase = offset;
94 req->wb_bytes = count;
95 atomic_set(&req->wb_count, 1);
96 req->wb_context = get_nfs_open_context(ctx);
97
98 return req;
99}
100
101/**
102 * nfs_unlock_request - Unlock request and wake up sleepers.
103 * @req:
104 */
105void nfs_unlock_request(struct nfs_page *req)
106{
107 if (!NFS_WBACK_BUSY(req)) {
108 printk(KERN_ERR "NFS: Invalid unlock attempted\n");
109 BUG();
110 }
111 smp_mb__before_clear_bit();
112 clear_bit(PG_BUSY, &req->wb_flags);
113 smp_mb__after_clear_bit();
Trond Myklebust464a98b2005-06-22 17:16:21 +0000114 wake_up_bit(&req->wb_flags, PG_BUSY);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700115 nfs_release_request(req);
116}
117
118/**
Trond Myklebustc6a556b2005-06-22 17:16:30 +0000119 * nfs_set_page_writeback_locked - Lock a request for writeback
120 * @req:
121 */
122int nfs_set_page_writeback_locked(struct nfs_page *req)
123{
124 struct nfs_inode *nfsi = NFS_I(req->wb_context->dentry->d_inode);
125
126 if (!nfs_lock_request(req))
127 return 0;
128 radix_tree_tag_set(&nfsi->nfs_page_tree, req->wb_index, NFS_PAGE_TAG_WRITEBACK);
129 return 1;
130}
131
132/**
133 * nfs_clear_page_writeback - Unlock request and wake up sleepers
134 */
135void nfs_clear_page_writeback(struct nfs_page *req)
136{
137 struct nfs_inode *nfsi = NFS_I(req->wb_context->dentry->d_inode);
138
Trond Myklebustdeb7d632006-03-20 13:44:50 -0500139 if (req->wb_page != NULL) {
140 spin_lock(&nfsi->req_lock);
141 radix_tree_tag_clear(&nfsi->nfs_page_tree, req->wb_index, NFS_PAGE_TAG_WRITEBACK);
142 spin_unlock(&nfsi->req_lock);
143 }
Trond Myklebustc6a556b2005-06-22 17:16:30 +0000144 nfs_unlock_request(req);
145}
146
147/**
Linus Torvalds1da177e2005-04-16 15:20:36 -0700148 * nfs_clear_request - Free up all resources allocated to the request
149 * @req:
150 *
151 * Release page resources associated with a write request after it
152 * has completed.
153 */
154void nfs_clear_request(struct nfs_page *req)
155{
Trond Myklebustcd52ed32006-03-20 13:44:04 -0500156 struct page *page = req->wb_page;
157 if (page != NULL) {
Trond Myklebustcd52ed32006-03-20 13:44:04 -0500158 page_cache_release(page);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700159 req->wb_page = NULL;
160 }
161}
162
163
164/**
165 * nfs_release_request - Release the count on an NFS read/write request
166 * @req: request to release
167 *
168 * Note: Should never be called with the spinlock held!
169 */
170void
171nfs_release_request(struct nfs_page *req)
172{
173 if (!atomic_dec_and_test(&req->wb_count))
174 return;
175
176#ifdef NFS_PARANOIA
177 BUG_ON (!list_empty(&req->wb_list));
178 BUG_ON (NFS_WBACK_BUSY(req));
179#endif
180
181 /* Release struct file or cached credential */
182 nfs_clear_request(req);
183 put_nfs_open_context(req->wb_context);
184 nfs_page_free(req);
185}
186
Trond Myklebust464a98b2005-06-22 17:16:21 +0000187static int nfs_wait_bit_interruptible(void *word)
188{
189 int ret = 0;
190
191 if (signal_pending(current))
192 ret = -ERESTARTSYS;
193 else
194 schedule();
195 return ret;
196}
197
Linus Torvalds1da177e2005-04-16 15:20:36 -0700198/**
199 * nfs_wait_on_request - Wait for a request to complete.
200 * @req: request to wait upon.
201 *
202 * Interruptible by signals only if mounted with intr flag.
203 * The user is responsible for holding a count on the request.
204 */
205int
206nfs_wait_on_request(struct nfs_page *req)
207{
Trond Myklebust464a98b2005-06-22 17:16:21 +0000208 struct rpc_clnt *clnt = NFS_CLIENT(req->wb_context->dentry->d_inode);
209 sigset_t oldmask;
210 int ret = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700211
Trond Myklebust464a98b2005-06-22 17:16:21 +0000212 if (!test_bit(PG_BUSY, &req->wb_flags))
213 goto out;
214 /*
215 * Note: the call to rpc_clnt_sigmask() suffices to ensure that we
216 * are not interrupted if intr flag is not set
217 */
218 rpc_clnt_sigmask(clnt, &oldmask);
219 ret = out_of_line_wait_on_bit(&req->wb_flags, PG_BUSY,
220 nfs_wait_bit_interruptible, TASK_INTERRUPTIBLE);
221 rpc_clnt_sigunmask(clnt, &oldmask);
222out:
223 return ret;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700224}
225
226/**
Trond Myklebustd8a5ad72007-04-02 18:48:28 -0400227 * nfs_pageio_init - initialise a page io descriptor
228 * @desc: pointer to descriptor
Trond Myklebustbcb71bb2007-04-02 18:48:28 -0400229 * @inode: pointer to inode
230 * @doio: pointer to io function
231 * @bsize: io block size
232 * @io_flags: extra parameters for the io function
Trond Myklebustd8a5ad72007-04-02 18:48:28 -0400233 */
Trond Myklebustbcb71bb2007-04-02 18:48:28 -0400234void nfs_pageio_init(struct nfs_pageio_descriptor *desc,
235 struct inode *inode,
Trond Myklebust8d5658c2007-04-10 09:26:35 -0400236 int (*doio)(struct inode *, struct list_head *, unsigned int, size_t, int),
Trond Myklebustbcb71bb2007-04-02 18:48:28 -0400237 unsigned int bsize,
238 int io_flags)
Trond Myklebustd8a5ad72007-04-02 18:48:28 -0400239{
240 INIT_LIST_HEAD(&desc->pg_list);
Trond Myklebustbcb71bb2007-04-02 18:48:28 -0400241 desc->pg_bytes_written = 0;
Trond Myklebustd8a5ad72007-04-02 18:48:28 -0400242 desc->pg_count = 0;
243 desc->pg_bsize = bsize;
244 desc->pg_base = 0;
Trond Myklebustbcb71bb2007-04-02 18:48:28 -0400245 desc->pg_inode = inode;
246 desc->pg_doio = doio;
247 desc->pg_ioflags = io_flags;
248 desc->pg_error = 0;
Trond Myklebustd8a5ad72007-04-02 18:48:28 -0400249}
250
251/**
252 * nfs_can_coalesce_requests - test two requests for compatibility
253 * @prev: pointer to nfs_page
254 * @req: pointer to nfs_page
255 *
256 * The nfs_page structures 'prev' and 'req' are compared to ensure that the
257 * page data area they describe is contiguous, and that their RPC
258 * credentials, NFSv4 open state, and lockowners are the same.
259 *
260 * Return 'true' if this is the case, else return 'false'.
261 */
262static int nfs_can_coalesce_requests(struct nfs_page *prev,
263 struct nfs_page *req)
264{
265 if (req->wb_context->cred != prev->wb_context->cred)
266 return 0;
267 if (req->wb_context->lockowner != prev->wb_context->lockowner)
268 return 0;
269 if (req->wb_context->state != prev->wb_context->state)
270 return 0;
271 if (req->wb_index != (prev->wb_index + 1))
272 return 0;
273 if (req->wb_pgbase != 0)
274 return 0;
275 if (prev->wb_pgbase + prev->wb_bytes != PAGE_CACHE_SIZE)
276 return 0;
277 return 1;
278}
279
280/**
Trond Myklebustbcb71bb2007-04-02 18:48:28 -0400281 * nfs_pageio_do_add_request - Attempt to coalesce a request into a page list.
Trond Myklebustd8a5ad72007-04-02 18:48:28 -0400282 * @desc: destination io descriptor
283 * @req: request
284 *
285 * Returns true if the request 'req' was successfully coalesced into the
286 * existing list of pages 'desc'.
287 */
Trond Myklebustbcb71bb2007-04-02 18:48:28 -0400288static int nfs_pageio_do_add_request(struct nfs_pageio_descriptor *desc,
289 struct nfs_page *req)
Trond Myklebustd8a5ad72007-04-02 18:48:28 -0400290{
291 size_t newlen = req->wb_bytes;
292
293 if (desc->pg_count != 0) {
294 struct nfs_page *prev;
295
296 /*
297 * FIXME: ideally we should be able to coalesce all requests
298 * that are not block boundary aligned, but currently this
299 * is problematic for the case of bsize < PAGE_CACHE_SIZE,
300 * since nfs_flush_multi and nfs_pagein_multi assume you
301 * can have only one struct nfs_page.
302 */
Trond Myklebust8d5658c2007-04-10 09:26:35 -0400303 if (desc->pg_bsize < PAGE_SIZE)
304 return 0;
Trond Myklebustd8a5ad72007-04-02 18:48:28 -0400305 newlen += desc->pg_count;
Trond Myklebust8d5658c2007-04-10 09:26:35 -0400306 if (newlen > desc->pg_bsize)
Trond Myklebustd8a5ad72007-04-02 18:48:28 -0400307 return 0;
308 prev = nfs_list_entry(desc->pg_list.prev);
309 if (!nfs_can_coalesce_requests(prev, req))
310 return 0;
311 } else
312 desc->pg_base = req->wb_pgbase;
313 nfs_list_remove_request(req);
314 nfs_list_add_request(req, &desc->pg_list);
315 desc->pg_count = newlen;
316 return 1;
317}
318
Trond Myklebustbcb71bb2007-04-02 18:48:28 -0400319/*
320 * Helper for nfs_pageio_add_request and nfs_pageio_complete
321 */
322static void nfs_pageio_doio(struct nfs_pageio_descriptor *desc)
323{
324 if (!list_empty(&desc->pg_list)) {
325 int error = desc->pg_doio(desc->pg_inode,
326 &desc->pg_list,
Trond Myklebust8d5658c2007-04-10 09:26:35 -0400327 nfs_page_array_len(desc->pg_base,
328 desc->pg_count),
Trond Myklebustbcb71bb2007-04-02 18:48:28 -0400329 desc->pg_count,
330 desc->pg_ioflags);
331 if (error < 0)
332 desc->pg_error = error;
333 else
334 desc->pg_bytes_written += desc->pg_count;
335 }
336 if (list_empty(&desc->pg_list)) {
337 desc->pg_count = 0;
338 desc->pg_base = 0;
339 }
340}
341
342/**
343 * nfs_pageio_add_request - Attempt to coalesce a request into a page list.
344 * @desc: destination io descriptor
345 * @req: request
346 *
347 * Returns true if the request 'req' was successfully coalesced into the
348 * existing list of pages 'desc'.
349 */
Trond Myklebust8b09bee2007-04-02 18:48:28 -0400350int nfs_pageio_add_request(struct nfs_pageio_descriptor *desc,
351 struct nfs_page *req)
Trond Myklebustbcb71bb2007-04-02 18:48:28 -0400352{
353 while (!nfs_pageio_do_add_request(desc, req)) {
354 nfs_pageio_doio(desc);
355 if (desc->pg_error < 0)
356 return 0;
357 }
358 return 1;
359}
360
Trond Myklebustd8a5ad72007-04-02 18:48:28 -0400361/**
Trond Myklebustbcb71bb2007-04-02 18:48:28 -0400362 * nfs_pageio_complete - Complete I/O on an nfs_pageio_descriptor
363 * @desc: pointer to io descriptor
364 */
365void nfs_pageio_complete(struct nfs_pageio_descriptor *desc)
366{
367 nfs_pageio_doio(desc);
368}
369
Trond Myklebust3da28eb2005-06-22 17:16:31 +0000370#define NFS_SCAN_MAXENTRIES 16
371/**
Linus Torvalds1da177e2005-04-16 15:20:36 -0700372 * nfs_scan_list - Scan a list for matching requests
Trond Myklebustd2ccddf2006-05-31 01:13:38 -0400373 * @nfsi: NFS inode
Linus Torvalds1da177e2005-04-16 15:20:36 -0700374 * @head: One of the NFS inode request lists
375 * @dst: Destination list
376 * @idx_start: lower bound of page->index to scan
377 * @npages: idx_start + npages sets the upper bound to scan.
378 *
379 * Moves elements from one of the inode request lists.
380 * If the number of requests is set to 0, the entire address_space
381 * starting at index idx_start, is scanned.
382 * The requests are *not* checked to ensure that they form a contiguous set.
383 * You must be holding the inode's req_lock when calling this function
384 */
Trond Myklebustd2ccddf2006-05-31 01:13:38 -0400385int nfs_scan_list(struct nfs_inode *nfsi, struct list_head *head,
386 struct list_head *dst, unsigned long idx_start,
387 unsigned int npages)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700388{
Trond Myklebustd2ccddf2006-05-31 01:13:38 -0400389 struct nfs_page *pgvec[NFS_SCAN_MAXENTRIES];
390 struct nfs_page *req;
391 unsigned long idx_end;
392 int found, i;
393 int res;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700394
395 res = 0;
396 if (npages == 0)
397 idx_end = ~0;
398 else
399 idx_end = idx_start + npages - 1;
400
Trond Myklebustd2ccddf2006-05-31 01:13:38 -0400401 for (;;) {
402 found = radix_tree_gang_lookup(&nfsi->nfs_page_tree,
403 (void **)&pgvec[0], idx_start,
404 NFS_SCAN_MAXENTRIES);
405 if (found <= 0)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700406 break;
Trond Myklebustd2ccddf2006-05-31 01:13:38 -0400407 for (i = 0; i < found; i++) {
408 req = pgvec[i];
409 if (req->wb_index > idx_end)
410 goto out;
411 idx_start = req->wb_index + 1;
412 if (req->wb_list_head != head)
413 continue;
414 if (nfs_set_page_writeback_locked(req)) {
415 nfs_list_remove_request(req);
416 nfs_list_add_request(req, dst);
417 res++;
418 }
419 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700420
Linus Torvalds1da177e2005-04-16 15:20:36 -0700421 }
Trond Myklebustd2ccddf2006-05-31 01:13:38 -0400422out:
Linus Torvalds1da177e2005-04-16 15:20:36 -0700423 return res;
424}
425
David Howellsf7b422b2006-06-09 09:34:33 -0400426int __init nfs_init_nfspagecache(void)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700427{
428 nfs_page_cachep = kmem_cache_create("nfs_page",
429 sizeof(struct nfs_page),
430 0, SLAB_HWCACHE_ALIGN,
431 NULL, NULL);
432 if (nfs_page_cachep == NULL)
433 return -ENOMEM;
434
435 return 0;
436}
437
David Brownell266bee82006-06-27 12:59:15 -0700438void nfs_destroy_nfspagecache(void)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700439{
Alexey Dobriyan1a1d92c2006-09-27 01:49:40 -0700440 kmem_cache_destroy(nfs_page_cachep);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700441}
442