blob: 59389a8801bc615cf952bd2f7ac76abacd70ae47 [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/**
2 * aops.c - NTFS kernel address space operations and page cache handling.
3 * Part of the Linux-NTFS project.
4 *
Anton Altaparmakovb6ad6c52005-02-15 10:08:43 +00005 * Copyright (c) 2001-2005 Anton Altaparmakov
Linus Torvalds1da177e2005-04-16 15:20:36 -07006 * Copyright (c) 2002 Richard Russon
7 *
8 * This program/include file is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License as published
10 * by the Free Software Foundation; either version 2 of the License, or
11 * (at your option) any later version.
12 *
13 * This program/include file is distributed in the hope that it will be
14 * useful, but WITHOUT ANY WARRANTY; without even the implied warranty
15 * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
17 *
18 * You should have received a copy of the GNU General Public License
19 * along with this program (in the main directory of the Linux-NTFS
20 * distribution in the file COPYING); if not, write to the Free Software
21 * Foundation,Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
22 */
23
24#include <linux/errno.h>
25#include <linux/mm.h>
26#include <linux/pagemap.h>
27#include <linux/swap.h>
28#include <linux/buffer_head.h>
29#include <linux/writeback.h>
30
31#include "aops.h"
32#include "attrib.h"
33#include "debug.h"
34#include "inode.h"
35#include "mft.h"
36#include "runlist.h"
37#include "types.h"
38#include "ntfs.h"
39
40/**
41 * ntfs_end_buffer_async_read - async io completion for reading attributes
42 * @bh: buffer head on which io is completed
43 * @uptodate: whether @bh is now uptodate or not
44 *
45 * Asynchronous I/O completion handler for reading pages belonging to the
46 * attribute address space of an inode. The inodes can either be files or
47 * directories or they can be fake inodes describing some attribute.
48 *
49 * If NInoMstProtected(), perform the post read mst fixups when all IO on the
50 * page has been completed and mark the page uptodate or set the error bit on
51 * the page. To determine the size of the records that need fixing up, we
52 * cheat a little bit by setting the index_block_size in ntfs_inode to the ntfs
53 * record size, and index_block_size_bits, to the log(base 2) of the ntfs
54 * record size.
55 */
56static void ntfs_end_buffer_async_read(struct buffer_head *bh, int uptodate)
57{
58 static DEFINE_SPINLOCK(page_uptodate_lock);
59 unsigned long flags;
60 struct buffer_head *tmp;
61 struct page *page;
62 ntfs_inode *ni;
63 int page_uptodate = 1;
64
65 page = bh->b_page;
66 ni = NTFS_I(page->mapping->host);
67
68 if (likely(uptodate)) {
Anton Altaparmakov07a4e2d2005-01-12 13:08:26 +000069 s64 file_ofs, initialized_size;
Linus Torvalds1da177e2005-04-16 15:20:36 -070070
71 set_buffer_uptodate(bh);
72
73 file_ofs = ((s64)page->index << PAGE_CACHE_SHIFT) +
74 bh_offset(bh);
Anton Altaparmakov07a4e2d2005-01-12 13:08:26 +000075 read_lock_irqsave(&ni->size_lock, flags);
76 initialized_size = ni->initialized_size;
77 read_unlock_irqrestore(&ni->size_lock, flags);
Linus Torvalds1da177e2005-04-16 15:20:36 -070078 /* Check for the current buffer head overflowing. */
Anton Altaparmakov07a4e2d2005-01-12 13:08:26 +000079 if (file_ofs + bh->b_size > initialized_size) {
Linus Torvalds1da177e2005-04-16 15:20:36 -070080 char *addr;
81 int ofs = 0;
82
Anton Altaparmakov07a4e2d2005-01-12 13:08:26 +000083 if (file_ofs < initialized_size)
84 ofs = initialized_size - file_ofs;
Linus Torvalds1da177e2005-04-16 15:20:36 -070085 addr = kmap_atomic(page, KM_BIO_SRC_IRQ);
86 memset(addr + bh_offset(bh) + ofs, 0, bh->b_size - ofs);
87 flush_dcache_page(page);
88 kunmap_atomic(addr, KM_BIO_SRC_IRQ);
89 }
90 } else {
91 clear_buffer_uptodate(bh);
92 ntfs_error(ni->vol->sb, "Buffer I/O error, logical block %llu.",
93 (unsigned long long)bh->b_blocknr);
94 SetPageError(page);
95 }
96 spin_lock_irqsave(&page_uptodate_lock, flags);
97 clear_buffer_async_read(bh);
98 unlock_buffer(bh);
99 tmp = bh;
100 do {
101 if (!buffer_uptodate(tmp))
102 page_uptodate = 0;
103 if (buffer_async_read(tmp)) {
104 if (likely(buffer_locked(tmp)))
105 goto still_busy;
106 /* Async buffers must be locked. */
107 BUG();
108 }
109 tmp = tmp->b_this_page;
110 } while (tmp != bh);
111 spin_unlock_irqrestore(&page_uptodate_lock, flags);
112 /*
113 * If none of the buffers had errors then we can set the page uptodate,
114 * but we first have to perform the post read mst fixups, if the
115 * attribute is mst protected, i.e. if NInoMstProteced(ni) is true.
116 * Note we ignore fixup errors as those are detected when
117 * map_mft_record() is called which gives us per record granularity
118 * rather than per page granularity.
119 */
120 if (!NInoMstProtected(ni)) {
121 if (likely(page_uptodate && !PageError(page)))
122 SetPageUptodate(page);
123 } else {
124 char *addr;
125 unsigned int i, recs;
126 u32 rec_size;
127
128 rec_size = ni->itype.index.block_size;
129 recs = PAGE_CACHE_SIZE / rec_size;
130 /* Should have been verified before we got here... */
131 BUG_ON(!recs);
132 addr = kmap_atomic(page, KM_BIO_SRC_IRQ);
133 for (i = 0; i < recs; i++)
134 post_read_mst_fixup((NTFS_RECORD*)(addr +
135 i * rec_size), rec_size);
136 flush_dcache_page(page);
137 kunmap_atomic(addr, KM_BIO_SRC_IRQ);
Anton Altaparmakovb6ad6c52005-02-15 10:08:43 +0000138 if (likely(page_uptodate && !PageError(page)))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700139 SetPageUptodate(page);
140 }
141 unlock_page(page);
142 return;
143still_busy:
144 spin_unlock_irqrestore(&page_uptodate_lock, flags);
145 return;
146}
147
148/**
149 * ntfs_read_block - fill a @page of an address space with data
150 * @page: page cache page to fill with data
151 *
152 * Fill the page @page of the address space belonging to the @page->host inode.
153 * We read each buffer asynchronously and when all buffers are read in, our io
154 * completion handler ntfs_end_buffer_read_async(), if required, automatically
155 * applies the mst fixups to the page before finally marking it uptodate and
156 * unlocking it.
157 *
158 * We only enforce allocated_size limit because i_size is checked for in
159 * generic_file_read().
160 *
161 * Return 0 on success and -errno on error.
162 *
163 * Contains an adapted version of fs/buffer.c::block_read_full_page().
164 */
165static int ntfs_read_block(struct page *page)
166{
167 VCN vcn;
168 LCN lcn;
169 ntfs_inode *ni;
170 ntfs_volume *vol;
171 runlist_element *rl;
172 struct buffer_head *bh, *head, *arr[MAX_BUF_PER_PAGE];
173 sector_t iblock, lblock, zblock;
Anton Altaparmakov07a4e2d2005-01-12 13:08:26 +0000174 unsigned long flags;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700175 unsigned int blocksize, vcn_ofs;
176 int i, nr;
177 unsigned char blocksize_bits;
178
179 ni = NTFS_I(page->mapping->host);
180 vol = ni->vol;
181
182 /* $MFT/$DATA must have its complete runlist in memory at all times. */
183 BUG_ON(!ni->runlist.rl && !ni->mft_no && !NInoAttr(ni));
184
185 blocksize_bits = VFS_I(ni)->i_blkbits;
186 blocksize = 1 << blocksize_bits;
187
188 if (!page_has_buffers(page))
189 create_empty_buffers(page, blocksize, 0);
190 bh = head = page_buffers(page);
191 if (unlikely(!bh)) {
192 unlock_page(page);
193 return -ENOMEM;
194 }
195
196 iblock = (s64)page->index << (PAGE_CACHE_SHIFT - blocksize_bits);
Anton Altaparmakov07a4e2d2005-01-12 13:08:26 +0000197 read_lock_irqsave(&ni->size_lock, flags);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700198 lblock = (ni->allocated_size + blocksize - 1) >> blocksize_bits;
199 zblock = (ni->initialized_size + blocksize - 1) >> blocksize_bits;
Anton Altaparmakov07a4e2d2005-01-12 13:08:26 +0000200 read_unlock_irqrestore(&ni->size_lock, flags);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700201
202 /* Loop through all the buffers in the page. */
203 rl = NULL;
204 nr = i = 0;
205 do {
206 u8 *kaddr;
Anton Altaparmakov8273d5d2005-09-08 22:00:33 +0100207 int err;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700208
209 if (unlikely(buffer_uptodate(bh)))
210 continue;
211 if (unlikely(buffer_mapped(bh))) {
212 arr[nr++] = bh;
213 continue;
214 }
Anton Altaparmakov8273d5d2005-09-08 22:00:33 +0100215 err = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700216 bh->b_bdev = vol->sb->s_bdev;
217 /* Is the block within the allowed limits? */
218 if (iblock < lblock) {
219 BOOL is_retry = FALSE;
220
221 /* Convert iblock into corresponding vcn and offset. */
222 vcn = (VCN)iblock << blocksize_bits >>
223 vol->cluster_size_bits;
224 vcn_ofs = ((VCN)iblock << blocksize_bits) &
225 vol->cluster_size_mask;
226 if (!rl) {
227lock_retry_remap:
228 down_read(&ni->runlist.lock);
229 rl = ni->runlist.rl;
230 }
231 if (likely(rl != NULL)) {
232 /* Seek to element containing target vcn. */
233 while (rl->length && rl[1].vcn <= vcn)
234 rl++;
235 lcn = ntfs_rl_vcn_to_lcn(rl, vcn);
236 } else
237 lcn = LCN_RL_NOT_MAPPED;
238 /* Successful remap. */
239 if (lcn >= 0) {
240 /* Setup buffer head to correct block. */
241 bh->b_blocknr = ((lcn << vol->cluster_size_bits)
242 + vcn_ofs) >> blocksize_bits;
243 set_buffer_mapped(bh);
244 /* Only read initialized data blocks. */
245 if (iblock < zblock) {
246 arr[nr++] = bh;
247 continue;
248 }
249 /* Fully non-initialized data block, zero it. */
250 goto handle_zblock;
251 }
252 /* It is a hole, need to zero it. */
253 if (lcn == LCN_HOLE)
254 goto handle_hole;
255 /* If first try and runlist unmapped, map and retry. */
256 if (!is_retry && lcn == LCN_RL_NOT_MAPPED) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700257 is_retry = TRUE;
258 /*
259 * Attempt to map runlist, dropping lock for
260 * the duration.
261 */
262 up_read(&ni->runlist.lock);
263 err = ntfs_map_runlist(ni, vcn);
264 if (likely(!err))
265 goto lock_retry_remap;
266 rl = NULL;
Anton Altaparmakov9f993fe2005-06-25 16:15:36 +0100267 } else if (!rl)
268 up_read(&ni->runlist.lock);
Anton Altaparmakov8273d5d2005-09-08 22:00:33 +0100269 /*
270 * If buffer is outside the runlist, treat it as a
271 * hole. This can happen due to concurrent truncate
272 * for example.
273 */
274 if (err == -ENOENT || lcn == LCN_ENOENT) {
275 err = 0;
276 goto handle_hole;
277 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700278 /* Hard error, zero out region. */
Anton Altaparmakov8273d5d2005-09-08 22:00:33 +0100279 if (!err)
280 err = -EIO;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700281 bh->b_blocknr = -1;
282 SetPageError(page);
283 ntfs_error(vol->sb, "Failed to read from inode 0x%lx, "
284 "attribute type 0x%x, vcn 0x%llx, "
285 "offset 0x%x because its location on "
286 "disk could not be determined%s "
Anton Altaparmakov8273d5d2005-09-08 22:00:33 +0100287 "(error code %i).", ni->mft_no,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700288 ni->type, (unsigned long long)vcn,
289 vcn_ofs, is_retry ? " even after "
Anton Altaparmakov8273d5d2005-09-08 22:00:33 +0100290 "retrying" : "", err);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700291 }
292 /*
293 * Either iblock was outside lblock limits or
294 * ntfs_rl_vcn_to_lcn() returned error. Just zero that portion
295 * of the page and set the buffer uptodate.
296 */
297handle_hole:
298 bh->b_blocknr = -1UL;
299 clear_buffer_mapped(bh);
300handle_zblock:
301 kaddr = kmap_atomic(page, KM_USER0);
302 memset(kaddr + i * blocksize, 0, blocksize);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700303 kunmap_atomic(kaddr, KM_USER0);
Anton Altaparmakov8273d5d2005-09-08 22:00:33 +0100304 flush_dcache_page(page);
305 if (likely(!err))
306 set_buffer_uptodate(bh);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700307 } while (i++, iblock++, (bh = bh->b_this_page) != head);
308
309 /* Release the lock if we took it. */
310 if (rl)
311 up_read(&ni->runlist.lock);
312
313 /* Check we have at least one buffer ready for i/o. */
314 if (nr) {
315 struct buffer_head *tbh;
316
317 /* Lock the buffers. */
318 for (i = 0; i < nr; i++) {
319 tbh = arr[i];
320 lock_buffer(tbh);
321 tbh->b_end_io = ntfs_end_buffer_async_read;
322 set_buffer_async_read(tbh);
323 }
324 /* Finally, start i/o on the buffers. */
325 for (i = 0; i < nr; i++) {
326 tbh = arr[i];
327 if (likely(!buffer_uptodate(tbh)))
328 submit_bh(READ, tbh);
329 else
330 ntfs_end_buffer_async_read(tbh, 1);
331 }
332 return 0;
333 }
334 /* No i/o was scheduled on any of the buffers. */
335 if (likely(!PageError(page)))
336 SetPageUptodate(page);
337 else /* Signal synchronous i/o error. */
338 nr = -EIO;
339 unlock_page(page);
340 return nr;
341}
342
343/**
344 * ntfs_readpage - fill a @page of a @file with data from the device
345 * @file: open file to which the page @page belongs or NULL
346 * @page: page cache page to fill with data
347 *
348 * For non-resident attributes, ntfs_readpage() fills the @page of the open
349 * file @file by calling the ntfs version of the generic block_read_full_page()
350 * function, ntfs_read_block(), which in turn creates and reads in the buffers
351 * associated with the page asynchronously.
352 *
353 * For resident attributes, OTOH, ntfs_readpage() fills @page by copying the
354 * data from the mft record (which at this stage is most likely in memory) and
355 * fills the remainder with zeroes. Thus, in this case, I/O is synchronous, as
356 * even if the mft record is not cached at this point in time, we need to wait
357 * for it to be read in before we can do the copy.
358 *
359 * Return 0 on success and -errno on error.
360 */
361static int ntfs_readpage(struct file *file, struct page *page)
362{
Linus Torvalds1da177e2005-04-16 15:20:36 -0700363 ntfs_inode *ni, *base_ni;
364 u8 *kaddr;
365 ntfs_attr_search_ctx *ctx;
366 MFT_RECORD *mrec;
Anton Altaparmakovb6ad6c52005-02-15 10:08:43 +0000367 unsigned long flags;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700368 u32 attr_len;
369 int err = 0;
370
Anton Altaparmakov905685f2005-03-10 11:06:19 +0000371retry_readpage:
Linus Torvalds1da177e2005-04-16 15:20:36 -0700372 BUG_ON(!PageLocked(page));
373 /*
374 * This can potentially happen because we clear PageUptodate() during
375 * ntfs_writepage() of MstProtected() attributes.
376 */
377 if (PageUptodate(page)) {
378 unlock_page(page);
379 return 0;
380 }
381 ni = NTFS_I(page->mapping->host);
382
383 /* NInoNonResident() == NInoIndexAllocPresent() */
384 if (NInoNonResident(ni)) {
385 /*
386 * Only unnamed $DATA attributes can be compressed or
387 * encrypted.
388 */
389 if (ni->type == AT_DATA && !ni->name_len) {
390 /* If file is encrypted, deny access, just like NT4. */
391 if (NInoEncrypted(ni)) {
392 err = -EACCES;
393 goto err_out;
394 }
395 /* Compressed data streams are handled in compress.c. */
396 if (NInoCompressed(ni))
397 return ntfs_read_compressed_block(page);
398 }
399 /* Normal data stream. */
400 return ntfs_read_block(page);
401 }
402 /*
403 * Attribute is resident, implying it is not compressed or encrypted.
404 * This also means the attribute is smaller than an mft record and
405 * hence smaller than a page, so can simply zero out any pages with
Anton Altaparmakovb6ad6c52005-02-15 10:08:43 +0000406 * index above 0.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700407 */
Anton Altaparmakovb6ad6c52005-02-15 10:08:43 +0000408 if (unlikely(page->index > 0)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700409 kaddr = kmap_atomic(page, KM_USER0);
410 memset(kaddr, 0, PAGE_CACHE_SIZE);
411 flush_dcache_page(page);
412 kunmap_atomic(kaddr, KM_USER0);
413 goto done;
414 }
415 if (!NInoAttr(ni))
416 base_ni = ni;
417 else
418 base_ni = ni->ext.base_ntfs_ino;
419 /* Map, pin, and lock the mft record. */
420 mrec = map_mft_record(base_ni);
421 if (IS_ERR(mrec)) {
422 err = PTR_ERR(mrec);
423 goto err_out;
424 }
Anton Altaparmakov905685f2005-03-10 11:06:19 +0000425 /*
426 * If a parallel write made the attribute non-resident, drop the mft
427 * record and retry the readpage.
428 */
429 if (unlikely(NInoNonResident(ni))) {
430 unmap_mft_record(base_ni);
431 goto retry_readpage;
432 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700433 ctx = ntfs_attr_get_search_ctx(base_ni, mrec);
434 if (unlikely(!ctx)) {
435 err = -ENOMEM;
436 goto unm_err_out;
437 }
438 err = ntfs_attr_lookup(ni->type, ni->name, ni->name_len,
439 CASE_SENSITIVE, 0, NULL, 0, ctx);
440 if (unlikely(err))
441 goto put_unm_err_out;
442 attr_len = le32_to_cpu(ctx->attr->data.resident.value_length);
Anton Altaparmakovb6ad6c52005-02-15 10:08:43 +0000443 read_lock_irqsave(&ni->size_lock, flags);
444 if (unlikely(attr_len > ni->initialized_size))
445 attr_len = ni->initialized_size;
446 read_unlock_irqrestore(&ni->size_lock, flags);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700447 kaddr = kmap_atomic(page, KM_USER0);
448 /* Copy the data to the page. */
449 memcpy(kaddr, (u8*)ctx->attr +
450 le16_to_cpu(ctx->attr->data.resident.value_offset),
451 attr_len);
452 /* Zero the remainder of the page. */
453 memset(kaddr + attr_len, 0, PAGE_CACHE_SIZE - attr_len);
454 flush_dcache_page(page);
455 kunmap_atomic(kaddr, KM_USER0);
456put_unm_err_out:
457 ntfs_attr_put_search_ctx(ctx);
458unm_err_out:
459 unmap_mft_record(base_ni);
460done:
461 SetPageUptodate(page);
462err_out:
463 unlock_page(page);
464 return err;
465}
466
467#ifdef NTFS_RW
468
469/**
470 * ntfs_write_block - write a @page to the backing store
471 * @page: page cache page to write out
472 * @wbc: writeback control structure
473 *
474 * This function is for writing pages belonging to non-resident, non-mst
475 * protected attributes to their backing store.
476 *
477 * For a page with buffers, map and write the dirty buffers asynchronously
478 * under page writeback. For a page without buffers, create buffers for the
479 * page, then proceed as above.
480 *
481 * If a page doesn't have buffers the page dirty state is definitive. If a page
482 * does have buffers, the page dirty state is just a hint, and the buffer dirty
483 * state is definitive. (A hint which has rules: dirty buffers against a clean
484 * page is illegal. Other combinations are legal and need to be handled. In
485 * particular a dirty page containing clean buffers for example.)
486 *
487 * Return 0 on success and -errno on error.
488 *
489 * Based on ntfs_read_block() and __block_write_full_page().
490 */
491static int ntfs_write_block(struct page *page, struct writeback_control *wbc)
492{
493 VCN vcn;
494 LCN lcn;
Anton Altaparmakov07a4e2d2005-01-12 13:08:26 +0000495 s64 initialized_size;
496 loff_t i_size;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700497 sector_t block, dblock, iblock;
498 struct inode *vi;
499 ntfs_inode *ni;
500 ntfs_volume *vol;
501 runlist_element *rl;
502 struct buffer_head *bh, *head;
Anton Altaparmakov07a4e2d2005-01-12 13:08:26 +0000503 unsigned long flags;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700504 unsigned int blocksize, vcn_ofs;
505 int err;
506 BOOL need_end_writeback;
507 unsigned char blocksize_bits;
508
509 vi = page->mapping->host;
510 ni = NTFS_I(vi);
511 vol = ni->vol;
512
513 ntfs_debug("Entering for inode 0x%lx, attribute type 0x%x, page index "
514 "0x%lx.", ni->mft_no, ni->type, page->index);
515
516 BUG_ON(!NInoNonResident(ni));
517 BUG_ON(NInoMstProtected(ni));
518
519 blocksize_bits = vi->i_blkbits;
520 blocksize = 1 << blocksize_bits;
521
522 if (!page_has_buffers(page)) {
523 BUG_ON(!PageUptodate(page));
524 create_empty_buffers(page, blocksize,
525 (1 << BH_Uptodate) | (1 << BH_Dirty));
526 }
527 bh = head = page_buffers(page);
528 if (unlikely(!bh)) {
529 ntfs_warning(vol->sb, "Error allocating page buffers. "
530 "Redirtying page so we try again later.");
531 /*
532 * Put the page back on mapping->dirty_pages, but leave its
533 * buffer's dirty state as-is.
534 */
535 redirty_page_for_writepage(wbc, page);
536 unlock_page(page);
537 return 0;
538 }
539
540 /* NOTE: Different naming scheme to ntfs_read_block()! */
541
542 /* The first block in the page. */
543 block = (s64)page->index << (PAGE_CACHE_SHIFT - blocksize_bits);
544
Anton Altaparmakov07a4e2d2005-01-12 13:08:26 +0000545 read_lock_irqsave(&ni->size_lock, flags);
546 i_size = i_size_read(vi);
547 initialized_size = ni->initialized_size;
548 read_unlock_irqrestore(&ni->size_lock, flags);
549
Linus Torvalds1da177e2005-04-16 15:20:36 -0700550 /* The first out of bounds block for the data size. */
Anton Altaparmakov07a4e2d2005-01-12 13:08:26 +0000551 dblock = (i_size + blocksize - 1) >> blocksize_bits;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700552
553 /* The last (fully or partially) initialized block. */
Anton Altaparmakov07a4e2d2005-01-12 13:08:26 +0000554 iblock = initialized_size >> blocksize_bits;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700555
556 /*
557 * Be very careful. We have no exclusion from __set_page_dirty_buffers
558 * here, and the (potentially unmapped) buffers may become dirty at
559 * any time. If a buffer becomes dirty here after we've inspected it
560 * then we just miss that fact, and the page stays dirty.
561 *
562 * Buffers outside i_size may be dirtied by __set_page_dirty_buffers;
563 * handle that here by just cleaning them.
564 */
565
566 /*
567 * Loop through all the buffers in the page, mapping all the dirty
568 * buffers to disk addresses and handling any aliases from the
569 * underlying block device's mapping.
570 */
571 rl = NULL;
572 err = 0;
573 do {
574 BOOL is_retry = FALSE;
575
576 if (unlikely(block >= dblock)) {
577 /*
578 * Mapped buffers outside i_size will occur, because
579 * this page can be outside i_size when there is a
580 * truncate in progress. The contents of such buffers
581 * were zeroed by ntfs_writepage().
582 *
583 * FIXME: What about the small race window where
584 * ntfs_writepage() has not done any clearing because
585 * the page was within i_size but before we get here,
586 * vmtruncate() modifies i_size?
587 */
588 clear_buffer_dirty(bh);
589 set_buffer_uptodate(bh);
590 continue;
591 }
592
593 /* Clean buffers are not written out, so no need to map them. */
594 if (!buffer_dirty(bh))
595 continue;
596
597 /* Make sure we have enough initialized size. */
598 if (unlikely((block >= iblock) &&
Anton Altaparmakov07a4e2d2005-01-12 13:08:26 +0000599 (initialized_size < i_size))) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700600 /*
601 * If this page is fully outside initialized size, zero
602 * out all pages between the current initialized size
603 * and the current page. Just use ntfs_readpage() to do
604 * the zeroing transparently.
605 */
606 if (block > iblock) {
607 // TODO:
608 // For each page do:
609 // - read_cache_page()
610 // Again for each page do:
611 // - wait_on_page_locked()
612 // - Check (PageUptodate(page) &&
613 // !PageError(page))
614 // Update initialized size in the attribute and
615 // in the inode.
616 // Again, for each page do:
617 // __set_page_dirty_buffers();
618 // page_cache_release()
619 // We don't need to wait on the writes.
620 // Update iblock.
621 }
622 /*
623 * The current page straddles initialized size. Zero
624 * all non-uptodate buffers and set them uptodate (and
625 * dirty?). Note, there aren't any non-uptodate buffers
626 * if the page is uptodate.
627 * FIXME: For an uptodate page, the buffers may need to
628 * be written out because they were not initialized on
629 * disk before.
630 */
631 if (!PageUptodate(page)) {
632 // TODO:
633 // Zero any non-uptodate buffers up to i_size.
634 // Set them uptodate and dirty.
635 }
636 // TODO:
637 // Update initialized size in the attribute and in the
638 // inode (up to i_size).
639 // Update iblock.
640 // FIXME: This is inefficient. Try to batch the two
641 // size changes to happen in one go.
642 ntfs_error(vol->sb, "Writing beyond initialized size "
643 "is not supported yet. Sorry.");
644 err = -EOPNOTSUPP;
645 break;
646 // Do NOT set_buffer_new() BUT DO clear buffer range
647 // outside write request range.
648 // set_buffer_uptodate() on complete buffers as well as
649 // set_buffer_dirty().
650 }
651
652 /* No need to map buffers that are already mapped. */
653 if (buffer_mapped(bh))
654 continue;
655
656 /* Unmapped, dirty buffer. Need to map it. */
657 bh->b_bdev = vol->sb->s_bdev;
658
659 /* Convert block into corresponding vcn and offset. */
660 vcn = (VCN)block << blocksize_bits;
661 vcn_ofs = vcn & vol->cluster_size_mask;
662 vcn >>= vol->cluster_size_bits;
663 if (!rl) {
664lock_retry_remap:
665 down_read(&ni->runlist.lock);
666 rl = ni->runlist.rl;
667 }
668 if (likely(rl != NULL)) {
669 /* Seek to element containing target vcn. */
670 while (rl->length && rl[1].vcn <= vcn)
671 rl++;
672 lcn = ntfs_rl_vcn_to_lcn(rl, vcn);
673 } else
674 lcn = LCN_RL_NOT_MAPPED;
675 /* Successful remap. */
676 if (lcn >= 0) {
677 /* Setup buffer head to point to correct block. */
678 bh->b_blocknr = ((lcn << vol->cluster_size_bits) +
679 vcn_ofs) >> blocksize_bits;
680 set_buffer_mapped(bh);
681 continue;
682 }
683 /* It is a hole, need to instantiate it. */
684 if (lcn == LCN_HOLE) {
Anton Altaparmakov8dcdeba2005-09-08 21:25:48 +0100685 u8 *kaddr;
686 unsigned long *bpos, *bend;
687
688 /* Check if the buffer is zero. */
689 kaddr = kmap_atomic(page, KM_USER0);
690 bpos = (unsigned long *)(kaddr + bh_offset(bh));
691 bend = (unsigned long *)((u8*)bpos + blocksize);
692 do {
693 if (unlikely(*bpos))
694 break;
695 } while (likely(++bpos < bend));
696 kunmap_atomic(kaddr, KM_USER0);
697 if (bpos == bend) {
698 /*
699 * Buffer is zero and sparse, no need to write
700 * it.
701 */
702 bh->b_blocknr = -1;
703 clear_buffer_dirty(bh);
704 continue;
705 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700706 // TODO: Instantiate the hole.
707 // clear_buffer_new(bh);
708 // unmap_underlying_metadata(bh->b_bdev, bh->b_blocknr);
709 ntfs_error(vol->sb, "Writing into sparse regions is "
710 "not supported yet. Sorry.");
711 err = -EOPNOTSUPP;
712 break;
713 }
714 /* If first try and runlist unmapped, map and retry. */
715 if (!is_retry && lcn == LCN_RL_NOT_MAPPED) {
716 is_retry = TRUE;
717 /*
718 * Attempt to map runlist, dropping lock for
719 * the duration.
720 */
721 up_read(&ni->runlist.lock);
722 err = ntfs_map_runlist(ni, vcn);
723 if (likely(!err))
724 goto lock_retry_remap;
725 rl = NULL;
Anton Altaparmakov9f993fe2005-06-25 16:15:36 +0100726 } else if (!rl)
727 up_read(&ni->runlist.lock);
Anton Altaparmakov8273d5d2005-09-08 22:00:33 +0100728 /*
729 * If buffer is outside the runlist, truncate has cut it out
730 * of the runlist. Just clean and clear the buffer and set it
731 * uptodate so it can get discarded by the VM.
732 */
733 if (err == -ENOENT || lcn == LCN_ENOENT) {
734 u8 *kaddr;
735
736 bh->b_blocknr = -1;
737 clear_buffer_dirty(bh);
738 kaddr = kmap_atomic(page, KM_USER0);
739 memset(kaddr + bh_offset(bh), 0, blocksize);
740 kunmap_atomic(kaddr, KM_USER0);
741 flush_dcache_page(page);
742 set_buffer_uptodate(bh);
743 err = 0;
744 continue;
745 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700746 /* Failed to map the buffer, even after retrying. */
Anton Altaparmakov8273d5d2005-09-08 22:00:33 +0100747 if (!err)
748 err = -EIO;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700749 bh->b_blocknr = -1;
750 ntfs_error(vol->sb, "Failed to write to inode 0x%lx, "
751 "attribute type 0x%x, vcn 0x%llx, offset 0x%x "
752 "because its location on disk could not be "
Anton Altaparmakov8273d5d2005-09-08 22:00:33 +0100753 "determined%s (error code %i).", ni->mft_no,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700754 ni->type, (unsigned long long)vcn,
755 vcn_ofs, is_retry ? " even after "
Anton Altaparmakov8273d5d2005-09-08 22:00:33 +0100756 "retrying" : "", err);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700757 break;
758 } while (block++, (bh = bh->b_this_page) != head);
759
760 /* Release the lock if we took it. */
761 if (rl)
762 up_read(&ni->runlist.lock);
763
764 /* For the error case, need to reset bh to the beginning. */
765 bh = head;
766
Anton Altaparmakov54b02eb02005-09-08 21:43:47 +0100767 /* Just an optimization, so ->readpage() is not called later. */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700768 if (unlikely(!PageUptodate(page))) {
769 int uptodate = 1;
770 do {
771 if (!buffer_uptodate(bh)) {
772 uptodate = 0;
773 bh = head;
774 break;
775 }
776 } while ((bh = bh->b_this_page) != head);
777 if (uptodate)
778 SetPageUptodate(page);
779 }
780
781 /* Setup all mapped, dirty buffers for async write i/o. */
782 do {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700783 if (buffer_mapped(bh) && buffer_dirty(bh)) {
784 lock_buffer(bh);
785 if (test_clear_buffer_dirty(bh)) {
786 BUG_ON(!buffer_uptodate(bh));
787 mark_buffer_async_write(bh);
788 } else
789 unlock_buffer(bh);
790 } else if (unlikely(err)) {
791 /*
792 * For the error case. The buffer may have been set
793 * dirty during attachment to a dirty page.
794 */
795 if (err != -ENOMEM)
796 clear_buffer_dirty(bh);
797 }
798 } while ((bh = bh->b_this_page) != head);
799
800 if (unlikely(err)) {
801 // TODO: Remove the -EOPNOTSUPP check later on...
802 if (unlikely(err == -EOPNOTSUPP))
803 err = 0;
804 else if (err == -ENOMEM) {
805 ntfs_warning(vol->sb, "Error allocating memory. "
806 "Redirtying page so we try again "
807 "later.");
808 /*
809 * Put the page back on mapping->dirty_pages, but
810 * leave its buffer's dirty state as-is.
811 */
812 redirty_page_for_writepage(wbc, page);
813 err = 0;
814 } else
815 SetPageError(page);
816 }
817
818 BUG_ON(PageWriteback(page));
819 set_page_writeback(page); /* Keeps try_to_free_buffers() away. */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700820
Anton Altaparmakov54b02eb02005-09-08 21:43:47 +0100821 /* Submit the prepared buffers for i/o. */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700822 need_end_writeback = TRUE;
823 do {
824 struct buffer_head *next = bh->b_this_page;
825 if (buffer_async_write(bh)) {
826 submit_bh(WRITE, bh);
827 need_end_writeback = FALSE;
828 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700829 bh = next;
830 } while (bh != head);
Anton Altaparmakov54b02eb02005-09-08 21:43:47 +0100831 unlock_page(page);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700832
833 /* If no i/o was started, need to end_page_writeback(). */
834 if (unlikely(need_end_writeback))
835 end_page_writeback(page);
836
837 ntfs_debug("Done.");
838 return err;
839}
840
841/**
842 * ntfs_write_mst_block - write a @page to the backing store
843 * @page: page cache page to write out
844 * @wbc: writeback control structure
845 *
846 * This function is for writing pages belonging to non-resident, mst protected
847 * attributes to their backing store. The only supported attributes are index
848 * allocation and $MFT/$DATA. Both directory inodes and index inodes are
849 * supported for the index allocation case.
850 *
851 * The page must remain locked for the duration of the write because we apply
852 * the mst fixups, write, and then undo the fixups, so if we were to unlock the
853 * page before undoing the fixups, any other user of the page will see the
854 * page contents as corrupt.
855 *
856 * We clear the page uptodate flag for the duration of the function to ensure
857 * exclusion for the $MFT/$DATA case against someone mapping an mft record we
858 * are about to apply the mst fixups to.
859 *
860 * Return 0 on success and -errno on error.
861 *
862 * Based on ntfs_write_block(), ntfs_mft_writepage(), and
863 * write_mft_record_nolock().
864 */
865static int ntfs_write_mst_block(struct page *page,
866 struct writeback_control *wbc)
867{
868 sector_t block, dblock, rec_block;
869 struct inode *vi = page->mapping->host;
870 ntfs_inode *ni = NTFS_I(vi);
871 ntfs_volume *vol = ni->vol;
872 u8 *kaddr;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700873 unsigned int rec_size = ni->itype.index.block_size;
874 ntfs_inode *locked_nis[PAGE_CACHE_SIZE / rec_size];
875 struct buffer_head *bh, *head, *tbh, *rec_start_bh;
Anton Altaparmakovd53ee322005-04-06 16:11:20 +0100876 struct buffer_head *bhs[MAX_BUF_PER_PAGE];
Linus Torvalds1da177e2005-04-16 15:20:36 -0700877 runlist_element *rl;
Anton Altaparmakovd53ee322005-04-06 16:11:20 +0100878 int i, nr_locked_nis, nr_recs, nr_bhs, max_bhs, bhs_per_rec, err, err2;
879 unsigned bh_size, rec_size_bits;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700880 BOOL sync, is_mft, page_is_dirty, rec_is_dirty;
Anton Altaparmakovd53ee322005-04-06 16:11:20 +0100881 unsigned char bh_size_bits;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700882
883 ntfs_debug("Entering for inode 0x%lx, attribute type 0x%x, page index "
884 "0x%lx.", vi->i_ino, ni->type, page->index);
885 BUG_ON(!NInoNonResident(ni));
886 BUG_ON(!NInoMstProtected(ni));
887 is_mft = (S_ISREG(vi->i_mode) && !vi->i_ino);
888 /*
889 * NOTE: ntfs_write_mst_block() would be called for $MFTMirr if a page
890 * in its page cache were to be marked dirty. However this should
891 * never happen with the current driver and considering we do not
892 * handle this case here we do want to BUG(), at least for now.
893 */
894 BUG_ON(!(is_mft || S_ISDIR(vi->i_mode) ||
895 (NInoAttr(ni) && ni->type == AT_INDEX_ALLOCATION)));
Anton Altaparmakovd53ee322005-04-06 16:11:20 +0100896 bh_size_bits = vi->i_blkbits;
897 bh_size = 1 << bh_size_bits;
898 max_bhs = PAGE_CACHE_SIZE / bh_size;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700899 BUG_ON(!max_bhs);
Anton Altaparmakovd53ee322005-04-06 16:11:20 +0100900 BUG_ON(max_bhs > MAX_BUF_PER_PAGE);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700901
902 /* Were we called for sync purposes? */
903 sync = (wbc->sync_mode == WB_SYNC_ALL);
904
905 /* Make sure we have mapped buffers. */
906 BUG_ON(!page_has_buffers(page));
907 bh = head = page_buffers(page);
908 BUG_ON(!bh);
909
910 rec_size_bits = ni->itype.index.block_size_bits;
911 BUG_ON(!(PAGE_CACHE_SIZE >> rec_size_bits));
912 bhs_per_rec = rec_size >> bh_size_bits;
913 BUG_ON(!bhs_per_rec);
914
915 /* The first block in the page. */
916 rec_block = block = (sector_t)page->index <<
917 (PAGE_CACHE_SHIFT - bh_size_bits);
918
919 /* The first out of bounds block for the data size. */
Anton Altaparmakov07a4e2d2005-01-12 13:08:26 +0000920 dblock = (i_size_read(vi) + bh_size - 1) >> bh_size_bits;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700921
922 rl = NULL;
923 err = err2 = nr_bhs = nr_recs = nr_locked_nis = 0;
924 page_is_dirty = rec_is_dirty = FALSE;
925 rec_start_bh = NULL;
926 do {
927 BOOL is_retry = FALSE;
928
929 if (likely(block < rec_block)) {
930 if (unlikely(block >= dblock)) {
931 clear_buffer_dirty(bh);
Anton Altaparmakov946929d2005-01-13 15:26:29 +0000932 set_buffer_uptodate(bh);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700933 continue;
934 }
935 /*
936 * This block is not the first one in the record. We
937 * ignore the buffer's dirty state because we could
938 * have raced with a parallel mark_ntfs_record_dirty().
939 */
940 if (!rec_is_dirty)
941 continue;
942 if (unlikely(err2)) {
943 if (err2 != -ENOMEM)
944 clear_buffer_dirty(bh);
945 continue;
946 }
947 } else /* if (block == rec_block) */ {
948 BUG_ON(block > rec_block);
949 /* This block is the first one in the record. */
950 rec_block += bhs_per_rec;
951 err2 = 0;
952 if (unlikely(block >= dblock)) {
953 clear_buffer_dirty(bh);
954 continue;
955 }
956 if (!buffer_dirty(bh)) {
957 /* Clean records are not written out. */
958 rec_is_dirty = FALSE;
959 continue;
960 }
961 rec_is_dirty = TRUE;
962 rec_start_bh = bh;
963 }
964 /* Need to map the buffer if it is not mapped already. */
965 if (unlikely(!buffer_mapped(bh))) {
966 VCN vcn;
967 LCN lcn;
968 unsigned int vcn_ofs;
969
Anton Altaparmakov481d0372005-08-16 19:42:56 +0100970 bh->b_bdev = vol->sb->s_bdev;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700971 /* Obtain the vcn and offset of the current block. */
972 vcn = (VCN)block << bh_size_bits;
973 vcn_ofs = vcn & vol->cluster_size_mask;
974 vcn >>= vol->cluster_size_bits;
975 if (!rl) {
976lock_retry_remap:
977 down_read(&ni->runlist.lock);
978 rl = ni->runlist.rl;
979 }
980 if (likely(rl != NULL)) {
981 /* Seek to element containing target vcn. */
982 while (rl->length && rl[1].vcn <= vcn)
983 rl++;
984 lcn = ntfs_rl_vcn_to_lcn(rl, vcn);
985 } else
986 lcn = LCN_RL_NOT_MAPPED;
987 /* Successful remap. */
988 if (likely(lcn >= 0)) {
989 /* Setup buffer head to correct block. */
990 bh->b_blocknr = ((lcn <<
991 vol->cluster_size_bits) +
992 vcn_ofs) >> bh_size_bits;
993 set_buffer_mapped(bh);
994 } else {
995 /*
996 * Remap failed. Retry to map the runlist once
997 * unless we are working on $MFT which always
998 * has the whole of its runlist in memory.
999 */
1000 if (!is_mft && !is_retry &&
1001 lcn == LCN_RL_NOT_MAPPED) {
1002 is_retry = TRUE;
1003 /*
1004 * Attempt to map runlist, dropping
1005 * lock for the duration.
1006 */
1007 up_read(&ni->runlist.lock);
1008 err2 = ntfs_map_runlist(ni, vcn);
1009 if (likely(!err2))
1010 goto lock_retry_remap;
1011 if (err2 == -ENOMEM)
1012 page_is_dirty = TRUE;
1013 lcn = err2;
Anton Altaparmakov9f993fe2005-06-25 16:15:36 +01001014 } else {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001015 err2 = -EIO;
Anton Altaparmakov9f993fe2005-06-25 16:15:36 +01001016 if (!rl)
1017 up_read(&ni->runlist.lock);
1018 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001019 /* Hard error. Abort writing this record. */
1020 if (!err || err == -ENOMEM)
1021 err = err2;
1022 bh->b_blocknr = -1;
1023 ntfs_error(vol->sb, "Cannot write ntfs record "
1024 "0x%llx (inode 0x%lx, "
1025 "attribute type 0x%x) because "
1026 "its location on disk could "
1027 "not be determined (error "
Randy Dunlap8907547d2005-03-03 11:19:53 +00001028 "code %lli).",
1029 (long long)block <<
Linus Torvalds1da177e2005-04-16 15:20:36 -07001030 bh_size_bits >>
1031 vol->mft_record_size_bits,
1032 ni->mft_no, ni->type,
1033 (long long)lcn);
1034 /*
1035 * If this is not the first buffer, remove the
1036 * buffers in this record from the list of
1037 * buffers to write and clear their dirty bit
1038 * if not error -ENOMEM.
1039 */
1040 if (rec_start_bh != bh) {
1041 while (bhs[--nr_bhs] != rec_start_bh)
1042 ;
1043 if (err2 != -ENOMEM) {
1044 do {
1045 clear_buffer_dirty(
1046 rec_start_bh);
1047 } while ((rec_start_bh =
1048 rec_start_bh->
1049 b_this_page) !=
1050 bh);
1051 }
1052 }
1053 continue;
1054 }
1055 }
1056 BUG_ON(!buffer_uptodate(bh));
1057 BUG_ON(nr_bhs >= max_bhs);
1058 bhs[nr_bhs++] = bh;
1059 } while (block++, (bh = bh->b_this_page) != head);
1060 if (unlikely(rl))
1061 up_read(&ni->runlist.lock);
1062 /* If there were no dirty buffers, we are done. */
1063 if (!nr_bhs)
1064 goto done;
1065 /* Map the page so we can access its contents. */
1066 kaddr = kmap(page);
1067 /* Clear the page uptodate flag whilst the mst fixups are applied. */
1068 BUG_ON(!PageUptodate(page));
1069 ClearPageUptodate(page);
1070 for (i = 0; i < nr_bhs; i++) {
1071 unsigned int ofs;
1072
1073 /* Skip buffers which are not at the beginning of records. */
1074 if (i % bhs_per_rec)
1075 continue;
1076 tbh = bhs[i];
1077 ofs = bh_offset(tbh);
1078 if (is_mft) {
1079 ntfs_inode *tni;
1080 unsigned long mft_no;
1081
1082 /* Get the mft record number. */
1083 mft_no = (((s64)page->index << PAGE_CACHE_SHIFT) + ofs)
1084 >> rec_size_bits;
1085 /* Check whether to write this mft record. */
1086 tni = NULL;
1087 if (!ntfs_may_write_mft_record(vol, mft_no,
1088 (MFT_RECORD*)(kaddr + ofs), &tni)) {
1089 /*
1090 * The record should not be written. This
1091 * means we need to redirty the page before
1092 * returning.
1093 */
1094 page_is_dirty = TRUE;
1095 /*
1096 * Remove the buffers in this mft record from
1097 * the list of buffers to write.
1098 */
1099 do {
1100 bhs[i] = NULL;
1101 } while (++i % bhs_per_rec);
1102 continue;
1103 }
1104 /*
1105 * The record should be written. If a locked ntfs
1106 * inode was returned, add it to the array of locked
1107 * ntfs inodes.
1108 */
1109 if (tni)
1110 locked_nis[nr_locked_nis++] = tni;
1111 }
1112 /* Apply the mst protection fixups. */
1113 err2 = pre_write_mst_fixup((NTFS_RECORD*)(kaddr + ofs),
1114 rec_size);
1115 if (unlikely(err2)) {
1116 if (!err || err == -ENOMEM)
1117 err = -EIO;
1118 ntfs_error(vol->sb, "Failed to apply mst fixups "
1119 "(inode 0x%lx, attribute type 0x%x, "
1120 "page index 0x%lx, page offset 0x%x)!"
1121 " Unmount and run chkdsk.", vi->i_ino,
1122 ni->type, page->index, ofs);
1123 /*
1124 * Mark all the buffers in this record clean as we do
1125 * not want to write corrupt data to disk.
1126 */
1127 do {
1128 clear_buffer_dirty(bhs[i]);
1129 bhs[i] = NULL;
1130 } while (++i % bhs_per_rec);
1131 continue;
1132 }
1133 nr_recs++;
1134 }
1135 /* If no records are to be written out, we are done. */
1136 if (!nr_recs)
1137 goto unm_done;
1138 flush_dcache_page(page);
1139 /* Lock buffers and start synchronous write i/o on them. */
1140 for (i = 0; i < nr_bhs; i++) {
1141 tbh = bhs[i];
1142 if (!tbh)
1143 continue;
1144 if (unlikely(test_set_buffer_locked(tbh)))
1145 BUG();
1146 /* The buffer dirty state is now irrelevant, just clean it. */
1147 clear_buffer_dirty(tbh);
1148 BUG_ON(!buffer_uptodate(tbh));
1149 BUG_ON(!buffer_mapped(tbh));
1150 get_bh(tbh);
1151 tbh->b_end_io = end_buffer_write_sync;
1152 submit_bh(WRITE, tbh);
1153 }
1154 /* Synchronize the mft mirror now if not @sync. */
1155 if (is_mft && !sync)
1156 goto do_mirror;
1157do_wait:
1158 /* Wait on i/o completion of buffers. */
1159 for (i = 0; i < nr_bhs; i++) {
1160 tbh = bhs[i];
1161 if (!tbh)
1162 continue;
1163 wait_on_buffer(tbh);
1164 if (unlikely(!buffer_uptodate(tbh))) {
1165 ntfs_error(vol->sb, "I/O error while writing ntfs "
1166 "record buffer (inode 0x%lx, "
1167 "attribute type 0x%x, page index "
1168 "0x%lx, page offset 0x%lx)! Unmount "
1169 "and run chkdsk.", vi->i_ino, ni->type,
1170 page->index, bh_offset(tbh));
1171 if (!err || err == -ENOMEM)
1172 err = -EIO;
1173 /*
1174 * Set the buffer uptodate so the page and buffer
1175 * states do not become out of sync.
1176 */
1177 set_buffer_uptodate(tbh);
1178 }
1179 }
1180 /* If @sync, now synchronize the mft mirror. */
1181 if (is_mft && sync) {
1182do_mirror:
1183 for (i = 0; i < nr_bhs; i++) {
1184 unsigned long mft_no;
1185 unsigned int ofs;
1186
1187 /*
1188 * Skip buffers which are not at the beginning of
1189 * records.
1190 */
1191 if (i % bhs_per_rec)
1192 continue;
1193 tbh = bhs[i];
1194 /* Skip removed buffers (and hence records). */
1195 if (!tbh)
1196 continue;
1197 ofs = bh_offset(tbh);
1198 /* Get the mft record number. */
1199 mft_no = (((s64)page->index << PAGE_CACHE_SHIFT) + ofs)
1200 >> rec_size_bits;
1201 if (mft_no < vol->mftmirr_size)
1202 ntfs_sync_mft_mirror(vol, mft_no,
1203 (MFT_RECORD*)(kaddr + ofs),
1204 sync);
1205 }
1206 if (!sync)
1207 goto do_wait;
1208 }
1209 /* Remove the mst protection fixups again. */
1210 for (i = 0; i < nr_bhs; i++) {
1211 if (!(i % bhs_per_rec)) {
1212 tbh = bhs[i];
1213 if (!tbh)
1214 continue;
1215 post_write_mst_fixup((NTFS_RECORD*)(kaddr +
1216 bh_offset(tbh)));
1217 }
1218 }
1219 flush_dcache_page(page);
1220unm_done:
1221 /* Unlock any locked inodes. */
1222 while (nr_locked_nis-- > 0) {
1223 ntfs_inode *tni, *base_tni;
1224
1225 tni = locked_nis[nr_locked_nis];
1226 /* Get the base inode. */
1227 down(&tni->extent_lock);
1228 if (tni->nr_extents >= 0)
1229 base_tni = tni;
1230 else {
1231 base_tni = tni->ext.base_ntfs_ino;
1232 BUG_ON(!base_tni);
1233 }
1234 up(&tni->extent_lock);
1235 ntfs_debug("Unlocking %s inode 0x%lx.",
1236 tni == base_tni ? "base" : "extent",
1237 tni->mft_no);
1238 up(&tni->mrec_lock);
1239 atomic_dec(&tni->count);
1240 iput(VFS_I(base_tni));
1241 }
1242 SetPageUptodate(page);
1243 kunmap(page);
1244done:
1245 if (unlikely(err && err != -ENOMEM)) {
1246 /*
1247 * Set page error if there is only one ntfs record in the page.
1248 * Otherwise we would loose per-record granularity.
1249 */
1250 if (ni->itype.index.block_size == PAGE_CACHE_SIZE)
1251 SetPageError(page);
1252 NVolSetErrors(vol);
1253 }
1254 if (page_is_dirty) {
1255 ntfs_debug("Page still contains one or more dirty ntfs "
1256 "records. Redirtying the page starting at "
1257 "record 0x%lx.", page->index <<
1258 (PAGE_CACHE_SHIFT - rec_size_bits));
1259 redirty_page_for_writepage(wbc, page);
1260 unlock_page(page);
1261 } else {
1262 /*
1263 * Keep the VM happy. This must be done otherwise the
1264 * radix-tree tag PAGECACHE_TAG_DIRTY remains set even though
1265 * the page is clean.
1266 */
1267 BUG_ON(PageWriteback(page));
1268 set_page_writeback(page);
1269 unlock_page(page);
1270 end_page_writeback(page);
1271 }
1272 if (likely(!err))
1273 ntfs_debug("Done.");
1274 return err;
1275}
1276
1277/**
1278 * ntfs_writepage - write a @page to the backing store
1279 * @page: page cache page to write out
1280 * @wbc: writeback control structure
1281 *
1282 * This is called from the VM when it wants to have a dirty ntfs page cache
1283 * page cleaned. The VM has already locked the page and marked it clean.
1284 *
1285 * For non-resident attributes, ntfs_writepage() writes the @page by calling
1286 * the ntfs version of the generic block_write_full_page() function,
1287 * ntfs_write_block(), which in turn if necessary creates and writes the
1288 * buffers associated with the page asynchronously.
1289 *
1290 * For resident attributes, OTOH, ntfs_writepage() writes the @page by copying
1291 * the data to the mft record (which at this stage is most likely in memory).
1292 * The mft record is then marked dirty and written out asynchronously via the
1293 * vfs inode dirty code path for the inode the mft record belongs to or via the
1294 * vm page dirty code path for the page the mft record is in.
1295 *
1296 * Based on ntfs_readpage() and fs/buffer.c::block_write_full_page().
1297 *
1298 * Return 0 on success and -errno on error.
1299 */
1300static int ntfs_writepage(struct page *page, struct writeback_control *wbc)
1301{
1302 loff_t i_size;
Anton Altaparmakov149f0c52005-01-12 13:52:30 +00001303 struct inode *vi = page->mapping->host;
1304 ntfs_inode *base_ni = NULL, *ni = NTFS_I(vi);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001305 char *kaddr;
Anton Altaparmakov149f0c52005-01-12 13:52:30 +00001306 ntfs_attr_search_ctx *ctx = NULL;
1307 MFT_RECORD *m = NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001308 u32 attr_len;
1309 int err;
1310
Anton Altaparmakov905685f2005-03-10 11:06:19 +00001311retry_writepage:
Linus Torvalds1da177e2005-04-16 15:20:36 -07001312 BUG_ON(!PageLocked(page));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001313 i_size = i_size_read(vi);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001314 /* Is the page fully outside i_size? (truncate in progress) */
1315 if (unlikely(page->index >= (i_size + PAGE_CACHE_SIZE - 1) >>
1316 PAGE_CACHE_SHIFT)) {
1317 /*
1318 * The page may have dirty, unmapped buffers. Make them
1319 * freeable here, so the page does not leak.
1320 */
1321 block_invalidatepage(page, 0);
1322 unlock_page(page);
1323 ntfs_debug("Write outside i_size - truncated?");
1324 return 0;
1325 }
Anton Altaparmakovbd45fdd2005-09-08 21:38:05 +01001326 /*
1327 * Only $DATA attributes can be encrypted and only unnamed $DATA
1328 * attributes can be compressed. Index root can have the flags set but
1329 * this means to create compressed/encrypted files, not that the
1330 * attribute is compressed/encrypted.
1331 */
1332 if (ni->type != AT_INDEX_ROOT) {
1333 /* If file is encrypted, deny access, just like NT4. */
1334 if (NInoEncrypted(ni)) {
1335 unlock_page(page);
1336 BUG_ON(ni->type != AT_DATA);
1337 ntfs_debug("Denying write access to encrypted "
1338 "file.");
1339 return -EACCES;
1340 }
1341 /* Compressed data streams are handled in compress.c. */
1342 if (NInoNonResident(ni) && NInoCompressed(ni)) {
1343 BUG_ON(ni->type != AT_DATA);
1344 BUG_ON(ni->name_len);
1345 // TODO: Implement and replace this with
1346 // return ntfs_write_compressed_block(page);
1347 unlock_page(page);
1348 ntfs_error(vi->i_sb, "Writing to compressed files is "
1349 "not supported yet. Sorry.");
1350 return -EOPNOTSUPP;
1351 }
1352 // TODO: Implement and remove this check.
1353 if (NInoNonResident(ni) && NInoSparse(ni)) {
1354 unlock_page(page);
1355 ntfs_error(vi->i_sb, "Writing to sparse files is not "
1356 "supported yet. Sorry.");
1357 return -EOPNOTSUPP;
1358 }
1359 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001360 /* NInoNonResident() == NInoIndexAllocPresent() */
1361 if (NInoNonResident(ni)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001362 /* We have to zero every time due to mmap-at-end-of-file. */
1363 if (page->index >= (i_size >> PAGE_CACHE_SHIFT)) {
1364 /* The page straddles i_size. */
1365 unsigned int ofs = i_size & ~PAGE_CACHE_MASK;
1366 kaddr = kmap_atomic(page, KM_USER0);
1367 memset(kaddr + ofs, 0, PAGE_CACHE_SIZE - ofs);
1368 flush_dcache_page(page);
1369 kunmap_atomic(kaddr, KM_USER0);
1370 }
1371 /* Handle mst protected attributes. */
1372 if (NInoMstProtected(ni))
1373 return ntfs_write_mst_block(page, wbc);
Anton Altaparmakovbd45fdd2005-09-08 21:38:05 +01001374 /* Normal, non-resident data stream. */
Linus Torvalds1da177e2005-04-16 15:20:36 -07001375 return ntfs_write_block(page, wbc);
1376 }
1377 /*
Anton Altaparmakovbd45fdd2005-09-08 21:38:05 +01001378 * Attribute is resident, implying it is not compressed, encrypted, or
1379 * mst protected. This also means the attribute is smaller than an mft
1380 * record and hence smaller than a page, so can simply return error on
1381 * any pages with index above 0. Note the attribute can actually be
1382 * marked compressed but if it is resident the actual data is not
1383 * compressed so we are ok to ignore the compressed flag here.
Linus Torvalds1da177e2005-04-16 15:20:36 -07001384 */
1385 BUG_ON(page_has_buffers(page));
1386 BUG_ON(!PageUptodate(page));
1387 if (unlikely(page->index > 0)) {
1388 ntfs_error(vi->i_sb, "BUG()! page->index (0x%lx) > 0. "
1389 "Aborting write.", page->index);
1390 BUG_ON(PageWriteback(page));
1391 set_page_writeback(page);
1392 unlock_page(page);
1393 end_page_writeback(page);
1394 return -EIO;
1395 }
1396 if (!NInoAttr(ni))
1397 base_ni = ni;
1398 else
1399 base_ni = ni->ext.base_ntfs_ino;
1400 /* Map, pin, and lock the mft record. */
1401 m = map_mft_record(base_ni);
1402 if (IS_ERR(m)) {
1403 err = PTR_ERR(m);
1404 m = NULL;
1405 ctx = NULL;
1406 goto err_out;
1407 }
Anton Altaparmakov905685f2005-03-10 11:06:19 +00001408 /*
1409 * If a parallel write made the attribute non-resident, drop the mft
1410 * record and retry the writepage.
1411 */
1412 if (unlikely(NInoNonResident(ni))) {
1413 unmap_mft_record(base_ni);
1414 goto retry_writepage;
1415 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001416 ctx = ntfs_attr_get_search_ctx(base_ni, m);
1417 if (unlikely(!ctx)) {
1418 err = -ENOMEM;
1419 goto err_out;
1420 }
1421 err = ntfs_attr_lookup(ni->type, ni->name, ni->name_len,
1422 CASE_SENSITIVE, 0, NULL, 0, ctx);
1423 if (unlikely(err))
1424 goto err_out;
1425 /*
1426 * Keep the VM happy. This must be done otherwise the radix-tree tag
1427 * PAGECACHE_TAG_DIRTY remains set even though the page is clean.
1428 */
1429 BUG_ON(PageWriteback(page));
1430 set_page_writeback(page);
1431 unlock_page(page);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001432 /*
Anton Altaparmakovbd45fdd2005-09-08 21:38:05 +01001433 * Here, we do not need to zero the out of bounds area everytime
1434 * because the below memcpy() already takes care of the
1435 * mmap-at-end-of-file requirements. If the file is converted to a
1436 * non-resident one, then the code path use is switched to the
1437 * non-resident one where the zeroing happens on each ntfs_writepage()
1438 * invocation.
Linus Torvalds1da177e2005-04-16 15:20:36 -07001439 */
Linus Torvalds1da177e2005-04-16 15:20:36 -07001440 attr_len = le32_to_cpu(ctx->attr->data.resident.value_length);
Anton Altaparmakov07a4e2d2005-01-12 13:08:26 +00001441 i_size = i_size_read(vi);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001442 if (unlikely(attr_len > i_size)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001443 attr_len = i_size;
Anton Altaparmakovf40661b2005-01-13 16:03:38 +00001444 ctx->attr->data.resident.value_length = cpu_to_le32(attr_len);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001445 }
Anton Altaparmakovf40661b2005-01-13 16:03:38 +00001446 kaddr = kmap_atomic(page, KM_USER0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001447 /* Copy the data from the page to the mft record. */
1448 memcpy((u8*)ctx->attr +
1449 le16_to_cpu(ctx->attr->data.resident.value_offset),
1450 kaddr, attr_len);
1451 flush_dcache_mft_record_page(ctx->ntfs_ino);
1452 /* Zero out of bounds area in the page cache page. */
1453 memset(kaddr + attr_len, 0, PAGE_CACHE_SIZE - attr_len);
1454 flush_dcache_page(page);
1455 kunmap_atomic(kaddr, KM_USER0);
1456
1457 end_page_writeback(page);
1458
1459 /* Mark the mft record dirty, so it gets written back. */
1460 mark_mft_record_dirty(ctx->ntfs_ino);
1461 ntfs_attr_put_search_ctx(ctx);
1462 unmap_mft_record(base_ni);
1463 return 0;
1464err_out:
1465 if (err == -ENOMEM) {
1466 ntfs_warning(vi->i_sb, "Error allocating memory. Redirtying "
1467 "page so we try again later.");
1468 /*
1469 * Put the page back on mapping->dirty_pages, but leave its
1470 * buffers' dirty state as-is.
1471 */
1472 redirty_page_for_writepage(wbc, page);
1473 err = 0;
1474 } else {
1475 ntfs_error(vi->i_sb, "Resident attribute write failed with "
Anton Altaparmakov149f0c52005-01-12 13:52:30 +00001476 "error %i.", err);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001477 SetPageError(page);
Anton Altaparmakov149f0c52005-01-12 13:52:30 +00001478 NVolSetErrors(ni->vol);
1479 make_bad_inode(vi);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001480 }
1481 unlock_page(page);
1482 if (ctx)
1483 ntfs_attr_put_search_ctx(ctx);
1484 if (m)
1485 unmap_mft_record(base_ni);
1486 return err;
1487}
1488
1489/**
1490 * ntfs_prepare_nonresident_write -
1491 *
1492 */
1493static int ntfs_prepare_nonresident_write(struct page *page,
1494 unsigned from, unsigned to)
1495{
1496 VCN vcn;
1497 LCN lcn;
Anton Altaparmakov07a4e2d2005-01-12 13:08:26 +00001498 s64 initialized_size;
1499 loff_t i_size;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001500 sector_t block, ablock, iblock;
1501 struct inode *vi;
1502 ntfs_inode *ni;
1503 ntfs_volume *vol;
1504 runlist_element *rl;
1505 struct buffer_head *bh, *head, *wait[2], **wait_bh = wait;
Anton Altaparmakov07a4e2d2005-01-12 13:08:26 +00001506 unsigned long flags;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001507 unsigned int vcn_ofs, block_start, block_end, blocksize;
1508 int err;
1509 BOOL is_retry;
1510 unsigned char blocksize_bits;
1511
1512 vi = page->mapping->host;
1513 ni = NTFS_I(vi);
1514 vol = ni->vol;
1515
1516 ntfs_debug("Entering for inode 0x%lx, attribute type 0x%x, page index "
1517 "0x%lx, from = %u, to = %u.", ni->mft_no, ni->type,
1518 page->index, from, to);
1519
1520 BUG_ON(!NInoNonResident(ni));
1521
1522 blocksize_bits = vi->i_blkbits;
1523 blocksize = 1 << blocksize_bits;
1524
1525 /*
1526 * create_empty_buffers() will create uptodate/dirty buffers if the
1527 * page is uptodate/dirty.
1528 */
1529 if (!page_has_buffers(page))
1530 create_empty_buffers(page, blocksize, 0);
1531 bh = head = page_buffers(page);
1532 if (unlikely(!bh))
1533 return -ENOMEM;
1534
1535 /* The first block in the page. */
1536 block = (s64)page->index << (PAGE_CACHE_SHIFT - blocksize_bits);
1537
Anton Altaparmakov07a4e2d2005-01-12 13:08:26 +00001538 read_lock_irqsave(&ni->size_lock, flags);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001539 /*
Anton Altaparmakovb6ad6c52005-02-15 10:08:43 +00001540 * The first out of bounds block for the allocated size. No need to
Linus Torvalds1da177e2005-04-16 15:20:36 -07001541 * round up as allocated_size is in multiples of cluster size and the
1542 * minimum cluster size is 512 bytes, which is equal to the smallest
1543 * blocksize.
1544 */
1545 ablock = ni->allocated_size >> blocksize_bits;
Anton Altaparmakov07a4e2d2005-01-12 13:08:26 +00001546 i_size = i_size_read(vi);
1547 initialized_size = ni->initialized_size;
1548 read_unlock_irqrestore(&ni->size_lock, flags);
1549
Linus Torvalds1da177e2005-04-16 15:20:36 -07001550 /* The last (fully or partially) initialized block. */
Anton Altaparmakov07a4e2d2005-01-12 13:08:26 +00001551 iblock = initialized_size >> blocksize_bits;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001552
1553 /* Loop through all the buffers in the page. */
1554 block_start = 0;
1555 rl = NULL;
1556 err = 0;
1557 do {
1558 block_end = block_start + blocksize;
1559 /*
1560 * If buffer @bh is outside the write, just mark it uptodate
1561 * if the page is uptodate and continue with the next buffer.
1562 */
1563 if (block_end <= from || block_start >= to) {
1564 if (PageUptodate(page)) {
1565 if (!buffer_uptodate(bh))
1566 set_buffer_uptodate(bh);
1567 }
1568 continue;
1569 }
1570 /*
1571 * @bh is at least partially being written to.
1572 * Make sure it is not marked as new.
1573 */
1574 //if (buffer_new(bh))
1575 // clear_buffer_new(bh);
1576
1577 if (block >= ablock) {
1578 // TODO: block is above allocated_size, need to
1579 // allocate it. Best done in one go to accommodate not
1580 // only block but all above blocks up to and including:
1581 // ((page->index << PAGE_CACHE_SHIFT) + to + blocksize
1582 // - 1) >> blobksize_bits. Obviously will need to round
1583 // up to next cluster boundary, too. This should be
1584 // done with a helper function, so it can be reused.
1585 ntfs_error(vol->sb, "Writing beyond allocated size "
1586 "is not supported yet. Sorry.");
1587 err = -EOPNOTSUPP;
1588 goto err_out;
1589 // Need to update ablock.
1590 // Need to set_buffer_new() on all block bhs that are
1591 // newly allocated.
1592 }
1593 /*
1594 * Now we have enough allocated size to fulfill the whole
1595 * request, i.e. block < ablock is true.
1596 */
1597 if (unlikely((block >= iblock) &&
Anton Altaparmakov07a4e2d2005-01-12 13:08:26 +00001598 (initialized_size < i_size))) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001599 /*
1600 * If this page is fully outside initialized size, zero
1601 * out all pages between the current initialized size
1602 * and the current page. Just use ntfs_readpage() to do
1603 * the zeroing transparently.
1604 */
1605 if (block > iblock) {
1606 // TODO:
1607 // For each page do:
1608 // - read_cache_page()
1609 // Again for each page do:
1610 // - wait_on_page_locked()
1611 // - Check (PageUptodate(page) &&
1612 // !PageError(page))
1613 // Update initialized size in the attribute and
1614 // in the inode.
1615 // Again, for each page do:
1616 // __set_page_dirty_buffers();
1617 // page_cache_release()
1618 // We don't need to wait on the writes.
1619 // Update iblock.
1620 }
1621 /*
1622 * The current page straddles initialized size. Zero
1623 * all non-uptodate buffers and set them uptodate (and
1624 * dirty?). Note, there aren't any non-uptodate buffers
1625 * if the page is uptodate.
1626 * FIXME: For an uptodate page, the buffers may need to
1627 * be written out because they were not initialized on
1628 * disk before.
1629 */
1630 if (!PageUptodate(page)) {
1631 // TODO:
1632 // Zero any non-uptodate buffers up to i_size.
1633 // Set them uptodate and dirty.
1634 }
1635 // TODO:
1636 // Update initialized size in the attribute and in the
1637 // inode (up to i_size).
1638 // Update iblock.
1639 // FIXME: This is inefficient. Try to batch the two
1640 // size changes to happen in one go.
1641 ntfs_error(vol->sb, "Writing beyond initialized size "
1642 "is not supported yet. Sorry.");
1643 err = -EOPNOTSUPP;
1644 goto err_out;
1645 // Do NOT set_buffer_new() BUT DO clear buffer range
1646 // outside write request range.
1647 // set_buffer_uptodate() on complete buffers as well as
1648 // set_buffer_dirty().
1649 }
1650
1651 /* Need to map unmapped buffers. */
1652 if (!buffer_mapped(bh)) {
1653 /* Unmapped buffer. Need to map it. */
1654 bh->b_bdev = vol->sb->s_bdev;
1655
1656 /* Convert block into corresponding vcn and offset. */
1657 vcn = (VCN)block << blocksize_bits >>
1658 vol->cluster_size_bits;
1659 vcn_ofs = ((VCN)block << blocksize_bits) &
1660 vol->cluster_size_mask;
1661
1662 is_retry = FALSE;
1663 if (!rl) {
1664lock_retry_remap:
1665 down_read(&ni->runlist.lock);
1666 rl = ni->runlist.rl;
1667 }
1668 if (likely(rl != NULL)) {
1669 /* Seek to element containing target vcn. */
1670 while (rl->length && rl[1].vcn <= vcn)
1671 rl++;
1672 lcn = ntfs_rl_vcn_to_lcn(rl, vcn);
1673 } else
1674 lcn = LCN_RL_NOT_MAPPED;
1675 if (unlikely(lcn < 0)) {
1676 /*
1677 * We extended the attribute allocation above.
1678 * If we hit an ENOENT here it means that the
1679 * allocation was insufficient which is a bug.
1680 */
1681 BUG_ON(lcn == LCN_ENOENT);
1682
1683 /* It is a hole, need to instantiate it. */
1684 if (lcn == LCN_HOLE) {
1685 // TODO: Instantiate the hole.
1686 // clear_buffer_new(bh);
1687 // unmap_underlying_metadata(bh->b_bdev,
1688 // bh->b_blocknr);
1689 // For non-uptodate buffers, need to
1690 // zero out the region outside the
1691 // request in this bh or all bhs,
1692 // depending on what we implemented
1693 // above.
1694 // Need to flush_dcache_page().
1695 // Or could use set_buffer_new()
1696 // instead?
1697 ntfs_error(vol->sb, "Writing into "
1698 "sparse regions is "
1699 "not supported yet. "
1700 "Sorry.");
1701 err = -EOPNOTSUPP;
Anton Altaparmakov9f993fe2005-06-25 16:15:36 +01001702 if (!rl)
1703 up_read(&ni->runlist.lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001704 goto err_out;
1705 } else if (!is_retry &&
1706 lcn == LCN_RL_NOT_MAPPED) {
1707 is_retry = TRUE;
1708 /*
1709 * Attempt to map runlist, dropping
1710 * lock for the duration.
1711 */
1712 up_read(&ni->runlist.lock);
1713 err = ntfs_map_runlist(ni, vcn);
1714 if (likely(!err))
1715 goto lock_retry_remap;
1716 rl = NULL;
1717 lcn = err;
Anton Altaparmakov9f993fe2005-06-25 16:15:36 +01001718 } else if (!rl)
1719 up_read(&ni->runlist.lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001720 /*
1721 * Failed to map the buffer, even after
1722 * retrying.
1723 */
1724 bh->b_blocknr = -1;
1725 ntfs_error(vol->sb, "Failed to write to inode "
1726 "0x%lx, attribute type 0x%x, "
1727 "vcn 0x%llx, offset 0x%x "
1728 "because its location on disk "
1729 "could not be determined%s "
1730 "(error code %lli).",
1731 ni->mft_no, ni->type,
1732 (unsigned long long)vcn,
1733 vcn_ofs, is_retry ? " even "
1734 "after retrying" : "",
1735 (long long)lcn);
1736 if (!err)
1737 err = -EIO;
1738 goto err_out;
1739 }
1740 /* We now have a successful remap, i.e. lcn >= 0. */
1741
1742 /* Setup buffer head to correct block. */
1743 bh->b_blocknr = ((lcn << vol->cluster_size_bits)
1744 + vcn_ofs) >> blocksize_bits;
1745 set_buffer_mapped(bh);
1746
1747 // FIXME: Something analogous to this is needed for
1748 // each newly allocated block, i.e. BH_New.
1749 // FIXME: Might need to take this out of the
1750 // if (!buffer_mapped(bh)) {}, depending on how we
1751 // implement things during the allocated_size and
1752 // initialized_size extension code above.
1753 if (buffer_new(bh)) {
1754 clear_buffer_new(bh);
1755 unmap_underlying_metadata(bh->b_bdev,
1756 bh->b_blocknr);
1757 if (PageUptodate(page)) {
1758 set_buffer_uptodate(bh);
1759 continue;
1760 }
1761 /*
1762 * Page is _not_ uptodate, zero surrounding
1763 * region. NOTE: This is how we decide if to
1764 * zero or not!
1765 */
1766 if (block_end > to || block_start < from) {
1767 void *kaddr;
1768
1769 kaddr = kmap_atomic(page, KM_USER0);
1770 if (block_end > to)
1771 memset(kaddr + to, 0,
1772 block_end - to);
1773 if (block_start < from)
1774 memset(kaddr + block_start, 0,
1775 from -
1776 block_start);
1777 flush_dcache_page(page);
1778 kunmap_atomic(kaddr, KM_USER0);
1779 }
1780 continue;
1781 }
1782 }
1783 /* @bh is mapped, set it uptodate if the page is uptodate. */
1784 if (PageUptodate(page)) {
1785 if (!buffer_uptodate(bh))
1786 set_buffer_uptodate(bh);
1787 continue;
1788 }
1789 /*
1790 * The page is not uptodate. The buffer is mapped. If it is not
1791 * uptodate, and it is only partially being written to, we need
1792 * to read the buffer in before the write, i.e. right now.
1793 */
1794 if (!buffer_uptodate(bh) &&
1795 (block_start < from || block_end > to)) {
1796 ll_rw_block(READ, 1, &bh);
1797 *wait_bh++ = bh;
1798 }
1799 } while (block++, block_start = block_end,
1800 (bh = bh->b_this_page) != head);
1801
1802 /* Release the lock if we took it. */
1803 if (rl) {
1804 up_read(&ni->runlist.lock);
1805 rl = NULL;
1806 }
1807
1808 /* If we issued read requests, let them complete. */
1809 while (wait_bh > wait) {
1810 wait_on_buffer(*--wait_bh);
1811 if (!buffer_uptodate(*wait_bh))
1812 return -EIO;
1813 }
1814
1815 ntfs_debug("Done.");
1816 return 0;
1817err_out:
1818 /*
1819 * Zero out any newly allocated blocks to avoid exposing stale data.
1820 * If BH_New is set, we know that the block was newly allocated in the
1821 * above loop.
1822 * FIXME: What about initialized_size increments? Have we done all the
1823 * required zeroing above? If not this error handling is broken, and
1824 * in particular the if (block_end <= from) check is completely bogus.
1825 */
1826 bh = head;
1827 block_start = 0;
1828 is_retry = FALSE;
1829 do {
1830 block_end = block_start + blocksize;
1831 if (block_end <= from)
1832 continue;
1833 if (block_start >= to)
1834 break;
1835 if (buffer_new(bh)) {
1836 void *kaddr;
1837
1838 clear_buffer_new(bh);
1839 kaddr = kmap_atomic(page, KM_USER0);
1840 memset(kaddr + block_start, 0, bh->b_size);
1841 kunmap_atomic(kaddr, KM_USER0);
1842 set_buffer_uptodate(bh);
1843 mark_buffer_dirty(bh);
1844 is_retry = TRUE;
1845 }
1846 } while (block_start = block_end, (bh = bh->b_this_page) != head);
1847 if (is_retry)
1848 flush_dcache_page(page);
1849 if (rl)
1850 up_read(&ni->runlist.lock);
1851 return err;
1852}
1853
1854/**
1855 * ntfs_prepare_write - prepare a page for receiving data
1856 *
1857 * This is called from generic_file_write() with i_sem held on the inode
1858 * (@page->mapping->host). The @page is locked but not kmap()ped. The source
1859 * data has not yet been copied into the @page.
1860 *
1861 * Need to extend the attribute/fill in holes if necessary, create blocks and
1862 * make partially overwritten blocks uptodate,
1863 *
1864 * i_size is not to be modified yet.
1865 *
1866 * Return 0 on success or -errno on error.
1867 *
1868 * Should be using block_prepare_write() [support for sparse files] or
1869 * cont_prepare_write() [no support for sparse files]. Cannot do that due to
1870 * ntfs specifics but can look at them for implementation guidance.
1871 *
1872 * Note: In the range, @from is inclusive and @to is exclusive, i.e. @from is
1873 * the first byte in the page that will be written to and @to is the first byte
1874 * after the last byte that will be written to.
1875 */
1876static int ntfs_prepare_write(struct file *file, struct page *page,
1877 unsigned from, unsigned to)
1878{
1879 s64 new_size;
Anton Altaparmakovf40661b2005-01-13 16:03:38 +00001880 loff_t i_size;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001881 struct inode *vi = page->mapping->host;
1882 ntfs_inode *base_ni = NULL, *ni = NTFS_I(vi);
1883 ntfs_volume *vol = ni->vol;
1884 ntfs_attr_search_ctx *ctx = NULL;
1885 MFT_RECORD *m = NULL;
1886 ATTR_RECORD *a;
1887 u8 *kaddr;
1888 u32 attr_len;
1889 int err;
1890
1891 ntfs_debug("Entering for inode 0x%lx, attribute type 0x%x, page index "
1892 "0x%lx, from = %u, to = %u.", vi->i_ino, ni->type,
1893 page->index, from, to);
1894 BUG_ON(!PageLocked(page));
1895 BUG_ON(from > PAGE_CACHE_SIZE);
1896 BUG_ON(to > PAGE_CACHE_SIZE);
1897 BUG_ON(from > to);
1898 BUG_ON(NInoMstProtected(ni));
1899 /*
1900 * If a previous ntfs_truncate() failed, repeat it and abort if it
1901 * fails again.
1902 */
1903 if (unlikely(NInoTruncateFailed(ni))) {
1904 down_write(&vi->i_alloc_sem);
1905 err = ntfs_truncate(vi);
1906 up_write(&vi->i_alloc_sem);
1907 if (err || NInoTruncateFailed(ni)) {
1908 if (!err)
1909 err = -EIO;
1910 goto err_out;
1911 }
1912 }
1913 /* If the attribute is not resident, deal with it elsewhere. */
1914 if (NInoNonResident(ni)) {
1915 /*
1916 * Only unnamed $DATA attributes can be compressed, encrypted,
1917 * and/or sparse.
1918 */
1919 if (ni->type == AT_DATA && !ni->name_len) {
1920 /* If file is encrypted, deny access, just like NT4. */
1921 if (NInoEncrypted(ni)) {
1922 ntfs_debug("Denying write access to encrypted "
1923 "file.");
1924 return -EACCES;
1925 }
1926 /* Compressed data streams are handled in compress.c. */
1927 if (NInoCompressed(ni)) {
1928 // TODO: Implement and replace this check with
1929 // return ntfs_write_compressed_block(page);
1930 ntfs_error(vi->i_sb, "Writing to compressed "
1931 "files is not supported yet. "
1932 "Sorry.");
1933 return -EOPNOTSUPP;
1934 }
1935 // TODO: Implement and remove this check.
1936 if (NInoSparse(ni)) {
1937 ntfs_error(vi->i_sb, "Writing to sparse files "
1938 "is not supported yet. Sorry.");
1939 return -EOPNOTSUPP;
1940 }
1941 }
1942 /* Normal data stream. */
1943 return ntfs_prepare_nonresident_write(page, from, to);
1944 }
1945 /*
1946 * Attribute is resident, implying it is not compressed, encrypted, or
1947 * sparse.
1948 */
1949 BUG_ON(page_has_buffers(page));
1950 new_size = ((s64)page->index << PAGE_CACHE_SHIFT) + to;
1951 /* If we do not need to resize the attribute allocation we are done. */
Anton Altaparmakov07a4e2d2005-01-12 13:08:26 +00001952 if (new_size <= i_size_read(vi))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001953 goto done;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001954 /* Map, pin, and lock the (base) mft record. */
1955 if (!NInoAttr(ni))
1956 base_ni = ni;
1957 else
1958 base_ni = ni->ext.base_ntfs_ino;
1959 m = map_mft_record(base_ni);
1960 if (IS_ERR(m)) {
1961 err = PTR_ERR(m);
1962 m = NULL;
1963 ctx = NULL;
1964 goto err_out;
1965 }
1966 ctx = ntfs_attr_get_search_ctx(base_ni, m);
1967 if (unlikely(!ctx)) {
1968 err = -ENOMEM;
1969 goto err_out;
1970 }
1971 err = ntfs_attr_lookup(ni->type, ni->name, ni->name_len,
1972 CASE_SENSITIVE, 0, NULL, 0, ctx);
1973 if (unlikely(err)) {
1974 if (err == -ENOENT)
1975 err = -EIO;
1976 goto err_out;
1977 }
1978 m = ctx->mrec;
1979 a = ctx->attr;
1980 /* The total length of the attribute value. */
1981 attr_len = le32_to_cpu(a->data.resident.value_length);
Anton Altaparmakov946929d2005-01-13 15:26:29 +00001982 /* Fix an eventual previous failure of ntfs_commit_write(). */
Anton Altaparmakovf40661b2005-01-13 16:03:38 +00001983 i_size = i_size_read(vi);
1984 if (unlikely(attr_len > i_size)) {
1985 attr_len = i_size;
Anton Altaparmakov946929d2005-01-13 15:26:29 +00001986 a->data.resident.value_length = cpu_to_le32(attr_len);
Anton Altaparmakov946929d2005-01-13 15:26:29 +00001987 }
Anton Altaparmakov946929d2005-01-13 15:26:29 +00001988 /* If we do not need to resize the attribute allocation we are done. */
1989 if (new_size <= attr_len)
1990 goto done_unm;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001991 /* Check if new size is allowed in $AttrDef. */
1992 err = ntfs_attr_size_bounds_check(vol, ni->type, new_size);
1993 if (unlikely(err)) {
1994 if (err == -ERANGE) {
1995 ntfs_error(vol->sb, "Write would cause the inode "
1996 "0x%lx to exceed the maximum size for "
1997 "its attribute type (0x%x). Aborting "
1998 "write.", vi->i_ino,
1999 le32_to_cpu(ni->type));
2000 } else {
2001 ntfs_error(vol->sb, "Inode 0x%lx has unknown "
2002 "attribute type 0x%x. Aborting "
2003 "write.", vi->i_ino,
2004 le32_to_cpu(ni->type));
2005 err = -EIO;
2006 }
2007 goto err_out2;
2008 }
2009 /*
2010 * Extend the attribute record to be able to store the new attribute
2011 * size.
2012 */
2013 if (new_size >= vol->mft_record_size || ntfs_attr_record_resize(m, a,
2014 le16_to_cpu(a->data.resident.value_offset) +
2015 new_size)) {
2016 /* Not enough space in the mft record. */
2017 ntfs_error(vol->sb, "Not enough space in the mft record for "
2018 "the resized attribute value. This is not "
2019 "supported yet. Aborting write.");
2020 err = -EOPNOTSUPP;
2021 goto err_out2;
2022 }
2023 /*
2024 * We have enough space in the mft record to fit the write. This
2025 * implies the attribute is smaller than the mft record and hence the
2026 * attribute must be in a single page and hence page->index must be 0.
2027 */
2028 BUG_ON(page->index);
2029 /*
2030 * If the beginning of the write is past the old size, enlarge the
2031 * attribute value up to the beginning of the write and fill it with
2032 * zeroes.
2033 */
2034 if (from > attr_len) {
2035 memset((u8*)a + le16_to_cpu(a->data.resident.value_offset) +
2036 attr_len, 0, from - attr_len);
2037 a->data.resident.value_length = cpu_to_le32(from);
2038 /* Zero the corresponding area in the page as well. */
2039 if (PageUptodate(page)) {
2040 kaddr = kmap_atomic(page, KM_USER0);
2041 memset(kaddr + attr_len, 0, from - attr_len);
2042 kunmap_atomic(kaddr, KM_USER0);
2043 flush_dcache_page(page);
2044 }
2045 }
2046 flush_dcache_mft_record_page(ctx->ntfs_ino);
2047 mark_mft_record_dirty(ctx->ntfs_ino);
Anton Altaparmakov946929d2005-01-13 15:26:29 +00002048done_unm:
Linus Torvalds1da177e2005-04-16 15:20:36 -07002049 ntfs_attr_put_search_ctx(ctx);
2050 unmap_mft_record(base_ni);
2051 /*
2052 * Because resident attributes are handled by memcpy() to/from the
2053 * corresponding MFT record, and because this form of i/o is byte
2054 * aligned rather than block aligned, there is no need to bring the
2055 * page uptodate here as in the non-resident case where we need to
2056 * bring the buffers straddled by the write uptodate before
2057 * generic_file_write() does the copying from userspace.
2058 *
2059 * We thus defer the uptodate bringing of the page region outside the
2060 * region written to to ntfs_commit_write(), which makes the code
2061 * simpler and saves one atomic kmap which is good.
2062 */
2063done:
2064 ntfs_debug("Done.");
2065 return 0;
2066err_out:
2067 if (err == -ENOMEM)
2068 ntfs_warning(vi->i_sb, "Error allocating memory required to "
2069 "prepare the write.");
2070 else {
2071 ntfs_error(vi->i_sb, "Resident attribute prepare write failed "
2072 "with error %i.", err);
2073 NVolSetErrors(vol);
2074 make_bad_inode(vi);
2075 }
2076err_out2:
2077 if (ctx)
2078 ntfs_attr_put_search_ctx(ctx);
2079 if (m)
2080 unmap_mft_record(base_ni);
2081 return err;
2082}
2083
2084/**
2085 * ntfs_commit_nonresident_write -
2086 *
2087 */
2088static int ntfs_commit_nonresident_write(struct page *page,
2089 unsigned from, unsigned to)
2090{
2091 s64 pos = ((s64)page->index << PAGE_CACHE_SHIFT) + to;
2092 struct inode *vi = page->mapping->host;
2093 struct buffer_head *bh, *head;
2094 unsigned int block_start, block_end, blocksize;
2095 BOOL partial;
2096
2097 ntfs_debug("Entering for inode 0x%lx, attribute type 0x%x, page index "
2098 "0x%lx, from = %u, to = %u.", vi->i_ino,
2099 NTFS_I(vi)->type, page->index, from, to);
2100 blocksize = 1 << vi->i_blkbits;
2101
2102 // FIXME: We need a whole slew of special cases in here for compressed
2103 // files for example...
2104 // For now, we know ntfs_prepare_write() would have failed so we can't
2105 // get here in any of the cases which we have to special case, so we
2106 // are just a ripped off, unrolled generic_commit_write().
2107
2108 bh = head = page_buffers(page);
2109 block_start = 0;
2110 partial = FALSE;
2111 do {
2112 block_end = block_start + blocksize;
2113 if (block_end <= from || block_start >= to) {
2114 if (!buffer_uptodate(bh))
2115 partial = TRUE;
2116 } else {
2117 set_buffer_uptodate(bh);
2118 mark_buffer_dirty(bh);
2119 }
2120 } while (block_start = block_end, (bh = bh->b_this_page) != head);
2121 /*
2122 * If this is a partial write which happened to make all buffers
2123 * uptodate then we can optimize away a bogus ->readpage() for the next
2124 * read(). Here we 'discover' whether the page went uptodate as a
2125 * result of this (potentially partial) write.
2126 */
2127 if (!partial)
2128 SetPageUptodate(page);
2129 /*
2130 * Not convinced about this at all. See disparity comment above. For
2131 * now we know ntfs_prepare_write() would have failed in the write
2132 * exceeds i_size case, so this will never trigger which is fine.
2133 */
Anton Altaparmakov07a4e2d2005-01-12 13:08:26 +00002134 if (pos > i_size_read(vi)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002135 ntfs_error(vi->i_sb, "Writing beyond the existing file size is "
2136 "not supported yet. Sorry.");
2137 return -EOPNOTSUPP;
2138 // vi->i_size = pos;
2139 // mark_inode_dirty(vi);
2140 }
2141 ntfs_debug("Done.");
2142 return 0;
2143}
2144
2145/**
2146 * ntfs_commit_write - commit the received data
2147 *
2148 * This is called from generic_file_write() with i_sem held on the inode
2149 * (@page->mapping->host). The @page is locked but not kmap()ped. The source
2150 * data has already been copied into the @page. ntfs_prepare_write() has been
2151 * called before the data copied and it returned success so we can take the
2152 * results of various BUG checks and some error handling for granted.
2153 *
2154 * Need to mark modified blocks dirty so they get written out later when
2155 * ntfs_writepage() is invoked by the VM.
2156 *
2157 * Return 0 on success or -errno on error.
2158 *
2159 * Should be using generic_commit_write(). This marks buffers uptodate and
2160 * dirty, sets the page uptodate if all buffers in the page are uptodate, and
2161 * updates i_size if the end of io is beyond i_size. In that case, it also
2162 * marks the inode dirty.
2163 *
2164 * Cannot use generic_commit_write() due to ntfs specialities but can look at
2165 * it for implementation guidance.
2166 *
2167 * If things have gone as outlined in ntfs_prepare_write(), then we do not
2168 * need to do any page content modifications here at all, except in the write
2169 * to resident attribute case, where we need to do the uptodate bringing here
2170 * which we combine with the copying into the mft record which means we save
2171 * one atomic kmap.
2172 */
2173static int ntfs_commit_write(struct file *file, struct page *page,
2174 unsigned from, unsigned to)
2175{
2176 struct inode *vi = page->mapping->host;
2177 ntfs_inode *base_ni, *ni = NTFS_I(vi);
2178 char *kaddr, *kattr;
2179 ntfs_attr_search_ctx *ctx;
2180 MFT_RECORD *m;
2181 ATTR_RECORD *a;
2182 u32 attr_len;
2183 int err;
2184
2185 ntfs_debug("Entering for inode 0x%lx, attribute type 0x%x, page index "
2186 "0x%lx, from = %u, to = %u.", vi->i_ino, ni->type,
2187 page->index, from, to);
2188 /* If the attribute is not resident, deal with it elsewhere. */
2189 if (NInoNonResident(ni)) {
2190 /* Only unnamed $DATA attributes can be compressed/encrypted. */
2191 if (ni->type == AT_DATA && !ni->name_len) {
2192 /* Encrypted files need separate handling. */
2193 if (NInoEncrypted(ni)) {
2194 // We never get here at present!
2195 BUG();
2196 }
2197 /* Compressed data streams are handled in compress.c. */
2198 if (NInoCompressed(ni)) {
2199 // TODO: Implement this!
2200 // return ntfs_write_compressed_block(page);
2201 // We never get here at present!
2202 BUG();
2203 }
2204 }
2205 /* Normal data stream. */
2206 return ntfs_commit_nonresident_write(page, from, to);
2207 }
2208 /*
2209 * Attribute is resident, implying it is not compressed, encrypted, or
2210 * sparse.
2211 */
2212 if (!NInoAttr(ni))
2213 base_ni = ni;
2214 else
2215 base_ni = ni->ext.base_ntfs_ino;
2216 /* Map, pin, and lock the mft record. */
2217 m = map_mft_record(base_ni);
2218 if (IS_ERR(m)) {
2219 err = PTR_ERR(m);
2220 m = NULL;
2221 ctx = NULL;
2222 goto err_out;
2223 }
2224 ctx = ntfs_attr_get_search_ctx(base_ni, m);
2225 if (unlikely(!ctx)) {
2226 err = -ENOMEM;
2227 goto err_out;
2228 }
2229 err = ntfs_attr_lookup(ni->type, ni->name, ni->name_len,
2230 CASE_SENSITIVE, 0, NULL, 0, ctx);
2231 if (unlikely(err)) {
2232 if (err == -ENOENT)
2233 err = -EIO;
2234 goto err_out;
2235 }
2236 a = ctx->attr;
2237 /* The total length of the attribute value. */
2238 attr_len = le32_to_cpu(a->data.resident.value_length);
2239 BUG_ON(from > attr_len);
2240 kattr = (u8*)a + le16_to_cpu(a->data.resident.value_offset);
2241 kaddr = kmap_atomic(page, KM_USER0);
2242 /* Copy the received data from the page to the mft record. */
2243 memcpy(kattr + from, kaddr + from, to - from);
2244 /* Update the attribute length if necessary. */
2245 if (to > attr_len) {
2246 attr_len = to;
2247 a->data.resident.value_length = cpu_to_le32(attr_len);
2248 }
2249 /*
2250 * If the page is not uptodate, bring the out of bounds area(s)
2251 * uptodate by copying data from the mft record to the page.
2252 */
2253 if (!PageUptodate(page)) {
2254 if (from > 0)
2255 memcpy(kaddr, kattr, from);
2256 if (to < attr_len)
2257 memcpy(kaddr + to, kattr + to, attr_len - to);
2258 /* Zero the region outside the end of the attribute value. */
2259 if (attr_len < PAGE_CACHE_SIZE)
2260 memset(kaddr + attr_len, 0, PAGE_CACHE_SIZE - attr_len);
2261 /*
2262 * The probability of not having done any of the above is
2263 * extremely small, so we just flush unconditionally.
2264 */
2265 flush_dcache_page(page);
2266 SetPageUptodate(page);
2267 }
2268 kunmap_atomic(kaddr, KM_USER0);
2269 /* Update i_size if necessary. */
Anton Altaparmakov07a4e2d2005-01-12 13:08:26 +00002270 if (i_size_read(vi) < attr_len) {
2271 unsigned long flags;
2272
2273 write_lock_irqsave(&ni->size_lock, flags);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002274 ni->allocated_size = ni->initialized_size = attr_len;
2275 i_size_write(vi, attr_len);
Anton Altaparmakov07a4e2d2005-01-12 13:08:26 +00002276 write_unlock_irqrestore(&ni->size_lock, flags);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002277 }
2278 /* Mark the mft record dirty, so it gets written back. */
2279 flush_dcache_mft_record_page(ctx->ntfs_ino);
2280 mark_mft_record_dirty(ctx->ntfs_ino);
2281 ntfs_attr_put_search_ctx(ctx);
2282 unmap_mft_record(base_ni);
2283 ntfs_debug("Done.");
2284 return 0;
2285err_out:
2286 if (err == -ENOMEM) {
2287 ntfs_warning(vi->i_sb, "Error allocating memory required to "
2288 "commit the write.");
2289 if (PageUptodate(page)) {
2290 ntfs_warning(vi->i_sb, "Page is uptodate, setting "
2291 "dirty so the write will be retried "
2292 "later on by the VM.");
2293 /*
2294 * Put the page on mapping->dirty_pages, but leave its
2295 * buffers' dirty state as-is.
2296 */
2297 __set_page_dirty_nobuffers(page);
2298 err = 0;
2299 } else
2300 ntfs_error(vi->i_sb, "Page is not uptodate. Written "
2301 "data has been lost.");
2302 } else {
2303 ntfs_error(vi->i_sb, "Resident attribute commit write failed "
2304 "with error %i.", err);
2305 NVolSetErrors(ni->vol);
2306 make_bad_inode(vi);
2307 }
2308 if (ctx)
2309 ntfs_attr_put_search_ctx(ctx);
2310 if (m)
2311 unmap_mft_record(base_ni);
2312 return err;
2313}
2314
2315#endif /* NTFS_RW */
2316
2317/**
2318 * ntfs_aops - general address space operations for inodes and attributes
2319 */
2320struct address_space_operations ntfs_aops = {
2321 .readpage = ntfs_readpage, /* Fill page with data. */
2322 .sync_page = block_sync_page, /* Currently, just unplugs the
2323 disk request queue. */
2324#ifdef NTFS_RW
2325 .writepage = ntfs_writepage, /* Write dirty page to disk. */
2326 .prepare_write = ntfs_prepare_write, /* Prepare page and buffers
2327 ready to receive data. */
2328 .commit_write = ntfs_commit_write, /* Commit received data. */
2329#endif /* NTFS_RW */
2330};
2331
2332/**
2333 * ntfs_mst_aops - general address space operations for mst protecteed inodes
2334 * and attributes
2335 */
2336struct address_space_operations ntfs_mst_aops = {
2337 .readpage = ntfs_readpage, /* Fill page with data. */
2338 .sync_page = block_sync_page, /* Currently, just unplugs the
2339 disk request queue. */
2340#ifdef NTFS_RW
2341 .writepage = ntfs_writepage, /* Write dirty page to disk. */
2342 .set_page_dirty = __set_page_dirty_nobuffers, /* Set the page dirty
2343 without touching the buffers
2344 belonging to the page. */
2345#endif /* NTFS_RW */
2346};
2347
2348#ifdef NTFS_RW
2349
2350/**
2351 * mark_ntfs_record_dirty - mark an ntfs record dirty
2352 * @page: page containing the ntfs record to mark dirty
2353 * @ofs: byte offset within @page at which the ntfs record begins
2354 *
2355 * Set the buffers and the page in which the ntfs record is located dirty.
2356 *
2357 * The latter also marks the vfs inode the ntfs record belongs to dirty
2358 * (I_DIRTY_PAGES only).
2359 *
2360 * If the page does not have buffers, we create them and set them uptodate.
2361 * The page may not be locked which is why we need to handle the buffers under
2362 * the mapping->private_lock. Once the buffers are marked dirty we no longer
2363 * need the lock since try_to_free_buffers() does not free dirty buffers.
2364 */
2365void mark_ntfs_record_dirty(struct page *page, const unsigned int ofs) {
2366 struct address_space *mapping = page->mapping;
2367 ntfs_inode *ni = NTFS_I(mapping->host);
2368 struct buffer_head *bh, *head, *buffers_to_free = NULL;
2369 unsigned int end, bh_size, bh_ofs;
2370
2371 BUG_ON(!PageUptodate(page));
2372 end = ofs + ni->itype.index.block_size;
2373 bh_size = 1 << VFS_I(ni)->i_blkbits;
2374 spin_lock(&mapping->private_lock);
2375 if (unlikely(!page_has_buffers(page))) {
2376 spin_unlock(&mapping->private_lock);
2377 bh = head = alloc_page_buffers(page, bh_size, 1);
2378 spin_lock(&mapping->private_lock);
2379 if (likely(!page_has_buffers(page))) {
2380 struct buffer_head *tail;
2381
2382 do {
2383 set_buffer_uptodate(bh);
2384 tail = bh;
2385 bh = bh->b_this_page;
2386 } while (bh);
2387 tail->b_this_page = head;
2388 attach_page_buffers(page, head);
2389 } else
2390 buffers_to_free = bh;
2391 }
2392 bh = head = page_buffers(page);
2393 do {
2394 bh_ofs = bh_offset(bh);
2395 if (bh_ofs + bh_size <= ofs)
2396 continue;
2397 if (unlikely(bh_ofs >= end))
2398 break;
2399 set_buffer_dirty(bh);
2400 } while ((bh = bh->b_this_page) != head);
2401 spin_unlock(&mapping->private_lock);
2402 __set_page_dirty_nobuffers(page);
2403 if (unlikely(buffers_to_free)) {
2404 do {
2405 bh = buffers_to_free->b_this_page;
2406 free_buffer_head(buffers_to_free);
2407 buffers_to_free = bh;
2408 } while (buffers_to_free);
2409 }
2410}
2411
2412#endif /* NTFS_RW */