blob: 77d30707de56eee127cc55355b77b0f1e76805ff [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * JFFS2 -- Journalling Flash File System, Version 2.
3 *
4 * Copyright (C) 2001-2003 Red Hat, Inc.
5 *
6 * Created by David Woodhouse <dwmw2@infradead.org>
7 *
8 * For licensing information, see the file 'LICENCE' in this directory.
9 *
Thomas Gleixner182ec4e2005-11-07 11:16:07 +000010 * $Id: gc.c,v 1.155 2005/11/07 11:14:39 gleixner Exp $
Linus Torvalds1da177e2005-04-16 15:20:36 -070011 *
12 */
13
14#include <linux/kernel.h>
15#include <linux/mtd/mtd.h>
16#include <linux/slab.h>
17#include <linux/pagemap.h>
18#include <linux/crc32.h>
19#include <linux/compiler.h>
20#include <linux/stat.h>
21#include "nodelist.h"
22#include "compr.h"
23
Thomas Gleixner182ec4e2005-11-07 11:16:07 +000024static int jffs2_garbage_collect_pristine(struct jffs2_sb_info *c,
Linus Torvalds1da177e2005-04-16 15:20:36 -070025 struct jffs2_inode_cache *ic,
26 struct jffs2_raw_node_ref *raw);
Thomas Gleixner182ec4e2005-11-07 11:16:07 +000027static int jffs2_garbage_collect_metadata(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
Linus Torvalds1da177e2005-04-16 15:20:36 -070028 struct jffs2_inode_info *f, struct jffs2_full_dnode *fd);
Thomas Gleixner182ec4e2005-11-07 11:16:07 +000029static int jffs2_garbage_collect_dirent(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
Linus Torvalds1da177e2005-04-16 15:20:36 -070030 struct jffs2_inode_info *f, struct jffs2_full_dirent *fd);
Thomas Gleixner182ec4e2005-11-07 11:16:07 +000031static int jffs2_garbage_collect_deletion_dirent(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
Linus Torvalds1da177e2005-04-16 15:20:36 -070032 struct jffs2_inode_info *f, struct jffs2_full_dirent *fd);
33static int jffs2_garbage_collect_hole(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
34 struct jffs2_inode_info *f, struct jffs2_full_dnode *fn,
35 uint32_t start, uint32_t end);
36static int jffs2_garbage_collect_dnode(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
37 struct jffs2_inode_info *f, struct jffs2_full_dnode *fn,
38 uint32_t start, uint32_t end);
39static int jffs2_garbage_collect_live(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
40 struct jffs2_raw_node_ref *raw, struct jffs2_inode_info *f);
41
42/* Called with erase_completion_lock held */
43static struct jffs2_eraseblock *jffs2_find_gc_block(struct jffs2_sb_info *c)
44{
45 struct jffs2_eraseblock *ret;
46 struct list_head *nextlist = NULL;
47 int n = jiffies % 128;
48
49 /* Pick an eraseblock to garbage collect next. This is where we'll
50 put the clever wear-levelling algorithms. Eventually. */
51 /* We possibly want to favour the dirtier blocks more when the
52 number of free blocks is low. */
Artem B. Bityuckiya42163d2005-03-20 17:45:29 +000053again:
Linus Torvalds1da177e2005-04-16 15:20:36 -070054 if (!list_empty(&c->bad_used_list) && c->nr_free_blocks > c->resv_blocks_gcbad) {
55 D1(printk(KERN_DEBUG "Picking block from bad_used_list to GC next\n"));
56 nextlist = &c->bad_used_list;
57 } else if (n < 50 && !list_empty(&c->erasable_list)) {
Thomas Gleixner182ec4e2005-11-07 11:16:07 +000058 /* Note that most of them will have gone directly to be erased.
Linus Torvalds1da177e2005-04-16 15:20:36 -070059 So don't favour the erasable_list _too_ much. */
60 D1(printk(KERN_DEBUG "Picking block from erasable_list to GC next\n"));
61 nextlist = &c->erasable_list;
62 } else if (n < 110 && !list_empty(&c->very_dirty_list)) {
63 /* Most of the time, pick one off the very_dirty list */
64 D1(printk(KERN_DEBUG "Picking block from very_dirty_list to GC next\n"));
65 nextlist = &c->very_dirty_list;
66 } else if (n < 126 && !list_empty(&c->dirty_list)) {
67 D1(printk(KERN_DEBUG "Picking block from dirty_list to GC next\n"));
68 nextlist = &c->dirty_list;
69 } else if (!list_empty(&c->clean_list)) {
70 D1(printk(KERN_DEBUG "Picking block from clean_list to GC next\n"));
71 nextlist = &c->clean_list;
72 } else if (!list_empty(&c->dirty_list)) {
73 D1(printk(KERN_DEBUG "Picking block from dirty_list to GC next (clean_list was empty)\n"));
74
75 nextlist = &c->dirty_list;
76 } else if (!list_empty(&c->very_dirty_list)) {
77 D1(printk(KERN_DEBUG "Picking block from very_dirty_list to GC next (clean_list and dirty_list were empty)\n"));
78 nextlist = &c->very_dirty_list;
79 } else if (!list_empty(&c->erasable_list)) {
80 D1(printk(KERN_DEBUG "Picking block from erasable_list to GC next (clean_list and {very_,}dirty_list were empty)\n"));
81
82 nextlist = &c->erasable_list;
Artem B. Bityuckiya42163d2005-03-20 17:45:29 +000083 } else if (!list_empty(&c->erasable_pending_wbuf_list)) {
84 /* There are blocks are wating for the wbuf sync */
85 D1(printk(KERN_DEBUG "Synching wbuf in order to reuse erasable_pending_wbuf_list blocks\n"));
Artem B. Bityuckiy3cceb9f2005-03-20 21:43:26 +000086 spin_unlock(&c->erase_completion_lock);
Artem B. Bityuckiya42163d2005-03-20 17:45:29 +000087 jffs2_flush_wbuf_pad(c);
Artem B. Bityuckiy3cceb9f2005-03-20 21:43:26 +000088 spin_lock(&c->erase_completion_lock);
Artem B. Bityuckiya42163d2005-03-20 17:45:29 +000089 goto again;
Linus Torvalds1da177e2005-04-16 15:20:36 -070090 } else {
91 /* Eep. All were empty */
92 D1(printk(KERN_NOTICE "jffs2: No clean, dirty _or_ erasable blocks to GC from! Where are they all?\n"));
93 return NULL;
94 }
95
96 ret = list_entry(nextlist->next, struct jffs2_eraseblock, list);
97 list_del(&ret->list);
98 c->gcblock = ret;
99 ret->gc_node = ret->first_node;
100 if (!ret->gc_node) {
101 printk(KERN_WARNING "Eep. ret->gc_node for block at 0x%08x is NULL\n", ret->offset);
102 BUG();
103 }
Thomas Gleixner182ec4e2005-11-07 11:16:07 +0000104
Linus Torvalds1da177e2005-04-16 15:20:36 -0700105 /* Have we accidentally picked a clean block with wasted space ? */
106 if (ret->wasted_size) {
107 D1(printk(KERN_DEBUG "Converting wasted_size %08x to dirty_size\n", ret->wasted_size));
108 ret->dirty_size += ret->wasted_size;
109 c->wasted_size -= ret->wasted_size;
110 c->dirty_size += ret->wasted_size;
111 ret->wasted_size = 0;
112 }
113
Linus Torvalds1da177e2005-04-16 15:20:36 -0700114 return ret;
115}
116
117/* jffs2_garbage_collect_pass
118 * Make a single attempt to progress GC. Move one node, and possibly
119 * start erasing one eraseblock.
120 */
121int jffs2_garbage_collect_pass(struct jffs2_sb_info *c)
122{
123 struct jffs2_inode_info *f;
124 struct jffs2_inode_cache *ic;
125 struct jffs2_eraseblock *jeb;
126 struct jffs2_raw_node_ref *raw;
127 int ret = 0, inum, nlink;
128
129 if (down_interruptible(&c->alloc_sem))
130 return -EINTR;
131
132 for (;;) {
133 spin_lock(&c->erase_completion_lock);
134 if (!c->unchecked_size)
135 break;
136
137 /* We can't start doing GC yet. We haven't finished checking
138 the node CRCs etc. Do it now. */
Thomas Gleixner182ec4e2005-11-07 11:16:07 +0000139
Linus Torvalds1da177e2005-04-16 15:20:36 -0700140 /* checked_ino is protected by the alloc_sem */
141 if (c->checked_ino > c->highest_ino) {
142 printk(KERN_CRIT "Checked all inodes but still 0x%x bytes of unchecked space?\n",
143 c->unchecked_size);
Artem B. Bityutskiye0c8e422005-07-24 16:14:17 +0100144 jffs2_dbg_dump_block_lists_nolock(c);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700145 spin_unlock(&c->erase_completion_lock);
146 BUG();
147 }
148
149 spin_unlock(&c->erase_completion_lock);
150
151 spin_lock(&c->inocache_lock);
152
153 ic = jffs2_get_ino_cache(c, c->checked_ino++);
154
155 if (!ic) {
156 spin_unlock(&c->inocache_lock);
157 continue;
158 }
159
160 if (!ic->nlink) {
161 D1(printk(KERN_DEBUG "Skipping check of ino #%d with nlink zero\n",
162 ic->ino));
163 spin_unlock(&c->inocache_lock);
164 continue;
165 }
166 switch(ic->state) {
167 case INO_STATE_CHECKEDABSENT:
168 case INO_STATE_PRESENT:
169 D1(printk(KERN_DEBUG "Skipping ino #%u already checked\n", ic->ino));
170 spin_unlock(&c->inocache_lock);
171 continue;
172
173 case INO_STATE_GC:
174 case INO_STATE_CHECKING:
175 printk(KERN_WARNING "Inode #%u is in state %d during CRC check phase!\n", ic->ino, ic->state);
176 spin_unlock(&c->inocache_lock);
177 BUG();
178
179 case INO_STATE_READING:
180 /* We need to wait for it to finish, lest we move on
Thomas Gleixner182ec4e2005-11-07 11:16:07 +0000181 and trigger the BUG() above while we haven't yet
Linus Torvalds1da177e2005-04-16 15:20:36 -0700182 finished checking all its nodes */
183 D1(printk(KERN_DEBUG "Waiting for ino #%u to finish reading\n", ic->ino));
David Woodhoused96fb992006-04-17 00:19:48 +0100184 /* We need to come back again for the _same_ inode. We've
185 made no progress in this case, but that should be OK */
186 c->checked_ino--;
187
Linus Torvalds1da177e2005-04-16 15:20:36 -0700188 up(&c->alloc_sem);
189 sleep_on_spinunlock(&c->inocache_wq, &c->inocache_lock);
190 return 0;
191
192 default:
193 BUG();
194
195 case INO_STATE_UNCHECKED:
196 ;
197 }
198 ic->state = INO_STATE_CHECKING;
199 spin_unlock(&c->inocache_lock);
200
201 D1(printk(KERN_DEBUG "jffs2_garbage_collect_pass() triggering inode scan of ino#%u\n", ic->ino));
202
203 ret = jffs2_do_crccheck_inode(c, ic);
204 if (ret)
205 printk(KERN_WARNING "Returned error for crccheck of ino #%u. Expect badness...\n", ic->ino);
206
207 jffs2_set_inocache_state(c, ic, INO_STATE_CHECKEDABSENT);
208 up(&c->alloc_sem);
209 return ret;
210 }
211
212 /* First, work out which block we're garbage-collecting */
213 jeb = c->gcblock;
214
215 if (!jeb)
216 jeb = jffs2_find_gc_block(c);
217
218 if (!jeb) {
219 D1 (printk(KERN_NOTICE "jffs2: Couldn't find erase block to garbage collect!\n"));
220 spin_unlock(&c->erase_completion_lock);
221 up(&c->alloc_sem);
222 return -EIO;
223 }
224
225 D1(printk(KERN_DEBUG "GC from block %08x, used_size %08x, dirty_size %08x, free_size %08x\n", jeb->offset, jeb->used_size, jeb->dirty_size, jeb->free_size));
226 D1(if (c->nextblock)
227 printk(KERN_DEBUG "Nextblock at %08x, used_size %08x, dirty_size %08x, wasted_size %08x, free_size %08x\n", c->nextblock->offset, c->nextblock->used_size, c->nextblock->dirty_size, c->nextblock->wasted_size, c->nextblock->free_size));
228
229 if (!jeb->used_size) {
230 up(&c->alloc_sem);
231 goto eraseit;
232 }
233
234 raw = jeb->gc_node;
Thomas Gleixner182ec4e2005-11-07 11:16:07 +0000235
Linus Torvalds1da177e2005-04-16 15:20:36 -0700236 while(ref_obsolete(raw)) {
237 D1(printk(KERN_DEBUG "Node at 0x%08x is obsolete... skipping\n", ref_offset(raw)));
238 raw = raw->next_phys;
239 if (unlikely(!raw)) {
240 printk(KERN_WARNING "eep. End of raw list while still supposedly nodes to GC\n");
Thomas Gleixner182ec4e2005-11-07 11:16:07 +0000241 printk(KERN_WARNING "erase block at 0x%08x. free_size 0x%08x, dirty_size 0x%08x, used_size 0x%08x\n",
Linus Torvalds1da177e2005-04-16 15:20:36 -0700242 jeb->offset, jeb->free_size, jeb->dirty_size, jeb->used_size);
243 jeb->gc_node = raw;
244 spin_unlock(&c->erase_completion_lock);
245 up(&c->alloc_sem);
246 BUG();
247 }
248 }
249 jeb->gc_node = raw;
250
251 D1(printk(KERN_DEBUG "Going to garbage collect node at 0x%08x\n", ref_offset(raw)));
252
253 if (!raw->next_in_ino) {
254 /* Inode-less node. Clean marker, snapshot or something like that */
255 /* FIXME: If it's something that needs to be copied, including something
256 we don't grok that has JFFS2_NODETYPE_RWCOMPAT_COPY, we should do so */
257 spin_unlock(&c->erase_completion_lock);
258 jffs2_mark_node_obsolete(c, raw);
259 up(&c->alloc_sem);
260 goto eraseit_lock;
261 }
262
263 ic = jffs2_raw_ref_to_ic(raw);
264
265 /* We need to hold the inocache. Either the erase_completion_lock or
Thomas Gleixner182ec4e2005-11-07 11:16:07 +0000266 the inocache_lock are sufficient; we trade down since the inocache_lock
Linus Torvalds1da177e2005-04-16 15:20:36 -0700267 causes less contention. */
268 spin_lock(&c->inocache_lock);
269
270 spin_unlock(&c->erase_completion_lock);
271
272 D1(printk(KERN_DEBUG "jffs2_garbage_collect_pass collecting from block @0x%08x. Node @0x%08x(%d), ino #%u\n", jeb->offset, ref_offset(raw), ref_flags(raw), ic->ino));
273
274 /* Three possibilities:
275 1. Inode is already in-core. We must iget it and do proper
276 updating to its fragtree, etc.
277 2. Inode is not in-core, node is REF_PRISTINE. We lock the
278 inocache to prevent a read_inode(), copy the node intact.
279 3. Inode is not in-core, node is not pristine. We must iget()
280 and take the slow path.
281 */
282
283 switch(ic->state) {
284 case INO_STATE_CHECKEDABSENT:
Thomas Gleixner182ec4e2005-11-07 11:16:07 +0000285 /* It's been checked, but it's not currently in-core.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700286 We can just copy any pristine nodes, but have
287 to prevent anyone else from doing read_inode() while
288 we're at it, so we set the state accordingly */
289 if (ref_flags(raw) == REF_PRISTINE)
290 ic->state = INO_STATE_GC;
291 else {
Thomas Gleixner182ec4e2005-11-07 11:16:07 +0000292 D1(printk(KERN_DEBUG "Ino #%u is absent but node not REF_PRISTINE. Reading.\n",
Linus Torvalds1da177e2005-04-16 15:20:36 -0700293 ic->ino));
294 }
295 break;
296
297 case INO_STATE_PRESENT:
298 /* It's in-core. GC must iget() it. */
299 break;
300
301 case INO_STATE_UNCHECKED:
302 case INO_STATE_CHECKING:
303 case INO_STATE_GC:
304 /* Should never happen. We should have finished checking
Thomas Gleixner182ec4e2005-11-07 11:16:07 +0000305 by the time we actually start doing any GC, and since
306 we're holding the alloc_sem, no other garbage collection
Linus Torvalds1da177e2005-04-16 15:20:36 -0700307 can happen.
308 */
309 printk(KERN_CRIT "Inode #%u already in state %d in jffs2_garbage_collect_pass()!\n",
310 ic->ino, ic->state);
311 up(&c->alloc_sem);
312 spin_unlock(&c->inocache_lock);
313 BUG();
314
315 case INO_STATE_READING:
316 /* Someone's currently trying to read it. We must wait for
317 them to finish and then go through the full iget() route
318 to do the GC. However, sometimes read_inode() needs to get
319 the alloc_sem() (for marking nodes invalid) so we must
320 drop the alloc_sem before sleeping. */
321
322 up(&c->alloc_sem);
323 D1(printk(KERN_DEBUG "jffs2_garbage_collect_pass() waiting for ino #%u in state %d\n",
324 ic->ino, ic->state));
325 sleep_on_spinunlock(&c->inocache_wq, &c->inocache_lock);
Thomas Gleixner182ec4e2005-11-07 11:16:07 +0000326 /* And because we dropped the alloc_sem we must start again from the
Linus Torvalds1da177e2005-04-16 15:20:36 -0700327 beginning. Ponder chance of livelock here -- we're returning success
328 without actually making any progress.
329
Thomas Gleixner182ec4e2005-11-07 11:16:07 +0000330 Q: What are the chances that the inode is back in INO_STATE_READING
Linus Torvalds1da177e2005-04-16 15:20:36 -0700331 again by the time we next enter this function? And that this happens
332 enough times to cause a real delay?
333
Thomas Gleixner182ec4e2005-11-07 11:16:07 +0000334 A: Small enough that I don't care :)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700335 */
336 return 0;
337 }
338
339 /* OK. Now if the inode is in state INO_STATE_GC, we are going to copy the
Thomas Gleixner182ec4e2005-11-07 11:16:07 +0000340 node intact, and we don't have to muck about with the fragtree etc.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700341 because we know it's not in-core. If it _was_ in-core, we go through
342 all the iget() crap anyway */
343
344 if (ic->state == INO_STATE_GC) {
345 spin_unlock(&c->inocache_lock);
346
347 ret = jffs2_garbage_collect_pristine(c, ic, raw);
348
349 spin_lock(&c->inocache_lock);
350 ic->state = INO_STATE_CHECKEDABSENT;
351 wake_up(&c->inocache_wq);
352
353 if (ret != -EBADFD) {
354 spin_unlock(&c->inocache_lock);
355 goto release_sem;
356 }
357
358 /* Fall through if it wanted us to, with inocache_lock held */
359 }
360
361 /* Prevent the fairly unlikely race where the gcblock is
362 entirely obsoleted by the final close of a file which had
363 the only valid nodes in the block, followed by erasure,
364 followed by freeing of the ic because the erased block(s)
365 held _all_ the nodes of that inode.... never been seen but
366 it's vaguely possible. */
367
368 inum = ic->ino;
369 nlink = ic->nlink;
370 spin_unlock(&c->inocache_lock);
371
372 f = jffs2_gc_fetch_inode(c, inum, nlink);
373 if (IS_ERR(f)) {
374 ret = PTR_ERR(f);
375 goto release_sem;
376 }
377 if (!f) {
378 ret = 0;
379 goto release_sem;
380 }
381
382 ret = jffs2_garbage_collect_live(c, jeb, raw, f);
383
384 jffs2_gc_release_inode(c, f);
385
386 release_sem:
387 up(&c->alloc_sem);
388
389 eraseit_lock:
390 /* If we've finished this block, start it erasing */
391 spin_lock(&c->erase_completion_lock);
392
393 eraseit:
394 if (c->gcblock && !c->gcblock->used_size) {
395 D1(printk(KERN_DEBUG "Block at 0x%08x completely obsoleted by GC. Moving to erase_pending_list\n", c->gcblock->offset));
396 /* We're GC'ing an empty block? */
397 list_add_tail(&c->gcblock->list, &c->erase_pending_list);
398 c->gcblock = NULL;
399 c->nr_erasing_blocks++;
400 jffs2_erase_pending_trigger(c);
401 }
402 spin_unlock(&c->erase_completion_lock);
403
404 return ret;
405}
406
407static int jffs2_garbage_collect_live(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
408 struct jffs2_raw_node_ref *raw, struct jffs2_inode_info *f)
409{
410 struct jffs2_node_frag *frag;
411 struct jffs2_full_dnode *fn = NULL;
412 struct jffs2_full_dirent *fd;
413 uint32_t start = 0, end = 0, nrfrags = 0;
414 int ret = 0;
415
416 down(&f->sem);
417
418 /* Now we have the lock for this inode. Check that it's still the one at the head
419 of the list. */
420
421 spin_lock(&c->erase_completion_lock);
422
423 if (c->gcblock != jeb) {
424 spin_unlock(&c->erase_completion_lock);
425 D1(printk(KERN_DEBUG "GC block is no longer gcblock. Restart\n"));
426 goto upnout;
427 }
428 if (ref_obsolete(raw)) {
429 spin_unlock(&c->erase_completion_lock);
430 D1(printk(KERN_DEBUG "node to be GC'd was obsoleted in the meantime.\n"));
431 /* They'll call again */
432 goto upnout;
433 }
434 spin_unlock(&c->erase_completion_lock);
435
436 /* OK. Looks safe. And nobody can get us now because we have the semaphore. Move the block */
437 if (f->metadata && f->metadata->raw == raw) {
438 fn = f->metadata;
439 ret = jffs2_garbage_collect_metadata(c, jeb, f, fn);
440 goto upnout;
441 }
442
443 /* FIXME. Read node and do lookup? */
444 for (frag = frag_first(&f->fragtree); frag; frag = frag_next(frag)) {
445 if (frag->node && frag->node->raw == raw) {
446 fn = frag->node;
447 end = frag->ofs + frag->size;
448 if (!nrfrags++)
449 start = frag->ofs;
450 if (nrfrags == frag->node->frags)
451 break; /* We've found them all */
452 }
453 }
454 if (fn) {
455 if (ref_flags(raw) == REF_PRISTINE) {
456 ret = jffs2_garbage_collect_pristine(c, f->inocache, raw);
457 if (!ret) {
458 /* Urgh. Return it sensibly. */
459 frag->node->raw = f->inocache->nodes;
Thomas Gleixner182ec4e2005-11-07 11:16:07 +0000460 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700461 if (ret != -EBADFD)
462 goto upnout;
463 }
464 /* We found a datanode. Do the GC */
465 if((start >> PAGE_CACHE_SHIFT) < ((end-1) >> PAGE_CACHE_SHIFT)) {
466 /* It crosses a page boundary. Therefore, it must be a hole. */
467 ret = jffs2_garbage_collect_hole(c, jeb, f, fn, start, end);
468 } else {
469 /* It could still be a hole. But we GC the page this way anyway */
470 ret = jffs2_garbage_collect_dnode(c, jeb, f, fn, start, end);
471 }
472 goto upnout;
473 }
Thomas Gleixner182ec4e2005-11-07 11:16:07 +0000474
Linus Torvalds1da177e2005-04-16 15:20:36 -0700475 /* Wasn't a dnode. Try dirent */
476 for (fd = f->dents; fd; fd=fd->next) {
477 if (fd->raw == raw)
478 break;
479 }
480
481 if (fd && fd->ino) {
482 ret = jffs2_garbage_collect_dirent(c, jeb, f, fd);
483 } else if (fd) {
484 ret = jffs2_garbage_collect_deletion_dirent(c, jeb, f, fd);
485 } else {
486 printk(KERN_WARNING "Raw node at 0x%08x wasn't in node lists for ino #%u\n",
487 ref_offset(raw), f->inocache->ino);
488 if (ref_obsolete(raw)) {
489 printk(KERN_WARNING "But it's obsolete so we don't mind too much\n");
490 } else {
Artem B. Bityutskiye0c8e422005-07-24 16:14:17 +0100491 jffs2_dbg_dump_node(c, ref_offset(raw));
492 BUG();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700493 }
494 }
495 upnout:
496 up(&f->sem);
497
498 return ret;
499}
500
Thomas Gleixner182ec4e2005-11-07 11:16:07 +0000501static int jffs2_garbage_collect_pristine(struct jffs2_sb_info *c,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700502 struct jffs2_inode_cache *ic,
503 struct jffs2_raw_node_ref *raw)
504{
505 union jffs2_node_union *node;
506 struct jffs2_raw_node_ref *nraw;
507 size_t retlen;
508 int ret;
509 uint32_t phys_ofs, alloclen;
510 uint32_t crc, rawlen;
511 int retried = 0;
512
513 D1(printk(KERN_DEBUG "Going to GC REF_PRISTINE node at 0x%08x\n", ref_offset(raw)));
514
515 rawlen = ref_totlen(c, c->gcblock, raw);
516
517 /* Ask for a small amount of space (or the totlen if smaller) because we
518 don't want to force wastage of the end of a block if splitting would
519 work. */
Ferenc Havasie631ddb2005-09-07 09:35:26 +0100520 ret = jffs2_reserve_space_gc(c, min_t(uint32_t, sizeof(struct jffs2_raw_inode) +
521 JFFS2_MIN_DATA_LEN, rawlen), &phys_ofs, &alloclen, rawlen);
522 /* this is not the exact summary size of it,
523 it is only an upper estimation */
524
Linus Torvalds1da177e2005-04-16 15:20:36 -0700525 if (ret)
526 return ret;
527
528 if (alloclen < rawlen) {
529 /* Doesn't fit untouched. We'll go the old route and split it */
530 return -EBADFD;
531 }
532
533 node = kmalloc(rawlen, GFP_KERNEL);
534 if (!node)
535 return -ENOMEM;
536
537 ret = jffs2_flash_read(c, ref_offset(raw), rawlen, &retlen, (char *)node);
538 if (!ret && retlen != rawlen)
539 ret = -EIO;
540 if (ret)
541 goto out_node;
542
543 crc = crc32(0, node, sizeof(struct jffs2_unknown_node)-4);
544 if (je32_to_cpu(node->u.hdr_crc) != crc) {
545 printk(KERN_WARNING "Header CRC failed on REF_PRISTINE node at 0x%08x: Read 0x%08x, calculated 0x%08x\n",
546 ref_offset(raw), je32_to_cpu(node->u.hdr_crc), crc);
547 goto bail;
548 }
549
550 switch(je16_to_cpu(node->u.nodetype)) {
551 case JFFS2_NODETYPE_INODE:
552 crc = crc32(0, node, sizeof(node->i)-8);
553 if (je32_to_cpu(node->i.node_crc) != crc) {
554 printk(KERN_WARNING "Node CRC failed on REF_PRISTINE data node at 0x%08x: Read 0x%08x, calculated 0x%08x\n",
555 ref_offset(raw), je32_to_cpu(node->i.node_crc), crc);
556 goto bail;
557 }
558
559 if (je32_to_cpu(node->i.dsize)) {
560 crc = crc32(0, node->i.data, je32_to_cpu(node->i.csize));
561 if (je32_to_cpu(node->i.data_crc) != crc) {
562 printk(KERN_WARNING "Data CRC failed on REF_PRISTINE data node at 0x%08x: Read 0x%08x, calculated 0x%08x\n",
563 ref_offset(raw), je32_to_cpu(node->i.data_crc), crc);
564 goto bail;
565 }
566 }
567 break;
568
569 case JFFS2_NODETYPE_DIRENT:
570 crc = crc32(0, node, sizeof(node->d)-8);
571 if (je32_to_cpu(node->d.node_crc) != crc) {
572 printk(KERN_WARNING "Node CRC failed on REF_PRISTINE dirent node at 0x%08x: Read 0x%08x, calculated 0x%08x\n",
573 ref_offset(raw), je32_to_cpu(node->d.node_crc), crc);
574 goto bail;
575 }
576
577 if (node->d.nsize) {
578 crc = crc32(0, node->d.name, node->d.nsize);
579 if (je32_to_cpu(node->d.name_crc) != crc) {
580 printk(KERN_WARNING "Name CRC failed on REF_PRISTINE dirent ode at 0x%08x: Read 0x%08x, calculated 0x%08x\n",
581 ref_offset(raw), je32_to_cpu(node->d.name_crc), crc);
582 goto bail;
583 }
584 }
585 break;
586 default:
Thomas Gleixner182ec4e2005-11-07 11:16:07 +0000587 printk(KERN_WARNING "Unknown node type for REF_PRISTINE node at 0x%08x: 0x%04x\n",
Linus Torvalds1da177e2005-04-16 15:20:36 -0700588 ref_offset(raw), je16_to_cpu(node->u.nodetype));
589 goto bail;
590 }
591
592 nraw = jffs2_alloc_raw_node_ref();
593 if (!nraw) {
594 ret = -ENOMEM;
595 goto out_node;
596 }
597
598 /* OK, all the CRCs are good; this node can just be copied as-is. */
599 retry:
600 nraw->flash_offset = phys_ofs;
601 nraw->__totlen = rawlen;
602 nraw->next_phys = NULL;
603
604 ret = jffs2_flash_write(c, phys_ofs, rawlen, &retlen, (char *)node);
605
606 if (ret || (retlen != rawlen)) {
607 printk(KERN_NOTICE "Write of %d bytes at 0x%08x failed. returned %d, retlen %zd\n",
608 rawlen, phys_ofs, ret, retlen);
609 if (retlen) {
610 /* Doesn't belong to any inode */
611 nraw->next_in_ino = NULL;
612
613 nraw->flash_offset |= REF_OBSOLETE;
614 jffs2_add_physical_node_ref(c, nraw);
615 jffs2_mark_node_obsolete(c, nraw);
616 } else {
617 printk(KERN_NOTICE "Not marking the space at 0x%08x as dirty because the flash driver returned retlen zero\n", nraw->flash_offset);
618 jffs2_free_raw_node_ref(nraw);
619 }
620 if (!retried && (nraw = jffs2_alloc_raw_node_ref())) {
621 /* Try to reallocate space and retry */
622 uint32_t dummy;
623 struct jffs2_eraseblock *jeb = &c->blocks[phys_ofs / c->sector_size];
624
625 retried = 1;
626
627 D1(printk(KERN_DEBUG "Retrying failed write of REF_PRISTINE node.\n"));
Thomas Gleixner182ec4e2005-11-07 11:16:07 +0000628
Artem B. Bityutskiy730554d2005-07-17 07:56:26 +0100629 jffs2_dbg_acct_sanity_check(c,jeb);
630 jffs2_dbg_acct_paranoia_check(c, jeb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700631
Ferenc Havasie631ddb2005-09-07 09:35:26 +0100632 ret = jffs2_reserve_space_gc(c, rawlen, &phys_ofs, &dummy, rawlen);
633 /* this is not the exact summary size of it,
634 it is only an upper estimation */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700635
636 if (!ret) {
637 D1(printk(KERN_DEBUG "Allocated space at 0x%08x to retry failed write.\n", phys_ofs));
638
Artem B. Bityutskiy730554d2005-07-17 07:56:26 +0100639 jffs2_dbg_acct_sanity_check(c,jeb);
640 jffs2_dbg_acct_paranoia_check(c, jeb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700641
642 goto retry;
643 }
644 D1(printk(KERN_DEBUG "Failed to allocate space to retry failed write: %d!\n", ret));
645 jffs2_free_raw_node_ref(nraw);
646 }
647
648 jffs2_free_raw_node_ref(nraw);
649 if (!ret)
650 ret = -EIO;
651 goto out_node;
652 }
653 nraw->flash_offset |= REF_PRISTINE;
654 jffs2_add_physical_node_ref(c, nraw);
655
656 /* Link into per-inode list. This is safe because of the ic
657 state being INO_STATE_GC. Note that if we're doing this
658 for an inode which is in-core, the 'nraw' pointer is then
659 going to be fetched from ic->nodes by our caller. */
660 spin_lock(&c->erase_completion_lock);
661 nraw->next_in_ino = ic->nodes;
662 ic->nodes = nraw;
663 spin_unlock(&c->erase_completion_lock);
664
665 jffs2_mark_node_obsolete(c, raw);
666 D1(printk(KERN_DEBUG "WHEEE! GC REF_PRISTINE node at 0x%08x succeeded\n", ref_offset(raw)));
667
668 out_node:
669 kfree(node);
670 return ret;
671 bail:
672 ret = -EBADFD;
673 goto out_node;
674}
675
Thomas Gleixner182ec4e2005-11-07 11:16:07 +0000676static int jffs2_garbage_collect_metadata(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700677 struct jffs2_inode_info *f, struct jffs2_full_dnode *fn)
678{
679 struct jffs2_full_dnode *new_fn;
680 struct jffs2_raw_inode ri;
Artem B. Bityuckiy8557fd52005-04-09 11:47:03 +0100681 struct jffs2_node_frag *last_frag;
David Woodhouseaef9ab42006-05-19 00:28:49 +0100682 union jffs2_device_node dev;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700683 char *mdata = NULL, mdatalen = 0;
Artem B. Bityuckiy8557fd52005-04-09 11:47:03 +0100684 uint32_t alloclen, phys_ofs, ilen;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700685 int ret;
686
687 if (S_ISBLK(JFFS2_F_I_MODE(f)) ||
688 S_ISCHR(JFFS2_F_I_MODE(f)) ) {
689 /* For these, we don't actually need to read the old node */
David Woodhouseaef9ab42006-05-19 00:28:49 +0100690 mdatalen = jffs2_encode_dev(&dev, JFFS2_F_I_RDEV(f));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700691 mdata = (char *)&dev;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700692 D1(printk(KERN_DEBUG "jffs2_garbage_collect_metadata(): Writing %d bytes of kdev_t\n", mdatalen));
693 } else if (S_ISLNK(JFFS2_F_I_MODE(f))) {
694 mdatalen = fn->size;
695 mdata = kmalloc(fn->size, GFP_KERNEL);
696 if (!mdata) {
697 printk(KERN_WARNING "kmalloc of mdata failed in jffs2_garbage_collect_metadata()\n");
698 return -ENOMEM;
699 }
700 ret = jffs2_read_dnode(c, f, fn, mdata, 0, mdatalen);
701 if (ret) {
702 printk(KERN_WARNING "read of old metadata failed in jffs2_garbage_collect_metadata(): %d\n", ret);
703 kfree(mdata);
704 return ret;
705 }
706 D1(printk(KERN_DEBUG "jffs2_garbage_collect_metadata(): Writing %d bites of symlink target\n", mdatalen));
707
708 }
Thomas Gleixner182ec4e2005-11-07 11:16:07 +0000709
Ferenc Havasie631ddb2005-09-07 09:35:26 +0100710 ret = jffs2_reserve_space_gc(c, sizeof(ri) + mdatalen, &phys_ofs, &alloclen,
711 JFFS2_SUMMARY_INODE_SIZE);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700712 if (ret) {
713 printk(KERN_WARNING "jffs2_reserve_space_gc of %zd bytes for garbage_collect_metadata failed: %d\n",
714 sizeof(ri)+ mdatalen, ret);
715 goto out;
716 }
Thomas Gleixner182ec4e2005-11-07 11:16:07 +0000717
Artem B. Bityuckiy8557fd52005-04-09 11:47:03 +0100718 last_frag = frag_last(&f->fragtree);
719 if (last_frag)
720 /* Fetch the inode length from the fragtree rather then
721 * from i_size since i_size may have not been updated yet */
722 ilen = last_frag->ofs + last_frag->size;
723 else
724 ilen = JFFS2_F_I_SIZE(f);
Thomas Gleixner182ec4e2005-11-07 11:16:07 +0000725
Linus Torvalds1da177e2005-04-16 15:20:36 -0700726 memset(&ri, 0, sizeof(ri));
727 ri.magic = cpu_to_je16(JFFS2_MAGIC_BITMASK);
728 ri.nodetype = cpu_to_je16(JFFS2_NODETYPE_INODE);
729 ri.totlen = cpu_to_je32(sizeof(ri) + mdatalen);
730 ri.hdr_crc = cpu_to_je32(crc32(0, &ri, sizeof(struct jffs2_unknown_node)-4));
731
732 ri.ino = cpu_to_je32(f->inocache->ino);
733 ri.version = cpu_to_je32(++f->highest_version);
734 ri.mode = cpu_to_jemode(JFFS2_F_I_MODE(f));
735 ri.uid = cpu_to_je16(JFFS2_F_I_UID(f));
736 ri.gid = cpu_to_je16(JFFS2_F_I_GID(f));
Artem B. Bityuckiy8557fd52005-04-09 11:47:03 +0100737 ri.isize = cpu_to_je32(ilen);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700738 ri.atime = cpu_to_je32(JFFS2_F_I_ATIME(f));
739 ri.ctime = cpu_to_je32(JFFS2_F_I_CTIME(f));
740 ri.mtime = cpu_to_je32(JFFS2_F_I_MTIME(f));
741 ri.offset = cpu_to_je32(0);
742 ri.csize = cpu_to_je32(mdatalen);
743 ri.dsize = cpu_to_je32(mdatalen);
744 ri.compr = JFFS2_COMPR_NONE;
745 ri.node_crc = cpu_to_je32(crc32(0, &ri, sizeof(ri)-8));
746 ri.data_crc = cpu_to_je32(crc32(0, mdata, mdatalen));
747
748 new_fn = jffs2_write_dnode(c, f, &ri, mdata, mdatalen, phys_ofs, ALLOC_GC);
749
750 if (IS_ERR(new_fn)) {
751 printk(KERN_WARNING "Error writing new dnode: %ld\n", PTR_ERR(new_fn));
752 ret = PTR_ERR(new_fn);
753 goto out;
754 }
755 jffs2_mark_node_obsolete(c, fn->raw);
756 jffs2_free_full_dnode(fn);
757 f->metadata = new_fn;
758 out:
759 if (S_ISLNK(JFFS2_F_I_MODE(f)))
760 kfree(mdata);
761 return ret;
762}
763
Thomas Gleixner182ec4e2005-11-07 11:16:07 +0000764static int jffs2_garbage_collect_dirent(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700765 struct jffs2_inode_info *f, struct jffs2_full_dirent *fd)
766{
767 struct jffs2_full_dirent *new_fd;
768 struct jffs2_raw_dirent rd;
769 uint32_t alloclen, phys_ofs;
770 int ret;
771
772 rd.magic = cpu_to_je16(JFFS2_MAGIC_BITMASK);
773 rd.nodetype = cpu_to_je16(JFFS2_NODETYPE_DIRENT);
774 rd.nsize = strlen(fd->name);
775 rd.totlen = cpu_to_je32(sizeof(rd) + rd.nsize);
776 rd.hdr_crc = cpu_to_je32(crc32(0, &rd, sizeof(struct jffs2_unknown_node)-4));
777
778 rd.pino = cpu_to_je32(f->inocache->ino);
779 rd.version = cpu_to_je32(++f->highest_version);
780 rd.ino = cpu_to_je32(fd->ino);
Artem B. Bityutskiy3a69e0c2005-08-17 14:46:26 +0100781 /* If the times on this inode were set by explicit utime() they can be different,
782 so refrain from splatting them. */
783 if (JFFS2_F_I_MTIME(f) == JFFS2_F_I_CTIME(f))
784 rd.mctime = cpu_to_je32(JFFS2_F_I_MTIME(f));
Thomas Gleixner182ec4e2005-11-07 11:16:07 +0000785 else
Artem B. Bityutskiy3a69e0c2005-08-17 14:46:26 +0100786 rd.mctime = cpu_to_je32(0);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700787 rd.type = fd->type;
788 rd.node_crc = cpu_to_je32(crc32(0, &rd, sizeof(rd)-8));
789 rd.name_crc = cpu_to_je32(crc32(0, fd->name, rd.nsize));
Thomas Gleixner182ec4e2005-11-07 11:16:07 +0000790
Ferenc Havasie631ddb2005-09-07 09:35:26 +0100791 ret = jffs2_reserve_space_gc(c, sizeof(rd)+rd.nsize, &phys_ofs, &alloclen,
792 JFFS2_SUMMARY_DIRENT_SIZE(rd.nsize));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700793 if (ret) {
794 printk(KERN_WARNING "jffs2_reserve_space_gc of %zd bytes for garbage_collect_dirent failed: %d\n",
795 sizeof(rd)+rd.nsize, ret);
796 return ret;
797 }
798 new_fd = jffs2_write_dirent(c, f, &rd, fd->name, rd.nsize, phys_ofs, ALLOC_GC);
799
800 if (IS_ERR(new_fd)) {
801 printk(KERN_WARNING "jffs2_write_dirent in garbage_collect_dirent failed: %ld\n", PTR_ERR(new_fd));
802 return PTR_ERR(new_fd);
803 }
804 jffs2_add_fd_to_list(c, new_fd, &f->dents);
805 return 0;
806}
807
Thomas Gleixner182ec4e2005-11-07 11:16:07 +0000808static int jffs2_garbage_collect_deletion_dirent(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700809 struct jffs2_inode_info *f, struct jffs2_full_dirent *fd)
810{
811 struct jffs2_full_dirent **fdp = &f->dents;
812 int found = 0;
813
814 /* On a medium where we can't actually mark nodes obsolete
815 pernamently, such as NAND flash, we need to work out
816 whether this deletion dirent is still needed to actively
817 delete a 'real' dirent with the same name that's still
818 somewhere else on the flash. */
819 if (!jffs2_can_mark_obsolete(c)) {
820 struct jffs2_raw_dirent *rd;
821 struct jffs2_raw_node_ref *raw;
822 int ret;
823 size_t retlen;
824 int name_len = strlen(fd->name);
825 uint32_t name_crc = crc32(0, fd->name, name_len);
826 uint32_t rawlen = ref_totlen(c, jeb, fd->raw);
827
828 rd = kmalloc(rawlen, GFP_KERNEL);
829 if (!rd)
830 return -ENOMEM;
831
832 /* Prevent the erase code from nicking the obsolete node refs while
833 we're looking at them. I really don't like this extra lock but
834 can't see any alternative. Suggestions on a postcard to... */
835 down(&c->erase_free_sem);
836
837 for (raw = f->inocache->nodes; raw != (void *)f->inocache; raw = raw->next_in_ino) {
838
839 /* We only care about obsolete ones */
840 if (!(ref_obsolete(raw)))
841 continue;
842
843 /* Any dirent with the same name is going to have the same length... */
844 if (ref_totlen(c, NULL, raw) != rawlen)
845 continue;
846
Thomas Gleixner182ec4e2005-11-07 11:16:07 +0000847 /* Doesn't matter if there's one in the same erase block. We're going to
Linus Torvalds1da177e2005-04-16 15:20:36 -0700848 delete it too at the same time. */
Andrew Victor3be36672005-02-09 09:09:05 +0000849 if (SECTOR_ADDR(raw->flash_offset) == SECTOR_ADDR(fd->raw->flash_offset))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700850 continue;
851
852 D1(printk(KERN_DEBUG "Check potential deletion dirent at %08x\n", ref_offset(raw)));
853
854 /* This is an obsolete node belonging to the same directory, and it's of the right
855 length. We need to take a closer look...*/
856 ret = jffs2_flash_read(c, ref_offset(raw), rawlen, &retlen, (char *)rd);
857 if (ret) {
858 printk(KERN_WARNING "jffs2_g_c_deletion_dirent(): Read error (%d) reading obsolete node at %08x\n", ret, ref_offset(raw));
859 /* If we can't read it, we don't need to continue to obsolete it. Continue */
860 continue;
861 }
862 if (retlen != rawlen) {
863 printk(KERN_WARNING "jffs2_g_c_deletion_dirent(): Short read (%zd not %u) reading header from obsolete node at %08x\n",
864 retlen, rawlen, ref_offset(raw));
865 continue;
866 }
867
868 if (je16_to_cpu(rd->nodetype) != JFFS2_NODETYPE_DIRENT)
869 continue;
870
871 /* If the name CRC doesn't match, skip */
872 if (je32_to_cpu(rd->name_crc) != name_crc)
873 continue;
874
875 /* If the name length doesn't match, or it's another deletion dirent, skip */
876 if (rd->nsize != name_len || !je32_to_cpu(rd->ino))
877 continue;
878
879 /* OK, check the actual name now */
880 if (memcmp(rd->name, fd->name, name_len))
881 continue;
882
883 /* OK. The name really does match. There really is still an older node on
884 the flash which our deletion dirent obsoletes. So we have to write out
885 a new deletion dirent to replace it */
886 up(&c->erase_free_sem);
887
888 D1(printk(KERN_DEBUG "Deletion dirent at %08x still obsoletes real dirent \"%s\" at %08x for ino #%u\n",
889 ref_offset(fd->raw), fd->name, ref_offset(raw), je32_to_cpu(rd->ino)));
890 kfree(rd);
891
892 return jffs2_garbage_collect_dirent(c, jeb, f, fd);
893 }
894
895 up(&c->erase_free_sem);
896 kfree(rd);
897 }
898
Thomas Gleixner182ec4e2005-11-07 11:16:07 +0000899 /* FIXME: If we're deleting a dirent which contains the current mtime and ctime,
Artem B. Bityutskiy3a69e0c2005-08-17 14:46:26 +0100900 we should update the metadata node with those times accordingly */
901
Linus Torvalds1da177e2005-04-16 15:20:36 -0700902 /* No need for it any more. Just mark it obsolete and remove it from the list */
903 while (*fdp) {
904 if ((*fdp) == fd) {
905 found = 1;
906 *fdp = fd->next;
907 break;
908 }
909 fdp = &(*fdp)->next;
910 }
911 if (!found) {
912 printk(KERN_WARNING "Deletion dirent \"%s\" not found in list for ino #%u\n", fd->name, f->inocache->ino);
913 }
914 jffs2_mark_node_obsolete(c, fd->raw);
915 jffs2_free_full_dirent(fd);
916 return 0;
917}
918
919static int jffs2_garbage_collect_hole(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
920 struct jffs2_inode_info *f, struct jffs2_full_dnode *fn,
921 uint32_t start, uint32_t end)
922{
923 struct jffs2_raw_inode ri;
924 struct jffs2_node_frag *frag;
925 struct jffs2_full_dnode *new_fn;
Artem B. Bityuckiy8557fd52005-04-09 11:47:03 +0100926 uint32_t alloclen, phys_ofs, ilen;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700927 int ret;
928
929 D1(printk(KERN_DEBUG "Writing replacement hole node for ino #%u from offset 0x%x to 0x%x\n",
930 f->inocache->ino, start, end));
Thomas Gleixner182ec4e2005-11-07 11:16:07 +0000931
Linus Torvalds1da177e2005-04-16 15:20:36 -0700932 memset(&ri, 0, sizeof(ri));
933
934 if(fn->frags > 1) {
935 size_t readlen;
936 uint32_t crc;
Thomas Gleixner182ec4e2005-11-07 11:16:07 +0000937 /* It's partially obsoleted by a later write. So we have to
Linus Torvalds1da177e2005-04-16 15:20:36 -0700938 write it out again with the _same_ version as before */
939 ret = jffs2_flash_read(c, ref_offset(fn->raw), sizeof(ri), &readlen, (char *)&ri);
940 if (readlen != sizeof(ri) || ret) {
941 printk(KERN_WARNING "Node read failed in jffs2_garbage_collect_hole. Ret %d, retlen %zd. Data will be lost by writing new hole node\n", ret, readlen);
942 goto fill;
943 }
944 if (je16_to_cpu(ri.nodetype) != JFFS2_NODETYPE_INODE) {
945 printk(KERN_WARNING "jffs2_garbage_collect_hole: Node at 0x%08x had node type 0x%04x instead of JFFS2_NODETYPE_INODE(0x%04x)\n",
946 ref_offset(fn->raw),
947 je16_to_cpu(ri.nodetype), JFFS2_NODETYPE_INODE);
948 return -EIO;
949 }
950 if (je32_to_cpu(ri.totlen) != sizeof(ri)) {
951 printk(KERN_WARNING "jffs2_garbage_collect_hole: Node at 0x%08x had totlen 0x%x instead of expected 0x%zx\n",
952 ref_offset(fn->raw),
953 je32_to_cpu(ri.totlen), sizeof(ri));
954 return -EIO;
955 }
956 crc = crc32(0, &ri, sizeof(ri)-8);
957 if (crc != je32_to_cpu(ri.node_crc)) {
958 printk(KERN_WARNING "jffs2_garbage_collect_hole: Node at 0x%08x had CRC 0x%08x which doesn't match calculated CRC 0x%08x\n",
Thomas Gleixner182ec4e2005-11-07 11:16:07 +0000959 ref_offset(fn->raw),
Linus Torvalds1da177e2005-04-16 15:20:36 -0700960 je32_to_cpu(ri.node_crc), crc);
961 /* FIXME: We could possibly deal with this by writing new holes for each frag */
Thomas Gleixner182ec4e2005-11-07 11:16:07 +0000962 printk(KERN_WARNING "Data in the range 0x%08x to 0x%08x of inode #%u will be lost\n",
Linus Torvalds1da177e2005-04-16 15:20:36 -0700963 start, end, f->inocache->ino);
964 goto fill;
965 }
966 if (ri.compr != JFFS2_COMPR_ZERO) {
967 printk(KERN_WARNING "jffs2_garbage_collect_hole: Node 0x%08x wasn't a hole node!\n", ref_offset(fn->raw));
Thomas Gleixner182ec4e2005-11-07 11:16:07 +0000968 printk(KERN_WARNING "Data in the range 0x%08x to 0x%08x of inode #%u will be lost\n",
Linus Torvalds1da177e2005-04-16 15:20:36 -0700969 start, end, f->inocache->ino);
970 goto fill;
971 }
972 } else {
973 fill:
974 ri.magic = cpu_to_je16(JFFS2_MAGIC_BITMASK);
975 ri.nodetype = cpu_to_je16(JFFS2_NODETYPE_INODE);
976 ri.totlen = cpu_to_je32(sizeof(ri));
977 ri.hdr_crc = cpu_to_je32(crc32(0, &ri, sizeof(struct jffs2_unknown_node)-4));
978
979 ri.ino = cpu_to_je32(f->inocache->ino);
980 ri.version = cpu_to_je32(++f->highest_version);
981 ri.offset = cpu_to_je32(start);
982 ri.dsize = cpu_to_je32(end - start);
983 ri.csize = cpu_to_je32(0);
984 ri.compr = JFFS2_COMPR_ZERO;
985 }
Thomas Gleixner182ec4e2005-11-07 11:16:07 +0000986
Artem B. Bityuckiy8557fd52005-04-09 11:47:03 +0100987 frag = frag_last(&f->fragtree);
988 if (frag)
989 /* Fetch the inode length from the fragtree rather then
990 * from i_size since i_size may have not been updated yet */
991 ilen = frag->ofs + frag->size;
992 else
993 ilen = JFFS2_F_I_SIZE(f);
994
Linus Torvalds1da177e2005-04-16 15:20:36 -0700995 ri.mode = cpu_to_jemode(JFFS2_F_I_MODE(f));
996 ri.uid = cpu_to_je16(JFFS2_F_I_UID(f));
997 ri.gid = cpu_to_je16(JFFS2_F_I_GID(f));
Artem B. Bityuckiy8557fd52005-04-09 11:47:03 +0100998 ri.isize = cpu_to_je32(ilen);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700999 ri.atime = cpu_to_je32(JFFS2_F_I_ATIME(f));
1000 ri.ctime = cpu_to_je32(JFFS2_F_I_CTIME(f));
1001 ri.mtime = cpu_to_je32(JFFS2_F_I_MTIME(f));
1002 ri.data_crc = cpu_to_je32(0);
1003 ri.node_crc = cpu_to_je32(crc32(0, &ri, sizeof(ri)-8));
1004
Ferenc Havasie631ddb2005-09-07 09:35:26 +01001005 ret = jffs2_reserve_space_gc(c, sizeof(ri), &phys_ofs, &alloclen,
1006 JFFS2_SUMMARY_INODE_SIZE);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001007 if (ret) {
1008 printk(KERN_WARNING "jffs2_reserve_space_gc of %zd bytes for garbage_collect_hole failed: %d\n",
1009 sizeof(ri), ret);
1010 return ret;
1011 }
1012 new_fn = jffs2_write_dnode(c, f, &ri, NULL, 0, phys_ofs, ALLOC_GC);
1013
1014 if (IS_ERR(new_fn)) {
1015 printk(KERN_WARNING "Error writing new hole node: %ld\n", PTR_ERR(new_fn));
1016 return PTR_ERR(new_fn);
1017 }
1018 if (je32_to_cpu(ri.version) == f->highest_version) {
1019 jffs2_add_full_dnode_to_inode(c, f, new_fn);
1020 if (f->metadata) {
1021 jffs2_mark_node_obsolete(c, f->metadata->raw);
1022 jffs2_free_full_dnode(f->metadata);
1023 f->metadata = NULL;
1024 }
1025 return 0;
1026 }
1027
Thomas Gleixner182ec4e2005-11-07 11:16:07 +00001028 /*
Linus Torvalds1da177e2005-04-16 15:20:36 -07001029 * We should only get here in the case where the node we are
1030 * replacing had more than one frag, so we kept the same version
Thomas Gleixner182ec4e2005-11-07 11:16:07 +00001031 * number as before. (Except in case of error -- see 'goto fill;'
Linus Torvalds1da177e2005-04-16 15:20:36 -07001032 * above.)
1033 */
1034 D1(if(unlikely(fn->frags <= 1)) {
1035 printk(KERN_WARNING "jffs2_garbage_collect_hole: Replacing fn with %d frag(s) but new ver %d != highest_version %d of ino #%d\n",
1036 fn->frags, je32_to_cpu(ri.version), f->highest_version,
1037 je32_to_cpu(ri.ino));
1038 });
1039
1040 /* This is a partially-overlapped hole node. Mark it REF_NORMAL not REF_PRISTINE */
1041 mark_ref_normal(new_fn->raw);
1042
Thomas Gleixner182ec4e2005-11-07 11:16:07 +00001043 for (frag = jffs2_lookup_node_frag(&f->fragtree, fn->ofs);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001044 frag; frag = frag_next(frag)) {
1045 if (frag->ofs > fn->size + fn->ofs)
1046 break;
1047 if (frag->node == fn) {
1048 frag->node = new_fn;
1049 new_fn->frags++;
1050 fn->frags--;
1051 }
1052 }
1053 if (fn->frags) {
1054 printk(KERN_WARNING "jffs2_garbage_collect_hole: Old node still has frags!\n");
1055 BUG();
1056 }
1057 if (!new_fn->frags) {
1058 printk(KERN_WARNING "jffs2_garbage_collect_hole: New node has no frags!\n");
1059 BUG();
1060 }
Thomas Gleixner182ec4e2005-11-07 11:16:07 +00001061
Linus Torvalds1da177e2005-04-16 15:20:36 -07001062 jffs2_mark_node_obsolete(c, fn->raw);
1063 jffs2_free_full_dnode(fn);
Thomas Gleixner182ec4e2005-11-07 11:16:07 +00001064
Linus Torvalds1da177e2005-04-16 15:20:36 -07001065 return 0;
1066}
1067
1068static int jffs2_garbage_collect_dnode(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
1069 struct jffs2_inode_info *f, struct jffs2_full_dnode *fn,
1070 uint32_t start, uint32_t end)
1071{
1072 struct jffs2_full_dnode *new_fn;
1073 struct jffs2_raw_inode ri;
Thomas Gleixner182ec4e2005-11-07 11:16:07 +00001074 uint32_t alloclen, phys_ofs, offset, orig_end, orig_start;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001075 int ret = 0;
1076 unsigned char *comprbuf = NULL, *writebuf;
1077 unsigned long pg;
1078 unsigned char *pg_ptr;
Thomas Gleixner182ec4e2005-11-07 11:16:07 +00001079
Linus Torvalds1da177e2005-04-16 15:20:36 -07001080 memset(&ri, 0, sizeof(ri));
1081
1082 D1(printk(KERN_DEBUG "Writing replacement dnode for ino #%u from offset 0x%x to 0x%x\n",
1083 f->inocache->ino, start, end));
1084
1085 orig_end = end;
1086 orig_start = start;
1087
1088 if (c->nr_free_blocks + c->nr_erasing_blocks > c->resv_blocks_gcmerge) {
1089 /* Attempt to do some merging. But only expand to cover logically
1090 adjacent frags if the block containing them is already considered
Thomas Gleixner182ec4e2005-11-07 11:16:07 +00001091 to be dirty. Otherwise we end up with GC just going round in
1092 circles dirtying the nodes it already wrote out, especially
Linus Torvalds1da177e2005-04-16 15:20:36 -07001093 on NAND where we have small eraseblocks and hence a much higher
1094 chance of nodes having to be split to cross boundaries. */
1095
1096 struct jffs2_node_frag *frag;
1097 uint32_t min, max;
1098
1099 min = start & ~(PAGE_CACHE_SIZE-1);
1100 max = min + PAGE_CACHE_SIZE;
1101
1102 frag = jffs2_lookup_node_frag(&f->fragtree, start);
1103
1104 /* BUG_ON(!frag) but that'll happen anyway... */
1105
1106 BUG_ON(frag->ofs != start);
1107
1108 /* First grow down... */
1109 while((frag = frag_prev(frag)) && frag->ofs >= min) {
1110
1111 /* If the previous frag doesn't even reach the beginning, there's
1112 excessive fragmentation. Just merge. */
1113 if (frag->ofs > min) {
1114 D1(printk(KERN_DEBUG "Expanding down to cover partial frag (0x%x-0x%x)\n",
1115 frag->ofs, frag->ofs+frag->size));
1116 start = frag->ofs;
1117 continue;
1118 }
1119 /* OK. This frag holds the first byte of the page. */
1120 if (!frag->node || !frag->node->raw) {
1121 D1(printk(KERN_DEBUG "First frag in page is hole (0x%x-0x%x). Not expanding down.\n",
1122 frag->ofs, frag->ofs+frag->size));
1123 break;
1124 } else {
1125
Thomas Gleixner182ec4e2005-11-07 11:16:07 +00001126 /* OK, it's a frag which extends to the beginning of the page. Does it live
Linus Torvalds1da177e2005-04-16 15:20:36 -07001127 in a block which is still considered clean? If so, don't obsolete it.
1128 If not, cover it anyway. */
1129
1130 struct jffs2_raw_node_ref *raw = frag->node->raw;
1131 struct jffs2_eraseblock *jeb;
1132
1133 jeb = &c->blocks[raw->flash_offset / c->sector_size];
1134
1135 if (jeb == c->gcblock) {
1136 D1(printk(KERN_DEBUG "Expanding down to cover frag (0x%x-0x%x) in gcblock at %08x\n",
1137 frag->ofs, frag->ofs+frag->size, ref_offset(raw)));
1138 start = frag->ofs;
1139 break;
1140 }
1141 if (!ISDIRTY(jeb->dirty_size + jeb->wasted_size)) {
1142 D1(printk(KERN_DEBUG "Not expanding down to cover frag (0x%x-0x%x) in clean block %08x\n",
1143 frag->ofs, frag->ofs+frag->size, jeb->offset));
1144 break;
1145 }
1146
1147 D1(printk(KERN_DEBUG "Expanding down to cover frag (0x%x-0x%x) in dirty block %08x\n",
1148 frag->ofs, frag->ofs+frag->size, jeb->offset));
1149 start = frag->ofs;
1150 break;
1151 }
1152 }
1153
1154 /* ... then up */
1155
1156 /* Find last frag which is actually part of the node we're to GC. */
1157 frag = jffs2_lookup_node_frag(&f->fragtree, end-1);
1158
1159 while((frag = frag_next(frag)) && frag->ofs+frag->size <= max) {
1160
1161 /* If the previous frag doesn't even reach the beginning, there's lots
1162 of fragmentation. Just merge. */
1163 if (frag->ofs+frag->size < max) {
1164 D1(printk(KERN_DEBUG "Expanding up to cover partial frag (0x%x-0x%x)\n",
1165 frag->ofs, frag->ofs+frag->size));
1166 end = frag->ofs + frag->size;
1167 continue;
1168 }
1169
1170 if (!frag->node || !frag->node->raw) {
1171 D1(printk(KERN_DEBUG "Last frag in page is hole (0x%x-0x%x). Not expanding up.\n",
1172 frag->ofs, frag->ofs+frag->size));
1173 break;
1174 } else {
1175
Thomas Gleixner182ec4e2005-11-07 11:16:07 +00001176 /* OK, it's a frag which extends to the beginning of the page. Does it live
Linus Torvalds1da177e2005-04-16 15:20:36 -07001177 in a block which is still considered clean? If so, don't obsolete it.
1178 If not, cover it anyway. */
1179
1180 struct jffs2_raw_node_ref *raw = frag->node->raw;
1181 struct jffs2_eraseblock *jeb;
1182
1183 jeb = &c->blocks[raw->flash_offset / c->sector_size];
1184
1185 if (jeb == c->gcblock) {
1186 D1(printk(KERN_DEBUG "Expanding up to cover frag (0x%x-0x%x) in gcblock at %08x\n",
1187 frag->ofs, frag->ofs+frag->size, ref_offset(raw)));
1188 end = frag->ofs + frag->size;
1189 break;
1190 }
1191 if (!ISDIRTY(jeb->dirty_size + jeb->wasted_size)) {
1192 D1(printk(KERN_DEBUG "Not expanding up to cover frag (0x%x-0x%x) in clean block %08x\n",
1193 frag->ofs, frag->ofs+frag->size, jeb->offset));
1194 break;
1195 }
1196
1197 D1(printk(KERN_DEBUG "Expanding up to cover frag (0x%x-0x%x) in dirty block %08x\n",
1198 frag->ofs, frag->ofs+frag->size, jeb->offset));
1199 end = frag->ofs + frag->size;
1200 break;
1201 }
1202 }
Thomas Gleixner182ec4e2005-11-07 11:16:07 +00001203 D1(printk(KERN_DEBUG "Expanded dnode to write from (0x%x-0x%x) to (0x%x-0x%x)\n",
Linus Torvalds1da177e2005-04-16 15:20:36 -07001204 orig_start, orig_end, start, end));
1205
Artem B. Bityuckiy8557fd52005-04-09 11:47:03 +01001206 D1(BUG_ON(end > frag_last(&f->fragtree)->ofs + frag_last(&f->fragtree)->size));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001207 BUG_ON(end < orig_end);
1208 BUG_ON(start > orig_start);
1209 }
Thomas Gleixner182ec4e2005-11-07 11:16:07 +00001210
Linus Torvalds1da177e2005-04-16 15:20:36 -07001211 /* First, use readpage() to read the appropriate page into the page cache */
1212 /* Q: What happens if we actually try to GC the _same_ page for which commit_write()
1213 * triggered garbage collection in the first place?
1214 * A: I _think_ it's OK. read_cache_page shouldn't deadlock, we'll write out the
1215 * page OK. We'll actually write it out again in commit_write, which is a little
1216 * suboptimal, but at least we're correct.
1217 */
1218 pg_ptr = jffs2_gc_fetch_page(c, f, start, &pg);
1219
1220 if (IS_ERR(pg_ptr)) {
1221 printk(KERN_WARNING "read_cache_page() returned error: %ld\n", PTR_ERR(pg_ptr));
1222 return PTR_ERR(pg_ptr);
1223 }
1224
1225 offset = start;
1226 while(offset < orig_end) {
1227 uint32_t datalen;
1228 uint32_t cdatalen;
1229 uint16_t comprtype = JFFS2_COMPR_NONE;
1230
Ferenc Havasie631ddb2005-09-07 09:35:26 +01001231 ret = jffs2_reserve_space_gc(c, sizeof(ri) + JFFS2_MIN_DATA_LEN, &phys_ofs,
1232 &alloclen, JFFS2_SUMMARY_INODE_SIZE);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001233
1234 if (ret) {
1235 printk(KERN_WARNING "jffs2_reserve_space_gc of %zd bytes for garbage_collect_dnode failed: %d\n",
1236 sizeof(ri)+ JFFS2_MIN_DATA_LEN, ret);
1237 break;
1238 }
1239 cdatalen = min_t(uint32_t, alloclen - sizeof(ri), end - offset);
1240 datalen = end - offset;
1241
1242 writebuf = pg_ptr + (offset & (PAGE_CACHE_SIZE -1));
1243
1244 comprtype = jffs2_compress(c, f, writebuf, &comprbuf, &datalen, &cdatalen);
1245
1246 ri.magic = cpu_to_je16(JFFS2_MAGIC_BITMASK);
1247 ri.nodetype = cpu_to_je16(JFFS2_NODETYPE_INODE);
1248 ri.totlen = cpu_to_je32(sizeof(ri) + cdatalen);
1249 ri.hdr_crc = cpu_to_je32(crc32(0, &ri, sizeof(struct jffs2_unknown_node)-4));
1250
1251 ri.ino = cpu_to_je32(f->inocache->ino);
1252 ri.version = cpu_to_je32(++f->highest_version);
1253 ri.mode = cpu_to_jemode(JFFS2_F_I_MODE(f));
1254 ri.uid = cpu_to_je16(JFFS2_F_I_UID(f));
1255 ri.gid = cpu_to_je16(JFFS2_F_I_GID(f));
1256 ri.isize = cpu_to_je32(JFFS2_F_I_SIZE(f));
1257 ri.atime = cpu_to_je32(JFFS2_F_I_ATIME(f));
1258 ri.ctime = cpu_to_je32(JFFS2_F_I_CTIME(f));
1259 ri.mtime = cpu_to_je32(JFFS2_F_I_MTIME(f));
1260 ri.offset = cpu_to_je32(offset);
1261 ri.csize = cpu_to_je32(cdatalen);
1262 ri.dsize = cpu_to_je32(datalen);
1263 ri.compr = comprtype & 0xff;
1264 ri.usercompr = (comprtype >> 8) & 0xff;
1265 ri.node_crc = cpu_to_je32(crc32(0, &ri, sizeof(ri)-8));
1266 ri.data_crc = cpu_to_je32(crc32(0, comprbuf, cdatalen));
Thomas Gleixner182ec4e2005-11-07 11:16:07 +00001267
Linus Torvalds1da177e2005-04-16 15:20:36 -07001268 new_fn = jffs2_write_dnode(c, f, &ri, comprbuf, cdatalen, phys_ofs, ALLOC_GC);
1269
1270 jffs2_free_comprbuf(comprbuf, writebuf);
1271
1272 if (IS_ERR(new_fn)) {
1273 printk(KERN_WARNING "Error writing new dnode: %ld\n", PTR_ERR(new_fn));
1274 ret = PTR_ERR(new_fn);
1275 break;
1276 }
1277 ret = jffs2_add_full_dnode_to_inode(c, f, new_fn);
1278 offset += datalen;
1279 if (f->metadata) {
1280 jffs2_mark_node_obsolete(c, f->metadata->raw);
1281 jffs2_free_full_dnode(f->metadata);
1282 f->metadata = NULL;
1283 }
1284 }
1285
1286 jffs2_gc_release_page(c, pg_ptr, &pg);
1287 return ret;
1288}