blob: 7fd125c8dd192ec6eb8837c7ab2f41cd2dfe5dbe [file] [log] [blame]
Thomas Gleixner1a59d1b82019-05-27 08:55:05 +02001/* SPDX-License-Identifier: GPL-2.0-or-later */
Linus Torvalds1da177e2005-04-16 15:20:36 -07002/*
3 * Copyright (C) International Business Machines Corp., 2000-2004
4 * Portions Copyright (C) Christoph Hellwig, 2001-2002
Linus Torvalds1da177e2005-04-16 15:20:36 -07005 */
6#ifndef _H_JFS_LOGMGR
7#define _H_JFS_LOGMGR
8
Andy Shevchenko2e3bc612019-01-10 15:41:53 +02009#include <linux/uuid.h>
10
Linus Torvalds1da177e2005-04-16 15:20:36 -070011#include "jfs_filsys.h"
12#include "jfs_lock.h"
13
14/*
15 * log manager configuration parameters
16 */
17
18/* log page size */
19#define LOGPSIZE 4096
20#define L2LOGPSIZE 12
21
22#define LOGPAGES 16 /* Log pages per mounted file system */
23
24/*
25 * log logical volume
26 *
Dave Kleikamp63f83c92006-10-02 09:55:27 -050027 * a log is used to make the commit operation on journalled
Linus Torvalds1da177e2005-04-16 15:20:36 -070028 * files within the same logical volume group atomic.
29 * a log is implemented with a logical volume.
Dave Kleikamp63f83c92006-10-02 09:55:27 -050030 * there is one log per logical volume group.
Linus Torvalds1da177e2005-04-16 15:20:36 -070031 *
32 * block 0 of the log logical volume is not used (ipl etc).
33 * block 1 contains a log "superblock" and is used by logFormat(),
Dave Kleikamp63f83c92006-10-02 09:55:27 -050034 * lmLogInit(), lmLogShutdown(), and logRedo() to record status
35 * of the log but is not otherwise used during normal processing.
Linus Torvalds1da177e2005-04-16 15:20:36 -070036 * blocks 2 - (N-1) are used to contain log records.
37 *
Dave Kleikamp63f83c92006-10-02 09:55:27 -050038 * when a volume group is varied-on-line, logRedo() must have
39 * been executed before the file systems (logical volumes) in
Linus Torvalds1da177e2005-04-16 15:20:36 -070040 * the volume group can be mounted.
41 */
42/*
43 * log superblock (block 1 of logical volume)
44 */
45#define LOGSUPER_B 1
46#define LOGSTART_B 2
47
48#define LOGMAGIC 0x87654321
49#define LOGVERSION 1
50
51#define MAX_ACTIVE 128 /* Max active file systems sharing log */
52
53struct logsuper {
54 __le32 magic; /* 4: log lv identifier */
55 __le32 version; /* 4: version number */
56 __le32 serial; /* 4: log open/mount counter */
57 __le32 size; /* 4: size in number of LOGPSIZE blocks */
58 __le32 bsize; /* 4: logical block size in byte */
59 __le32 l2bsize; /* 4: log2 of bsize */
60
61 __le32 flag; /* 4: option */
62 __le32 state; /* 4: state - see below */
63
64 __le32 end; /* 4: addr of last log record set by logredo */
Andy Shevchenko2e3bc612019-01-10 15:41:53 +020065 uuid_t uuid; /* 16: 128-bit journal uuid */
Linus Torvalds1da177e2005-04-16 15:20:36 -070066 char label[16]; /* 16: journal label */
67 struct {
Andy Shevchenko2e3bc612019-01-10 15:41:53 +020068 uuid_t uuid;
Linus Torvalds1da177e2005-04-16 15:20:36 -070069 } active[MAX_ACTIVE]; /* 2048: active file systems list */
70};
71
Linus Torvalds1da177e2005-04-16 15:20:36 -070072/* log flag: commit option (see jfs_filsys.h) */
73
74/* log state */
75#define LOGMOUNT 0 /* log mounted by lmLogInit() */
76#define LOGREDONE 1 /* log shutdown by lmLogShutdown().
77 * log redo completed by logredo().
78 */
79#define LOGWRAP 2 /* log wrapped */
80#define LOGREADERR 3 /* log read error detected in logredo() */
81
82
83/*
84 * log logical page
85 *
86 * (this comment should be rewritten !)
Dave Kleikamp63f83c92006-10-02 09:55:27 -050087 * the header and trailer structures (h,t) will normally have
Linus Torvalds1da177e2005-04-16 15:20:36 -070088 * the same page and eor value.
Dave Kleikamp63f83c92006-10-02 09:55:27 -050089 * An exception to this occurs when a complete page write is not
Linus Torvalds1da177e2005-04-16 15:20:36 -070090 * accomplished on a power failure. Since the hardware may "split write"
Dave Kleikamp63f83c92006-10-02 09:55:27 -050091 * sectors in the page, any out of order sequence may occur during powerfail
Linus Torvalds1da177e2005-04-16 15:20:36 -070092 * and needs to be recognized during log replay. The xor value is
93 * an "exclusive or" of all log words in the page up to eor. This
94 * 32 bit eor is stored with the top 16 bits in the header and the
95 * bottom 16 bits in the trailer. logredo can easily recognize pages
Dave Kleikamp63f83c92006-10-02 09:55:27 -050096 * that were not completed by reconstructing this eor and checking
Linus Torvalds1da177e2005-04-16 15:20:36 -070097 * the log page.
98 *
Dave Kleikamp63f83c92006-10-02 09:55:27 -050099 * Previous versions of the operating system did not allow split
100 * writes and detected partially written records in logredo by
101 * ordering the updates to the header, trailer, and the move of data
102 * into the logdata area. The order: (1) data is moved (2) header
103 * is updated (3) trailer is updated. In logredo, when the header
104 * differed from the trailer, the header and trailer were reconciled
105 * as follows: if h.page != t.page they were set to the smaller of
106 * the two and h.eor and t.eor set to 8 (i.e. empty page). if (only)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700107 * h.eor != t.eor they were set to the smaller of their two values.
108 */
109struct logpage {
110 struct { /* header */
111 __le32 page; /* 4: log sequence page number */
112 __le16 rsrvd; /* 2: */
113 __le16 eor; /* 2: end-of-log offset of lasrt record write */
114 } h;
115
116 __le32 data[LOGPSIZE / 4 - 4]; /* log record area */
117
118 struct { /* trailer */
119 __le32 page; /* 4: normally the same as h.page */
120 __le16 rsrvd; /* 2: */
121 __le16 eor; /* 2: normally the same as h.eor */
122 } t;
123};
124
125#define LOGPHDRSIZE 8 /* log page header size */
126#define LOGPTLRSIZE 8 /* log page trailer size */
127
128
129/*
130 * log record
131 *
132 * (this comment should be rewritten !)
133 * jfs uses only "after" log records (only a single writer is allowed
Dave Kleikampf720e3b2007-06-06 15:28:35 -0500134 * in a page, pages are written to temporary paging space if
Linus Torvalds1da177e2005-04-16 15:20:36 -0700135 * if they must be written to disk before commit, and i/o is
136 * scheduled for modified pages to their home location after
Dave Kleikamp63f83c92006-10-02 09:55:27 -0500137 * the log records containing the after values and the commit
Linus Torvalds1da177e2005-04-16 15:20:36 -0700138 * record is written to the log on disk, undo discards the copy
139 * in main-memory.)
140 *
Dave Kleikamp63f83c92006-10-02 09:55:27 -0500141 * a log record consists of a data area of variable length followed by
Linus Torvalds1da177e2005-04-16 15:20:36 -0700142 * a descriptor of fixed size LOGRDSIZE bytes.
Dave Kleikampf720e3b2007-06-06 15:28:35 -0500143 * the data area is rounded up to an integral number of 4-bytes and
Linus Torvalds1da177e2005-04-16 15:20:36 -0700144 * must be no longer than LOGPSIZE.
Dave Kleikamp63f83c92006-10-02 09:55:27 -0500145 * the descriptor is of size of multiple of 4-bytes and aligned on a
146 * 4-byte boundary.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700147 * records are packed one after the other in the data area of log pages.
Dave Kleikamp63f83c92006-10-02 09:55:27 -0500148 * (sometimes a DUMMY record is inserted so that at least one record ends
Linus Torvalds1da177e2005-04-16 15:20:36 -0700149 * on every page or the longest record is placed on at most two pages).
Dave Kleikamp63f83c92006-10-02 09:55:27 -0500150 * the field eor in page header/trailer points to the byte following
Linus Torvalds1da177e2005-04-16 15:20:36 -0700151 * the last record on a page.
152 */
153
154/* log record types */
155#define LOG_COMMIT 0x8000
156#define LOG_SYNCPT 0x4000
157#define LOG_MOUNT 0x2000
158#define LOG_REDOPAGE 0x0800
159#define LOG_NOREDOPAGE 0x0080
160#define LOG_NOREDOINOEXT 0x0040
161#define LOG_UPDATEMAP 0x0008
162#define LOG_NOREDOFILE 0x0001
163
164/* REDOPAGE/NOREDOPAGE log record data type */
165#define LOG_INODE 0x0001
166#define LOG_XTREE 0x0002
167#define LOG_DTREE 0x0004
168#define LOG_BTROOT 0x0010
169#define LOG_EA 0x0020
170#define LOG_ACL 0x0040
171#define LOG_DATA 0x0080
172#define LOG_NEW 0x0100
173#define LOG_EXTEND 0x0200
174#define LOG_RELOCATE 0x0400
175#define LOG_DIR_XTREE 0x0800 /* Xtree is in directory inode */
176
177/* UPDATEMAP log record descriptor type */
178#define LOG_ALLOCXADLIST 0x0080
179#define LOG_ALLOCPXDLIST 0x0040
180#define LOG_ALLOCXAD 0x0020
181#define LOG_ALLOCPXD 0x0010
182#define LOG_FREEXADLIST 0x0008
183#define LOG_FREEPXDLIST 0x0004
184#define LOG_FREEXAD 0x0002
185#define LOG_FREEPXD 0x0001
186
187
188struct lrd {
189 /*
190 * type independent area
191 */
192 __le32 logtid; /* 4: log transaction identifier */
193 __le32 backchain; /* 4: ptr to prev record of same transaction */
194 __le16 type; /* 2: record type */
195 __le16 length; /* 2: length of data in record (in byte) */
196 __le32 aggregate; /* 4: file system lv/aggregate */
197 /* (16) */
198
199 /*
200 * type dependent area (20)
201 */
202 union {
203
204 /*
Dave Kleikampf720e3b2007-06-06 15:28:35 -0500205 * COMMIT: commit
Linus Torvalds1da177e2005-04-16 15:20:36 -0700206 *
207 * transaction commit: no type-dependent information;
208 */
209
210 /*
Dave Kleikampf720e3b2007-06-06 15:28:35 -0500211 * REDOPAGE: after-image
Linus Torvalds1da177e2005-04-16 15:20:36 -0700212 *
213 * apply after-image;
214 *
215 * N.B. REDOPAGE, NOREDOPAGE, and UPDATEMAP must be same format;
216 */
217 struct {
218 __le32 fileset; /* 4: fileset number */
219 __le32 inode; /* 4: inode number */
220 __le16 type; /* 2: REDOPAGE record type */
221 __le16 l2linesize; /* 2: log2 of line size */
222 pxd_t pxd; /* 8: on-disk page pxd */
223 } redopage; /* (20) */
224
225 /*
Dave Kleikampf720e3b2007-06-06 15:28:35 -0500226 * NOREDOPAGE: the page is freed
Linus Torvalds1da177e2005-04-16 15:20:36 -0700227 *
228 * do not apply after-image records which precede this record
229 * in the log with the same page block number to this page.
230 *
231 * N.B. REDOPAGE, NOREDOPAGE, and UPDATEMAP must be same format;
232 */
233 struct {
234 __le32 fileset; /* 4: fileset number */
235 __le32 inode; /* 4: inode number */
236 __le16 type; /* 2: NOREDOPAGE record type */
237 __le16 rsrvd; /* 2: reserved */
238 pxd_t pxd; /* 8: on-disk page pxd */
239 } noredopage; /* (20) */
240
241 /*
Dave Kleikampf720e3b2007-06-06 15:28:35 -0500242 * UPDATEMAP: update block allocation map
Linus Torvalds1da177e2005-04-16 15:20:36 -0700243 *
244 * either in-line PXD,
245 * or out-of-line XADLIST;
246 *
247 * N.B. REDOPAGE, NOREDOPAGE, and UPDATEMAP must be same format;
248 */
249 struct {
250 __le32 fileset; /* 4: fileset number */
251 __le32 inode; /* 4: inode number */
252 __le16 type; /* 2: UPDATEMAP record type */
253 __le16 nxd; /* 2: number of extents */
254 pxd_t pxd; /* 8: pxd */
255 } updatemap; /* (20) */
256
257 /*
Dave Kleikampf720e3b2007-06-06 15:28:35 -0500258 * NOREDOINOEXT: the inode extent is freed
Linus Torvalds1da177e2005-04-16 15:20:36 -0700259 *
Dave Kleikamp63f83c92006-10-02 09:55:27 -0500260 * do not apply after-image records which precede this
261 * record in the log with the any of the 4 page block
262 * numbers in this inode extent.
263 *
264 * NOTE: The fileset and pxd fields MUST remain in
Linus Torvalds1da177e2005-04-16 15:20:36 -0700265 * the same fields in the REDOPAGE record format.
266 *
267 */
268 struct {
269 __le32 fileset; /* 4: fileset number */
270 __le32 iagnum; /* 4: IAG number */
271 __le32 inoext_idx; /* 4: inode extent index */
272 pxd_t pxd; /* 8: on-disk page pxd */
273 } noredoinoext; /* (20) */
274
275 /*
Dave Kleikampf720e3b2007-06-06 15:28:35 -0500276 * SYNCPT: log sync point
Linus Torvalds1da177e2005-04-16 15:20:36 -0700277 *
Lucas De Marchi25985ed2011-03-30 22:57:33 -0300278 * replay log up to syncpt address specified;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700279 */
280 struct {
281 __le32 sync; /* 4: syncpt address (0 = here) */
282 } syncpt;
283
284 /*
Dave Kleikampf720e3b2007-06-06 15:28:35 -0500285 * MOUNT: file system mount
Linus Torvalds1da177e2005-04-16 15:20:36 -0700286 *
287 * file system mount: no type-dependent information;
288 */
289
290 /*
Dave Kleikampf720e3b2007-06-06 15:28:35 -0500291 * ? FREEXTENT: free specified extent(s)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700292 *
293 * free specified extent(s) from block allocation map
294 * N.B.: nextents should be length of data/sizeof(xad_t)
295 */
296 struct {
297 __le32 type; /* 4: FREEXTENT record type */
298 __le32 nextent; /* 4: number of extents */
299
300 /* data: PXD or XAD list */
301 } freextent;
302
303 /*
Dave Kleikampf720e3b2007-06-06 15:28:35 -0500304 * ? NOREDOFILE: this file is freed
Linus Torvalds1da177e2005-04-16 15:20:36 -0700305 *
306 * do not apply records which precede this record in the log
307 * with the same inode number.
308 *
Dave Kleikamp63f83c92006-10-02 09:55:27 -0500309 * NOREDOFILE must be the first to be written at commit
Linus Torvalds1da177e2005-04-16 15:20:36 -0700310 * (last to be read in logredo()) - it prevents
311 * replay of preceding updates of all preceding generations
Dave Kleikamp63f83c92006-10-02 09:55:27 -0500312 * of the inumber esp. the on-disk inode itself.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700313 */
314 struct {
315 __le32 fileset; /* 4: fileset number */
316 __le32 inode; /* 4: inode number */
317 } noredofile;
318
319 /*
Dave Kleikampf720e3b2007-06-06 15:28:35 -0500320 * ? NEWPAGE:
Linus Torvalds1da177e2005-04-16 15:20:36 -0700321 *
322 * metadata type dependent
323 */
324 struct {
325 __le32 fileset; /* 4: fileset number */
326 __le32 inode; /* 4: inode number */
327 __le32 type; /* 4: NEWPAGE record type */
328 pxd_t pxd; /* 8: on-disk page pxd */
329 } newpage;
330
331 /*
Dave Kleikampf720e3b2007-06-06 15:28:35 -0500332 * ? DUMMY: filler
Linus Torvalds1da177e2005-04-16 15:20:36 -0700333 *
334 * no type-dependent information
335 */
336 } log;
337}; /* (36) */
338
339#define LOGRDSIZE (sizeof(struct lrd))
340
341/*
342 * line vector descriptor
343 */
344struct lvd {
345 __le16 offset;
346 __le16 length;
347};
348
349
350/*
351 * log logical volume
352 */
353struct jfs_log {
354
355 struct list_head sb_list;/* This is used to sync metadata
356 * before writing syncpt.
357 */
358 struct list_head journal_list; /* Global list */
359 struct block_device *bdev; /* 4: log lv pointer */
360 int serial; /* 4: log mount serial number */
361
362 s64 base; /* @8: log extent address (inline log ) */
363 int size; /* 4: log size in log page (in page) */
364 int l2bsize; /* 4: log2 of bsize */
365
Al Viro5ba25332007-10-14 19:35:50 +0100366 unsigned long flag; /* 4: flag */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700367
368 struct lbuf *lbuf_free; /* 4: free lbufs */
369 wait_queue_head_t free_wait; /* 4: */
370
371 /* log write */
372 int logtid; /* 4: log tid */
373 int page; /* 4: page number of eol page */
374 int eor; /* 4: eor of last record in eol page */
375 struct lbuf *bp; /* 4: current log page buffer */
376
Ingo Molnar1de87442006-01-24 15:22:50 -0600377 struct mutex loglock; /* 4: log write serialization lock */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700378
379 /* syncpt */
380 int nextsync; /* 4: bytes to write before next syncpt */
381 int active; /* 4: */
382 wait_queue_head_t syncwait; /* 4: */
383
384 /* commit */
385 uint cflag; /* 4: */
386 struct list_head cqueue; /* FIFO commit queue */
387 struct tblock *flush_tblk; /* tblk we're waiting on for flush */
388 int gcrtc; /* 4: GC_READY transaction count */
389 struct tblock *gclrt; /* 4: latest GC_READY transaction */
390 spinlock_t gclock; /* 4: group commit lock */
391 int logsize; /* 4: log data area size in byte */
392 int lsn; /* 4: end-of-log */
393 int clsn; /* 4: clsn */
394 int syncpt; /* 4: addr of last syncpt record */
395 int sync; /* 4: addr from last logsync() */
396 struct list_head synclist; /* 8: logsynclist anchor */
397 spinlock_t synclock; /* 4: synclist lock */
398 struct lbuf *wqueue; /* 4: log pageout queue */
399 int count; /* 4: count */
Andy Shevchenko2e3bc612019-01-10 15:41:53 +0200400 uuid_t uuid; /* 16: 128-bit uuid of log device */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700401
402 int no_integrity; /* 3: flag to disable journaling to disk */
403};
404
405/*
406 * Log flag
407 */
408#define log_INLINELOG 1
409#define log_SYNCBARRIER 2
410#define log_QUIESCE 3
411#define log_FLUSH 4
412
413/*
414 * group commit flag
415 */
416/* jfs_log */
417#define logGC_PAGEOUT 0x00000001
418
419/* tblock/lbuf */
420#define tblkGC_QUEUE 0x0001
421#define tblkGC_READY 0x0002
422#define tblkGC_COMMIT 0x0004
423#define tblkGC_COMMITTED 0x0008
424#define tblkGC_EOP 0x0010
425#define tblkGC_FREE 0x0020
426#define tblkGC_LEADER 0x0040
427#define tblkGC_ERROR 0x0080
428#define tblkGC_LAZY 0x0100 // D230860
429#define tblkGC_UNLOCKED 0x0200 // D230860
430
431/*
432 * log cache buffer header
433 */
434struct lbuf {
435 struct jfs_log *l_log; /* 4: log associated with buffer */
436
437 /*
438 * data buffer base area
439 */
440 uint l_flag; /* 4: pageout control flags */
441
442 struct lbuf *l_wqnext; /* 4: write queue link */
443 struct lbuf *l_freelist; /* 4: freelistlink */
444
445 int l_pn; /* 4: log page number */
446 int l_eor; /* 4: log record eor */
447 int l_ceor; /* 4: committed log record eor */
448
449 s64 l_blkno; /* 8: log page block number */
450 caddr_t l_ldata; /* 4: data page */
Dave Kleikampdc5798d2005-05-02 12:24:57 -0600451 struct page *l_page; /* The page itself */
Dave Kleikamp63f83c92006-10-02 09:55:27 -0500452 uint l_offset; /* Offset of l_ldata within the page */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700453
454 wait_queue_head_t l_ioevent; /* 4: i/o done event */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700455};
456
457/* Reuse l_freelist for redrive list */
458#define l_redrive_next l_freelist
459
460/*
461 * logsynclist block
462 *
463 * common logsyncblk prefix for jbuf_t and tblock
464 */
465struct logsyncblk {
466 u16 xflag; /* flags */
467 u16 flag; /* only meaninful in tblock */
468 lid_t lid; /* lock id */
469 s32 lsn; /* log sequence number */
470 struct list_head synclist; /* log sync list link */
471};
472
473/*
474 * logsynclist serialization (per log)
475 */
476
477#define LOGSYNC_LOCK_INIT(log) spin_lock_init(&(log)->synclock)
Dave Kleikamp7fab4792005-05-02 12:25:02 -0600478#define LOGSYNC_LOCK(log, flags) spin_lock_irqsave(&(log)->synclock, flags)
479#define LOGSYNC_UNLOCK(log, flags) \
480 spin_unlock_irqrestore(&(log)->synclock, flags)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700481
482/* compute the difference in bytes of lsn from sync point */
483#define logdiff(diff, lsn, log)\
484{\
485 diff = (lsn) - (log)->syncpt;\
486 if (diff < 0)\
487 diff += (log)->logsize;\
488}
489
490extern int lmLogOpen(struct super_block *sb);
491extern int lmLogClose(struct super_block *sb);
492extern int lmLogShutdown(struct jfs_log * log);
493extern int lmLogInit(struct jfs_log * log);
494extern int lmLogFormat(struct jfs_log *log, s64 logAddress, int logSize);
Dave Kleikamp1868f4a2005-05-04 15:29:35 -0500495extern int lmGroupCommit(struct jfs_log *, struct tblock *);
496extern int jfsIOWait(void *);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700497extern void jfs_flush_journal(struct jfs_log * log, int wait);
Dave Kleikampcbc3d652005-07-27 09:17:57 -0500498extern void jfs_syncpt(struct jfs_log *log, int hard_sync);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700499
500#endif /* _H_JFS_LOGMGR */