blob: 50d854bfc45cad784e0e9a543abaacb9b258aea9 [file] [log] [blame]
Dave Chinner0b61f8a2018-06-05 19:42:14 -07001// SPDX-License-Identifier: GPL-2.0
Linus Torvalds1da177e2005-04-16 15:20:36 -07002/*
Nathan Scott7b718762005-11-02 14:58:39 +11003 * Copyright (c) 2000-2005 Silicon Graphics, Inc.
4 * All Rights Reserved.
Linus Torvalds1da177e2005-04-16 15:20:36 -07005 */
Linus Torvalds1da177e2005-04-16 15:20:36 -07006#include "xfs.h"
Nathan Scotta844f452005-11-02 14:38:42 +11007#include "xfs_fs.h"
Dave Chinner70a98832013-10-23 10:36:05 +11008#include "xfs_shared.h"
Dave Chinnera4fbe6a2013-10-23 10:51:50 +11009#include "xfs_format.h"
Dave Chinner239880e2013-10-23 10:50:10 +110010#include "xfs_log_format.h"
11#include "xfs_trans_resv.h"
Linus Torvalds1da177e2005-04-16 15:20:36 -070012#include "xfs_mount.h"
Darrick J. Wonge9e899a2017-10-31 12:04:49 -070013#include "xfs_errortag.h"
Linus Torvalds1da177e2005-04-16 15:20:36 -070014#include "xfs_error.h"
Dave Chinner239880e2013-10-23 10:50:10 +110015#include "xfs_trans.h"
16#include "xfs_trans_priv.h"
17#include "xfs_log.h"
Linus Torvalds1da177e2005-04-16 15:20:36 -070018#include "xfs_log_priv.h"
Christoph Hellwig0b1b2132009-12-14 23:14:59 +000019#include "xfs_trace.h"
Brian Fosterbaff4e42014-07-15 08:07:29 +100020#include "xfs_sysfs.h"
Dave Chinner61e63ec2015-01-22 09:10:31 +110021#include "xfs_sb.h"
Darrick J. Wong39353ff2019-04-12 07:41:15 -070022#include "xfs_health.h"
Linus Torvalds1da177e2005-04-16 15:20:36 -070023
David Chinnereb01c9c2008-04-10 12:18:46 +100024kmem_zone_t *xfs_log_ticket_zone;
Linus Torvalds1da177e2005-04-16 15:20:36 -070025
Linus Torvalds1da177e2005-04-16 15:20:36 -070026/* Local miscellaneous function prototypes */
Mark Tinguelyad223e62012-06-14 09:22:15 -050027STATIC int
28xlog_commit_record(
29 struct xlog *log,
30 struct xlog_ticket *ticket,
31 struct xlog_in_core **iclog,
32 xfs_lsn_t *commitlsnp);
33
Mark Tinguely9a8d2fd2012-06-14 09:22:16 -050034STATIC struct xlog *
35xlog_alloc_log(
36 struct xfs_mount *mp,
37 struct xfs_buftarg *log_target,
38 xfs_daddr_t blk_offset,
39 int num_bblks);
Mark Tinguelyad223e62012-06-14 09:22:15 -050040STATIC int
41xlog_space_left(
42 struct xlog *log,
43 atomic64_t *head);
Mark Tinguely9a8d2fd2012-06-14 09:22:16 -050044STATIC void
45xlog_dealloc_log(
46 struct xlog *log);
Linus Torvalds1da177e2005-04-16 15:20:36 -070047
48/* local state machine functions */
Christoph Hellwigd15cbf22019-06-28 19:27:30 -070049STATIC void xlog_state_done_syncing(
50 struct xlog_in_core *iclog,
51 bool aborted);
Mark Tinguely9a8d2fd2012-06-14 09:22:16 -050052STATIC int
53xlog_state_get_iclog_space(
54 struct xlog *log,
55 int len,
56 struct xlog_in_core **iclog,
57 struct xlog_ticket *ticket,
58 int *continued_write,
59 int *logoffsetp);
60STATIC int
61xlog_state_release_iclog(
62 struct xlog *log,
63 struct xlog_in_core *iclog);
64STATIC void
65xlog_state_switch_iclogs(
66 struct xlog *log,
67 struct xlog_in_core *iclog,
68 int eventual_size);
69STATIC void
70xlog_state_want_sync(
71 struct xlog *log,
72 struct xlog_in_core *iclog);
Linus Torvalds1da177e2005-04-16 15:20:36 -070073
Mark Tinguelyad223e62012-06-14 09:22:15 -050074STATIC void
75xlog_grant_push_ail(
Mark Tinguely9a8d2fd2012-06-14 09:22:16 -050076 struct xlog *log,
77 int need_bytes);
78STATIC void
79xlog_regrant_reserve_log_space(
80 struct xlog *log,
81 struct xlog_ticket *ticket);
82STATIC void
83xlog_ungrant_log_space(
84 struct xlog *log,
85 struct xlog_ticket *ticket);
Linus Torvalds1da177e2005-04-16 15:20:36 -070086
Nathan Scottcfcbbbd2005-11-02 15:12:04 +110087#if defined(DEBUG)
Mark Tinguely9a8d2fd2012-06-14 09:22:16 -050088STATIC void
89xlog_verify_dest_ptr(
90 struct xlog *log,
Christoph Hellwig5809d5e2015-06-22 09:44:47 +100091 void *ptr);
Mark Tinguelyad223e62012-06-14 09:22:15 -050092STATIC void
93xlog_verify_grant_tail(
Mark Tinguely9a8d2fd2012-06-14 09:22:16 -050094 struct xlog *log);
95STATIC void
96xlog_verify_iclog(
97 struct xlog *log,
98 struct xlog_in_core *iclog,
Christoph Hellwigabca1f32019-06-28 19:27:24 -070099 int count);
Mark Tinguely9a8d2fd2012-06-14 09:22:16 -0500100STATIC void
101xlog_verify_tail_lsn(
102 struct xlog *log,
103 struct xlog_in_core *iclog,
104 xfs_lsn_t tail_lsn);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700105#else
106#define xlog_verify_dest_ptr(a,b)
Dave Chinner3f336c62010-12-21 12:02:52 +1100107#define xlog_verify_grant_tail(a)
Christoph Hellwigabca1f32019-06-28 19:27:24 -0700108#define xlog_verify_iclog(a,b,c)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700109#define xlog_verify_tail_lsn(a,b,c)
110#endif
111
Mark Tinguely9a8d2fd2012-06-14 09:22:16 -0500112STATIC int
113xlog_iclogs_empty(
114 struct xlog *log);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700115
Christoph Hellwigdd954c62006-01-11 15:34:50 +1100116static void
Dave Chinner663e4962010-12-21 12:06:05 +1100117xlog_grant_sub_space(
Mark Tinguelyad223e62012-06-14 09:22:15 -0500118 struct xlog *log,
119 atomic64_t *head,
120 int bytes)
Christoph Hellwigdd954c62006-01-11 15:34:50 +1100121{
Dave Chinnerd0eb2f32010-12-21 12:29:14 +1100122 int64_t head_val = atomic64_read(head);
123 int64_t new, old;
Dave Chinnera69ed032010-12-21 12:08:20 +1100124
Dave Chinnerd0eb2f32010-12-21 12:29:14 +1100125 do {
126 int cycle, space;
Dave Chinnera69ed032010-12-21 12:08:20 +1100127
Dave Chinnerd0eb2f32010-12-21 12:29:14 +1100128 xlog_crack_grant_head_val(head_val, &cycle, &space);
Dave Chinnera69ed032010-12-21 12:08:20 +1100129
Dave Chinnerd0eb2f32010-12-21 12:29:14 +1100130 space -= bytes;
131 if (space < 0) {
132 space += log->l_logsize;
133 cycle--;
134 }
135
136 old = head_val;
137 new = xlog_assign_grant_head_val(cycle, space);
138 head_val = atomic64_cmpxchg(head, old, new);
139 } while (head_val != old);
Christoph Hellwigdd954c62006-01-11 15:34:50 +1100140}
141
142static void
Dave Chinner663e4962010-12-21 12:06:05 +1100143xlog_grant_add_space(
Mark Tinguelyad223e62012-06-14 09:22:15 -0500144 struct xlog *log,
145 atomic64_t *head,
146 int bytes)
Christoph Hellwigdd954c62006-01-11 15:34:50 +1100147{
Dave Chinnerd0eb2f32010-12-21 12:29:14 +1100148 int64_t head_val = atomic64_read(head);
149 int64_t new, old;
Dave Chinnera69ed032010-12-21 12:08:20 +1100150
Dave Chinnerd0eb2f32010-12-21 12:29:14 +1100151 do {
152 int tmp;
153 int cycle, space;
Dave Chinnera69ed032010-12-21 12:08:20 +1100154
Dave Chinnerd0eb2f32010-12-21 12:29:14 +1100155 xlog_crack_grant_head_val(head_val, &cycle, &space);
Dave Chinnera69ed032010-12-21 12:08:20 +1100156
Dave Chinnerd0eb2f32010-12-21 12:29:14 +1100157 tmp = log->l_logsize - space;
158 if (tmp > bytes)
159 space += bytes;
160 else {
161 space = bytes - tmp;
162 cycle++;
163 }
164
165 old = head_val;
166 new = xlog_assign_grant_head_val(cycle, space);
167 head_val = atomic64_cmpxchg(head, old, new);
168 } while (head_val != old);
Christoph Hellwigdd954c62006-01-11 15:34:50 +1100169}
Dave Chinnera69ed032010-12-21 12:08:20 +1100170
Christoph Hellwigc303c5b2012-02-20 02:31:26 +0000171STATIC void
172xlog_grant_head_init(
173 struct xlog_grant_head *head)
174{
175 xlog_assign_grant_head(&head->grant, 1, 0);
176 INIT_LIST_HEAD(&head->waiters);
177 spin_lock_init(&head->lock);
178}
179
Christoph Hellwiga79bf2d2012-02-20 02:31:27 +0000180STATIC void
181xlog_grant_head_wake_all(
182 struct xlog_grant_head *head)
183{
184 struct xlog_ticket *tic;
185
186 spin_lock(&head->lock);
187 list_for_each_entry(tic, &head->waiters, t_queue)
188 wake_up_process(tic->t_task);
189 spin_unlock(&head->lock);
190}
191
Christoph Hellwige179840d2012-02-20 02:31:29 +0000192static inline int
193xlog_ticket_reservation(
Mark Tinguelyad223e62012-06-14 09:22:15 -0500194 struct xlog *log,
Christoph Hellwige179840d2012-02-20 02:31:29 +0000195 struct xlog_grant_head *head,
196 struct xlog_ticket *tic)
Christoph Hellwig9f9c19e2011-11-28 08:17:36 +0000197{
Christoph Hellwige179840d2012-02-20 02:31:29 +0000198 if (head == &log->l_write_head) {
199 ASSERT(tic->t_flags & XLOG_TIC_PERM_RESERV);
200 return tic->t_unit_res;
201 } else {
Christoph Hellwig9f9c19e2011-11-28 08:17:36 +0000202 if (tic->t_flags & XLOG_TIC_PERM_RESERV)
Christoph Hellwige179840d2012-02-20 02:31:29 +0000203 return tic->t_unit_res * tic->t_cnt;
Christoph Hellwig9f9c19e2011-11-28 08:17:36 +0000204 else
Christoph Hellwige179840d2012-02-20 02:31:29 +0000205 return tic->t_unit_res;
Christoph Hellwig9f9c19e2011-11-28 08:17:36 +0000206 }
Christoph Hellwig9f9c19e2011-11-28 08:17:36 +0000207}
208
209STATIC bool
Christoph Hellwige179840d2012-02-20 02:31:29 +0000210xlog_grant_head_wake(
Mark Tinguelyad223e62012-06-14 09:22:15 -0500211 struct xlog *log,
Christoph Hellwige179840d2012-02-20 02:31:29 +0000212 struct xlog_grant_head *head,
Christoph Hellwig9f9c19e2011-11-28 08:17:36 +0000213 int *free_bytes)
214{
215 struct xlog_ticket *tic;
216 int need_bytes;
217
Christoph Hellwige179840d2012-02-20 02:31:29 +0000218 list_for_each_entry(tic, &head->waiters, t_queue) {
219 need_bytes = xlog_ticket_reservation(log, head, tic);
Christoph Hellwig9f9c19e2011-11-28 08:17:36 +0000220 if (*free_bytes < need_bytes)
221 return false;
Christoph Hellwig9f9c19e2011-11-28 08:17:36 +0000222
Christoph Hellwige179840d2012-02-20 02:31:29 +0000223 *free_bytes -= need_bytes;
224 trace_xfs_log_grant_wake_up(log, tic);
Christoph Hellwig14a7235f2012-02-20 02:31:24 +0000225 wake_up_process(tic->t_task);
Christoph Hellwig9f9c19e2011-11-28 08:17:36 +0000226 }
227
228 return true;
229}
230
231STATIC int
Christoph Hellwig23ee3df2012-02-20 02:31:28 +0000232xlog_grant_head_wait(
Mark Tinguelyad223e62012-06-14 09:22:15 -0500233 struct xlog *log,
Christoph Hellwig23ee3df2012-02-20 02:31:28 +0000234 struct xlog_grant_head *head,
Christoph Hellwig9f9c19e2011-11-28 08:17:36 +0000235 struct xlog_ticket *tic,
Dave Chinnera30b0362013-09-02 20:49:36 +1000236 int need_bytes) __releases(&head->lock)
237 __acquires(&head->lock)
Christoph Hellwig9f9c19e2011-11-28 08:17:36 +0000238{
Christoph Hellwig23ee3df2012-02-20 02:31:28 +0000239 list_add_tail(&tic->t_queue, &head->waiters);
Christoph Hellwig9f9c19e2011-11-28 08:17:36 +0000240
241 do {
242 if (XLOG_FORCED_SHUTDOWN(log))
243 goto shutdown;
244 xlog_grant_push_ail(log, need_bytes);
245
Christoph Hellwig14a7235f2012-02-20 02:31:24 +0000246 __set_current_state(TASK_UNINTERRUPTIBLE);
Christoph Hellwig23ee3df2012-02-20 02:31:28 +0000247 spin_unlock(&head->lock);
Christoph Hellwig9f9c19e2011-11-28 08:17:36 +0000248
Bill O'Donnellff6d6af2015-10-12 18:21:22 +1100249 XFS_STATS_INC(log->l_mp, xs_sleep_logspace);
Christoph Hellwig14a7235f2012-02-20 02:31:24 +0000250
251 trace_xfs_log_grant_sleep(log, tic);
252 schedule();
Christoph Hellwig9f9c19e2011-11-28 08:17:36 +0000253 trace_xfs_log_grant_wake(log, tic);
254
Christoph Hellwig23ee3df2012-02-20 02:31:28 +0000255 spin_lock(&head->lock);
Christoph Hellwig9f9c19e2011-11-28 08:17:36 +0000256 if (XLOG_FORCED_SHUTDOWN(log))
257 goto shutdown;
Christoph Hellwig23ee3df2012-02-20 02:31:28 +0000258 } while (xlog_space_left(log, &head->grant) < need_bytes);
Christoph Hellwig9f9c19e2011-11-28 08:17:36 +0000259
260 list_del_init(&tic->t_queue);
261 return 0;
262shutdown:
263 list_del_init(&tic->t_queue);
Dave Chinner24513372014-06-25 14:58:08 +1000264 return -EIO;
Christoph Hellwig9f9c19e2011-11-28 08:17:36 +0000265}
266
Christoph Hellwig42ceedb2012-02-20 02:31:30 +0000267/*
268 * Atomically get the log space required for a log ticket.
269 *
270 * Once a ticket gets put onto head->waiters, it will only return after the
271 * needed reservation is satisfied.
272 *
273 * This function is structured so that it has a lock free fast path. This is
274 * necessary because every new transaction reservation will come through this
275 * path. Hence any lock will be globally hot if we take it unconditionally on
276 * every pass.
277 *
278 * As tickets are only ever moved on and off head->waiters under head->lock, we
279 * only need to take that lock if we are going to add the ticket to the queue
280 * and sleep. We can avoid taking the lock if the ticket was never added to
281 * head->waiters because the t_queue list head will be empty and we hold the
282 * only reference to it so it can safely be checked unlocked.
283 */
284STATIC int
285xlog_grant_head_check(
Mark Tinguelyad223e62012-06-14 09:22:15 -0500286 struct xlog *log,
Christoph Hellwig42ceedb2012-02-20 02:31:30 +0000287 struct xlog_grant_head *head,
288 struct xlog_ticket *tic,
289 int *need_bytes)
290{
291 int free_bytes;
292 int error = 0;
293
294 ASSERT(!(log->l_flags & XLOG_ACTIVE_RECOVERY));
295
296 /*
297 * If there are other waiters on the queue then give them a chance at
298 * logspace before us. Wake up the first waiters, if we do not wake
299 * up all the waiters then go to sleep waiting for more free space,
300 * otherwise try to get some space for this transaction.
301 */
302 *need_bytes = xlog_ticket_reservation(log, head, tic);
303 free_bytes = xlog_space_left(log, &head->grant);
304 if (!list_empty_careful(&head->waiters)) {
305 spin_lock(&head->lock);
306 if (!xlog_grant_head_wake(log, head, &free_bytes) ||
307 free_bytes < *need_bytes) {
308 error = xlog_grant_head_wait(log, head, tic,
309 *need_bytes);
310 }
311 spin_unlock(&head->lock);
312 } else if (free_bytes < *need_bytes) {
313 spin_lock(&head->lock);
314 error = xlog_grant_head_wait(log, head, tic, *need_bytes);
315 spin_unlock(&head->lock);
316 }
317
318 return error;
319}
320
Christoph Hellwig0adba532007-08-30 17:21:46 +1000321static void
322xlog_tic_reset_res(xlog_ticket_t *tic)
323{
324 tic->t_res_num = 0;
325 tic->t_res_arr_sum = 0;
326 tic->t_res_num_ophdrs = 0;
327}
328
329static void
330xlog_tic_add_region(xlog_ticket_t *tic, uint len, uint type)
331{
332 if (tic->t_res_num == XLOG_TIC_LEN_MAX) {
333 /* add to overflow and start again */
334 tic->t_res_o_flow += tic->t_res_arr_sum;
335 tic->t_res_num = 0;
336 tic->t_res_arr_sum = 0;
337 }
338
339 tic->t_res_arr[tic->t_res_num].r_len = len;
340 tic->t_res_arr[tic->t_res_num].r_type = type;
341 tic->t_res_arr_sum += len;
342 tic->t_res_num++;
343}
Christoph Hellwigdd954c62006-01-11 15:34:50 +1100344
Linus Torvalds1da177e2005-04-16 15:20:36 -0700345/*
Christoph Hellwig9006fb92012-02-20 02:31:31 +0000346 * Replenish the byte reservation required by moving the grant write head.
347 */
348int
349xfs_log_regrant(
350 struct xfs_mount *mp,
351 struct xlog_ticket *tic)
352{
Mark Tinguelyad223e62012-06-14 09:22:15 -0500353 struct xlog *log = mp->m_log;
Christoph Hellwig9006fb92012-02-20 02:31:31 +0000354 int need_bytes;
355 int error = 0;
356
357 if (XLOG_FORCED_SHUTDOWN(log))
Dave Chinner24513372014-06-25 14:58:08 +1000358 return -EIO;
Christoph Hellwig9006fb92012-02-20 02:31:31 +0000359
Bill O'Donnellff6d6af2015-10-12 18:21:22 +1100360 XFS_STATS_INC(mp, xs_try_logspace);
Christoph Hellwig9006fb92012-02-20 02:31:31 +0000361
362 /*
363 * This is a new transaction on the ticket, so we need to change the
364 * transaction ID so that the next transaction has a different TID in
365 * the log. Just add one to the existing tid so that we can see chains
366 * of rolling transactions in the log easily.
367 */
368 tic->t_tid++;
369
370 xlog_grant_push_ail(log, tic->t_unit_res);
371
372 tic->t_curr_res = tic->t_unit_res;
373 xlog_tic_reset_res(tic);
374
375 if (tic->t_cnt > 0)
376 return 0;
377
378 trace_xfs_log_regrant(log, tic);
379
380 error = xlog_grant_head_check(log, &log->l_write_head, tic,
381 &need_bytes);
382 if (error)
383 goto out_error;
384
385 xlog_grant_add_space(log, &log->l_write_head.grant, need_bytes);
386 trace_xfs_log_regrant_exit(log, tic);
387 xlog_verify_grant_tail(log);
388 return 0;
389
390out_error:
391 /*
392 * If we are failing, make sure the ticket doesn't have any current
393 * reservations. We don't want to add this back when the ticket/
394 * transaction gets cancelled.
395 */
396 tic->t_curr_res = 0;
397 tic->t_cnt = 0; /* ungrant will give back unit_res * t_cnt. */
398 return error;
399}
400
401/*
Huang Chonga0e336ba2018-08-03 08:17:54 -0700402 * Reserve log space and return a ticket corresponding to the reservation.
Christoph Hellwig9006fb92012-02-20 02:31:31 +0000403 *
404 * Each reservation is going to reserve extra space for a log record header.
405 * When writes happen to the on-disk log, we don't subtract the length of the
406 * log record header from any reservation. By wasting space in each
407 * reservation, we prevent over allocation problems.
408 */
409int
410xfs_log_reserve(
411 struct xfs_mount *mp,
412 int unit_bytes,
413 int cnt,
414 struct xlog_ticket **ticp,
Darrick J. Wongc8ce5402017-06-16 11:00:05 -0700415 uint8_t client,
Christoph Hellwig710b1e22016-04-06 09:20:36 +1000416 bool permanent)
Christoph Hellwig9006fb92012-02-20 02:31:31 +0000417{
Mark Tinguelyad223e62012-06-14 09:22:15 -0500418 struct xlog *log = mp->m_log;
Christoph Hellwig9006fb92012-02-20 02:31:31 +0000419 struct xlog_ticket *tic;
420 int need_bytes;
421 int error = 0;
422
423 ASSERT(client == XFS_TRANSACTION || client == XFS_LOG);
424
425 if (XLOG_FORCED_SHUTDOWN(log))
Dave Chinner24513372014-06-25 14:58:08 +1000426 return -EIO;
Christoph Hellwig9006fb92012-02-20 02:31:31 +0000427
Bill O'Donnellff6d6af2015-10-12 18:21:22 +1100428 XFS_STATS_INC(mp, xs_try_logspace);
Christoph Hellwig9006fb92012-02-20 02:31:31 +0000429
430 ASSERT(*ticp == NULL);
Tetsuo Handa707e0dd2019-08-26 12:06:22 -0700431 tic = xlog_ticket_alloc(log, unit_bytes, cnt, client, permanent, 0);
Christoph Hellwig9006fb92012-02-20 02:31:31 +0000432 *ticp = tic;
433
Dave Chinner437a2552012-11-28 13:01:00 +1100434 xlog_grant_push_ail(log, tic->t_cnt ? tic->t_unit_res * tic->t_cnt
435 : tic->t_unit_res);
Christoph Hellwig9006fb92012-02-20 02:31:31 +0000436
437 trace_xfs_log_reserve(log, tic);
438
439 error = xlog_grant_head_check(log, &log->l_reserve_head, tic,
440 &need_bytes);
441 if (error)
442 goto out_error;
443
444 xlog_grant_add_space(log, &log->l_reserve_head.grant, need_bytes);
445 xlog_grant_add_space(log, &log->l_write_head.grant, need_bytes);
446 trace_xfs_log_reserve_exit(log, tic);
447 xlog_verify_grant_tail(log);
448 return 0;
449
450out_error:
451 /*
452 * If we are failing, make sure the ticket doesn't have any current
453 * reservations. We don't want to add this back when the ticket/
454 * transaction gets cancelled.
455 */
456 tic->t_curr_res = 0;
457 tic->t_cnt = 0; /* ungrant will give back unit_res * t_cnt. */
458 return error;
459}
460
461
462/*
Linus Torvalds1da177e2005-04-16 15:20:36 -0700463 * NOTES:
464 *
465 * 1. currblock field gets updated at startup and after in-core logs
466 * marked as with WANT_SYNC.
467 */
468
469/*
470 * This routine is called when a user of a log manager ticket is done with
471 * the reservation. If the ticket was ever used, then a commit record for
472 * the associated transaction is written out as a log operation header with
473 * no data. The flag XLOG_TIC_INITED is set when the first write occurs with
474 * a given ticket. If the ticket was one with a permanent reservation, then
475 * a few operations are done differently. Permanent reservation tickets by
476 * default don't release the reservation. They just commit the current
477 * transaction with the belief that the reservation is still needed. A flag
478 * must be passed in before permanent reservations are actually released.
479 * When these type of tickets are not released, they need to be set into
480 * the inited state again. By doing this, a start record will be written
481 * out when the next write occurs.
482 */
483xfs_lsn_t
Christoph Hellwig35a8a722010-02-15 23:34:54 +0000484xfs_log_done(
485 struct xfs_mount *mp,
486 struct xlog_ticket *ticket,
487 struct xlog_in_core **iclog,
Christoph Hellwigf78c3902015-06-04 13:48:20 +1000488 bool regrant)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700489{
Mark Tinguelyad223e62012-06-14 09:22:15 -0500490 struct xlog *log = mp->m_log;
Christoph Hellwig35a8a722010-02-15 23:34:54 +0000491 xfs_lsn_t lsn = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700492
Linus Torvalds1da177e2005-04-16 15:20:36 -0700493 if (XLOG_FORCED_SHUTDOWN(log) ||
494 /*
495 * If nothing was ever written, don't write out commit record.
496 * If we get an error, just continue and give back the log ticket.
497 */
498 (((ticket->t_flags & XLOG_TIC_INITED) == 0) &&
Dave Chinner55b66332010-03-23 11:43:17 +1100499 (xlog_commit_record(log, ticket, iclog, &lsn)))) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700500 lsn = (xfs_lsn_t) -1;
Christoph Hellwigf78c3902015-06-04 13:48:20 +1000501 regrant = false;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700502 }
503
504
Christoph Hellwigf78c3902015-06-04 13:48:20 +1000505 if (!regrant) {
Christoph Hellwig0b1b2132009-12-14 23:14:59 +0000506 trace_xfs_log_done_nonperm(log, ticket);
507
Linus Torvalds1da177e2005-04-16 15:20:36 -0700508 /*
Nathan Scottc41564b2006-03-29 08:55:14 +1000509 * Release ticket if not permanent reservation or a specific
Linus Torvalds1da177e2005-04-16 15:20:36 -0700510 * request has been made to release a permanent reservation.
511 */
512 xlog_ungrant_log_space(log, ticket);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700513 } else {
Christoph Hellwig0b1b2132009-12-14 23:14:59 +0000514 trace_xfs_log_done_perm(log, ticket);
515
Linus Torvalds1da177e2005-04-16 15:20:36 -0700516 xlog_regrant_reserve_log_space(log, ticket);
Lachlan McIlroyc6a7b0f2008-08-13 16:52:50 +1000517 /* If this ticket was a permanent reservation and we aren't
518 * trying to release it, reset the inited flags; so next time
519 * we write, a start record will be written out.
520 */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700521 ticket->t_flags |= XLOG_TIC_INITED;
Lachlan McIlroyc6a7b0f2008-08-13 16:52:50 +1000522 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700523
Christoph Hellwigf78c3902015-06-04 13:48:20 +1000524 xfs_log_ticket_put(ticket);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700525 return lsn;
Christoph Hellwig35a8a722010-02-15 23:34:54 +0000526}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700527
Linus Torvalds1da177e2005-04-16 15:20:36 -0700528int
Christoph Hellwig35a8a722010-02-15 23:34:54 +0000529xfs_log_release_iclog(
530 struct xfs_mount *mp,
531 struct xlog_in_core *iclog)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700532{
Christoph Hellwig35a8a722010-02-15 23:34:54 +0000533 if (xlog_state_release_iclog(mp->m_log, iclog)) {
Nathan Scott7d04a332006-06-09 14:58:38 +1000534 xfs_force_shutdown(mp, SHUTDOWN_LOG_IO_ERROR);
Dave Chinner24513372014-06-25 14:58:08 +1000535 return -EIO;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700536 }
537
538 return 0;
539}
540
541/*
Linus Torvalds1da177e2005-04-16 15:20:36 -0700542 * Mount a log filesystem
543 *
544 * mp - ubiquitous xfs mount point structure
545 * log_target - buftarg of on-disk log device
546 * blk_offset - Start block # where block size is 512 bytes (BBSIZE)
547 * num_bblocks - Number of BBSIZE blocks in on-disk log
548 *
549 * Return error or zero.
550 */
551int
David Chinner249a8c12008-02-05 12:13:32 +1100552xfs_log_mount(
553 xfs_mount_t *mp,
554 xfs_buftarg_t *log_target,
555 xfs_daddr_t blk_offset,
556 int num_bblks)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700557{
Darrick J. Wong9c92ee22017-10-25 16:59:43 -0700558 bool fatal = xfs_sb_version_hascrc(&mp->m_sb);
Jie Liu3e7b91c2013-08-12 20:50:03 +1000559 int error = 0;
560 int min_logfsbs;
David Chinner249a8c12008-02-05 12:13:32 +1100561
Dave Chinnerc99d6092014-05-05 16:18:37 +1000562 if (!(mp->m_flags & XFS_MOUNT_NORECOVERY)) {
563 xfs_notice(mp, "Mounting V%d Filesystem",
564 XFS_SB_VERSION_NUM(&mp->m_sb));
565 } else {
Dave Chinnera0fa2b62011-03-07 10:01:35 +1100566 xfs_notice(mp,
Dave Chinnerc99d6092014-05-05 16:18:37 +1000567"Mounting V%d filesystem in no-recovery mode. Filesystem will be inconsistent.",
568 XFS_SB_VERSION_NUM(&mp->m_sb));
Christoph Hellwigbd186aa2007-08-30 17:21:12 +1000569 ASSERT(mp->m_flags & XFS_MOUNT_RDONLY);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700570 }
571
572 mp->m_log = xlog_alloc_log(mp, log_target, blk_offset, num_bblks);
Dave Chinnera6cb7672009-04-06 18:39:27 +0200573 if (IS_ERR(mp->m_log)) {
Dave Chinner24513372014-06-25 14:58:08 +1000574 error = PTR_ERR(mp->m_log);
Dave Chinner644c3562008-11-10 16:50:24 +1100575 goto out;
576 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700577
Linus Torvalds1da177e2005-04-16 15:20:36 -0700578 /*
Jie Liu3e7b91c2013-08-12 20:50:03 +1000579 * Validate the given log space and drop a critical message via syslog
580 * if the log size is too small that would lead to some unexpected
581 * situations in transaction log space reservation stage.
582 *
583 * Note: we can't just reject the mount if the validation fails. This
584 * would mean that people would have to downgrade their kernel just to
585 * remedy the situation as there is no way to grow the log (short of
586 * black magic surgery with xfs_db).
587 *
588 * We can, however, reject mounts for CRC format filesystems, as the
589 * mkfs binary being used to make the filesystem should never create a
590 * filesystem with a log that is too small.
591 */
592 min_logfsbs = xfs_log_calc_minimum_size(mp);
593
594 if (mp->m_sb.sb_logblocks < min_logfsbs) {
595 xfs_warn(mp,
596 "Log size %d blocks too small, minimum size is %d blocks",
597 mp->m_sb.sb_logblocks, min_logfsbs);
Dave Chinner24513372014-06-25 14:58:08 +1000598 error = -EINVAL;
Jie Liu3e7b91c2013-08-12 20:50:03 +1000599 } else if (mp->m_sb.sb_logblocks > XFS_MAX_LOG_BLOCKS) {
600 xfs_warn(mp,
601 "Log size %d blocks too large, maximum size is %lld blocks",
602 mp->m_sb.sb_logblocks, XFS_MAX_LOG_BLOCKS);
Dave Chinner24513372014-06-25 14:58:08 +1000603 error = -EINVAL;
Jie Liu3e7b91c2013-08-12 20:50:03 +1000604 } else if (XFS_FSB_TO_B(mp, mp->m_sb.sb_logblocks) > XFS_MAX_LOG_BYTES) {
605 xfs_warn(mp,
606 "log size %lld bytes too large, maximum size is %lld bytes",
607 XFS_FSB_TO_B(mp, mp->m_sb.sb_logblocks),
608 XFS_MAX_LOG_BYTES);
Dave Chinner24513372014-06-25 14:58:08 +1000609 error = -EINVAL;
Darrick J. Wong9c92ee22017-10-25 16:59:43 -0700610 } else if (mp->m_sb.sb_logsunit > 1 &&
611 mp->m_sb.sb_logsunit % mp->m_sb.sb_blocksize) {
612 xfs_warn(mp,
613 "log stripe unit %u bytes must be a multiple of block size",
614 mp->m_sb.sb_logsunit);
615 error = -EINVAL;
616 fatal = true;
Jie Liu3e7b91c2013-08-12 20:50:03 +1000617 }
618 if (error) {
Darrick J. Wong9c92ee22017-10-25 16:59:43 -0700619 /*
620 * Log check errors are always fatal on v5; or whenever bad
621 * metadata leads to a crash.
622 */
623 if (fatal) {
Jie Liu3e7b91c2013-08-12 20:50:03 +1000624 xfs_crit(mp, "AAIEEE! Log failed size checks. Abort!");
625 ASSERT(0);
626 goto out_free_log;
627 }
Joe Perchesf41febd2015-07-29 11:52:04 +1000628 xfs_crit(mp, "Log size out of supported range.");
Jie Liu3e7b91c2013-08-12 20:50:03 +1000629 xfs_crit(mp,
Joe Perchesf41febd2015-07-29 11:52:04 +1000630"Continuing onwards, but if log hangs are experienced then please report this message in the bug report.");
Jie Liu3e7b91c2013-08-12 20:50:03 +1000631 }
632
633 /*
David Chinner249a8c12008-02-05 12:13:32 +1100634 * Initialize the AIL now we have a log.
635 */
David Chinner249a8c12008-02-05 12:13:32 +1100636 error = xfs_trans_ail_init(mp);
637 if (error) {
Dave Chinnera0fa2b62011-03-07 10:01:35 +1100638 xfs_warn(mp, "AIL initialisation failed: error %d", error);
Christoph Hellwig26430752009-02-12 19:55:48 +0100639 goto out_free_log;
David Chinner249a8c12008-02-05 12:13:32 +1100640 }
David Chinnera9c21c12008-10-30 17:39:35 +1100641 mp->m_log->l_ailp = mp->m_ail;
David Chinner249a8c12008-02-05 12:13:32 +1100642
643 /*
Linus Torvalds1da177e2005-04-16 15:20:36 -0700644 * skip log recovery on a norecovery mount. pretend it all
645 * just worked.
646 */
647 if (!(mp->m_flags & XFS_MOUNT_NORECOVERY)) {
David Chinner249a8c12008-02-05 12:13:32 +1100648 int readonly = (mp->m_flags & XFS_MOUNT_RDONLY);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700649
650 if (readonly)
Christoph Hellwigbd186aa2007-08-30 17:21:12 +1000651 mp->m_flags &= ~XFS_MOUNT_RDONLY;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700652
Eric Sandeen65be6052006-01-11 15:34:19 +1100653 error = xlog_recover(mp->m_log);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700654
655 if (readonly)
Christoph Hellwigbd186aa2007-08-30 17:21:12 +1000656 mp->m_flags |= XFS_MOUNT_RDONLY;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700657 if (error) {
Dave Chinnera0fa2b62011-03-07 10:01:35 +1100658 xfs_warn(mp, "log mount/recovery failed: error %d",
659 error);
Brian Fosterf0b2efa2015-08-19 09:58:36 +1000660 xlog_recover_cancel(mp->m_log);
Christoph Hellwig26430752009-02-12 19:55:48 +0100661 goto out_destroy_ail;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700662 }
663 }
664
Brian Fosterbaff4e42014-07-15 08:07:29 +1000665 error = xfs_sysfs_init(&mp->m_log->l_kobj, &xfs_log_ktype, &mp->m_kobj,
666 "log");
667 if (error)
668 goto out_destroy_ail;
669
Linus Torvalds1da177e2005-04-16 15:20:36 -0700670 /* Normal transactions can now occur */
671 mp->m_log->l_flags &= ~XLOG_ACTIVE_RECOVERY;
672
Dave Chinner71e330b2010-05-21 14:37:18 +1000673 /*
674 * Now the log has been fully initialised and we know were our
675 * space grant counters are, we can initialise the permanent ticket
676 * needed for delayed logging to work.
677 */
678 xlog_cil_init_post_recovery(mp->m_log);
679
Linus Torvalds1da177e2005-04-16 15:20:36 -0700680 return 0;
Christoph Hellwig26430752009-02-12 19:55:48 +0100681
682out_destroy_ail:
683 xfs_trans_ail_destroy(mp);
684out_free_log:
685 xlog_dealloc_log(mp->m_log);
Dave Chinner644c3562008-11-10 16:50:24 +1100686out:
David Chinner249a8c12008-02-05 12:13:32 +1100687 return error;
Christoph Hellwig26430752009-02-12 19:55:48 +0100688}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700689
690/*
Dave Chinnerf661f1e2012-10-08 21:56:02 +1100691 * Finish the recovery of the file system. This is separate from the
692 * xfs_log_mount() call, because it depends on the code in xfs_mountfs() to read
693 * in the root and real-time bitmap inodes between calling xfs_log_mount() and
694 * here.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700695 *
Dave Chinnerf661f1e2012-10-08 21:56:02 +1100696 * If we finish recovery successfully, start the background log work. If we are
697 * not doing recovery, then we have a RO filesystem and we don't need to start
698 * it.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700699 */
700int
Brian Fosterf0b2efa2015-08-19 09:58:36 +1000701xfs_log_mount_finish(
702 struct xfs_mount *mp)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700703{
Dave Chinnerf661f1e2012-10-08 21:56:02 +1100704 int error = 0;
Eric Sandeen6f4a1ee2017-08-08 18:21:49 -0700705 bool readonly = (mp->m_flags & XFS_MOUNT_RDONLY);
Brian Fosterf1b92bbc2017-10-26 09:31:16 -0700706 bool recovered = mp->m_log->l_flags & XLOG_RECOVERY_NEEDED;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700707
Brian Fosterf0b2efa2015-08-19 09:58:36 +1000708 if (mp->m_flags & XFS_MOUNT_NORECOVERY) {
Christoph Hellwigbd186aa2007-08-30 17:21:12 +1000709 ASSERT(mp->m_flags & XFS_MOUNT_RDONLY);
Brian Fosterf0b2efa2015-08-19 09:58:36 +1000710 return 0;
Eric Sandeen6f4a1ee2017-08-08 18:21:49 -0700711 } else if (readonly) {
712 /* Allow unlinked processing to proceed */
713 mp->m_flags &= ~XFS_MOUNT_RDONLY;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700714 }
715
Darrick J. Wong8204f8d2017-08-10 14:20:28 -0700716 /*
717 * During the second phase of log recovery, we need iget and
718 * iput to behave like they do for an active filesystem.
719 * xfs_fs_drop_inode needs to be able to prevent the deletion
720 * of inodes before we're done replaying log items on those
721 * inodes. Turn it off immediately after recovery finishes
722 * so that we don't leak the quota inodes if subsequent mount
723 * activities fail.
Darrick J. Wong799ea9e2017-08-18 18:08:25 -0700724 *
725 * We let all inodes involved in redo item processing end up on
726 * the LRU instead of being evicted immediately so that if we do
727 * something to an unlinked inode, the irele won't cause
728 * premature truncation and freeing of the inode, which results
729 * in log recovery failure. We have to evict the unreferenced
Linus Torvalds1751e8a2017-11-27 13:05:09 -0800730 * lru inodes after clearing SB_ACTIVE because we don't
Darrick J. Wong799ea9e2017-08-18 18:08:25 -0700731 * otherwise clean up the lru if there's a subsequent failure in
732 * xfs_mountfs, which leads to us leaking the inodes if nothing
733 * else (e.g. quotacheck) references the inodes before the
734 * mount failure occurs.
Darrick J. Wong8204f8d2017-08-10 14:20:28 -0700735 */
Linus Torvalds1751e8a2017-11-27 13:05:09 -0800736 mp->m_super->s_flags |= SB_ACTIVE;
Brian Fosterf0b2efa2015-08-19 09:58:36 +1000737 error = xlog_recover_finish(mp->m_log);
738 if (!error)
739 xfs_log_work_queue(mp);
Linus Torvalds1751e8a2017-11-27 13:05:09 -0800740 mp->m_super->s_flags &= ~SB_ACTIVE;
Darrick J. Wong799ea9e2017-08-18 18:08:25 -0700741 evict_inodes(mp->m_super);
Brian Fosterf0b2efa2015-08-19 09:58:36 +1000742
Brian Fosterf1b92bbc2017-10-26 09:31:16 -0700743 /*
744 * Drain the buffer LRU after log recovery. This is required for v4
745 * filesystems to avoid leaving around buffers with NULL verifier ops,
746 * but we do it unconditionally to make sure we're always in a clean
747 * cache state after mount.
748 *
749 * Don't push in the error case because the AIL may have pending intents
750 * that aren't removed until recovery is cancelled.
751 */
752 if (!error && recovered) {
753 xfs_log_force(mp, XFS_LOG_SYNC);
754 xfs_ail_push_all_sync(mp->m_ail);
755 }
756 xfs_wait_buftarg(mp->m_ddev_targp);
757
Eric Sandeen6f4a1ee2017-08-08 18:21:49 -0700758 if (readonly)
759 mp->m_flags |= XFS_MOUNT_RDONLY;
760
Brian Fosterf0b2efa2015-08-19 09:58:36 +1000761 return error;
762}
763
764/*
765 * The mount has failed. Cancel the recovery if it hasn't completed and destroy
766 * the log.
767 */
Hariprasad Kelama7a92502019-07-03 07:34:18 -0700768void
Brian Fosterf0b2efa2015-08-19 09:58:36 +1000769xfs_log_mount_cancel(
770 struct xfs_mount *mp)
771{
Hariprasad Kelama7a92502019-07-03 07:34:18 -0700772 xlog_recover_cancel(mp->m_log);
Brian Fosterf0b2efa2015-08-19 09:58:36 +1000773 xfs_log_unmount(mp);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700774}
775
776/*
Linus Torvalds1da177e2005-04-16 15:20:36 -0700777 * Final log writes as part of unmount.
778 *
779 * Mark the filesystem clean as unmount happens. Note that during relocation
780 * this routine needs to be executed as part of source-bag while the
781 * deallocation must not be done until source-end.
782 */
783
Darrick J. Wong53235f22018-07-20 09:28:39 -0700784/* Actually write the unmount record to disk. */
785static void
786xfs_log_write_unmount_record(
787 struct xfs_mount *mp)
788{
789 /* the data section must be 32 bit size aligned */
790 struct xfs_unmount_log_format magic = {
791 .magic = XLOG_UNMOUNT_TYPE,
792 };
793 struct xfs_log_iovec reg = {
794 .i_addr = &magic,
795 .i_len = sizeof(magic),
796 .i_type = XLOG_REG_TYPE_UNMOUNT,
797 };
798 struct xfs_log_vec vec = {
799 .lv_niovecs = 1,
800 .lv_iovecp = &reg,
801 };
802 struct xlog *log = mp->m_log;
803 struct xlog_in_core *iclog;
804 struct xlog_ticket *tic = NULL;
805 xfs_lsn_t lsn;
Darrick J. Wongf467cad2018-07-20 09:28:40 -0700806 uint flags = XLOG_UNMOUNT_TRANS;
Darrick J. Wong53235f22018-07-20 09:28:39 -0700807 int error;
808
809 error = xfs_log_reserve(mp, 600, 1, &tic, XFS_LOG, 0);
810 if (error)
811 goto out_err;
812
Darrick J. Wongf467cad2018-07-20 09:28:40 -0700813 /*
814 * If we think the summary counters are bad, clear the unmount header
815 * flag in the unmount record so that the summary counters will be
816 * recalculated during log recovery at next mount. Refer to
817 * xlog_check_unmount_rec for more details.
818 */
Darrick J. Wong39353ff2019-04-12 07:41:15 -0700819 if (XFS_TEST_ERROR(xfs_fs_has_sickness(mp, XFS_SICK_FS_COUNTERS), mp,
Darrick J. Wongf467cad2018-07-20 09:28:40 -0700820 XFS_ERRTAG_FORCE_SUMMARY_RECALC)) {
821 xfs_alert(mp, "%s: will fix summary counters at next mount",
822 __func__);
823 flags &= ~XLOG_UNMOUNT_TRANS;
824 }
825
Darrick J. Wong53235f22018-07-20 09:28:39 -0700826 /* remove inited flag, and account for space used */
827 tic->t_flags = 0;
828 tic->t_curr_res -= sizeof(magic);
Darrick J. Wongf467cad2018-07-20 09:28:40 -0700829 error = xlog_write(log, &vec, tic, &lsn, NULL, flags);
Darrick J. Wong53235f22018-07-20 09:28:39 -0700830 /*
831 * At this point, we're umounting anyway, so there's no point in
832 * transitioning log state to IOERROR. Just continue...
833 */
834out_err:
835 if (error)
836 xfs_alert(mp, "%s: unmount record failed", __func__);
837
838 spin_lock(&log->l_icloglock);
839 iclog = log->l_iclog;
840 atomic_inc(&iclog->ic_refcnt);
841 xlog_state_want_sync(log, iclog);
842 spin_unlock(&log->l_icloglock);
843 error = xlog_state_release_iclog(log, iclog);
844
845 spin_lock(&log->l_icloglock);
846 switch (iclog->ic_state) {
847 default:
848 if (!XLOG_FORCED_SHUTDOWN(log)) {
849 xlog_wait(&iclog->ic_force_wait, &log->l_icloglock);
850 break;
851 }
852 /* fall through */
853 case XLOG_STATE_ACTIVE:
854 case XLOG_STATE_DIRTY:
855 spin_unlock(&log->l_icloglock);
856 break;
857 }
858
859 if (tic) {
860 trace_xfs_log_umount_write(log, tic);
861 xlog_ungrant_log_space(log, tic);
862 xfs_log_ticket_put(tic);
863 }
864}
865
Linus Torvalds1da177e2005-04-16 15:20:36 -0700866/*
867 * Unmount record used to have a string "Unmount filesystem--" in the
868 * data section where the "Un" was really a magic number (XLOG_UNMOUNT_TYPE).
869 * We just write the magic number now since that particular field isn't
Zhi Yong Wu8e159e72013-08-12 03:15:00 +0000870 * currently architecture converted and "Unmount" is a bit foo.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700871 * As far as I know, there weren't any dependencies on the old behaviour.
872 */
873
Eric Sandeen0d5a75e2016-06-01 17:38:15 +1000874static int
Linus Torvalds1da177e2005-04-16 15:20:36 -0700875xfs_log_unmount_write(xfs_mount_t *mp)
876{
Mark Tinguely9a8d2fd2012-06-14 09:22:16 -0500877 struct xlog *log = mp->m_log;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700878 xlog_in_core_t *iclog;
879#ifdef DEBUG
880 xlog_in_core_t *first_iclog;
881#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -0700882 int error;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700883
Linus Torvalds1da177e2005-04-16 15:20:36 -0700884 /*
Eric Sandeen757a69e2017-08-08 18:19:47 -0700885 * Don't write out unmount record on norecovery mounts or ro devices.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700886 * Or, if we are doing a forced umount (typically because of IO errors).
887 */
Eric Sandeen757a69e2017-08-08 18:19:47 -0700888 if (mp->m_flags & XFS_MOUNT_NORECOVERY ||
Christoph Hellwig2d15d2c2019-06-28 19:27:24 -0700889 xfs_readonly_buftarg(log->l_targ)) {
Eric Sandeen757a69e2017-08-08 18:19:47 -0700890 ASSERT(mp->m_flags & XFS_MOUNT_RDONLY);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700891 return 0;
Eric Sandeen757a69e2017-08-08 18:19:47 -0700892 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700893
Christoph Hellwig60e5bb72018-03-13 23:15:28 -0700894 error = xfs_log_force(mp, XFS_LOG_SYNC);
David Chinnerb911ca02008-04-10 12:24:30 +1000895 ASSERT(error || !(XLOG_FORCED_SHUTDOWN(log)));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700896
897#ifdef DEBUG
898 first_iclog = iclog = log->l_iclog;
899 do {
900 if (!(iclog->ic_state & XLOG_STATE_IOERROR)) {
901 ASSERT(iclog->ic_state & XLOG_STATE_ACTIVE);
902 ASSERT(iclog->ic_offset == 0);
903 }
904 iclog = iclog->ic_next;
905 } while (iclog != first_iclog);
906#endif
907 if (! (XLOG_FORCED_SHUTDOWN(log))) {
Darrick J. Wong53235f22018-07-20 09:28:39 -0700908 xfs_log_write_unmount_record(mp);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700909 } else {
910 /*
911 * We're already in forced_shutdown mode, couldn't
912 * even attempt to write out the unmount transaction.
913 *
914 * Go through the motions of sync'ing and releasing
915 * the iclog, even though no I/O will actually happen,
Nathan Scottc41564b2006-03-29 08:55:14 +1000916 * we need to wait for other log I/Os that may already
Linus Torvalds1da177e2005-04-16 15:20:36 -0700917 * be in progress. Do this as a separate section of
918 * code so we'll know if we ever get stuck here that
919 * we're in this odd situation of trying to unmount
920 * a file system that went into forced_shutdown as
921 * the result of an unmount..
922 */
Eric Sandeenb22cd72c2007-10-11 17:37:10 +1000923 spin_lock(&log->l_icloglock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700924 iclog = log->l_iclog;
David Chinner155cc6b2008-03-06 13:44:14 +1100925 atomic_inc(&iclog->ic_refcnt);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700926
927 xlog_state_want_sync(log, iclog);
Christoph Hellwig39e2def2008-12-03 12:20:28 +0100928 spin_unlock(&log->l_icloglock);
David Chinner1bb7d6b2008-04-10 12:24:38 +1000929 error = xlog_state_release_iclog(log, iclog);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700930
Eric Sandeenb22cd72c2007-10-11 17:37:10 +1000931 spin_lock(&log->l_icloglock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700932
933 if ( ! ( iclog->ic_state == XLOG_STATE_ACTIVE
934 || iclog->ic_state == XLOG_STATE_DIRTY
935 || iclog->ic_state == XLOG_STATE_IOERROR) ) {
936
Dave Chinnereb40a872010-12-21 12:09:01 +1100937 xlog_wait(&iclog->ic_force_wait,
938 &log->l_icloglock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700939 } else {
Eric Sandeenb22cd72c2007-10-11 17:37:10 +1000940 spin_unlock(&log->l_icloglock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700941 }
942 }
943
David Chinner1bb7d6b2008-04-10 12:24:38 +1000944 return error;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700945} /* xfs_log_unmount_write */
946
947/*
Dave Chinnerc75921a2012-10-08 21:56:08 +1100948 * Empty the log for unmount/freeze.
Dave Chinnercf2931d2012-10-08 21:56:03 +1100949 *
950 * To do this, we first need to shut down the background log work so it is not
951 * trying to cover the log as we clean up. We then need to unpin all objects in
952 * the log so we can then flush them out. Once they have completed their IO and
953 * run the callbacks removing themselves from the AIL, we can write the unmount
Dave Chinnerc75921a2012-10-08 21:56:08 +1100954 * record.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700955 */
956void
Dave Chinnerc75921a2012-10-08 21:56:08 +1100957xfs_log_quiesce(
958 struct xfs_mount *mp)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700959{
Dave Chinnerf661f1e2012-10-08 21:56:02 +1100960 cancel_delayed_work_sync(&mp->m_log->l_work);
Dave Chinnercf2931d2012-10-08 21:56:03 +1100961 xfs_log_force(mp, XFS_LOG_SYNC);
962
963 /*
964 * The superblock buffer is uncached and while xfs_ail_push_all_sync()
965 * will push it, xfs_wait_buftarg() will not wait for it. Further,
966 * xfs_buf_iowait() cannot be used because it was pushed with the
967 * XBF_ASYNC flag set, so we need to use a lock/unlock pair to wait for
968 * the IO to complete.
969 */
970 xfs_ail_push_all_sync(mp->m_ail);
971 xfs_wait_buftarg(mp->m_ddev_targp);
972 xfs_buf_lock(mp->m_sb_bp);
973 xfs_buf_unlock(mp->m_sb_bp);
974
975 xfs_log_unmount_write(mp);
Dave Chinnerc75921a2012-10-08 21:56:08 +1100976}
977
978/*
979 * Shut down and release the AIL and Log.
980 *
981 * During unmount, we need to ensure we flush all the dirty metadata objects
982 * from the AIL so that the log is empty before we write the unmount record to
983 * the log. Once this is done, we can tear down the AIL and the log.
984 */
985void
986xfs_log_unmount(
987 struct xfs_mount *mp)
988{
989 xfs_log_quiesce(mp);
Dave Chinnercf2931d2012-10-08 21:56:03 +1100990
David Chinner249a8c12008-02-05 12:13:32 +1100991 xfs_trans_ail_destroy(mp);
Brian Fosterbaff4e42014-07-15 08:07:29 +1000992
993 xfs_sysfs_del(&mp->m_log->l_kobj);
994
Nathan Scottc41564b2006-03-29 08:55:14 +1000995 xlog_dealloc_log(mp->m_log);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700996}
997
Dave Chinner43f5efc2010-03-23 10:10:00 +1100998void
999xfs_log_item_init(
1000 struct xfs_mount *mp,
1001 struct xfs_log_item *item,
1002 int type,
Christoph Hellwig272e42b2011-10-28 09:54:24 +00001003 const struct xfs_item_ops *ops)
Dave Chinner43f5efc2010-03-23 10:10:00 +11001004{
1005 item->li_mountp = mp;
1006 item->li_ailp = mp->m_ail;
1007 item->li_type = type;
1008 item->li_ops = ops;
Dave Chinner71e330b2010-05-21 14:37:18 +10001009 item->li_lv = NULL;
1010
1011 INIT_LIST_HEAD(&item->li_ail);
1012 INIT_LIST_HEAD(&item->li_cil);
Carlos Maiolino643c8c02018-01-24 13:38:49 -08001013 INIT_LIST_HEAD(&item->li_bio_list);
Dave Chinnere6631f82018-05-09 07:49:37 -07001014 INIT_LIST_HEAD(&item->li_trans);
Dave Chinner43f5efc2010-03-23 10:10:00 +11001015}
1016
Christoph Hellwig09a423a2012-02-20 02:31:20 +00001017/*
1018 * Wake up processes waiting for log space after we have moved the log tail.
Christoph Hellwig09a423a2012-02-20 02:31:20 +00001019 */
Linus Torvalds1da177e2005-04-16 15:20:36 -07001020void
Christoph Hellwig09a423a2012-02-20 02:31:20 +00001021xfs_log_space_wake(
Christoph Hellwigcfb7cdc2012-02-20 02:31:23 +00001022 struct xfs_mount *mp)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001023{
Mark Tinguelyad223e62012-06-14 09:22:15 -05001024 struct xlog *log = mp->m_log;
Christoph Hellwigcfb7cdc2012-02-20 02:31:23 +00001025 int free_bytes;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001026
Linus Torvalds1da177e2005-04-16 15:20:36 -07001027 if (XLOG_FORCED_SHUTDOWN(log))
1028 return;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001029
Christoph Hellwig28496962012-02-20 02:31:25 +00001030 if (!list_empty_careful(&log->l_write_head.waiters)) {
Christoph Hellwig09a423a2012-02-20 02:31:20 +00001031 ASSERT(!(log->l_flags & XLOG_ACTIVE_RECOVERY));
1032
Christoph Hellwig28496962012-02-20 02:31:25 +00001033 spin_lock(&log->l_write_head.lock);
1034 free_bytes = xlog_space_left(log, &log->l_write_head.grant);
Christoph Hellwige179840d2012-02-20 02:31:29 +00001035 xlog_grant_head_wake(log, &log->l_write_head, &free_bytes);
Christoph Hellwig28496962012-02-20 02:31:25 +00001036 spin_unlock(&log->l_write_head.lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001037 }
Dave Chinner10547942010-12-21 12:02:25 +11001038
Christoph Hellwig28496962012-02-20 02:31:25 +00001039 if (!list_empty_careful(&log->l_reserve_head.waiters)) {
Christoph Hellwig09a423a2012-02-20 02:31:20 +00001040 ASSERT(!(log->l_flags & XLOG_ACTIVE_RECOVERY));
1041
Christoph Hellwig28496962012-02-20 02:31:25 +00001042 spin_lock(&log->l_reserve_head.lock);
1043 free_bytes = xlog_space_left(log, &log->l_reserve_head.grant);
Christoph Hellwige179840d2012-02-20 02:31:29 +00001044 xlog_grant_head_wake(log, &log->l_reserve_head, &free_bytes);
Christoph Hellwig28496962012-02-20 02:31:25 +00001045 spin_unlock(&log->l_reserve_head.lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001046 }
Dave Chinner3f16b982010-12-21 12:29:01 +11001047}
Linus Torvalds1da177e2005-04-16 15:20:36 -07001048
1049/*
Dave Chinner2c6e24c2013-10-15 09:17:49 +11001050 * Determine if we have a transaction that has gone to disk that needs to be
1051 * covered. To begin the transition to the idle state firstly the log needs to
1052 * be idle. That means the CIL, the AIL and the iclogs needs to be empty before
1053 * we start attempting to cover the log.
Dave Chinnerb6f8dd42010-04-13 15:06:44 +10001054 *
Dave Chinner2c6e24c2013-10-15 09:17:49 +11001055 * Only if we are then in a state where covering is needed, the caller is
1056 * informed that dummy transactions are required to move the log into the idle
1057 * state.
1058 *
1059 * If there are any items in the AIl or CIL, then we do not want to attempt to
1060 * cover the log as we may be in a situation where there isn't log space
1061 * available to run a dummy transaction and this can lead to deadlocks when the
1062 * tail of the log is pinned by an item that is modified in the CIL. Hence
1063 * there's no point in running a dummy transaction at this point because we
1064 * can't start trying to idle the log until both the CIL and AIL are empty.
Linus Torvalds1da177e2005-04-16 15:20:36 -07001065 */
Eric Sandeen0d5a75e2016-06-01 17:38:15 +10001066static int
Linus Torvalds1da177e2005-04-16 15:20:36 -07001067xfs_log_need_covered(xfs_mount_t *mp)
1068{
Mark Tinguely9a8d2fd2012-06-14 09:22:16 -05001069 struct xlog *log = mp->m_log;
Dave Chinner2c6e24c2013-10-15 09:17:49 +11001070 int needed = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001071
Brian Foster91ee5752014-11-28 14:02:59 +11001072 if (!xfs_fs_writable(mp, SB_FREEZE_WRITE))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001073 return 0;
1074
Dave Chinner2c6e24c2013-10-15 09:17:49 +11001075 if (!xlog_cil_empty(log))
1076 return 0;
1077
Eric Sandeenb22cd72c2007-10-11 17:37:10 +10001078 spin_lock(&log->l_icloglock);
Dave Chinnerb6f8dd42010-04-13 15:06:44 +10001079 switch (log->l_covered_state) {
1080 case XLOG_STATE_COVER_DONE:
1081 case XLOG_STATE_COVER_DONE2:
1082 case XLOG_STATE_COVER_IDLE:
1083 break;
1084 case XLOG_STATE_COVER_NEED:
1085 case XLOG_STATE_COVER_NEED2:
Dave Chinner2c6e24c2013-10-15 09:17:49 +11001086 if (xfs_ail_min_lsn(log->l_ailp))
1087 break;
1088 if (!xlog_iclogs_empty(log))
1089 break;
1090
1091 needed = 1;
1092 if (log->l_covered_state == XLOG_STATE_COVER_NEED)
1093 log->l_covered_state = XLOG_STATE_COVER_DONE;
1094 else
1095 log->l_covered_state = XLOG_STATE_COVER_DONE2;
1096 break;
Dave Chinnerb6f8dd42010-04-13 15:06:44 +10001097 default:
Linus Torvalds1da177e2005-04-16 15:20:36 -07001098 needed = 1;
Dave Chinnerb6f8dd42010-04-13 15:06:44 +10001099 break;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001100 }
Eric Sandeenb22cd72c2007-10-11 17:37:10 +10001101 spin_unlock(&log->l_icloglock);
Jesper Juhl014c2542006-01-15 02:37:08 +01001102 return needed;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001103}
1104
Christoph Hellwig09a423a2012-02-20 02:31:20 +00001105/*
Linus Torvalds1da177e2005-04-16 15:20:36 -07001106 * We may be holding the log iclog lock upon entering this routine.
1107 */
1108xfs_lsn_t
Christoph Hellwig1c304622012-04-23 15:58:33 +10001109xlog_assign_tail_lsn_locked(
Dave Chinner1c3cb9e2010-12-21 12:28:39 +11001110 struct xfs_mount *mp)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001111{
Mark Tinguelyad223e62012-06-14 09:22:15 -05001112 struct xlog *log = mp->m_log;
Christoph Hellwig1c304622012-04-23 15:58:33 +10001113 struct xfs_log_item *lip;
1114 xfs_lsn_t tail_lsn;
1115
Matthew Wilcox57e80952018-03-07 14:59:39 -08001116 assert_spin_locked(&mp->m_ail->ail_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001117
Christoph Hellwig09a423a2012-02-20 02:31:20 +00001118 /*
1119 * To make sure we always have a valid LSN for the log tail we keep
1120 * track of the last LSN which was committed in log->l_last_sync_lsn,
Christoph Hellwig1c304622012-04-23 15:58:33 +10001121 * and use that when the AIL was empty.
Christoph Hellwig09a423a2012-02-20 02:31:20 +00001122 */
Christoph Hellwig1c304622012-04-23 15:58:33 +10001123 lip = xfs_ail_min(mp->m_ail);
1124 if (lip)
1125 tail_lsn = lip->li_lsn;
1126 else
Dave Chinner84f3c682010-12-03 22:11:29 +11001127 tail_lsn = atomic64_read(&log->l_last_sync_lsn);
Dave Chinner750b9c92013-11-01 15:27:18 +11001128 trace_xfs_log_assign_tail_lsn(log, tail_lsn);
Dave Chinner1c3cb9e2010-12-21 12:28:39 +11001129 atomic64_set(&log->l_tail_lsn, tail_lsn);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001130 return tail_lsn;
Dave Chinner1c3cb9e2010-12-21 12:28:39 +11001131}
Linus Torvalds1da177e2005-04-16 15:20:36 -07001132
Christoph Hellwig1c304622012-04-23 15:58:33 +10001133xfs_lsn_t
1134xlog_assign_tail_lsn(
1135 struct xfs_mount *mp)
1136{
1137 xfs_lsn_t tail_lsn;
1138
Matthew Wilcox57e80952018-03-07 14:59:39 -08001139 spin_lock(&mp->m_ail->ail_lock);
Christoph Hellwig1c304622012-04-23 15:58:33 +10001140 tail_lsn = xlog_assign_tail_lsn_locked(mp);
Matthew Wilcox57e80952018-03-07 14:59:39 -08001141 spin_unlock(&mp->m_ail->ail_lock);
Christoph Hellwig1c304622012-04-23 15:58:33 +10001142
1143 return tail_lsn;
1144}
1145
Linus Torvalds1da177e2005-04-16 15:20:36 -07001146/*
1147 * Return the space in the log between the tail and the head. The head
1148 * is passed in the cycle/bytes formal parms. In the special case where
1149 * the reserve head has wrapped passed the tail, this calculation is no
1150 * longer valid. In this case, just return 0 which means there is no space
1151 * in the log. This works for all places where this function is called
1152 * with the reserve head. Of course, if the write head were to ever
1153 * wrap the tail, we should blow up. Rather than catch this case here,
1154 * we depend on other ASSERTions in other parts of the code. XXXmiken
1155 *
1156 * This code also handles the case where the reservation head is behind
1157 * the tail. The details of this case are described below, but the end
1158 * result is that we return the size of the log as the amount of space left.
1159 */
David Chinnera8272ce2007-11-23 16:28:09 +11001160STATIC int
Dave Chinnera69ed032010-12-21 12:08:20 +11001161xlog_space_left(
Mark Tinguelyad223e62012-06-14 09:22:15 -05001162 struct xlog *log,
Dave Chinnerc8a09ff2010-12-04 00:02:40 +11001163 atomic64_t *head)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001164{
Dave Chinnera69ed032010-12-21 12:08:20 +11001165 int free_bytes;
1166 int tail_bytes;
1167 int tail_cycle;
1168 int head_cycle;
1169 int head_bytes;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001170
Dave Chinnera69ed032010-12-21 12:08:20 +11001171 xlog_crack_grant_head(head, &head_cycle, &head_bytes);
Dave Chinner1c3cb9e2010-12-21 12:28:39 +11001172 xlog_crack_atomic_lsn(&log->l_tail_lsn, &tail_cycle, &tail_bytes);
1173 tail_bytes = BBTOB(tail_bytes);
Dave Chinnera69ed032010-12-21 12:08:20 +11001174 if (tail_cycle == head_cycle && head_bytes >= tail_bytes)
1175 free_bytes = log->l_logsize - (head_bytes - tail_bytes);
1176 else if (tail_cycle + 1 < head_cycle)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001177 return 0;
Dave Chinnera69ed032010-12-21 12:08:20 +11001178 else if (tail_cycle < head_cycle) {
1179 ASSERT(tail_cycle == (head_cycle - 1));
1180 free_bytes = tail_bytes - head_bytes;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001181 } else {
1182 /*
1183 * The reservation head is behind the tail.
1184 * In this case we just want to return the size of the
1185 * log as the amount of space left.
1186 */
Joe Perchesf41febd2015-07-29 11:52:04 +10001187 xfs_alert(log->l_mp, "xlog_space_left: head behind tail");
Dave Chinnera0fa2b62011-03-07 10:01:35 +11001188 xfs_alert(log->l_mp,
Joe Perchesf41febd2015-07-29 11:52:04 +10001189 " tail_cycle = %d, tail_bytes = %d",
1190 tail_cycle, tail_bytes);
1191 xfs_alert(log->l_mp,
1192 " GH cycle = %d, GH bytes = %d",
1193 head_cycle, head_bytes);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001194 ASSERT(0);
1195 free_bytes = log->l_logsize;
1196 }
1197 return free_bytes;
Dave Chinnera69ed032010-12-21 12:08:20 +11001198}
Linus Torvalds1da177e2005-04-16 15:20:36 -07001199
1200
Eric Sandeen0d5a75e2016-06-01 17:38:15 +10001201static void
Christoph Hellwig79b54d92019-06-28 19:27:25 -07001202xlog_ioend_work(
1203 struct work_struct *work)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001204{
Christoph Hellwig79b54d92019-06-28 19:27:25 -07001205 struct xlog_in_core *iclog =
1206 container_of(work, struct xlog_in_core, ic_end_io_work);
1207 struct xlog *log = iclog->ic_log;
Christoph Hellwigd15cbf22019-06-28 19:27:30 -07001208 bool aborted = false;
Christoph Hellwig79b54d92019-06-28 19:27:25 -07001209 int error;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001210
Christoph Hellwig79b54d92019-06-28 19:27:25 -07001211 error = blk_status_to_errno(iclog->ic_bio.bi_status);
Christoph Hellwig366fc4b2019-06-28 19:27:21 -07001212#ifdef DEBUG
1213 /* treat writes with injected CRC errors as failed */
1214 if (iclog->ic_fail_crc)
Christoph Hellwig79b54d92019-06-28 19:27:25 -07001215 error = -EIO;
Christoph Hellwig366fc4b2019-06-28 19:27:21 -07001216#endif
Brian Foster609adfc2016-01-05 07:41:16 +11001217
Christoph Hellwig366fc4b2019-06-28 19:27:21 -07001218 /*
1219 * Race to shutdown the filesystem if we see an error.
1220 */
Christoph Hellwig79b54d92019-06-28 19:27:25 -07001221 if (XFS_TEST_ERROR(error, log->l_mp, XFS_ERRTAG_IODONE_IOERR)) {
1222 xfs_alert(log->l_mp, "log I/O error %d", error);
1223 xfs_force_shutdown(log->l_mp, SHUTDOWN_LOG_IO_ERROR);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001224 /*
1225 * This flag will be propagated to the trans-committed
1226 * callback routines to let them know that the log-commit
1227 * didn't succeed.
1228 */
Christoph Hellwigd15cbf22019-06-28 19:27:30 -07001229 aborted = true;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001230 } else if (iclog->ic_state & XLOG_STATE_IOERROR) {
Christoph Hellwigd15cbf22019-06-28 19:27:30 -07001231 aborted = true;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001232 }
David Chinner3db296f2007-05-14 18:24:16 +10001233
Linus Torvalds1da177e2005-04-16 15:20:36 -07001234 xlog_state_done_syncing(iclog, aborted);
Christoph Hellwig79b54d92019-06-28 19:27:25 -07001235 bio_uninit(&iclog->ic_bio);
Dave Chinner9c23ecc2014-04-17 08:15:26 +10001236
David Chinner3db296f2007-05-14 18:24:16 +10001237 /*
Christoph Hellwig79b54d92019-06-28 19:27:25 -07001238 * Drop the lock to signal that we are done. Nothing references the
1239 * iclog after this, so an unmount waiting on this lock can now tear it
1240 * down safely. As such, it is unsafe to reference the iclog after the
1241 * unlock as we could race with it being freed.
David Chinner3db296f2007-05-14 18:24:16 +10001242 */
Christoph Hellwig79b54d92019-06-28 19:27:25 -07001243 up(&iclog->ic_sema);
Dave Chinnerc3f8fc72012-11-12 22:54:01 +11001244}
Linus Torvalds1da177e2005-04-16 15:20:36 -07001245
1246/*
Linus Torvalds1da177e2005-04-16 15:20:36 -07001247 * Return size of each in-core log record buffer.
1248 *
Malcolm Parsons9da096f2009-03-29 09:55:42 +02001249 * All machines get 8 x 32kB buffers by default, unless tuned otherwise.
Linus Torvalds1da177e2005-04-16 15:20:36 -07001250 *
1251 * If the filesystem blocksize is too large, we may need to choose a
1252 * larger size since the directory code currently logs entire blocks.
1253 */
Linus Torvalds1da177e2005-04-16 15:20:36 -07001254STATIC void
Mark Tinguely9a8d2fd2012-06-14 09:22:16 -05001255xlog_get_iclog_buffer_size(
1256 struct xfs_mount *mp,
1257 struct xlog *log)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001258{
Eric Sandeen1cb51252007-08-16 16:24:43 +10001259 if (mp->m_logbufs <= 0)
Christoph Hellwig4f622822019-06-28 19:27:20 -07001260 mp->m_logbufs = XLOG_MAX_ICLOGS;
1261 if (mp->m_logbsize <= 0)
1262 mp->m_logbsize = XLOG_BIG_RECORD_BSIZE;
1263
1264 log->l_iclog_bufs = mp->m_logbufs;
1265 log->l_iclog_size = mp->m_logbsize;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001266
1267 /*
Christoph Hellwig4f622822019-06-28 19:27:20 -07001268 * # headers = size / 32k - one header holds cycles from 32k of data.
Linus Torvalds1da177e2005-04-16 15:20:36 -07001269 */
Christoph Hellwig4f622822019-06-28 19:27:20 -07001270 log->l_iclog_heads =
1271 DIV_ROUND_UP(mp->m_logbsize, XLOG_HEADER_CYCLE_SIZE);
1272 log->l_iclog_hsize = log->l_iclog_heads << BBSHIFT;
1273}
Linus Torvalds1da177e2005-04-16 15:20:36 -07001274
Dave Chinnerf661f1e2012-10-08 21:56:02 +11001275void
1276xfs_log_work_queue(
1277 struct xfs_mount *mp)
1278{
Brian Foster696a5622017-03-28 14:51:44 -07001279 queue_delayed_work(mp->m_sync_workqueue, &mp->m_log->l_work,
Dave Chinnerf661f1e2012-10-08 21:56:02 +11001280 msecs_to_jiffies(xfs_syncd_centisecs * 10));
1281}
1282
1283/*
1284 * Every sync period we need to unpin all items in the AIL and push them to
1285 * disk. If there is nothing dirty, then we might need to cover the log to
1286 * indicate that the filesystem is idle.
1287 */
Eric Sandeen0d5a75e2016-06-01 17:38:15 +10001288static void
Dave Chinnerf661f1e2012-10-08 21:56:02 +11001289xfs_log_worker(
1290 struct work_struct *work)
1291{
1292 struct xlog *log = container_of(to_delayed_work(work),
1293 struct xlog, l_work);
1294 struct xfs_mount *mp = log->l_mp;
1295
1296 /* dgc: errors ignored - not fatal and nowhere to report them */
Dave Chinner61e63ec2015-01-22 09:10:31 +11001297 if (xfs_log_need_covered(mp)) {
1298 /*
1299 * Dump a transaction into the log that contains no real change.
1300 * This is needed to stamp the current tail LSN into the log
1301 * during the covering operation.
1302 *
1303 * We cannot use an inode here for this - that will push dirty
1304 * state back up into the VFS and then periodic inode flushing
1305 * will prevent log covering from making progress. Hence we
1306 * synchronously log the superblock instead to ensure the
1307 * superblock is immediately unpinned and can be written back.
1308 */
1309 xfs_sync_sb(mp, true);
1310 } else
Dave Chinnerf661f1e2012-10-08 21:56:02 +11001311 xfs_log_force(mp, 0);
1312
1313 /* start pushing all the metadata that is currently dirty */
1314 xfs_ail_push_all(mp->m_ail);
1315
1316 /* queue us up again */
1317 xfs_log_work_queue(mp);
1318}
1319
Linus Torvalds1da177e2005-04-16 15:20:36 -07001320/*
1321 * This routine initializes some of the log structure for a given mount point.
1322 * Its primary purpose is to fill in enough, so recovery can occur. However,
1323 * some other stuff may be filled in too.
1324 */
Mark Tinguely9a8d2fd2012-06-14 09:22:16 -05001325STATIC struct xlog *
1326xlog_alloc_log(
1327 struct xfs_mount *mp,
1328 struct xfs_buftarg *log_target,
1329 xfs_daddr_t blk_offset,
1330 int num_bblks)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001331{
Mark Tinguely9a8d2fd2012-06-14 09:22:16 -05001332 struct xlog *log;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001333 xlog_rec_header_t *head;
1334 xlog_in_core_t **iclogp;
1335 xlog_in_core_t *iclog, *prev_iclog=NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001336 int i;
Dave Chinner24513372014-06-25 14:58:08 +10001337 int error = -ENOMEM;
Alex Elder69ce58f2010-04-20 17:09:59 +10001338 uint log2_size = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001339
Mark Tinguely9a8d2fd2012-06-14 09:22:16 -05001340 log = kmem_zalloc(sizeof(struct xlog), KM_MAYFAIL);
Dave Chinnera6cb7672009-04-06 18:39:27 +02001341 if (!log) {
Dave Chinnera0fa2b62011-03-07 10:01:35 +11001342 xfs_warn(mp, "Log allocation failed: No memory!");
Dave Chinnera6cb7672009-04-06 18:39:27 +02001343 goto out;
1344 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001345
1346 log->l_mp = mp;
1347 log->l_targ = log_target;
1348 log->l_logsize = BBTOB(num_bblks);
1349 log->l_logBBstart = blk_offset;
1350 log->l_logBBsize = num_bblks;
1351 log->l_covered_state = XLOG_STATE_COVER_IDLE;
1352 log->l_flags |= XLOG_ACTIVE_RECOVERY;
Dave Chinnerf661f1e2012-10-08 21:56:02 +11001353 INIT_DELAYED_WORK(&log->l_work, xfs_log_worker);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001354
1355 log->l_prev_block = -1;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001356 /* log->l_tail_lsn = 0x100000000LL; cycle = 1; current block = 0 */
Dave Chinner1c3cb9e2010-12-21 12:28:39 +11001357 xlog_assign_atomic_lsn(&log->l_tail_lsn, 1, 0);
1358 xlog_assign_atomic_lsn(&log->l_last_sync_lsn, 1, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001359 log->l_curr_cycle = 1; /* 0 is bad since this is initial value */
Christoph Hellwigc303c5b2012-02-20 02:31:26 +00001360
1361 xlog_grant_head_init(&log->l_reserve_head);
1362 xlog_grant_head_init(&log->l_write_head);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001363
Dave Chinner24513372014-06-25 14:58:08 +10001364 error = -EFSCORRUPTED;
Eric Sandeen62118702008-03-06 13:44:28 +11001365 if (xfs_sb_version_hassector(&mp->m_sb)) {
Alex Elder69ce58f2010-04-20 17:09:59 +10001366 log2_size = mp->m_sb.sb_logsectlog;
1367 if (log2_size < BBSHIFT) {
Dave Chinnera0fa2b62011-03-07 10:01:35 +11001368 xfs_warn(mp, "Log sector size too small (0x%x < 0x%x)",
1369 log2_size, BBSHIFT);
Alex Elder69ce58f2010-04-20 17:09:59 +10001370 goto out_free_log;
1371 }
1372
1373 log2_size -= BBSHIFT;
1374 if (log2_size > mp->m_sectbb_log) {
Dave Chinnera0fa2b62011-03-07 10:01:35 +11001375 xfs_warn(mp, "Log sector size too large (0x%x > 0x%x)",
1376 log2_size, mp->m_sectbb_log);
Dave Chinnera6cb7672009-04-06 18:39:27 +02001377 goto out_free_log;
1378 }
1379
Linus Torvalds1da177e2005-04-16 15:20:36 -07001380 /* for larger sector sizes, must have v2 or external log */
Alex Elder69ce58f2010-04-20 17:09:59 +10001381 if (log2_size && log->l_logBBstart > 0 &&
1382 !xfs_sb_version_haslogv2(&mp->m_sb)) {
Dave Chinnera0fa2b62011-03-07 10:01:35 +11001383 xfs_warn(mp,
1384 "log sector size (0x%x) invalid for configuration.",
1385 log2_size);
Dave Chinnera6cb7672009-04-06 18:39:27 +02001386 goto out_free_log;
1387 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001388 }
Alex Elder69ce58f2010-04-20 17:09:59 +10001389 log->l_sectBBsize = 1 << log2_size;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001390
1391 xlog_get_iclog_buffer_size(mp, log);
1392
Eric Sandeen007c61c2007-10-11 17:43:56 +10001393 spin_lock_init(&log->l_icloglock);
Dave Chinnereb40a872010-12-21 12:09:01 +11001394 init_waitqueue_head(&log->l_flush_wait);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001395
Linus Torvalds1da177e2005-04-16 15:20:36 -07001396 iclogp = &log->l_iclog;
1397 /*
1398 * The amount of memory to allocate for the iclog structure is
1399 * rather funky due to the way the structure is defined. It is
1400 * done this way so that we can use different sizes for machines
1401 * with different amounts of memory. See the definition of
1402 * xlog_in_core_t in xfs_log_priv.h for details.
1403 */
Linus Torvalds1da177e2005-04-16 15:20:36 -07001404 ASSERT(log->l_iclog_size >= 4096);
Christoph Hellwig79b54d92019-06-28 19:27:25 -07001405 for (i = 0; i < log->l_iclog_bufs; i++) {
Christoph Hellwig89b171a2019-06-28 19:31:36 -07001406 size_t bvec_size = howmany(log->l_iclog_size, PAGE_SIZE) *
1407 sizeof(struct bio_vec);
Christoph Hellwig79b54d92019-06-28 19:27:25 -07001408
1409 iclog = kmem_zalloc(sizeof(*iclog) + bvec_size, KM_MAYFAIL);
1410 if (!iclog)
Dave Chinner644c3562008-11-10 16:50:24 +11001411 goto out_free_iclog;
1412
Christoph Hellwig79b54d92019-06-28 19:27:25 -07001413 *iclogp = iclog;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001414 iclog->ic_prev = prev_iclog;
1415 prev_iclog = iclog;
Christoph Hellwig1fa40b02007-05-14 18:23:50 +10001416
Christoph Hellwig79b54d92019-06-28 19:27:25 -07001417 iclog->ic_data = kmem_alloc_large(log->l_iclog_size,
1418 KM_MAYFAIL);
1419 if (!iclog->ic_data)
Dave Chinner644c3562008-11-10 16:50:24 +11001420 goto out_free_iclog;
David Chinner4679b2d2008-04-10 12:18:54 +10001421#ifdef DEBUG
Christoph Hellwig5809d5e2015-06-22 09:44:47 +10001422 log->l_iclog_bak[i] = &iclog->ic_header;
David Chinner4679b2d2008-04-10 12:18:54 +10001423#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -07001424 head = &iclog->ic_header;
1425 memset(head, 0, sizeof(xlog_rec_header_t));
Christoph Hellwigb53e6752007-10-12 10:59:34 +10001426 head->h_magicno = cpu_to_be32(XLOG_HEADER_MAGIC_NUM);
1427 head->h_version = cpu_to_be32(
Eric Sandeen62118702008-03-06 13:44:28 +11001428 xfs_sb_version_haslogv2(&log->l_mp->m_sb) ? 2 : 1);
Christoph Hellwigb53e6752007-10-12 10:59:34 +10001429 head->h_size = cpu_to_be32(log->l_iclog_size);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001430 /* new fields */
Christoph Hellwigb53e6752007-10-12 10:59:34 +10001431 head->h_fmt = cpu_to_be32(XLOG_FMT);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001432 memcpy(&head->h_fs_uuid, &mp->m_sb.sb_uuid, sizeof(uuid_t));
1433
Christoph Hellwig79b54d92019-06-28 19:27:25 -07001434 iclog->ic_size = log->l_iclog_size - log->l_iclog_hsize;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001435 iclog->ic_state = XLOG_STATE_ACTIVE;
1436 iclog->ic_log = log;
David Chinner114d23a2008-04-10 12:18:39 +10001437 atomic_set(&iclog->ic_refcnt, 0);
1438 spin_lock_init(&iclog->ic_callback_lock);
Christoph Hellwig89ae3792019-06-28 19:27:34 -07001439 INIT_LIST_HEAD(&iclog->ic_callbacks);
Christoph Hellwigb28708d2008-11-28 14:23:38 +11001440 iclog->ic_datap = (char *)iclog->ic_data + log->l_iclog_hsize;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001441
Dave Chinnereb40a872010-12-21 12:09:01 +11001442 init_waitqueue_head(&iclog->ic_force_wait);
1443 init_waitqueue_head(&iclog->ic_write_wait);
Christoph Hellwig79b54d92019-06-28 19:27:25 -07001444 INIT_WORK(&iclog->ic_end_io_work, xlog_ioend_work);
1445 sema_init(&iclog->ic_sema, 1);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001446
1447 iclogp = &iclog->ic_next;
1448 }
1449 *iclogp = log->l_iclog; /* complete ring */
1450 log->l_iclog->ic_prev = prev_iclog; /* re-write 1st prev ptr */
1451
Christoph Hellwig1058d0f2019-06-28 19:27:25 -07001452 log->l_ioend_workqueue = alloc_workqueue("xfs-log/%s",
1453 WQ_MEM_RECLAIM | WQ_FREEZABLE | WQ_HIGHPRI, 0,
1454 mp->m_fsname);
1455 if (!log->l_ioend_workqueue)
1456 goto out_free_iclog;
1457
Dave Chinner71e330b2010-05-21 14:37:18 +10001458 error = xlog_cil_init(log);
1459 if (error)
Christoph Hellwig1058d0f2019-06-28 19:27:25 -07001460 goto out_destroy_workqueue;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001461 return log;
Dave Chinner644c3562008-11-10 16:50:24 +11001462
Christoph Hellwig1058d0f2019-06-28 19:27:25 -07001463out_destroy_workqueue:
1464 destroy_workqueue(log->l_ioend_workqueue);
Dave Chinner644c3562008-11-10 16:50:24 +11001465out_free_iclog:
1466 for (iclog = log->l_iclog; iclog; iclog = prev_iclog) {
1467 prev_iclog = iclog->ic_next;
Christoph Hellwig79b54d92019-06-28 19:27:25 -07001468 kmem_free(iclog->ic_data);
Dave Chinner644c3562008-11-10 16:50:24 +11001469 kmem_free(iclog);
1470 }
Dave Chinner644c3562008-11-10 16:50:24 +11001471out_free_log:
1472 kmem_free(log);
Dave Chinnera6cb7672009-04-06 18:39:27 +02001473out:
Dave Chinner24513372014-06-25 14:58:08 +10001474 return ERR_PTR(error);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001475} /* xlog_alloc_log */
1476
1477
1478/*
1479 * Write out the commit record of a transaction associated with the given
1480 * ticket. Return the lsn of the commit record.
1481 */
1482STATIC int
Dave Chinner55b66332010-03-23 11:43:17 +11001483xlog_commit_record(
Mark Tinguelyad223e62012-06-14 09:22:15 -05001484 struct xlog *log,
Dave Chinner55b66332010-03-23 11:43:17 +11001485 struct xlog_ticket *ticket,
1486 struct xlog_in_core **iclog,
1487 xfs_lsn_t *commitlsnp)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001488{
Dave Chinner55b66332010-03-23 11:43:17 +11001489 struct xfs_mount *mp = log->l_mp;
1490 int error;
1491 struct xfs_log_iovec reg = {
1492 .i_addr = NULL,
1493 .i_len = 0,
1494 .i_type = XLOG_REG_TYPE_COMMIT,
1495 };
1496 struct xfs_log_vec vec = {
1497 .lv_niovecs = 1,
1498 .lv_iovecp = &reg,
1499 };
Linus Torvalds1da177e2005-04-16 15:20:36 -07001500
1501 ASSERT_ALWAYS(iclog);
Dave Chinner55b66332010-03-23 11:43:17 +11001502 error = xlog_write(log, &vec, ticket, commitlsnp, iclog,
1503 XLOG_COMMIT_TRANS);
1504 if (error)
Nathan Scott7d04a332006-06-09 14:58:38 +10001505 xfs_force_shutdown(mp, SHUTDOWN_LOG_IO_ERROR);
Jesper Juhl014c2542006-01-15 02:37:08 +01001506 return error;
Dave Chinner55b66332010-03-23 11:43:17 +11001507}
Linus Torvalds1da177e2005-04-16 15:20:36 -07001508
1509/*
1510 * Push on the buffer cache code if we ever use more than 75% of the on-disk
1511 * log space. This code pushes on the lsn which would supposedly free up
1512 * the 25% which we want to leave free. We may need to adopt a policy which
1513 * pushes on an lsn which is further along in the log once we reach the high
1514 * water mark. In this manner, we would be creating a low water mark.
1515 */
David Chinnera8272ce2007-11-23 16:28:09 +11001516STATIC void
Dave Chinner2ced19c2010-12-21 12:09:20 +11001517xlog_grant_push_ail(
Mark Tinguelyad223e62012-06-14 09:22:15 -05001518 struct xlog *log,
Dave Chinner2ced19c2010-12-21 12:09:20 +11001519 int need_bytes)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001520{
Dave Chinner2ced19c2010-12-21 12:09:20 +11001521 xfs_lsn_t threshold_lsn = 0;
Dave Chinner84f3c682010-12-03 22:11:29 +11001522 xfs_lsn_t last_sync_lsn;
Dave Chinner2ced19c2010-12-21 12:09:20 +11001523 int free_blocks;
1524 int free_bytes;
1525 int threshold_block;
1526 int threshold_cycle;
1527 int free_threshold;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001528
Dave Chinner2ced19c2010-12-21 12:09:20 +11001529 ASSERT(BTOBB(need_bytes) < log->l_logBBsize);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001530
Christoph Hellwig28496962012-02-20 02:31:25 +00001531 free_bytes = xlog_space_left(log, &log->l_reserve_head.grant);
Dave Chinner2ced19c2010-12-21 12:09:20 +11001532 free_blocks = BTOBBT(free_bytes);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001533
Dave Chinner2ced19c2010-12-21 12:09:20 +11001534 /*
1535 * Set the threshold for the minimum number of free blocks in the
1536 * log to the maximum of what the caller needs, one quarter of the
1537 * log, and 256 blocks.
1538 */
1539 free_threshold = BTOBB(need_bytes);
Dave Chinner9bb54cb2018-06-07 07:54:02 -07001540 free_threshold = max(free_threshold, (log->l_logBBsize >> 2));
1541 free_threshold = max(free_threshold, 256);
Dave Chinner2ced19c2010-12-21 12:09:20 +11001542 if (free_blocks >= free_threshold)
1543 return;
1544
Dave Chinner1c3cb9e2010-12-21 12:28:39 +11001545 xlog_crack_atomic_lsn(&log->l_tail_lsn, &threshold_cycle,
1546 &threshold_block);
1547 threshold_block += free_threshold;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001548 if (threshold_block >= log->l_logBBsize) {
Dave Chinner2ced19c2010-12-21 12:09:20 +11001549 threshold_block -= log->l_logBBsize;
1550 threshold_cycle += 1;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001551 }
Dave Chinner2ced19c2010-12-21 12:09:20 +11001552 threshold_lsn = xlog_assign_lsn(threshold_cycle,
1553 threshold_block);
1554 /*
1555 * Don't pass in an lsn greater than the lsn of the last
Dave Chinner84f3c682010-12-03 22:11:29 +11001556 * log record known to be on disk. Use a snapshot of the last sync lsn
1557 * so that it doesn't change between the compare and the set.
Linus Torvalds1da177e2005-04-16 15:20:36 -07001558 */
Dave Chinner84f3c682010-12-03 22:11:29 +11001559 last_sync_lsn = atomic64_read(&log->l_last_sync_lsn);
1560 if (XFS_LSN_CMP(threshold_lsn, last_sync_lsn) > 0)
1561 threshold_lsn = last_sync_lsn;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001562
Dave Chinner2ced19c2010-12-21 12:09:20 +11001563 /*
1564 * Get the transaction layer to kick the dirty buffers out to
1565 * disk asynchronously. No point in trying to do this if
1566 * the filesystem is shutting down.
1567 */
1568 if (!XLOG_FORCED_SHUTDOWN(log))
Dave Chinnerfd074842011-04-08 12:45:07 +10001569 xfs_ail_push(log->l_ailp, threshold_lsn);
Dave Chinner2ced19c2010-12-21 12:09:20 +11001570}
Linus Torvalds1da177e2005-04-16 15:20:36 -07001571
Christoph Hellwig873ff5502010-01-13 22:17:57 +00001572/*
Christoph Hellwig0e446be2012-11-12 22:54:24 +11001573 * Stamp cycle number in every block
1574 */
1575STATIC void
1576xlog_pack_data(
1577 struct xlog *log,
1578 struct xlog_in_core *iclog,
1579 int roundoff)
1580{
1581 int i, j, k;
1582 int size = iclog->ic_offset + roundoff;
1583 __be32 cycle_lsn;
Christoph Hellwigb2a922c2015-06-22 09:45:10 +10001584 char *dp;
Christoph Hellwig0e446be2012-11-12 22:54:24 +11001585
1586 cycle_lsn = CYCLE_LSN_DISK(iclog->ic_header.h_lsn);
1587
1588 dp = iclog->ic_datap;
1589 for (i = 0; i < BTOBB(size); i++) {
1590 if (i >= (XLOG_HEADER_CYCLE_SIZE / BBSIZE))
1591 break;
1592 iclog->ic_header.h_cycle_data[i] = *(__be32 *)dp;
1593 *(__be32 *)dp = cycle_lsn;
1594 dp += BBSIZE;
1595 }
1596
1597 if (xfs_sb_version_haslogv2(&log->l_mp->m_sb)) {
1598 xlog_in_core_2_t *xhdr = iclog->ic_data;
1599
1600 for ( ; i < BTOBB(size); i++) {
1601 j = i / (XLOG_HEADER_CYCLE_SIZE / BBSIZE);
1602 k = i % (XLOG_HEADER_CYCLE_SIZE / BBSIZE);
1603 xhdr[j].hic_xheader.xh_cycle_data[k] = *(__be32 *)dp;
1604 *(__be32 *)dp = cycle_lsn;
1605 dp += BBSIZE;
1606 }
1607
1608 for (i = 1; i < log->l_iclog_heads; i++)
1609 xhdr[i].hic_xheader.xh_cycle = cycle_lsn;
1610 }
1611}
1612
1613/*
1614 * Calculate the checksum for a log buffer.
1615 *
1616 * This is a little more complicated than it should be because the various
1617 * headers and the actual data are non-contiguous.
1618 */
Dave Chinnerf9668a02012-11-28 13:01:03 +11001619__le32
Christoph Hellwig0e446be2012-11-12 22:54:24 +11001620xlog_cksum(
1621 struct xlog *log,
1622 struct xlog_rec_header *rhead,
1623 char *dp,
1624 int size)
1625{
Darrick J. Wongc8ce5402017-06-16 11:00:05 -07001626 uint32_t crc;
Christoph Hellwig0e446be2012-11-12 22:54:24 +11001627
1628 /* first generate the crc for the record header ... */
Dave Chinnercae028d2016-12-05 14:40:32 +11001629 crc = xfs_start_cksum_update((char *)rhead,
Christoph Hellwig0e446be2012-11-12 22:54:24 +11001630 sizeof(struct xlog_rec_header),
1631 offsetof(struct xlog_rec_header, h_crc));
1632
1633 /* ... then for additional cycle data for v2 logs ... */
1634 if (xfs_sb_version_haslogv2(&log->l_mp->m_sb)) {
1635 union xlog_in_core2 *xhdr = (union xlog_in_core2 *)rhead;
1636 int i;
Brian Fostera3f20012015-08-19 09:59:50 +10001637 int xheads;
Christoph Hellwig0e446be2012-11-12 22:54:24 +11001638
Brian Fostera3f20012015-08-19 09:59:50 +10001639 xheads = size / XLOG_HEADER_CYCLE_SIZE;
1640 if (size % XLOG_HEADER_CYCLE_SIZE)
1641 xheads++;
1642
1643 for (i = 1; i < xheads; i++) {
Christoph Hellwig0e446be2012-11-12 22:54:24 +11001644 crc = crc32c(crc, &xhdr[i].hic_xheader,
1645 sizeof(struct xlog_rec_ext_header));
1646 }
1647 }
1648
1649 /* ... and finally for the payload */
1650 crc = crc32c(crc, dp, size);
1651
1652 return xfs_end_cksum(crc);
1653}
1654
Christoph Hellwig79b54d92019-06-28 19:27:25 -07001655static void
1656xlog_bio_end_io(
1657 struct bio *bio)
1658{
1659 struct xlog_in_core *iclog = bio->bi_private;
1660
Christoph Hellwig1058d0f2019-06-28 19:27:25 -07001661 queue_work(iclog->ic_log->l_ioend_workqueue,
Christoph Hellwig79b54d92019-06-28 19:27:25 -07001662 &iclog->ic_end_io_work);
1663}
1664
1665static void
1666xlog_map_iclog_data(
1667 struct bio *bio,
1668 void *data,
1669 size_t count)
1670{
1671 do {
1672 struct page *page = kmem_to_page(data);
1673 unsigned int off = offset_in_page(data);
1674 size_t len = min_t(size_t, count, PAGE_SIZE - off);
1675
1676 WARN_ON_ONCE(bio_add_page(bio, page, len, off) != len);
1677
1678 data += len;
1679 count -= len;
1680 } while (count);
1681}
1682
Christoph Hellwig94860a32019-06-28 19:27:22 -07001683STATIC void
1684xlog_write_iclog(
1685 struct xlog *log,
1686 struct xlog_in_core *iclog,
Christoph Hellwig94860a32019-06-28 19:27:22 -07001687 uint64_t bno,
Christoph Hellwig79b54d92019-06-28 19:27:25 -07001688 unsigned int count,
Christoph Hellwig94860a32019-06-28 19:27:22 -07001689 bool need_flush)
Christoph Hellwig873ff5502010-01-13 22:17:57 +00001690{
Christoph Hellwig94860a32019-06-28 19:27:22 -07001691 ASSERT(bno < log->l_logBBsize);
Christoph Hellwig94860a32019-06-28 19:27:22 -07001692
1693 /*
1694 * We lock the iclogbufs here so that we can serialise against I/O
1695 * completion during unmount. We might be processing a shutdown
1696 * triggered during unmount, and that can occur asynchronously to the
1697 * unmount thread, and hence we need to ensure that completes before
1698 * tearing down the iclogbufs. Hence we need to hold the buffer lock
1699 * across the log IO to archieve that.
1700 */
Christoph Hellwig79b54d92019-06-28 19:27:25 -07001701 down(&iclog->ic_sema);
Christoph Hellwig94860a32019-06-28 19:27:22 -07001702 if (unlikely(iclog->ic_state & XLOG_STATE_IOERROR)) {
Christoph Hellwig873ff5502010-01-13 22:17:57 +00001703 /*
1704 * It would seem logical to return EIO here, but we rely on
1705 * the log state machine to propagate I/O errors instead of
Christoph Hellwig79b54d92019-06-28 19:27:25 -07001706 * doing it here. We kick of the state machine and unlock
1707 * the buffer manually, the code needs to be kept in sync
1708 * with the I/O completion path.
Christoph Hellwig873ff5502010-01-13 22:17:57 +00001709 */
Christoph Hellwig79b54d92019-06-28 19:27:25 -07001710 xlog_state_done_syncing(iclog, XFS_LI_ABORTED);
1711 up(&iclog->ic_sema);
Christoph Hellwig94860a32019-06-28 19:27:22 -07001712 return;
Christoph Hellwig873ff5502010-01-13 22:17:57 +00001713 }
1714
Christoph Hellwig79b54d92019-06-28 19:27:25 -07001715 iclog->ic_io_size = count;
1716
1717 bio_init(&iclog->ic_bio, iclog->ic_bvec, howmany(count, PAGE_SIZE));
1718 bio_set_dev(&iclog->ic_bio, log->l_targ->bt_bdev);
1719 iclog->ic_bio.bi_iter.bi_sector = log->l_logBBstart + bno;
1720 iclog->ic_bio.bi_end_io = xlog_bio_end_io;
1721 iclog->ic_bio.bi_private = iclog;
1722 iclog->ic_bio.bi_opf = REQ_OP_WRITE | REQ_META | REQ_SYNC | REQ_FUA;
1723 if (need_flush)
1724 iclog->ic_bio.bi_opf |= REQ_PREFLUSH;
1725
1726 xlog_map_iclog_data(&iclog->ic_bio, iclog->ic_data, iclog->ic_io_size);
1727 if (is_vmalloc_addr(iclog->ic_data))
1728 flush_kernel_vmap_range(iclog->ic_data, iclog->ic_io_size);
1729
1730 /*
1731 * If this log buffer would straddle the end of the log we will have
1732 * to split it up into two bios, so that we can continue at the start.
1733 */
1734 if (bno + BTOBB(count) > log->l_logBBsize) {
1735 struct bio *split;
1736
1737 split = bio_split(&iclog->ic_bio, log->l_logBBsize - bno,
1738 GFP_NOIO, &fs_bio_set);
1739 bio_chain(split, &iclog->ic_bio);
1740 submit_bio(split);
1741
1742 /* restart at logical offset zero for the remainder */
1743 iclog->ic_bio.bi_iter.bi_sector = log->l_logBBstart;
1744 }
1745
1746 submit_bio(&iclog->ic_bio);
Christoph Hellwig873ff5502010-01-13 22:17:57 +00001747}
Linus Torvalds1da177e2005-04-16 15:20:36 -07001748
1749/*
Christoph Hellwig56933842019-06-28 19:27:22 -07001750 * We need to bump cycle number for the part of the iclog that is
1751 * written to the start of the log. Watch out for the header magic
1752 * number case, though.
1753 */
Christoph Hellwig79b54d92019-06-28 19:27:25 -07001754static void
Christoph Hellwig56933842019-06-28 19:27:22 -07001755xlog_split_iclog(
1756 struct xlog *log,
1757 void *data,
1758 uint64_t bno,
1759 unsigned int count)
1760{
1761 unsigned int split_offset = BBTOB(log->l_logBBsize - bno);
1762 unsigned int i;
1763
1764 for (i = split_offset; i < count; i += BBSIZE) {
1765 uint32_t cycle = get_unaligned_be32(data + i);
1766
1767 if (++cycle == XLOG_HEADER_MAGIC_NUM)
1768 cycle++;
1769 put_unaligned_be32(cycle, data + i);
1770 }
Christoph Hellwig56933842019-06-28 19:27:22 -07001771}
1772
Christoph Hellwigdb0a6fa2019-06-28 19:27:23 -07001773static int
1774xlog_calc_iclog_size(
1775 struct xlog *log,
1776 struct xlog_in_core *iclog,
1777 uint32_t *roundoff)
1778{
1779 uint32_t count_init, count;
1780 bool use_lsunit;
1781
1782 use_lsunit = xfs_sb_version_haslogv2(&log->l_mp->m_sb) &&
1783 log->l_mp->m_sb.sb_logsunit > 1;
1784
1785 /* Add for LR header */
1786 count_init = log->l_iclog_hsize + iclog->ic_offset;
1787
1788 /* Round out the log write size */
1789 if (use_lsunit) {
1790 /* we have a v2 stripe unit to use */
1791 count = XLOG_LSUNITTOB(log, XLOG_BTOLSUNIT(log, count_init));
1792 } else {
1793 count = BBTOB(BTOBB(count_init));
1794 }
1795
1796 ASSERT(count >= count_init);
1797 *roundoff = count - count_init;
1798
1799 if (use_lsunit)
1800 ASSERT(*roundoff < log->l_mp->m_sb.sb_logsunit);
1801 else
1802 ASSERT(*roundoff < BBTOB(1));
1803 return count;
1804}
1805
Christoph Hellwig56933842019-06-28 19:27:22 -07001806/*
Linus Torvalds1da177e2005-04-16 15:20:36 -07001807 * Flush out the in-core log (iclog) to the on-disk log in an asynchronous
1808 * fashion. Previously, we should have moved the current iclog
1809 * ptr in the log to point to the next available iclog. This allows further
1810 * write to continue while this code syncs out an iclog ready to go.
1811 * Before an in-core log can be written out, the data section must be scanned
1812 * to save away the 1st word of each BBSIZE block into the header. We replace
1813 * it with the current cycle count. Each BBSIZE block is tagged with the
1814 * cycle count because there in an implicit assumption that drives will
1815 * guarantee that entire 512 byte blocks get written at once. In other words,
1816 * we can't have part of a 512 byte block written and part not written. By
1817 * tagging each block, we will know which blocks are valid when recovering
1818 * after an unclean shutdown.
1819 *
1820 * This routine is single threaded on the iclog. No other thread can be in
1821 * this routine with the same iclog. Changing contents of iclog can there-
1822 * fore be done without grabbing the state machine lock. Updating the global
1823 * log will require grabbing the lock though.
1824 *
1825 * The entire log manager uses a logical block numbering scheme. Only
Christoph Hellwig94860a32019-06-28 19:27:22 -07001826 * xlog_write_iclog knows about the fact that the log may not start with
1827 * block zero on a given device.
Linus Torvalds1da177e2005-04-16 15:20:36 -07001828 */
Christoph Hellwig94860a32019-06-28 19:27:22 -07001829STATIC void
Mark Tinguely9a8d2fd2012-06-14 09:22:16 -05001830xlog_sync(
1831 struct xlog *log,
1832 struct xlog_in_core *iclog)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001833{
Christoph Hellwigdb0a6fa2019-06-28 19:27:23 -07001834 unsigned int count; /* byte count of bwrite */
1835 unsigned int roundoff; /* roundoff to BB or stripe */
1836 uint64_t bno;
Christoph Hellwigdb0a6fa2019-06-28 19:27:23 -07001837 unsigned int size;
Christoph Hellwig79b54d92019-06-28 19:27:25 -07001838 bool need_flush = true, split = false;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001839
David Chinner155cc6b2008-03-06 13:44:14 +11001840 ASSERT(atomic_read(&iclog->ic_refcnt) == 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001841
Christoph Hellwigdb0a6fa2019-06-28 19:27:23 -07001842 count = xlog_calc_iclog_size(log, iclog, &roundoff);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001843
1844 /* move grant heads by roundoff in sync */
Christoph Hellwig28496962012-02-20 02:31:25 +00001845 xlog_grant_add_space(log, &log->l_reserve_head.grant, roundoff);
1846 xlog_grant_add_space(log, &log->l_write_head.grant, roundoff);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001847
1848 /* put cycle number in every block */
1849 xlog_pack_data(log, iclog, roundoff);
1850
1851 /* real byte length */
Christoph Hellwig0e446be2012-11-12 22:54:24 +11001852 size = iclog->ic_offset;
Christoph Hellwigdb0a6fa2019-06-28 19:27:23 -07001853 if (xfs_sb_version_haslogv2(&log->l_mp->m_sb))
Christoph Hellwig0e446be2012-11-12 22:54:24 +11001854 size += roundoff;
1855 iclog->ic_header.h_len = cpu_to_be32(size);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001856
Christoph Hellwig9b0489c2019-06-28 19:27:23 -07001857 XFS_STATS_INC(log->l_mp, xs_log_writes);
Bill O'Donnellff6d6af2015-10-12 18:21:22 +11001858 XFS_STATS_ADD(log->l_mp, xs_log_blocks, BTOBB(count));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001859
Christoph Hellwig94860a32019-06-28 19:27:22 -07001860 bno = BLOCK_LSN(be64_to_cpu(iclog->ic_header.h_lsn));
1861
Linus Torvalds1da177e2005-04-16 15:20:36 -07001862 /* Do we need to split this write into 2 parts? */
Christoph Hellwig79b54d92019-06-28 19:27:25 -07001863 if (bno + BTOBB(count) > log->l_logBBsize) {
1864 xlog_split_iclog(log, &iclog->ic_header, bno, count);
1865 split = true;
1866 }
Christoph Hellwig0e446be2012-11-12 22:54:24 +11001867
1868 /* calculcate the checksum */
1869 iclog->ic_header.h_crc = xlog_cksum(log, &iclog->ic_header,
1870 iclog->ic_datap, size);
Brian Foster609adfc2016-01-05 07:41:16 +11001871 /*
1872 * Intentionally corrupt the log record CRC based on the error injection
1873 * frequency, if defined. This facilitates testing log recovery in the
1874 * event of torn writes. Hence, set the IOABORT state to abort the log
1875 * write on I/O completion and shutdown the fs. The subsequent mount
1876 * detects the bad CRC and attempts to recover.
1877 */
Christoph Hellwig366fc4b2019-06-28 19:27:21 -07001878#ifdef DEBUG
Brian Foster3e88a002017-06-27 09:52:32 -07001879 if (XFS_TEST_ERROR(false, log->l_mp, XFS_ERRTAG_LOG_BAD_CRC)) {
Christoph Hellwige2a64192017-04-21 11:24:40 -07001880 iclog->ic_header.h_crc &= cpu_to_le32(0xAAAAAAAA);
Christoph Hellwig366fc4b2019-06-28 19:27:21 -07001881 iclog->ic_fail_crc = true;
Brian Foster609adfc2016-01-05 07:41:16 +11001882 xfs_warn(log->l_mp,
1883 "Intentionally corrupted log record at LSN 0x%llx. Shutdown imminent.",
1884 be64_to_cpu(iclog->ic_header.h_lsn));
1885 }
Christoph Hellwig366fc4b2019-06-28 19:27:21 -07001886#endif
Christoph Hellwig0e446be2012-11-12 22:54:24 +11001887
Dave Chinner2291dab2016-12-09 16:49:54 +11001888 /*
1889 * Flush the data device before flushing the log to make sure all meta
1890 * data written back from the AIL actually made it to disk before
1891 * stamping the new log tail LSN into the log buffer. For an external
1892 * log we need to issue the flush explicitly, and unfortunately
1893 * synchronously here; for an internal log we can simply use the block
1894 * layer state machine for preflushes.
1895 */
Christoph Hellwig2d15d2c2019-06-28 19:27:24 -07001896 if (log->l_targ != log->l_mp->m_ddev_targp || split) {
Dave Chinner2291dab2016-12-09 16:49:54 +11001897 xfs_blkdev_issue_flush(log->l_mp->m_ddev_targp);
Christoph Hellwig94860a32019-06-28 19:27:22 -07001898 need_flush = false;
1899 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001900
Christoph Hellwigabca1f32019-06-28 19:27:24 -07001901 xlog_verify_iclog(log, iclog, count);
Christoph Hellwig79b54d92019-06-28 19:27:25 -07001902 xlog_write_iclog(log, iclog, bno, count, need_flush);
Christoph Hellwig94860a32019-06-28 19:27:22 -07001903}
Linus Torvalds1da177e2005-04-16 15:20:36 -07001904
Linus Torvalds1da177e2005-04-16 15:20:36 -07001905/*
Nathan Scottc41564b2006-03-29 08:55:14 +10001906 * Deallocate a log structure
Linus Torvalds1da177e2005-04-16 15:20:36 -07001907 */
David Chinnera8272ce2007-11-23 16:28:09 +11001908STATIC void
Mark Tinguely9a8d2fd2012-06-14 09:22:16 -05001909xlog_dealloc_log(
1910 struct xlog *log)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001911{
1912 xlog_in_core_t *iclog, *next_iclog;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001913 int i;
1914
Dave Chinner71e330b2010-05-21 14:37:18 +10001915 xlog_cil_destroy(log);
1916
Dave Chinner44396472011-04-21 09:34:27 +00001917 /*
Dave Chinner9c23ecc2014-04-17 08:15:26 +10001918 * Cycle all the iclogbuf locks to make sure all log IO completion
1919 * is done before we tear down these buffers.
Dave Chinner44396472011-04-21 09:34:27 +00001920 */
Dave Chinner9c23ecc2014-04-17 08:15:26 +10001921 iclog = log->l_iclog;
1922 for (i = 0; i < log->l_iclog_bufs; i++) {
Christoph Hellwig79b54d92019-06-28 19:27:25 -07001923 down(&iclog->ic_sema);
1924 up(&iclog->ic_sema);
Dave Chinner9c23ecc2014-04-17 08:15:26 +10001925 iclog = iclog->ic_next;
1926 }
1927
Linus Torvalds1da177e2005-04-16 15:20:36 -07001928 iclog = log->l_iclog;
Dave Chinner9c23ecc2014-04-17 08:15:26 +10001929 for (i = 0; i < log->l_iclog_bufs; i++) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001930 next_iclog = iclog->ic_next;
Christoph Hellwig79b54d92019-06-28 19:27:25 -07001931 kmem_free(iclog->ic_data);
Denys Vlasenkof0e2d932008-05-19 16:31:57 +10001932 kmem_free(iclog);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001933 iclog = next_iclog;
1934 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001935
Linus Torvalds1da177e2005-04-16 15:20:36 -07001936 log->l_mp->m_log = NULL;
Christoph Hellwig1058d0f2019-06-28 19:27:25 -07001937 destroy_workqueue(log->l_ioend_workqueue);
Denys Vlasenkof0e2d932008-05-19 16:31:57 +10001938 kmem_free(log);
Nathan Scottc41564b2006-03-29 08:55:14 +10001939} /* xlog_dealloc_log */
Linus Torvalds1da177e2005-04-16 15:20:36 -07001940
1941/*
1942 * Update counters atomically now that memcpy is done.
1943 */
1944/* ARGSUSED */
1945static inline void
Mark Tinguely9a8d2fd2012-06-14 09:22:16 -05001946xlog_state_finish_copy(
1947 struct xlog *log,
1948 struct xlog_in_core *iclog,
1949 int record_cnt,
1950 int copy_bytes)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001951{
Eric Sandeenb22cd72c2007-10-11 17:37:10 +10001952 spin_lock(&log->l_icloglock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001953
Marcin Slusarz413d57c2008-02-13 15:03:29 -08001954 be32_add_cpu(&iclog->ic_header.h_num_logops, record_cnt);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001955 iclog->ic_offset += copy_bytes;
1956
Eric Sandeenb22cd72c2007-10-11 17:37:10 +10001957 spin_unlock(&log->l_icloglock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001958} /* xlog_state_finish_copy */
1959
1960
1961
1962
1963/*
Tim Shimmin7e9c6392005-09-02 16:42:05 +10001964 * print out info relating to regions written which consume
1965 * the reservation
1966 */
Dave Chinner71e330b2010-05-21 14:37:18 +10001967void
1968xlog_print_tic_res(
1969 struct xfs_mount *mp,
1970 struct xlog_ticket *ticket)
Tim Shimmin7e9c6392005-09-02 16:42:05 +10001971{
1972 uint i;
1973 uint ophdr_spc = ticket->t_res_num_ophdrs * (uint)sizeof(xlog_op_header_t);
1974
1975 /* match with XLOG_REG_TYPE_* in xfs_log.h */
Darrick J. Wong5110cd82016-03-07 08:40:03 +11001976#define REG_TYPE_STR(type, str) [XLOG_REG_TYPE_##type] = str
Darrick J. Wongd31d7182019-05-23 08:45:21 -07001977 static char *res_type_str[] = {
Darrick J. Wong5110cd82016-03-07 08:40:03 +11001978 REG_TYPE_STR(BFORMAT, "bformat"),
1979 REG_TYPE_STR(BCHUNK, "bchunk"),
1980 REG_TYPE_STR(EFI_FORMAT, "efi_format"),
1981 REG_TYPE_STR(EFD_FORMAT, "efd_format"),
1982 REG_TYPE_STR(IFORMAT, "iformat"),
1983 REG_TYPE_STR(ICORE, "icore"),
1984 REG_TYPE_STR(IEXT, "iext"),
1985 REG_TYPE_STR(IBROOT, "ibroot"),
1986 REG_TYPE_STR(ILOCAL, "ilocal"),
1987 REG_TYPE_STR(IATTR_EXT, "iattr_ext"),
1988 REG_TYPE_STR(IATTR_BROOT, "iattr_broot"),
1989 REG_TYPE_STR(IATTR_LOCAL, "iattr_local"),
1990 REG_TYPE_STR(QFORMAT, "qformat"),
1991 REG_TYPE_STR(DQUOT, "dquot"),
1992 REG_TYPE_STR(QUOTAOFF, "quotaoff"),
1993 REG_TYPE_STR(LRHEADER, "LR header"),
1994 REG_TYPE_STR(UNMOUNT, "unmount"),
1995 REG_TYPE_STR(COMMIT, "commit"),
1996 REG_TYPE_STR(TRANSHDR, "trans header"),
Darrick J. Wongd31d7182019-05-23 08:45:21 -07001997 REG_TYPE_STR(ICREATE, "inode create"),
1998 REG_TYPE_STR(RUI_FORMAT, "rui_format"),
1999 REG_TYPE_STR(RUD_FORMAT, "rud_format"),
2000 REG_TYPE_STR(CUI_FORMAT, "cui_format"),
2001 REG_TYPE_STR(CUD_FORMAT, "cud_format"),
2002 REG_TYPE_STR(BUI_FORMAT, "bui_format"),
2003 REG_TYPE_STR(BUD_FORMAT, "bud_format"),
Tim Shimmin7e9c6392005-09-02 16:42:05 +10002004 };
Darrick J. Wongd31d7182019-05-23 08:45:21 -07002005 BUILD_BUG_ON(ARRAY_SIZE(res_type_str) != XLOG_REG_TYPE_MAX + 1);
Darrick J. Wong5110cd82016-03-07 08:40:03 +11002006#undef REG_TYPE_STR
Tim Shimmin7e9c6392005-09-02 16:42:05 +10002007
Brian Foster7d2d56532017-06-14 21:29:48 -07002008 xfs_warn(mp, "ticket reservation summary:");
Joe Perchesf41febd2015-07-29 11:52:04 +10002009 xfs_warn(mp, " unit res = %d bytes",
2010 ticket->t_unit_res);
2011 xfs_warn(mp, " current res = %d bytes",
2012 ticket->t_curr_res);
2013 xfs_warn(mp, " total reg = %u bytes (o/flow = %u bytes)",
2014 ticket->t_res_arr_sum, ticket->t_res_o_flow);
2015 xfs_warn(mp, " ophdrs = %u (ophdr space = %u bytes)",
2016 ticket->t_res_num_ophdrs, ophdr_spc);
2017 xfs_warn(mp, " ophdr + reg = %u bytes",
2018 ticket->t_res_arr_sum + ticket->t_res_o_flow + ophdr_spc);
2019 xfs_warn(mp, " num regions = %u",
2020 ticket->t_res_num);
Tim Shimmin7e9c6392005-09-02 16:42:05 +10002021
2022 for (i = 0; i < ticket->t_res_num; i++) {
Dave Chinnera0fa2b62011-03-07 10:01:35 +11002023 uint r_type = ticket->t_res_arr[i].r_type;
Eric Sandeen08e96e12013-10-11 20:59:05 -05002024 xfs_warn(mp, "region[%u]: %s - %u bytes", i,
Tim Shimmin7e9c6392005-09-02 16:42:05 +10002025 ((r_type <= 0 || r_type > XLOG_REG_TYPE_MAX) ?
Darrick J. Wong5110cd82016-03-07 08:40:03 +11002026 "bad-rtype" : res_type_str[r_type]),
Tim Shimmin7e9c6392005-09-02 16:42:05 +10002027 ticket->t_res_arr[i].r_len);
2028 }
2029}
Tim Shimmin7e9c6392005-09-02 16:42:05 +10002030
2031/*
Brian Fosterd4ca1d52017-06-14 21:29:50 -07002032 * Print a summary of the transaction.
2033 */
2034void
2035xlog_print_trans(
Dave Chinnere6631f82018-05-09 07:49:37 -07002036 struct xfs_trans *tp)
Brian Fosterd4ca1d52017-06-14 21:29:50 -07002037{
Dave Chinnere6631f82018-05-09 07:49:37 -07002038 struct xfs_mount *mp = tp->t_mountp;
2039 struct xfs_log_item *lip;
Brian Fosterd4ca1d52017-06-14 21:29:50 -07002040
2041 /* dump core transaction and ticket info */
2042 xfs_warn(mp, "transaction summary:");
Brian Foster2c8f6262018-01-08 10:41:35 -08002043 xfs_warn(mp, " log res = %d", tp->t_log_res);
2044 xfs_warn(mp, " log count = %d", tp->t_log_count);
2045 xfs_warn(mp, " flags = 0x%x", tp->t_flags);
Brian Fosterd4ca1d52017-06-14 21:29:50 -07002046
2047 xlog_print_tic_res(mp, tp->t_ticket);
2048
2049 /* dump each log item */
Dave Chinnere6631f82018-05-09 07:49:37 -07002050 list_for_each_entry(lip, &tp->t_items, li_trans) {
Brian Fosterd4ca1d52017-06-14 21:29:50 -07002051 struct xfs_log_vec *lv = lip->li_lv;
2052 struct xfs_log_iovec *vec;
2053 int i;
2054
2055 xfs_warn(mp, "log item: ");
2056 xfs_warn(mp, " type = 0x%x", lip->li_type);
Dave Chinner22525c12018-05-09 07:47:34 -07002057 xfs_warn(mp, " flags = 0x%lx", lip->li_flags);
Brian Fosterd4ca1d52017-06-14 21:29:50 -07002058 if (!lv)
2059 continue;
2060 xfs_warn(mp, " niovecs = %d", lv->lv_niovecs);
2061 xfs_warn(mp, " size = %d", lv->lv_size);
2062 xfs_warn(mp, " bytes = %d", lv->lv_bytes);
2063 xfs_warn(mp, " buf len = %d", lv->lv_buf_len);
2064
2065 /* dump each iovec for the log item */
2066 vec = lv->lv_iovecp;
2067 for (i = 0; i < lv->lv_niovecs; i++) {
2068 int dumplen = min(vec->i_len, 32);
2069
2070 xfs_warn(mp, " iovec[%d]", i);
2071 xfs_warn(mp, " type = 0x%x", vec->i_type);
2072 xfs_warn(mp, " len = %d", vec->i_len);
2073 xfs_warn(mp, " first %d bytes of iovec[%d]:", dumplen, i);
kbuild test robot244e3de2017-06-26 08:54:16 -07002074 xfs_hex_dump(vec->i_addr, dumplen);
Brian Fosterd4ca1d52017-06-14 21:29:50 -07002075
2076 vec++;
2077 }
2078 }
2079}
2080
2081/*
Dave Chinnerb5203cd2010-03-23 11:29:44 +11002082 * Calculate the potential space needed by the log vector. Each region gets
2083 * its own xlog_op_header_t and may need to be double word aligned.
2084 */
2085static int
2086xlog_write_calc_vec_length(
2087 struct xlog_ticket *ticket,
Dave Chinner55b66332010-03-23 11:43:17 +11002088 struct xfs_log_vec *log_vector)
Dave Chinnerb5203cd2010-03-23 11:29:44 +11002089{
Dave Chinner55b66332010-03-23 11:43:17 +11002090 struct xfs_log_vec *lv;
Dave Chinnerb5203cd2010-03-23 11:29:44 +11002091 int headers = 0;
2092 int len = 0;
2093 int i;
2094
2095 /* acct for start rec of xact */
2096 if (ticket->t_flags & XLOG_TIC_INITED)
2097 headers++;
2098
Dave Chinner55b66332010-03-23 11:43:17 +11002099 for (lv = log_vector; lv; lv = lv->lv_next) {
Dave Chinnerfd638752013-06-27 16:04:51 +10002100 /* we don't write ordered log vectors */
2101 if (lv->lv_buf_len == XFS_LOG_VEC_ORDERED)
2102 continue;
2103
Dave Chinner55b66332010-03-23 11:43:17 +11002104 headers += lv->lv_niovecs;
Dave Chinnerb5203cd2010-03-23 11:29:44 +11002105
Dave Chinner55b66332010-03-23 11:43:17 +11002106 for (i = 0; i < lv->lv_niovecs; i++) {
2107 struct xfs_log_iovec *vecp = &lv->lv_iovecp[i];
2108
2109 len += vecp->i_len;
2110 xlog_tic_add_region(ticket, vecp->i_len, vecp->i_type);
2111 }
Dave Chinnerb5203cd2010-03-23 11:29:44 +11002112 }
2113
2114 ticket->t_res_num_ophdrs += headers;
2115 len += headers * sizeof(struct xlog_op_header);
2116
2117 return len;
2118}
2119
2120/*
2121 * If first write for transaction, insert start record We can't be trying to
2122 * commit if we are inited. We can't have any "partial_copy" if we are inited.
2123 */
2124static int
2125xlog_write_start_rec(
Christoph Hellwige6b1f272010-03-23 11:47:38 +11002126 struct xlog_op_header *ophdr,
Dave Chinnerb5203cd2010-03-23 11:29:44 +11002127 struct xlog_ticket *ticket)
2128{
Dave Chinnerb5203cd2010-03-23 11:29:44 +11002129 if (!(ticket->t_flags & XLOG_TIC_INITED))
2130 return 0;
2131
2132 ophdr->oh_tid = cpu_to_be32(ticket->t_tid);
2133 ophdr->oh_clientid = ticket->t_clientid;
2134 ophdr->oh_len = 0;
2135 ophdr->oh_flags = XLOG_START_TRANS;
2136 ophdr->oh_res2 = 0;
2137
2138 ticket->t_flags &= ~XLOG_TIC_INITED;
2139
2140 return sizeof(struct xlog_op_header);
2141}
2142
2143static xlog_op_header_t *
2144xlog_write_setup_ophdr(
Mark Tinguelyad223e62012-06-14 09:22:15 -05002145 struct xlog *log,
Christoph Hellwige6b1f272010-03-23 11:47:38 +11002146 struct xlog_op_header *ophdr,
Dave Chinnerb5203cd2010-03-23 11:29:44 +11002147 struct xlog_ticket *ticket,
2148 uint flags)
2149{
Dave Chinnerb5203cd2010-03-23 11:29:44 +11002150 ophdr->oh_tid = cpu_to_be32(ticket->t_tid);
2151 ophdr->oh_clientid = ticket->t_clientid;
2152 ophdr->oh_res2 = 0;
2153
2154 /* are we copying a commit or unmount record? */
2155 ophdr->oh_flags = flags;
2156
2157 /*
2158 * We've seen logs corrupted with bad transaction client ids. This
2159 * makes sure that XFS doesn't generate them on. Turn this into an EIO
2160 * and shut down the filesystem.
2161 */
2162 switch (ophdr->oh_clientid) {
2163 case XFS_TRANSACTION:
2164 case XFS_VOLUME:
2165 case XFS_LOG:
2166 break;
2167 default:
Dave Chinnera0fa2b62011-03-07 10:01:35 +11002168 xfs_warn(log->l_mp,
Darrick J. Wongc9690042018-01-09 12:02:55 -08002169 "Bad XFS transaction clientid 0x%x in ticket "PTR_FMT,
Dave Chinnerb5203cd2010-03-23 11:29:44 +11002170 ophdr->oh_clientid, ticket);
2171 return NULL;
2172 }
2173
2174 return ophdr;
2175}
2176
2177/*
2178 * Set up the parameters of the region copy into the log. This has
2179 * to handle region write split across multiple log buffers - this
2180 * state is kept external to this function so that this code can
Zhi Yong Wuac0e3002013-08-07 10:11:02 +00002181 * be written in an obvious, self documenting manner.
Dave Chinnerb5203cd2010-03-23 11:29:44 +11002182 */
2183static int
2184xlog_write_setup_copy(
2185 struct xlog_ticket *ticket,
2186 struct xlog_op_header *ophdr,
2187 int space_available,
2188 int space_required,
2189 int *copy_off,
2190 int *copy_len,
2191 int *last_was_partial_copy,
2192 int *bytes_consumed)
2193{
2194 int still_to_copy;
2195
2196 still_to_copy = space_required - *bytes_consumed;
2197 *copy_off = *bytes_consumed;
2198
2199 if (still_to_copy <= space_available) {
2200 /* write of region completes here */
2201 *copy_len = still_to_copy;
2202 ophdr->oh_len = cpu_to_be32(*copy_len);
2203 if (*last_was_partial_copy)
2204 ophdr->oh_flags |= (XLOG_END_TRANS|XLOG_WAS_CONT_TRANS);
2205 *last_was_partial_copy = 0;
2206 *bytes_consumed = 0;
2207 return 0;
2208 }
2209
2210 /* partial write of region, needs extra log op header reservation */
2211 *copy_len = space_available;
2212 ophdr->oh_len = cpu_to_be32(*copy_len);
2213 ophdr->oh_flags |= XLOG_CONTINUE_TRANS;
2214 if (*last_was_partial_copy)
2215 ophdr->oh_flags |= XLOG_WAS_CONT_TRANS;
2216 *bytes_consumed += *copy_len;
2217 (*last_was_partial_copy)++;
2218
2219 /* account for new log op header */
2220 ticket->t_curr_res -= sizeof(struct xlog_op_header);
2221 ticket->t_res_num_ophdrs++;
2222
2223 return sizeof(struct xlog_op_header);
2224}
2225
2226static int
2227xlog_write_copy_finish(
Mark Tinguelyad223e62012-06-14 09:22:15 -05002228 struct xlog *log,
Dave Chinnerb5203cd2010-03-23 11:29:44 +11002229 struct xlog_in_core *iclog,
2230 uint flags,
2231 int *record_cnt,
2232 int *data_cnt,
2233 int *partial_copy,
2234 int *partial_copy_len,
2235 int log_offset,
2236 struct xlog_in_core **commit_iclog)
2237{
2238 if (*partial_copy) {
2239 /*
2240 * This iclog has already been marked WANT_SYNC by
2241 * xlog_state_get_iclog_space.
2242 */
2243 xlog_state_finish_copy(log, iclog, *record_cnt, *data_cnt);
2244 *record_cnt = 0;
2245 *data_cnt = 0;
2246 return xlog_state_release_iclog(log, iclog);
2247 }
2248
2249 *partial_copy = 0;
2250 *partial_copy_len = 0;
2251
2252 if (iclog->ic_size - log_offset <= sizeof(xlog_op_header_t)) {
2253 /* no more space in this iclog - push it. */
2254 xlog_state_finish_copy(log, iclog, *record_cnt, *data_cnt);
2255 *record_cnt = 0;
2256 *data_cnt = 0;
2257
2258 spin_lock(&log->l_icloglock);
2259 xlog_state_want_sync(log, iclog);
2260 spin_unlock(&log->l_icloglock);
2261
2262 if (!commit_iclog)
2263 return xlog_state_release_iclog(log, iclog);
2264 ASSERT(flags & XLOG_COMMIT_TRANS);
2265 *commit_iclog = iclog;
2266 }
2267
2268 return 0;
2269}
2270
2271/*
Linus Torvalds1da177e2005-04-16 15:20:36 -07002272 * Write some region out to in-core log
2273 *
2274 * This will be called when writing externally provided regions or when
2275 * writing out a commit record for a given transaction.
2276 *
2277 * General algorithm:
2278 * 1. Find total length of this write. This may include adding to the
2279 * lengths passed in.
2280 * 2. Check whether we violate the tickets reservation.
2281 * 3. While writing to this iclog
2282 * A. Reserve as much space in this iclog as can get
2283 * B. If this is first write, save away start lsn
2284 * C. While writing this region:
2285 * 1. If first write of transaction, write start record
2286 * 2. Write log operation header (header per region)
2287 * 3. Find out if we can fit entire region into this iclog
2288 * 4. Potentially, verify destination memcpy ptr
2289 * 5. Memcpy (partial) region
2290 * 6. If partial copy, release iclog; otherwise, continue
2291 * copying more regions into current iclog
2292 * 4. Mark want sync bit (in simulation mode)
2293 * 5. Release iclog for potential flush to on-disk log.
2294 *
2295 * ERRORS:
2296 * 1. Panic if reservation is overrun. This should never happen since
2297 * reservation amounts are generated internal to the filesystem.
2298 * NOTES:
2299 * 1. Tickets are single threaded data structures.
2300 * 2. The XLOG_END_TRANS & XLOG_CONTINUE_TRANS flags are passed down to the
2301 * syncing routine. When a single log_write region needs to span
2302 * multiple in-core logs, the XLOG_CONTINUE_TRANS bit should be set
2303 * on all log operation writes which don't contain the end of the
2304 * region. The XLOG_END_TRANS bit is used for the in-core log
2305 * operation which contains the end of the continued log_write region.
2306 * 3. When xlog_state_get_iclog_space() grabs the rest of the current iclog,
2307 * we don't really know exactly how much space will be used. As a result,
2308 * we don't update ic_offset until the end when we know exactly how many
2309 * bytes have been written out.
2310 */
Dave Chinner71e330b2010-05-21 14:37:18 +10002311int
Christoph Hellwig35a8a722010-02-15 23:34:54 +00002312xlog_write(
Mark Tinguelyad223e62012-06-14 09:22:15 -05002313 struct xlog *log,
Dave Chinner55b66332010-03-23 11:43:17 +11002314 struct xfs_log_vec *log_vector,
Christoph Hellwig35a8a722010-02-15 23:34:54 +00002315 struct xlog_ticket *ticket,
2316 xfs_lsn_t *start_lsn,
2317 struct xlog_in_core **commit_iclog,
2318 uint flags)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002319{
Christoph Hellwig99428ad2010-03-23 11:35:45 +11002320 struct xlog_in_core *iclog = NULL;
Dave Chinner55b66332010-03-23 11:43:17 +11002321 struct xfs_log_iovec *vecp;
2322 struct xfs_log_vec *lv;
Christoph Hellwig99428ad2010-03-23 11:35:45 +11002323 int len;
2324 int index;
2325 int partial_copy = 0;
2326 int partial_copy_len = 0;
2327 int contwr = 0;
2328 int record_cnt = 0;
2329 int data_cnt = 0;
2330 int error;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002331
Christoph Hellwig99428ad2010-03-23 11:35:45 +11002332 *start_lsn = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002333
Dave Chinner55b66332010-03-23 11:43:17 +11002334 len = xlog_write_calc_vec_length(ticket, log_vector);
Dave Chinner71e330b2010-05-21 14:37:18 +10002335
Christoph Hellwig93b8a582011-12-06 21:58:07 +00002336 /*
2337 * Region headers and bytes are already accounted for.
2338 * We only need to take into account start records and
2339 * split regions in this function.
2340 */
2341 if (ticket->t_flags & XLOG_TIC_INITED)
2342 ticket->t_curr_res -= sizeof(xlog_op_header_t);
2343
2344 /*
2345 * Commit record headers need to be accounted for. These
2346 * come in as separate writes so are easy to detect.
2347 */
2348 if (flags & (XLOG_COMMIT_TRANS | XLOG_UNMOUNT_TRANS))
2349 ticket->t_curr_res -= sizeof(xlog_op_header_t);
Dave Chinner71e330b2010-05-21 14:37:18 +10002350
Brian Foster7d2d56532017-06-14 21:29:48 -07002351 if (ticket->t_curr_res < 0) {
2352 xfs_alert_tag(log->l_mp, XFS_PTAG_LOGRES,
2353 "ctx ticket reservation ran out. Need to up reservation");
Dave Chinner55b66332010-03-23 11:43:17 +11002354 xlog_print_tic_res(log->l_mp, ticket);
Brian Foster7d2d56532017-06-14 21:29:48 -07002355 xfs_force_shutdown(log->l_mp, SHUTDOWN_LOG_IO_ERROR);
2356 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002357
Dave Chinner55b66332010-03-23 11:43:17 +11002358 index = 0;
2359 lv = log_vector;
2360 vecp = lv->lv_iovecp;
Dave Chinnerfd638752013-06-27 16:04:51 +10002361 while (lv && (!lv->lv_niovecs || index < lv->lv_niovecs)) {
Christoph Hellwige6b1f272010-03-23 11:47:38 +11002362 void *ptr;
Christoph Hellwig99428ad2010-03-23 11:35:45 +11002363 int log_offset;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002364
Christoph Hellwig99428ad2010-03-23 11:35:45 +11002365 error = xlog_state_get_iclog_space(log, len, &iclog, ticket,
2366 &contwr, &log_offset);
2367 if (error)
2368 return error;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002369
Christoph Hellwig99428ad2010-03-23 11:35:45 +11002370 ASSERT(log_offset <= iclog->ic_size - 1);
Christoph Hellwige6b1f272010-03-23 11:47:38 +11002371 ptr = iclog->ic_datap + log_offset;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002372
Christoph Hellwig99428ad2010-03-23 11:35:45 +11002373 /* start_lsn is the first lsn written to. That's all we need. */
2374 if (!*start_lsn)
2375 *start_lsn = be64_to_cpu(iclog->ic_header.h_lsn);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002376
Christoph Hellwig99428ad2010-03-23 11:35:45 +11002377 /*
2378 * This loop writes out as many regions as can fit in the amount
2379 * of space which was allocated by xlog_state_get_iclog_space().
2380 */
Dave Chinnerfd638752013-06-27 16:04:51 +10002381 while (lv && (!lv->lv_niovecs || index < lv->lv_niovecs)) {
2382 struct xfs_log_iovec *reg;
Christoph Hellwig99428ad2010-03-23 11:35:45 +11002383 struct xlog_op_header *ophdr;
2384 int start_rec_copy;
2385 int copy_len;
2386 int copy_off;
Dave Chinnerfd638752013-06-27 16:04:51 +10002387 bool ordered = false;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002388
Dave Chinnerfd638752013-06-27 16:04:51 +10002389 /* ordered log vectors have no regions to write */
2390 if (lv->lv_buf_len == XFS_LOG_VEC_ORDERED) {
2391 ASSERT(lv->lv_niovecs == 0);
2392 ordered = true;
2393 goto next_lv;
2394 }
2395
2396 reg = &vecp[index];
Darrick J. Wongc8ce5402017-06-16 11:00:05 -07002397 ASSERT(reg->i_len % sizeof(int32_t) == 0);
2398 ASSERT((unsigned long)ptr % sizeof(int32_t) == 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002399
Christoph Hellwig99428ad2010-03-23 11:35:45 +11002400 start_rec_copy = xlog_write_start_rec(ptr, ticket);
2401 if (start_rec_copy) {
2402 record_cnt++;
Christoph Hellwige6b1f272010-03-23 11:47:38 +11002403 xlog_write_adv_cnt(&ptr, &len, &log_offset,
Christoph Hellwig99428ad2010-03-23 11:35:45 +11002404 start_rec_copy);
2405 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002406
Christoph Hellwig99428ad2010-03-23 11:35:45 +11002407 ophdr = xlog_write_setup_ophdr(log, ptr, ticket, flags);
2408 if (!ophdr)
Dave Chinner24513372014-06-25 14:58:08 +10002409 return -EIO;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002410
Christoph Hellwige6b1f272010-03-23 11:47:38 +11002411 xlog_write_adv_cnt(&ptr, &len, &log_offset,
Christoph Hellwig99428ad2010-03-23 11:35:45 +11002412 sizeof(struct xlog_op_header));
Linus Torvalds1da177e2005-04-16 15:20:36 -07002413
Christoph Hellwig99428ad2010-03-23 11:35:45 +11002414 len += xlog_write_setup_copy(ticket, ophdr,
2415 iclog->ic_size-log_offset,
Dave Chinner55b66332010-03-23 11:43:17 +11002416 reg->i_len,
Christoph Hellwig99428ad2010-03-23 11:35:45 +11002417 &copy_off, &copy_len,
2418 &partial_copy,
2419 &partial_copy_len);
2420 xlog_verify_dest_ptr(log, ptr);
Dave Chinnerb5203cd2010-03-23 11:29:44 +11002421
Eric Sandeen91f9f5f2015-10-12 16:04:15 +11002422 /*
2423 * Copy region.
2424 *
2425 * Unmount records just log an opheader, so can have
2426 * empty payloads with no data region to copy. Hence we
2427 * only copy the payload if the vector says it has data
2428 * to copy.
2429 */
Christoph Hellwig99428ad2010-03-23 11:35:45 +11002430 ASSERT(copy_len >= 0);
Eric Sandeen91f9f5f2015-10-12 16:04:15 +11002431 if (copy_len > 0) {
2432 memcpy(ptr, reg->i_addr + copy_off, copy_len);
2433 xlog_write_adv_cnt(&ptr, &len, &log_offset,
2434 copy_len);
2435 }
Christoph Hellwig99428ad2010-03-23 11:35:45 +11002436 copy_len += start_rec_copy + sizeof(xlog_op_header_t);
2437 record_cnt++;
2438 data_cnt += contwr ? copy_len : 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002439
Christoph Hellwig99428ad2010-03-23 11:35:45 +11002440 error = xlog_write_copy_finish(log, iclog, flags,
2441 &record_cnt, &data_cnt,
2442 &partial_copy,
2443 &partial_copy_len,
2444 log_offset,
2445 commit_iclog);
2446 if (error)
2447 return error;
2448
2449 /*
2450 * if we had a partial copy, we need to get more iclog
2451 * space but we don't want to increment the region
2452 * index because there is still more is this region to
2453 * write.
2454 *
2455 * If we completed writing this region, and we flushed
2456 * the iclog (indicated by resetting of the record
2457 * count), then we also need to get more log space. If
2458 * this was the last record, though, we are done and
2459 * can just return.
2460 */
2461 if (partial_copy)
2462 break;
2463
Dave Chinner55b66332010-03-23 11:43:17 +11002464 if (++index == lv->lv_niovecs) {
Dave Chinnerfd638752013-06-27 16:04:51 +10002465next_lv:
Dave Chinner55b66332010-03-23 11:43:17 +11002466 lv = lv->lv_next;
2467 index = 0;
2468 if (lv)
2469 vecp = lv->lv_iovecp;
2470 }
Thomas Meyer749f24f2017-10-09 11:38:54 -07002471 if (record_cnt == 0 && !ordered) {
Dave Chinner55b66332010-03-23 11:43:17 +11002472 if (!lv)
Christoph Hellwig99428ad2010-03-23 11:35:45 +11002473 return 0;
2474 break;
2475 }
2476 }
2477 }
2478
2479 ASSERT(len == 0);
2480
2481 xlog_state_finish_copy(log, iclog, record_cnt, data_cnt);
2482 if (!commit_iclog)
2483 return xlog_state_release_iclog(log, iclog);
2484
Linus Torvalds1da177e2005-04-16 15:20:36 -07002485 ASSERT(flags & XLOG_COMMIT_TRANS);
2486 *commit_iclog = iclog;
2487 return 0;
Christoph Hellwig99428ad2010-03-23 11:35:45 +11002488}
Linus Torvalds1da177e2005-04-16 15:20:36 -07002489
2490
2491/*****************************************************************************
2492 *
2493 * State Machine functions
2494 *
2495 *****************************************************************************
2496 */
2497
2498/* Clean iclogs starting from the head. This ordering must be
2499 * maintained, so an iclog doesn't become ACTIVE beyond one that
2500 * is SYNCING. This is also required to maintain the notion that we use
David Chinner12017fa2008-08-13 16:34:31 +10002501 * a ordered wait queue to hold off would be writers to the log when every
Linus Torvalds1da177e2005-04-16 15:20:36 -07002502 * iclog is trying to sync to disk.
2503 *
2504 * State Change: DIRTY -> ACTIVE
2505 */
Christoph Hellwigba0f32d2005-06-21 15:36:52 +10002506STATIC void
Mark Tinguely9a8d2fd2012-06-14 09:22:16 -05002507xlog_state_clean_log(
2508 struct xlog *log)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002509{
2510 xlog_in_core_t *iclog;
2511 int changed = 0;
2512
2513 iclog = log->l_iclog;
2514 do {
2515 if (iclog->ic_state == XLOG_STATE_DIRTY) {
2516 iclog->ic_state = XLOG_STATE_ACTIVE;
2517 iclog->ic_offset = 0;
Christoph Hellwig89ae3792019-06-28 19:27:34 -07002518 ASSERT(list_empty_careful(&iclog->ic_callbacks));
Linus Torvalds1da177e2005-04-16 15:20:36 -07002519 /*
2520 * If the number of ops in this iclog indicate it just
2521 * contains the dummy transaction, we can
2522 * change state into IDLE (the second time around).
2523 * Otherwise we should change the state into
2524 * NEED a dummy.
2525 * We don't need to cover the dummy.
2526 */
2527 if (!changed &&
Christoph Hellwigb53e6752007-10-12 10:59:34 +10002528 (be32_to_cpu(iclog->ic_header.h_num_logops) ==
2529 XLOG_COVER_OPS)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002530 changed = 1;
2531 } else {
2532 /*
2533 * We have two dirty iclogs so start over
2534 * This could also be num of ops indicates
2535 * this is not the dummy going out.
2536 */
2537 changed = 2;
2538 }
2539 iclog->ic_header.h_num_logops = 0;
2540 memset(iclog->ic_header.h_cycle_data, 0,
2541 sizeof(iclog->ic_header.h_cycle_data));
2542 iclog->ic_header.h_lsn = 0;
2543 } else if (iclog->ic_state == XLOG_STATE_ACTIVE)
2544 /* do nothing */;
2545 else
2546 break; /* stop cleaning */
2547 iclog = iclog->ic_next;
2548 } while (iclog != log->l_iclog);
2549
2550 /* log is locked when we are called */
2551 /*
2552 * Change state for the dummy log recording.
2553 * We usually go to NEED. But we go to NEED2 if the changed indicates
2554 * we are done writing the dummy record.
2555 * If we are done with the second dummy recored (DONE2), then
2556 * we go to IDLE.
2557 */
2558 if (changed) {
2559 switch (log->l_covered_state) {
2560 case XLOG_STATE_COVER_IDLE:
2561 case XLOG_STATE_COVER_NEED:
2562 case XLOG_STATE_COVER_NEED2:
2563 log->l_covered_state = XLOG_STATE_COVER_NEED;
2564 break;
2565
2566 case XLOG_STATE_COVER_DONE:
2567 if (changed == 1)
2568 log->l_covered_state = XLOG_STATE_COVER_NEED2;
2569 else
2570 log->l_covered_state = XLOG_STATE_COVER_NEED;
2571 break;
2572
2573 case XLOG_STATE_COVER_DONE2:
2574 if (changed == 1)
2575 log->l_covered_state = XLOG_STATE_COVER_IDLE;
2576 else
2577 log->l_covered_state = XLOG_STATE_COVER_NEED;
2578 break;
2579
2580 default:
2581 ASSERT(0);
2582 }
2583 }
2584} /* xlog_state_clean_log */
2585
2586STATIC xfs_lsn_t
2587xlog_get_lowest_lsn(
Christoph Hellwig9bff31322019-06-28 19:27:20 -07002588 struct xlog *log)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002589{
Christoph Hellwig9bff31322019-06-28 19:27:20 -07002590 struct xlog_in_core *iclog = log->l_iclog;
2591 xfs_lsn_t lowest_lsn = 0, lsn;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002592
Linus Torvalds1da177e2005-04-16 15:20:36 -07002593 do {
Christoph Hellwig9bff31322019-06-28 19:27:20 -07002594 if (iclog->ic_state & (XLOG_STATE_ACTIVE | XLOG_STATE_DIRTY))
2595 continue;
2596
2597 lsn = be64_to_cpu(iclog->ic_header.h_lsn);
2598 if ((lsn && !lowest_lsn) || XFS_LSN_CMP(lsn, lowest_lsn) < 0)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002599 lowest_lsn = lsn;
Christoph Hellwig9bff31322019-06-28 19:27:20 -07002600 } while ((iclog = iclog->ic_next) != log->l_iclog);
2601
Jesper Juhl014c2542006-01-15 02:37:08 +01002602 return lowest_lsn;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002603}
2604
Linus Torvalds1da177e2005-04-16 15:20:36 -07002605STATIC void
2606xlog_state_do_callback(
Mark Tinguely9a8d2fd2012-06-14 09:22:16 -05002607 struct xlog *log,
Christoph Hellwigd15cbf22019-06-28 19:27:30 -07002608 bool aborted,
Mark Tinguely9a8d2fd2012-06-14 09:22:16 -05002609 struct xlog_in_core *ciclog)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002610{
2611 xlog_in_core_t *iclog;
2612 xlog_in_core_t *first_iclog; /* used to know when we've
2613 * processed all iclogs once */
Linus Torvalds1da177e2005-04-16 15:20:36 -07002614 int flushcnt = 0;
2615 xfs_lsn_t lowest_lsn;
2616 int ioerrors; /* counter: iclogs with errors */
2617 int loopdidcallbacks; /* flag: inner loop did callbacks*/
2618 int funcdidcallbacks; /* flag: function did callbacks */
2619 int repeats; /* for issuing console warnings if
2620 * looping too many times */
Matthew Wilcoxd748c622008-05-19 16:34:27 +10002621 int wake = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002622
Eric Sandeenb22cd72c2007-10-11 17:37:10 +10002623 spin_lock(&log->l_icloglock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002624 first_iclog = iclog = log->l_iclog;
2625 ioerrors = 0;
2626 funcdidcallbacks = 0;
2627 repeats = 0;
2628
2629 do {
2630 /*
2631 * Scan all iclogs starting with the one pointed to by the
2632 * log. Reset this starting point each time the log is
2633 * unlocked (during callbacks).
2634 *
2635 * Keep looping through iclogs until one full pass is made
2636 * without running any callbacks.
2637 */
2638 first_iclog = log->l_iclog;
2639 iclog = log->l_iclog;
2640 loopdidcallbacks = 0;
2641 repeats++;
2642
2643 do {
2644
2645 /* skip all iclogs in the ACTIVE & DIRTY states */
2646 if (iclog->ic_state &
2647 (XLOG_STATE_ACTIVE|XLOG_STATE_DIRTY)) {
2648 iclog = iclog->ic_next;
2649 continue;
2650 }
2651
2652 /*
2653 * Between marking a filesystem SHUTDOWN and stopping
2654 * the log, we do flush all iclogs to disk (if there
2655 * wasn't a log I/O error). So, we do want things to
2656 * go smoothly in case of just a SHUTDOWN w/o a
2657 * LOG_IO_ERROR.
2658 */
2659 if (!(iclog->ic_state & XLOG_STATE_IOERROR)) {
2660 /*
2661 * Can only perform callbacks in order. Since
2662 * this iclog is not in the DONE_SYNC/
2663 * DO_CALLBACK state, we skip the rest and
2664 * just try to clean up. If we set our iclog
2665 * to DO_CALLBACK, we will not process it when
2666 * we retry since a previous iclog is in the
2667 * CALLBACK and the state cannot change since
Eric Sandeenb22cd72c2007-10-11 17:37:10 +10002668 * we are holding the l_icloglock.
Linus Torvalds1da177e2005-04-16 15:20:36 -07002669 */
2670 if (!(iclog->ic_state &
2671 (XLOG_STATE_DONE_SYNC |
2672 XLOG_STATE_DO_CALLBACK))) {
2673 if (ciclog && (ciclog->ic_state ==
2674 XLOG_STATE_DONE_SYNC)) {
2675 ciclog->ic_state = XLOG_STATE_DO_CALLBACK;
2676 }
2677 break;
2678 }
2679 /*
2680 * We now have an iclog that is in either the
2681 * DO_CALLBACK or DONE_SYNC states. The other
2682 * states (WANT_SYNC, SYNCING, or CALLBACK were
2683 * caught by the above if and are going to
2684 * clean (i.e. we aren't doing their callbacks)
2685 * see the above if.
2686 */
2687
2688 /*
2689 * We will do one more check here to see if we
2690 * have chased our tail around.
2691 */
2692
2693 lowest_lsn = xlog_get_lowest_lsn(log);
Christoph Hellwigb53e6752007-10-12 10:59:34 +10002694 if (lowest_lsn &&
2695 XFS_LSN_CMP(lowest_lsn,
Dave Chinner84f3c682010-12-03 22:11:29 +11002696 be64_to_cpu(iclog->ic_header.h_lsn)) < 0) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002697 iclog = iclog->ic_next;
2698 continue; /* Leave this iclog for
2699 * another thread */
2700 }
2701
2702 iclog->ic_state = XLOG_STATE_CALLBACK;
2703
Linus Torvalds1da177e2005-04-16 15:20:36 -07002704
Dave Chinner84f3c682010-12-03 22:11:29 +11002705 /*
Dave Chinnerd35e88f2012-10-08 21:56:12 +11002706 * Completion of a iclog IO does not imply that
2707 * a transaction has completed, as transactions
2708 * can be large enough to span many iclogs. We
2709 * cannot change the tail of the log half way
2710 * through a transaction as this may be the only
2711 * transaction in the log and moving th etail to
2712 * point to the middle of it will prevent
2713 * recovery from finding the start of the
2714 * transaction. Hence we should only update the
2715 * last_sync_lsn if this iclog contains
2716 * transaction completion callbacks on it.
2717 *
2718 * We have to do this before we drop the
Dave Chinner84f3c682010-12-03 22:11:29 +11002719 * icloglock to ensure we are the only one that
2720 * can update it.
Linus Torvalds1da177e2005-04-16 15:20:36 -07002721 */
Dave Chinner84f3c682010-12-03 22:11:29 +11002722 ASSERT(XFS_LSN_CMP(atomic64_read(&log->l_last_sync_lsn),
2723 be64_to_cpu(iclog->ic_header.h_lsn)) <= 0);
Christoph Hellwig89ae3792019-06-28 19:27:34 -07002724 if (!list_empty_careful(&iclog->ic_callbacks))
Dave Chinnerd35e88f2012-10-08 21:56:12 +11002725 atomic64_set(&log->l_last_sync_lsn,
2726 be64_to_cpu(iclog->ic_header.h_lsn));
Linus Torvalds1da177e2005-04-16 15:20:36 -07002727
Dave Chinner84f3c682010-12-03 22:11:29 +11002728 } else
Linus Torvalds1da177e2005-04-16 15:20:36 -07002729 ioerrors++;
Dave Chinner84f3c682010-12-03 22:11:29 +11002730
2731 spin_unlock(&log->l_icloglock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002732
David Chinner114d23a2008-04-10 12:18:39 +10002733 /*
2734 * Keep processing entries in the callback list until
2735 * we come around and it is empty. We need to
2736 * atomically see that the list is empty and change the
2737 * state to DIRTY so that we don't miss any more
2738 * callbacks being added.
2739 */
2740 spin_lock(&iclog->ic_callback_lock);
Christoph Hellwig89ae3792019-06-28 19:27:34 -07002741 while (!list_empty(&iclog->ic_callbacks)) {
2742 LIST_HEAD(tmp);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002743
Christoph Hellwig89ae3792019-06-28 19:27:34 -07002744 list_splice_init(&iclog->ic_callbacks, &tmp);
2745
2746 spin_unlock(&iclog->ic_callback_lock);
2747 xlog_cil_process_committed(&tmp, aborted);
David Chinner114d23a2008-04-10 12:18:39 +10002748 spin_lock(&iclog->ic_callback_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002749 }
2750
2751 loopdidcallbacks++;
2752 funcdidcallbacks++;
2753
David Chinner114d23a2008-04-10 12:18:39 +10002754 spin_lock(&log->l_icloglock);
David Chinner114d23a2008-04-10 12:18:39 +10002755 spin_unlock(&iclog->ic_callback_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002756 if (!(iclog->ic_state & XLOG_STATE_IOERROR))
2757 iclog->ic_state = XLOG_STATE_DIRTY;
2758
2759 /*
2760 * Transition from DIRTY to ACTIVE if applicable.
2761 * NOP if STATE_IOERROR.
2762 */
2763 xlog_state_clean_log(log);
2764
2765 /* wake up threads waiting in xfs_log_force() */
Dave Chinnereb40a872010-12-21 12:09:01 +11002766 wake_up_all(&iclog->ic_force_wait);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002767
2768 iclog = iclog->ic_next;
2769 } while (first_iclog != iclog);
Nathan Scotta3c6685e2006-09-28 11:02:14 +10002770
2771 if (repeats > 5000) {
2772 flushcnt += repeats;
2773 repeats = 0;
Dave Chinnera0fa2b62011-03-07 10:01:35 +11002774 xfs_warn(log->l_mp,
Nathan Scotta3c6685e2006-09-28 11:02:14 +10002775 "%s: possible infinite loop (%d iterations)",
Harvey Harrison34a622b2008-04-10 12:19:21 +10002776 __func__, flushcnt);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002777 }
2778 } while (!ioerrors && loopdidcallbacks);
2779
Linus Torvalds1da177e2005-04-16 15:20:36 -07002780#ifdef DEBUG
Brian Foster609adfc2016-01-05 07:41:16 +11002781 /*
2782 * Make one last gasp attempt to see if iclogs are being left in limbo.
2783 * If the above loop finds an iclog earlier than the current iclog and
2784 * in one of the syncing states, the current iclog is put into
2785 * DO_CALLBACK and the callbacks are deferred to the completion of the
2786 * earlier iclog. Walk the iclogs in order and make sure that no iclog
2787 * is in DO_CALLBACK unless an earlier iclog is in one of the syncing
2788 * states.
2789 *
2790 * Note that SYNCING|IOABORT is a valid state so we cannot just check
2791 * for ic_state == SYNCING.
2792 */
Linus Torvalds1da177e2005-04-16 15:20:36 -07002793 if (funcdidcallbacks) {
2794 first_iclog = iclog = log->l_iclog;
2795 do {
2796 ASSERT(iclog->ic_state != XLOG_STATE_DO_CALLBACK);
2797 /*
2798 * Terminate the loop if iclogs are found in states
2799 * which will cause other threads to clean up iclogs.
2800 *
2801 * SYNCING - i/o completion will go through logs
2802 * DONE_SYNC - interrupt thread should be waiting for
Eric Sandeenb22cd72c2007-10-11 17:37:10 +10002803 * l_icloglock
Linus Torvalds1da177e2005-04-16 15:20:36 -07002804 * IOERROR - give up hope all ye who enter here
2805 */
2806 if (iclog->ic_state == XLOG_STATE_WANT_SYNC ||
Brian Foster609adfc2016-01-05 07:41:16 +11002807 iclog->ic_state & XLOG_STATE_SYNCING ||
Linus Torvalds1da177e2005-04-16 15:20:36 -07002808 iclog->ic_state == XLOG_STATE_DONE_SYNC ||
2809 iclog->ic_state == XLOG_STATE_IOERROR )
2810 break;
2811 iclog = iclog->ic_next;
2812 } while (first_iclog != iclog);
2813 }
2814#endif
2815
Matthew Wilcoxd748c622008-05-19 16:34:27 +10002816 if (log->l_iclog->ic_state & (XLOG_STATE_ACTIVE|XLOG_STATE_IOERROR))
2817 wake = 1;
Eric Sandeenb22cd72c2007-10-11 17:37:10 +10002818 spin_unlock(&log->l_icloglock);
Matthew Wilcoxd748c622008-05-19 16:34:27 +10002819
2820 if (wake)
Dave Chinnereb40a872010-12-21 12:09:01 +11002821 wake_up_all(&log->l_flush_wait);
Matthew Wilcoxd748c622008-05-19 16:34:27 +10002822}
Linus Torvalds1da177e2005-04-16 15:20:36 -07002823
2824
2825/*
2826 * Finish transitioning this iclog to the dirty state.
2827 *
2828 * Make sure that we completely execute this routine only when this is
2829 * the last call to the iclog. There is a good chance that iclog flushes,
2830 * when we reach the end of the physical log, get turned into 2 separate
2831 * calls to bwrite. Hence, one iclog flush could generate two calls to this
2832 * routine. By using the reference count bwritecnt, we guarantee that only
2833 * the second completion goes through.
2834 *
2835 * Callbacks could take time, so they are done outside the scope of the
David Chinner12017fa2008-08-13 16:34:31 +10002836 * global state machine log lock.
Linus Torvalds1da177e2005-04-16 15:20:36 -07002837 */
David Chinnera8272ce2007-11-23 16:28:09 +11002838STATIC void
Linus Torvalds1da177e2005-04-16 15:20:36 -07002839xlog_state_done_syncing(
Christoph Hellwigd15cbf22019-06-28 19:27:30 -07002840 struct xlog_in_core *iclog,
2841 bool aborted)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002842{
Christoph Hellwigd15cbf22019-06-28 19:27:30 -07002843 struct xlog *log = iclog->ic_log;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002844
Eric Sandeenb22cd72c2007-10-11 17:37:10 +10002845 spin_lock(&log->l_icloglock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002846
2847 ASSERT(iclog->ic_state == XLOG_STATE_SYNCING ||
2848 iclog->ic_state == XLOG_STATE_IOERROR);
David Chinner155cc6b2008-03-06 13:44:14 +11002849 ASSERT(atomic_read(&iclog->ic_refcnt) == 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002850
2851 /*
2852 * If we got an error, either on the first buffer, or in the case of
2853 * split log writes, on the second, we mark ALL iclogs STATE_IOERROR,
2854 * and none should ever be attempted to be written to disk
2855 * again.
2856 */
Christoph Hellwig79b54d92019-06-28 19:27:25 -07002857 if (iclog->ic_state != XLOG_STATE_IOERROR)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002858 iclog->ic_state = XLOG_STATE_DONE_SYNC;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002859
2860 /*
2861 * Someone could be sleeping prior to writing out the next
2862 * iclog buffer, we wake them all, one will get to do the
2863 * I/O, the others get to wait for the result.
2864 */
Dave Chinnereb40a872010-12-21 12:09:01 +11002865 wake_up_all(&iclog->ic_write_wait);
Eric Sandeenb22cd72c2007-10-11 17:37:10 +10002866 spin_unlock(&log->l_icloglock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002867 xlog_state_do_callback(log, aborted, iclog); /* also cleans log */
2868} /* xlog_state_done_syncing */
2869
2870
2871/*
2872 * If the head of the in-core log ring is not (ACTIVE or DIRTY), then we must
David Chinner12017fa2008-08-13 16:34:31 +10002873 * sleep. We wait on the flush queue on the head iclog as that should be
2874 * the first iclog to complete flushing. Hence if all iclogs are syncing,
2875 * we will wait here and all new writes will sleep until a sync completes.
Linus Torvalds1da177e2005-04-16 15:20:36 -07002876 *
2877 * The in-core logs are used in a circular fashion. They are not used
2878 * out-of-order even when an iclog past the head is free.
2879 *
2880 * return:
2881 * * log_offset where xlog_write() can start writing into the in-core
2882 * log's data space.
2883 * * in-core log pointer to which xlog_write() should write.
2884 * * boolean indicating this is a continued write to an in-core log.
2885 * If this is the last write, then the in-core log's offset field
2886 * needs to be incremented, depending on the amount of data which
2887 * is copied.
2888 */
David Chinnera8272ce2007-11-23 16:28:09 +11002889STATIC int
Mark Tinguely9a8d2fd2012-06-14 09:22:16 -05002890xlog_state_get_iclog_space(
2891 struct xlog *log,
2892 int len,
2893 struct xlog_in_core **iclogp,
2894 struct xlog_ticket *ticket,
2895 int *continued_write,
2896 int *logoffsetp)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002897{
Linus Torvalds1da177e2005-04-16 15:20:36 -07002898 int log_offset;
2899 xlog_rec_header_t *head;
2900 xlog_in_core_t *iclog;
2901 int error;
2902
2903restart:
Eric Sandeenb22cd72c2007-10-11 17:37:10 +10002904 spin_lock(&log->l_icloglock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002905 if (XLOG_FORCED_SHUTDOWN(log)) {
Eric Sandeenb22cd72c2007-10-11 17:37:10 +10002906 spin_unlock(&log->l_icloglock);
Dave Chinner24513372014-06-25 14:58:08 +10002907 return -EIO;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002908 }
2909
2910 iclog = log->l_iclog;
Matthew Wilcoxd748c622008-05-19 16:34:27 +10002911 if (iclog->ic_state != XLOG_STATE_ACTIVE) {
Bill O'Donnellff6d6af2015-10-12 18:21:22 +11002912 XFS_STATS_INC(log->l_mp, xs_log_noiclogs);
Matthew Wilcoxd748c622008-05-19 16:34:27 +10002913
2914 /* Wait for log writes to have flushed */
Dave Chinnereb40a872010-12-21 12:09:01 +11002915 xlog_wait(&log->l_flush_wait, &log->l_icloglock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002916 goto restart;
2917 }
Matthew Wilcoxd748c622008-05-19 16:34:27 +10002918
Linus Torvalds1da177e2005-04-16 15:20:36 -07002919 head = &iclog->ic_header;
2920
David Chinner155cc6b2008-03-06 13:44:14 +11002921 atomic_inc(&iclog->ic_refcnt); /* prevents sync */
Linus Torvalds1da177e2005-04-16 15:20:36 -07002922 log_offset = iclog->ic_offset;
2923
2924 /* On the 1st write to an iclog, figure out lsn. This works
2925 * if iclogs marked XLOG_STATE_WANT_SYNC always write out what they are
2926 * committing to. If the offset is set, that's how many blocks
2927 * must be written.
2928 */
2929 if (log_offset == 0) {
2930 ticket->t_curr_res -= log->l_iclog_hsize;
Christoph Hellwig0adba532007-08-30 17:21:46 +10002931 xlog_tic_add_region(ticket,
Tim Shimmin7e9c6392005-09-02 16:42:05 +10002932 log->l_iclog_hsize,
2933 XLOG_REG_TYPE_LRHEADER);
Christoph Hellwigb53e6752007-10-12 10:59:34 +10002934 head->h_cycle = cpu_to_be32(log->l_curr_cycle);
2935 head->h_lsn = cpu_to_be64(
Christoph Hellwig03bea6f2007-10-12 10:58:05 +10002936 xlog_assign_lsn(log->l_curr_cycle, log->l_curr_block));
Linus Torvalds1da177e2005-04-16 15:20:36 -07002937 ASSERT(log->l_curr_block >= 0);
2938 }
2939
2940 /* If there is enough room to write everything, then do it. Otherwise,
2941 * claim the rest of the region and make sure the XLOG_STATE_WANT_SYNC
2942 * bit is on, so this will get flushed out. Don't update ic_offset
2943 * until you know exactly how many bytes get copied. Therefore, wait
2944 * until later to update ic_offset.
2945 *
2946 * xlog_write() algorithm assumes that at least 2 xlog_op_header_t's
2947 * can fit into remaining data section.
2948 */
2949 if (iclog->ic_size - iclog->ic_offset < 2*sizeof(xlog_op_header_t)) {
2950 xlog_state_switch_iclogs(log, iclog, iclog->ic_size);
2951
Dave Chinner49641f12008-07-11 17:43:55 +10002952 /*
2953 * If I'm the only one writing to this iclog, sync it to disk.
2954 * We need to do an atomic compare and decrement here to avoid
2955 * racing with concurrent atomic_dec_and_lock() calls in
2956 * xlog_state_release_iclog() when there is more than one
2957 * reference to the iclog.
2958 */
2959 if (!atomic_add_unless(&iclog->ic_refcnt, -1, 1)) {
2960 /* we are the only one */
Eric Sandeenb22cd72c2007-10-11 17:37:10 +10002961 spin_unlock(&log->l_icloglock);
Dave Chinner49641f12008-07-11 17:43:55 +10002962 error = xlog_state_release_iclog(log, iclog);
2963 if (error)
Jesper Juhl014c2542006-01-15 02:37:08 +01002964 return error;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002965 } else {
Eric Sandeenb22cd72c2007-10-11 17:37:10 +10002966 spin_unlock(&log->l_icloglock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002967 }
2968 goto restart;
2969 }
2970
2971 /* Do we have enough room to write the full amount in the remainder
2972 * of this iclog? Or must we continue a write on the next iclog and
2973 * mark this iclog as completely taken? In the case where we switch
2974 * iclogs (to mark it taken), this particular iclog will release/sync
2975 * to disk in xlog_write().
2976 */
2977 if (len <= iclog->ic_size - iclog->ic_offset) {
2978 *continued_write = 0;
2979 iclog->ic_offset += len;
2980 } else {
2981 *continued_write = 1;
2982 xlog_state_switch_iclogs(log, iclog, iclog->ic_size);
2983 }
2984 *iclogp = iclog;
2985
2986 ASSERT(iclog->ic_offset <= iclog->ic_size);
Eric Sandeenb22cd72c2007-10-11 17:37:10 +10002987 spin_unlock(&log->l_icloglock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002988
2989 *logoffsetp = log_offset;
2990 return 0;
2991} /* xlog_state_get_iclog_space */
2992
Linus Torvalds1da177e2005-04-16 15:20:36 -07002993/* The first cnt-1 times through here we don't need to
2994 * move the grant write head because the permanent
2995 * reservation has reserved cnt times the unit amount.
2996 * Release part of current permanent unit reservation and
2997 * reset current reservation to be one units worth. Also
2998 * move grant reservation head forward.
2999 */
3000STATIC void
Mark Tinguely9a8d2fd2012-06-14 09:22:16 -05003001xlog_regrant_reserve_log_space(
3002 struct xlog *log,
3003 struct xlog_ticket *ticket)
Linus Torvalds1da177e2005-04-16 15:20:36 -07003004{
Christoph Hellwig0b1b2132009-12-14 23:14:59 +00003005 trace_xfs_log_regrant_reserve_enter(log, ticket);
3006
Linus Torvalds1da177e2005-04-16 15:20:36 -07003007 if (ticket->t_cnt > 0)
3008 ticket->t_cnt--;
3009
Christoph Hellwig28496962012-02-20 02:31:25 +00003010 xlog_grant_sub_space(log, &log->l_reserve_head.grant,
Dave Chinnera69ed032010-12-21 12:08:20 +11003011 ticket->t_curr_res);
Christoph Hellwig28496962012-02-20 02:31:25 +00003012 xlog_grant_sub_space(log, &log->l_write_head.grant,
Dave Chinnera69ed032010-12-21 12:08:20 +11003013 ticket->t_curr_res);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003014 ticket->t_curr_res = ticket->t_unit_res;
Christoph Hellwig0adba532007-08-30 17:21:46 +10003015 xlog_tic_reset_res(ticket);
Christoph Hellwig0b1b2132009-12-14 23:14:59 +00003016
3017 trace_xfs_log_regrant_reserve_sub(log, ticket);
3018
Linus Torvalds1da177e2005-04-16 15:20:36 -07003019 /* just return if we still have some of the pre-reserved space */
Dave Chinnerd0eb2f32010-12-21 12:29:14 +11003020 if (ticket->t_cnt > 0)
Linus Torvalds1da177e2005-04-16 15:20:36 -07003021 return;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003022
Christoph Hellwig28496962012-02-20 02:31:25 +00003023 xlog_grant_add_space(log, &log->l_reserve_head.grant,
Dave Chinnera69ed032010-12-21 12:08:20 +11003024 ticket->t_unit_res);
Christoph Hellwig0b1b2132009-12-14 23:14:59 +00003025
3026 trace_xfs_log_regrant_reserve_exit(log, ticket);
3027
Linus Torvalds1da177e2005-04-16 15:20:36 -07003028 ticket->t_curr_res = ticket->t_unit_res;
Christoph Hellwig0adba532007-08-30 17:21:46 +10003029 xlog_tic_reset_res(ticket);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003030} /* xlog_regrant_reserve_log_space */
3031
3032
3033/*
3034 * Give back the space left from a reservation.
3035 *
3036 * All the information we need to make a correct determination of space left
3037 * is present. For non-permanent reservations, things are quite easy. The
3038 * count should have been decremented to zero. We only need to deal with the
3039 * space remaining in the current reservation part of the ticket. If the
3040 * ticket contains a permanent reservation, there may be left over space which
3041 * needs to be released. A count of N means that N-1 refills of the current
3042 * reservation can be done before we need to ask for more space. The first
3043 * one goes to fill up the first current reservation. Once we run out of
3044 * space, the count will stay at zero and the only space remaining will be
3045 * in the current reservation field.
3046 */
3047STATIC void
Mark Tinguely9a8d2fd2012-06-14 09:22:16 -05003048xlog_ungrant_log_space(
3049 struct xlog *log,
3050 struct xlog_ticket *ticket)
Linus Torvalds1da177e2005-04-16 15:20:36 -07003051{
Dave Chinner663e4962010-12-21 12:06:05 +11003052 int bytes;
3053
Linus Torvalds1da177e2005-04-16 15:20:36 -07003054 if (ticket->t_cnt > 0)
3055 ticket->t_cnt--;
3056
Christoph Hellwig0b1b2132009-12-14 23:14:59 +00003057 trace_xfs_log_ungrant_enter(log, ticket);
Christoph Hellwig0b1b2132009-12-14 23:14:59 +00003058 trace_xfs_log_ungrant_sub(log, ticket);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003059
Dave Chinner663e4962010-12-21 12:06:05 +11003060 /*
3061 * If this is a permanent reservation ticket, we may be able to free
Linus Torvalds1da177e2005-04-16 15:20:36 -07003062 * up more space based on the remaining count.
3063 */
Dave Chinner663e4962010-12-21 12:06:05 +11003064 bytes = ticket->t_curr_res;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003065 if (ticket->t_cnt > 0) {
3066 ASSERT(ticket->t_flags & XLOG_TIC_PERM_RESERV);
Dave Chinner663e4962010-12-21 12:06:05 +11003067 bytes += ticket->t_unit_res*ticket->t_cnt;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003068 }
3069
Christoph Hellwig28496962012-02-20 02:31:25 +00003070 xlog_grant_sub_space(log, &log->l_reserve_head.grant, bytes);
3071 xlog_grant_sub_space(log, &log->l_write_head.grant, bytes);
Dave Chinner663e4962010-12-21 12:06:05 +11003072
Christoph Hellwig0b1b2132009-12-14 23:14:59 +00003073 trace_xfs_log_ungrant_exit(log, ticket);
3074
Christoph Hellwigcfb7cdc2012-02-20 02:31:23 +00003075 xfs_log_space_wake(log->l_mp);
Christoph Hellwig09a423a2012-02-20 02:31:20 +00003076}
Linus Torvalds1da177e2005-04-16 15:20:36 -07003077
3078/*
Linus Torvalds1da177e2005-04-16 15:20:36 -07003079 * Flush iclog to disk if this is the last reference to the given iclog and
3080 * the WANT_SYNC bit is set.
3081 *
3082 * When this function is entered, the iclog is not necessarily in the
3083 * WANT_SYNC state. It may be sitting around waiting to get filled.
3084 *
3085 *
3086 */
David Chinnera8272ce2007-11-23 16:28:09 +11003087STATIC int
David Chinnerb5893342008-03-06 13:44:06 +11003088xlog_state_release_iclog(
Mark Tinguely9a8d2fd2012-06-14 09:22:16 -05003089 struct xlog *log,
3090 struct xlog_in_core *iclog)
Linus Torvalds1da177e2005-04-16 15:20:36 -07003091{
Linus Torvalds1da177e2005-04-16 15:20:36 -07003092 int sync = 0; /* do we sync? */
3093
David Chinner155cc6b2008-03-06 13:44:14 +11003094 if (iclog->ic_state & XLOG_STATE_IOERROR)
Dave Chinner24513372014-06-25 14:58:08 +10003095 return -EIO;
David Chinner155cc6b2008-03-06 13:44:14 +11003096
3097 ASSERT(atomic_read(&iclog->ic_refcnt) > 0);
3098 if (!atomic_dec_and_lock(&iclog->ic_refcnt, &log->l_icloglock))
3099 return 0;
3100
Linus Torvalds1da177e2005-04-16 15:20:36 -07003101 if (iclog->ic_state & XLOG_STATE_IOERROR) {
Eric Sandeenb22cd72c2007-10-11 17:37:10 +10003102 spin_unlock(&log->l_icloglock);
Dave Chinner24513372014-06-25 14:58:08 +10003103 return -EIO;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003104 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07003105 ASSERT(iclog->ic_state == XLOG_STATE_ACTIVE ||
3106 iclog->ic_state == XLOG_STATE_WANT_SYNC);
3107
David Chinner155cc6b2008-03-06 13:44:14 +11003108 if (iclog->ic_state == XLOG_STATE_WANT_SYNC) {
David Chinnerb5893342008-03-06 13:44:06 +11003109 /* update tail before writing to iclog */
Dave Chinner1c3cb9e2010-12-21 12:28:39 +11003110 xfs_lsn_t tail_lsn = xlog_assign_tail_lsn(log->l_mp);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003111 sync++;
3112 iclog->ic_state = XLOG_STATE_SYNCING;
Dave Chinner1c3cb9e2010-12-21 12:28:39 +11003113 iclog->ic_header.h_tail_lsn = cpu_to_be64(tail_lsn);
3114 xlog_verify_tail_lsn(log, iclog, tail_lsn);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003115 /* cycle incremented when incrementing curr_block */
3116 }
Eric Sandeenb22cd72c2007-10-11 17:37:10 +10003117 spin_unlock(&log->l_icloglock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003118
3119 /*
3120 * We let the log lock go, so it's possible that we hit a log I/O
Nathan Scottc41564b2006-03-29 08:55:14 +10003121 * error or some other SHUTDOWN condition that marks the iclog
Linus Torvalds1da177e2005-04-16 15:20:36 -07003122 * as XLOG_STATE_IOERROR before the bwrite. However, we know that
3123 * this iclog has consistent data, so we ignore IOERROR
3124 * flags after this point.
3125 */
David Chinnerb5893342008-03-06 13:44:06 +11003126 if (sync)
Christoph Hellwig94860a32019-06-28 19:27:22 -07003127 xlog_sync(log, iclog);
Jesper Juhl014c2542006-01-15 02:37:08 +01003128 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003129} /* xlog_state_release_iclog */
3130
3131
3132/*
3133 * This routine will mark the current iclog in the ring as WANT_SYNC
3134 * and move the current iclog pointer to the next iclog in the ring.
3135 * When this routine is called from xlog_state_get_iclog_space(), the
3136 * exact size of the iclog has not yet been determined. All we know is
3137 * that every data block. We have run out of space in this log record.
3138 */
3139STATIC void
Mark Tinguely9a8d2fd2012-06-14 09:22:16 -05003140xlog_state_switch_iclogs(
3141 struct xlog *log,
3142 struct xlog_in_core *iclog,
3143 int eventual_size)
Linus Torvalds1da177e2005-04-16 15:20:36 -07003144{
3145 ASSERT(iclog->ic_state == XLOG_STATE_ACTIVE);
3146 if (!eventual_size)
3147 eventual_size = iclog->ic_offset;
3148 iclog->ic_state = XLOG_STATE_WANT_SYNC;
Christoph Hellwigb53e6752007-10-12 10:59:34 +10003149 iclog->ic_header.h_prev_block = cpu_to_be32(log->l_prev_block);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003150 log->l_prev_block = log->l_curr_block;
3151 log->l_prev_cycle = log->l_curr_cycle;
3152
3153 /* roll log?: ic_offset changed later */
3154 log->l_curr_block += BTOBB(eventual_size)+BTOBB(log->l_iclog_hsize);
3155
3156 /* Round up to next log-sunit */
Eric Sandeen62118702008-03-06 13:44:28 +11003157 if (xfs_sb_version_haslogv2(&log->l_mp->m_sb) &&
Linus Torvalds1da177e2005-04-16 15:20:36 -07003158 log->l_mp->m_sb.sb_logsunit > 1) {
Darrick J. Wongc8ce5402017-06-16 11:00:05 -07003159 uint32_t sunit_bb = BTOBB(log->l_mp->m_sb.sb_logsunit);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003160 log->l_curr_block = roundup(log->l_curr_block, sunit_bb);
3161 }
3162
3163 if (log->l_curr_block >= log->l_logBBsize) {
Brian Fostera45086e2015-10-12 15:59:25 +11003164 /*
3165 * Rewind the current block before the cycle is bumped to make
3166 * sure that the combined LSN never transiently moves forward
3167 * when the log wraps to the next cycle. This is to support the
3168 * unlocked sample of these fields from xlog_valid_lsn(). Most
3169 * other cases should acquire l_icloglock.
3170 */
3171 log->l_curr_block -= log->l_logBBsize;
3172 ASSERT(log->l_curr_block >= 0);
3173 smp_wmb();
Linus Torvalds1da177e2005-04-16 15:20:36 -07003174 log->l_curr_cycle++;
3175 if (log->l_curr_cycle == XLOG_HEADER_MAGIC_NUM)
3176 log->l_curr_cycle++;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003177 }
3178 ASSERT(iclog == log->l_iclog);
3179 log->l_iclog = iclog->ic_next;
3180} /* xlog_state_switch_iclogs */
3181
Linus Torvalds1da177e2005-04-16 15:20:36 -07003182/*
3183 * Write out all data in the in-core log as of this exact moment in time.
3184 *
3185 * Data may be written to the in-core log during this call. However,
3186 * we don't guarantee this data will be written out. A change from past
3187 * implementation means this routine will *not* write out zero length LRs.
3188 *
3189 * Basically, we try and perform an intelligent scan of the in-core logs.
3190 * If we determine there is no flushable data, we just return. There is no
3191 * flushable data if:
3192 *
3193 * 1. the current iclog is active and has no data; the previous iclog
3194 * is in the active or dirty state.
3195 * 2. the current iclog is drity, and the previous iclog is in the
3196 * active or dirty state.
3197 *
David Chinner12017fa2008-08-13 16:34:31 +10003198 * We may sleep if:
Linus Torvalds1da177e2005-04-16 15:20:36 -07003199 *
3200 * 1. the current iclog is not in the active nor dirty state.
3201 * 2. the current iclog dirty, and the previous iclog is not in the
3202 * active nor dirty state.
3203 * 3. the current iclog is active, and there is another thread writing
3204 * to this particular iclog.
3205 * 4. a) the current iclog is active and has no other writers
3206 * b) when we return from flushing out this iclog, it is still
3207 * not in the active nor dirty state.
3208 */
Christoph Hellwiga14a3482010-01-19 09:56:46 +00003209int
Christoph Hellwig60e5bb72018-03-13 23:15:28 -07003210xfs_log_force(
Christoph Hellwiga14a3482010-01-19 09:56:46 +00003211 struct xfs_mount *mp,
Christoph Hellwig60e5bb72018-03-13 23:15:28 -07003212 uint flags)
Linus Torvalds1da177e2005-04-16 15:20:36 -07003213{
Mark Tinguelyad223e62012-06-14 09:22:15 -05003214 struct xlog *log = mp->m_log;
Christoph Hellwiga14a3482010-01-19 09:56:46 +00003215 struct xlog_in_core *iclog;
3216 xfs_lsn_t lsn;
3217
Bill O'Donnellff6d6af2015-10-12 18:21:22 +11003218 XFS_STATS_INC(mp, xs_log_force);
Christoph Hellwig60e5bb72018-03-13 23:15:28 -07003219 trace_xfs_log_force(mp, 0, _RET_IP_);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003220
Christoph Hellwig93b8a582011-12-06 21:58:07 +00003221 xlog_cil_force(log);
Dave Chinner71e330b2010-05-21 14:37:18 +10003222
Eric Sandeenb22cd72c2007-10-11 17:37:10 +10003223 spin_lock(&log->l_icloglock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003224 iclog = log->l_iclog;
Christoph Hellwige6b965702018-03-13 23:15:29 -07003225 if (iclog->ic_state & XLOG_STATE_IOERROR)
3226 goto out_error;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003227
Christoph Hellwige6b965702018-03-13 23:15:29 -07003228 if (iclog->ic_state == XLOG_STATE_DIRTY ||
3229 (iclog->ic_state == XLOG_STATE_ACTIVE &&
3230 atomic_read(&iclog->ic_refcnt) == 0 && iclog->ic_offset == 0)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07003231 /*
Christoph Hellwige6b965702018-03-13 23:15:29 -07003232 * If the head is dirty or (active and empty), then we need to
3233 * look at the previous iclog.
3234 *
3235 * If the previous iclog is active or dirty we are done. There
3236 * is nothing to sync out. Otherwise, we attach ourselves to the
Linus Torvalds1da177e2005-04-16 15:20:36 -07003237 * previous iclog and go to sleep.
3238 */
Christoph Hellwige6b965702018-03-13 23:15:29 -07003239 iclog = iclog->ic_prev;
3240 if (iclog->ic_state == XLOG_STATE_ACTIVE ||
3241 iclog->ic_state == XLOG_STATE_DIRTY)
3242 goto out_unlock;
3243 } else if (iclog->ic_state == XLOG_STATE_ACTIVE) {
3244 if (atomic_read(&iclog->ic_refcnt) == 0) {
3245 /*
3246 * We are the only one with access to this iclog.
3247 *
3248 * Flush it out now. There should be a roundoff of zero
3249 * to show that someone has already taken care of the
3250 * roundoff from the previous sync.
3251 */
3252 atomic_inc(&iclog->ic_refcnt);
3253 lsn = be64_to_cpu(iclog->ic_header.h_lsn);
3254 xlog_state_switch_iclogs(log, iclog, 0);
Eric Sandeenb22cd72c2007-10-11 17:37:10 +10003255 spin_unlock(&log->l_icloglock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003256
Christoph Hellwige6b965702018-03-13 23:15:29 -07003257 if (xlog_state_release_iclog(log, iclog))
3258 return -EIO;
3259
3260 spin_lock(&log->l_icloglock);
3261 if (be64_to_cpu(iclog->ic_header.h_lsn) != lsn ||
3262 iclog->ic_state == XLOG_STATE_DIRTY)
3263 goto out_unlock;
3264 } else {
3265 /*
3266 * Someone else is writing to this iclog.
3267 *
3268 * Use its call to flush out the data. However, the
3269 * other thread may not force out this LR, so we mark
3270 * it WANT_SYNC.
3271 */
3272 xlog_state_switch_iclogs(log, iclog, 0);
3273 }
3274 } else {
3275 /*
3276 * If the head iclog is not active nor dirty, we just attach
3277 * ourselves to the head and go to sleep if necessary.
3278 */
3279 ;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003280 }
Christoph Hellwige6b965702018-03-13 23:15:29 -07003281
3282 if (!(flags & XFS_LOG_SYNC))
3283 goto out_unlock;
3284
3285 if (iclog->ic_state & XLOG_STATE_IOERROR)
3286 goto out_error;
3287 XFS_STATS_INC(mp, xs_log_force_sleep);
3288 xlog_wait(&iclog->ic_force_wait, &log->l_icloglock);
3289 if (iclog->ic_state & XLOG_STATE_IOERROR)
3290 return -EIO;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003291 return 0;
Christoph Hellwige6b965702018-03-13 23:15:29 -07003292
3293out_unlock:
3294 spin_unlock(&log->l_icloglock);
3295 return 0;
3296out_error:
3297 spin_unlock(&log->l_icloglock);
3298 return -EIO;
Christoph Hellwiga14a3482010-01-19 09:56:46 +00003299}
Linus Torvalds1da177e2005-04-16 15:20:36 -07003300
Christoph Hellwig3e4da462018-03-13 23:15:30 -07003301static int
3302__xfs_log_force_lsn(
Christoph Hellwiga14a3482010-01-19 09:56:46 +00003303 struct xfs_mount *mp,
3304 xfs_lsn_t lsn,
3305 uint flags,
Christoph Hellwig3e4da462018-03-13 23:15:30 -07003306 int *log_flushed,
3307 bool already_slept)
Linus Torvalds1da177e2005-04-16 15:20:36 -07003308{
Mark Tinguelyad223e62012-06-14 09:22:15 -05003309 struct xlog *log = mp->m_log;
Christoph Hellwiga14a3482010-01-19 09:56:46 +00003310 struct xlog_in_core *iclog;
Christoph Hellwiga14a3482010-01-19 09:56:46 +00003311
Christoph Hellwiga14a3482010-01-19 09:56:46 +00003312 spin_lock(&log->l_icloglock);
3313 iclog = log->l_iclog;
Christoph Hellwig93806292018-03-13 23:15:29 -07003314 if (iclog->ic_state & XLOG_STATE_IOERROR)
3315 goto out_error;
3316
3317 while (be64_to_cpu(iclog->ic_header.h_lsn) != lsn) {
3318 iclog = iclog->ic_next;
3319 if (iclog == log->l_iclog)
3320 goto out_unlock;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003321 }
3322
Christoph Hellwig93806292018-03-13 23:15:29 -07003323 if (iclog->ic_state == XLOG_STATE_DIRTY)
3324 goto out_unlock;
Christoph Hellwiga14a3482010-01-19 09:56:46 +00003325
Christoph Hellwig93806292018-03-13 23:15:29 -07003326 if (iclog->ic_state == XLOG_STATE_ACTIVE) {
3327 /*
3328 * We sleep here if we haven't already slept (e.g. this is the
3329 * first time we've looked at the correct iclog buf) and the
3330 * buffer before us is going to be sync'ed. The reason for this
3331 * is that if we are doing sync transactions here, by waiting
3332 * for the previous I/O to complete, we can allow a few more
3333 * transactions into this iclog before we close it down.
3334 *
3335 * Otherwise, we mark the buffer WANT_SYNC, and bump up the
3336 * refcnt so we can release the log (which drops the ref count).
3337 * The state switch keeps new transaction commits from using
3338 * this buffer. When the current commits finish writing into
3339 * the buffer, the refcount will drop to zero and the buffer
3340 * will go out then.
3341 */
3342 if (!already_slept &&
3343 (iclog->ic_prev->ic_state &
3344 (XLOG_STATE_WANT_SYNC | XLOG_STATE_SYNCING))) {
3345 ASSERT(!(iclog->ic_state & XLOG_STATE_IOERROR));
Christoph Hellwiga14a3482010-01-19 09:56:46 +00003346
Bill O'Donnellff6d6af2015-10-12 18:21:22 +11003347 XFS_STATS_INC(mp, xs_log_force_sleep);
Christoph Hellwig93806292018-03-13 23:15:29 -07003348
3349 xlog_wait(&iclog->ic_prev->ic_write_wait,
3350 &log->l_icloglock);
Christoph Hellwig3e4da462018-03-13 23:15:30 -07003351 return -EAGAIN;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003352 }
Christoph Hellwig93806292018-03-13 23:15:29 -07003353 atomic_inc(&iclog->ic_refcnt);
3354 xlog_state_switch_iclogs(log, iclog, 0);
3355 spin_unlock(&log->l_icloglock);
3356 if (xlog_state_release_iclog(log, iclog))
3357 return -EIO;
3358 if (log_flushed)
3359 *log_flushed = 1;
3360 spin_lock(&log->l_icloglock);
3361 }
Christoph Hellwiga14a3482010-01-19 09:56:46 +00003362
Christoph Hellwig93806292018-03-13 23:15:29 -07003363 if (!(flags & XFS_LOG_SYNC) ||
3364 (iclog->ic_state & (XLOG_STATE_ACTIVE | XLOG_STATE_DIRTY)))
3365 goto out_unlock;
Christoph Hellwiga14a3482010-01-19 09:56:46 +00003366
Christoph Hellwig93806292018-03-13 23:15:29 -07003367 if (iclog->ic_state & XLOG_STATE_IOERROR)
3368 goto out_error;
3369
3370 XFS_STATS_INC(mp, xs_log_force_sleep);
3371 xlog_wait(&iclog->ic_force_wait, &log->l_icloglock);
3372 if (iclog->ic_state & XLOG_STATE_IOERROR)
3373 return -EIO;
3374 return 0;
3375
3376out_unlock:
Christoph Hellwiga14a3482010-01-19 09:56:46 +00003377 spin_unlock(&log->l_icloglock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003378 return 0;
Christoph Hellwig93806292018-03-13 23:15:29 -07003379out_error:
3380 spin_unlock(&log->l_icloglock);
3381 return -EIO;
Christoph Hellwiga14a3482010-01-19 09:56:46 +00003382}
Linus Torvalds1da177e2005-04-16 15:20:36 -07003383
Christoph Hellwiga14a3482010-01-19 09:56:46 +00003384/*
Christoph Hellwig3e4da462018-03-13 23:15:30 -07003385 * Force the in-core log to disk for a specific LSN.
3386 *
3387 * Find in-core log with lsn.
3388 * If it is in the DIRTY state, just return.
3389 * If it is in the ACTIVE state, move the in-core log into the WANT_SYNC
3390 * state and go to sleep or return.
3391 * If it is in any other state, go to sleep or return.
3392 *
3393 * Synchronous forces are implemented with a wait queue. All callers trying
3394 * to force a given lsn to disk must wait on the queue attached to the
3395 * specific in-core log. When given in-core log finally completes its write
3396 * to disk, that thread will wake up all threads waiting on the queue.
3397 */
3398int
3399xfs_log_force_lsn(
3400 struct xfs_mount *mp,
3401 xfs_lsn_t lsn,
3402 uint flags,
3403 int *log_flushed)
3404{
3405 int ret;
3406 ASSERT(lsn != 0);
3407
3408 XFS_STATS_INC(mp, xs_log_force);
3409 trace_xfs_log_force(mp, lsn, _RET_IP_);
3410
3411 lsn = xlog_cil_force_lsn(mp->m_log, lsn);
3412 if (lsn == NULLCOMMITLSN)
3413 return 0;
3414
3415 ret = __xfs_log_force_lsn(mp, lsn, flags, log_flushed, false);
3416 if (ret == -EAGAIN)
3417 ret = __xfs_log_force_lsn(mp, lsn, flags, log_flushed, true);
3418 return ret;
3419}
3420
3421/*
Linus Torvalds1da177e2005-04-16 15:20:36 -07003422 * Called when we want to mark the current iclog as being ready to sync to
3423 * disk.
3424 */
David Chinnera8272ce2007-11-23 16:28:09 +11003425STATIC void
Mark Tinguely9a8d2fd2012-06-14 09:22:16 -05003426xlog_state_want_sync(
3427 struct xlog *log,
3428 struct xlog_in_core *iclog)
Linus Torvalds1da177e2005-04-16 15:20:36 -07003429{
Christoph Hellwiga8914f32009-08-10 11:32:44 -03003430 assert_spin_locked(&log->l_icloglock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003431
3432 if (iclog->ic_state == XLOG_STATE_ACTIVE) {
3433 xlog_state_switch_iclogs(log, iclog, 0);
3434 } else {
3435 ASSERT(iclog->ic_state &
3436 (XLOG_STATE_WANT_SYNC|XLOG_STATE_IOERROR));
3437 }
Christoph Hellwig39e2def2008-12-03 12:20:28 +01003438}
Linus Torvalds1da177e2005-04-16 15:20:36 -07003439
3440
3441/*****************************************************************************
3442 *
3443 * TICKET functions
3444 *
3445 *****************************************************************************
3446 */
3447
3448/*
Malcolm Parsons9da096f2009-03-29 09:55:42 +02003449 * Free a used ticket when its refcount falls to zero.
Linus Torvalds1da177e2005-04-16 15:20:36 -07003450 */
Dave Chinnercc09c0d2008-11-17 17:37:10 +11003451void
3452xfs_log_ticket_put(
3453 xlog_ticket_t *ticket)
Linus Torvalds1da177e2005-04-16 15:20:36 -07003454{
Dave Chinnercc09c0d2008-11-17 17:37:10 +11003455 ASSERT(atomic_read(&ticket->t_ref) > 0);
Dave Chinnereb40a872010-12-21 12:09:01 +11003456 if (atomic_dec_and_test(&ticket->t_ref))
Dave Chinnercc09c0d2008-11-17 17:37:10 +11003457 kmem_zone_free(xfs_log_ticket_zone, ticket);
Dave Chinnercc09c0d2008-11-17 17:37:10 +11003458}
Linus Torvalds1da177e2005-04-16 15:20:36 -07003459
Dave Chinnercc09c0d2008-11-17 17:37:10 +11003460xlog_ticket_t *
3461xfs_log_ticket_get(
3462 xlog_ticket_t *ticket)
3463{
3464 ASSERT(atomic_read(&ticket->t_ref) > 0);
3465 atomic_inc(&ticket->t_ref);
3466 return ticket;
3467}
Linus Torvalds1da177e2005-04-16 15:20:36 -07003468
3469/*
Jie Liue773fc92013-08-12 20:50:01 +10003470 * Figure out the total log space unit (in bytes) that would be
3471 * required for a log ticket.
Linus Torvalds1da177e2005-04-16 15:20:36 -07003472 */
Jie Liue773fc92013-08-12 20:50:01 +10003473int
3474xfs_log_calc_unit_res(
3475 struct xfs_mount *mp,
3476 int unit_bytes)
Linus Torvalds1da177e2005-04-16 15:20:36 -07003477{
Jie Liue773fc92013-08-12 20:50:01 +10003478 struct xlog *log = mp->m_log;
3479 int iclog_space;
3480 uint num_headers;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003481
3482 /*
3483 * Permanent reservations have up to 'cnt'-1 active log operations
3484 * in the log. A unit in this case is the amount of space for one
3485 * of these log operations. Normal reservations have a cnt of 1
3486 * and their unit amount is the total amount of space required.
3487 *
3488 * The following lines of code account for non-transaction data
Tim Shimmin32fb9b52005-09-02 16:41:43 +10003489 * which occupy space in the on-disk log.
3490 *
3491 * Normal form of a transaction is:
3492 * <oph><trans-hdr><start-oph><reg1-oph><reg1><reg2-oph>...<commit-oph>
3493 * and then there are LR hdrs, split-recs and roundoff at end of syncs.
3494 *
3495 * We need to account for all the leadup data and trailer data
3496 * around the transaction data.
3497 * And then we need to account for the worst case in terms of using
3498 * more space.
3499 * The worst case will happen if:
3500 * - the placement of the transaction happens to be such that the
3501 * roundoff is at its maximum
3502 * - the transaction data is synced before the commit record is synced
3503 * i.e. <transaction-data><roundoff> | <commit-rec><roundoff>
3504 * Therefore the commit record is in its own Log Record.
3505 * This can happen as the commit record is called with its
3506 * own region to xlog_write().
3507 * This then means that in the worst case, roundoff can happen for
3508 * the commit-rec as well.
3509 * The commit-rec is smaller than padding in this scenario and so it is
3510 * not added separately.
Linus Torvalds1da177e2005-04-16 15:20:36 -07003511 */
3512
Tim Shimmin32fb9b52005-09-02 16:41:43 +10003513 /* for trans header */
Linus Torvalds1da177e2005-04-16 15:20:36 -07003514 unit_bytes += sizeof(xlog_op_header_t);
Tim Shimmin32fb9b52005-09-02 16:41:43 +10003515 unit_bytes += sizeof(xfs_trans_header_t);
3516
3517 /* for start-rec */
3518 unit_bytes += sizeof(xlog_op_header_t);
3519
Dave Chinner9b9fc2b72010-03-23 11:21:11 +11003520 /*
3521 * for LR headers - the space for data in an iclog is the size minus
3522 * the space used for the headers. If we use the iclog size, then we
3523 * undercalculate the number of headers required.
3524 *
3525 * Furthermore - the addition of op headers for split-recs might
3526 * increase the space required enough to require more log and op
3527 * headers, so take that into account too.
3528 *
3529 * IMPORTANT: This reservation makes the assumption that if this
3530 * transaction is the first in an iclog and hence has the LR headers
3531 * accounted to it, then the remaining space in the iclog is
3532 * exclusively for this transaction. i.e. if the transaction is larger
3533 * than the iclog, it will be the only thing in that iclog.
3534 * Fundamentally, this means we must pass the entire log vector to
3535 * xlog_write to guarantee this.
3536 */
3537 iclog_space = log->l_iclog_size - log->l_iclog_hsize;
3538 num_headers = howmany(unit_bytes, iclog_space);
3539
3540 /* for split-recs - ophdrs added when data split over LRs */
3541 unit_bytes += sizeof(xlog_op_header_t) * num_headers;
3542
3543 /* add extra header reservations if we overrun */
3544 while (!num_headers ||
3545 howmany(unit_bytes, iclog_space) > num_headers) {
3546 unit_bytes += sizeof(xlog_op_header_t);
3547 num_headers++;
3548 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07003549 unit_bytes += log->l_iclog_hsize * num_headers;
3550
Tim Shimmin32fb9b52005-09-02 16:41:43 +10003551 /* for commit-rec LR header - note: padding will subsume the ophdr */
3552 unit_bytes += log->l_iclog_hsize;
3553
Tim Shimmin32fb9b52005-09-02 16:41:43 +10003554 /* for roundoff padding for transaction data and one for commit record */
Jie Liue773fc92013-08-12 20:50:01 +10003555 if (xfs_sb_version_haslogv2(&mp->m_sb) && mp->m_sb.sb_logsunit > 1) {
Tim Shimmin32fb9b52005-09-02 16:41:43 +10003556 /* log su roundoff */
Jie Liue773fc92013-08-12 20:50:01 +10003557 unit_bytes += 2 * mp->m_sb.sb_logsunit;
Tim Shimmin32fb9b52005-09-02 16:41:43 +10003558 } else {
3559 /* BB roundoff */
Jie Liue773fc92013-08-12 20:50:01 +10003560 unit_bytes += 2 * BBSIZE;
Tim Shimmin32fb9b52005-09-02 16:41:43 +10003561 }
3562
Jie Liue773fc92013-08-12 20:50:01 +10003563 return unit_bytes;
3564}
3565
3566/*
3567 * Allocate and initialise a new log ticket.
3568 */
3569struct xlog_ticket *
3570xlog_ticket_alloc(
3571 struct xlog *log,
3572 int unit_bytes,
3573 int cnt,
3574 char client,
3575 bool permanent,
3576 xfs_km_flags_t alloc_flags)
3577{
3578 struct xlog_ticket *tic;
3579 int unit_res;
3580
3581 tic = kmem_zone_zalloc(xfs_log_ticket_zone, alloc_flags);
3582 if (!tic)
3583 return NULL;
3584
3585 unit_res = xfs_log_calc_unit_res(log->l_mp, unit_bytes);
3586
Dave Chinnercc09c0d2008-11-17 17:37:10 +11003587 atomic_set(&tic->t_ref, 1);
Christoph Hellwig14a7235f2012-02-20 02:31:24 +00003588 tic->t_task = current;
Dave Chinner10547942010-12-21 12:02:25 +11003589 INIT_LIST_HEAD(&tic->t_queue);
Jie Liue773fc92013-08-12 20:50:01 +10003590 tic->t_unit_res = unit_res;
3591 tic->t_curr_res = unit_res;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003592 tic->t_cnt = cnt;
3593 tic->t_ocnt = cnt;
Akinobu Mitaecb34032013-03-04 21:58:20 +09003594 tic->t_tid = prandom_u32();
Linus Torvalds1da177e2005-04-16 15:20:36 -07003595 tic->t_clientid = client;
3596 tic->t_flags = XLOG_TIC_INITED;
Christoph Hellwig9006fb92012-02-20 02:31:31 +00003597 if (permanent)
Linus Torvalds1da177e2005-04-16 15:20:36 -07003598 tic->t_flags |= XLOG_TIC_PERM_RESERV;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003599
Christoph Hellwig0adba532007-08-30 17:21:46 +10003600 xlog_tic_reset_res(tic);
Tim Shimmin7e9c6392005-09-02 16:42:05 +10003601
Linus Torvalds1da177e2005-04-16 15:20:36 -07003602 return tic;
Dave Chinnercc09c0d2008-11-17 17:37:10 +11003603}
Linus Torvalds1da177e2005-04-16 15:20:36 -07003604
3605
3606/******************************************************************************
3607 *
3608 * Log debug routines
3609 *
3610 ******************************************************************************
3611 */
Nathan Scottcfcbbbd2005-11-02 15:12:04 +11003612#if defined(DEBUG)
Linus Torvalds1da177e2005-04-16 15:20:36 -07003613/*
3614 * Make sure that the destination ptr is within the valid data region of
3615 * one of the iclogs. This uses backup pointers stored in a different
3616 * part of the log in case we trash the log structure.
3617 */
Christoph Hellwig181fdfe2017-11-06 11:54:02 -08003618STATIC void
Christoph Hellwige6b1f272010-03-23 11:47:38 +11003619xlog_verify_dest_ptr(
Mark Tinguelyad223e62012-06-14 09:22:15 -05003620 struct xlog *log,
Christoph Hellwig5809d5e2015-06-22 09:44:47 +10003621 void *ptr)
Linus Torvalds1da177e2005-04-16 15:20:36 -07003622{
3623 int i;
3624 int good_ptr = 0;
3625
Christoph Hellwige6b1f272010-03-23 11:47:38 +11003626 for (i = 0; i < log->l_iclog_bufs; i++) {
3627 if (ptr >= log->l_iclog_bak[i] &&
3628 ptr <= log->l_iclog_bak[i] + log->l_iclog_size)
Linus Torvalds1da177e2005-04-16 15:20:36 -07003629 good_ptr++;
3630 }
Christoph Hellwige6b1f272010-03-23 11:47:38 +11003631
3632 if (!good_ptr)
Dave Chinnera0fa2b62011-03-07 10:01:35 +11003633 xfs_emerg(log->l_mp, "%s: invalid ptr", __func__);
Christoph Hellwige6b1f272010-03-23 11:47:38 +11003634}
Linus Torvalds1da177e2005-04-16 15:20:36 -07003635
Dave Chinnerda8a1a42011-04-08 12:45:07 +10003636/*
3637 * Check to make sure the grant write head didn't just over lap the tail. If
3638 * the cycles are the same, we can't be overlapping. Otherwise, make sure that
3639 * the cycles differ by exactly one and check the byte count.
3640 *
3641 * This check is run unlocked, so can give false positives. Rather than assert
3642 * on failures, use a warn-once flag and a panic tag to allow the admin to
3643 * determine if they want to panic the machine when such an error occurs. For
3644 * debug kernels this will have the same effect as using an assert but, unlinke
3645 * an assert, it can be turned off at runtime.
3646 */
Linus Torvalds1da177e2005-04-16 15:20:36 -07003647STATIC void
Dave Chinner3f336c62010-12-21 12:02:52 +11003648xlog_verify_grant_tail(
Mark Tinguelyad223e62012-06-14 09:22:15 -05003649 struct xlog *log)
Dave Chinner3f336c62010-12-21 12:02:52 +11003650{
Dave Chinner1c3cb9e2010-12-21 12:28:39 +11003651 int tail_cycle, tail_blocks;
Dave Chinnera69ed032010-12-21 12:08:20 +11003652 int cycle, space;
Dave Chinner3f336c62010-12-21 12:02:52 +11003653
Christoph Hellwig28496962012-02-20 02:31:25 +00003654 xlog_crack_grant_head(&log->l_write_head.grant, &cycle, &space);
Dave Chinner1c3cb9e2010-12-21 12:28:39 +11003655 xlog_crack_atomic_lsn(&log->l_tail_lsn, &tail_cycle, &tail_blocks);
3656 if (tail_cycle != cycle) {
Dave Chinnerda8a1a42011-04-08 12:45:07 +10003657 if (cycle - 1 != tail_cycle &&
3658 !(log->l_flags & XLOG_TAIL_WARN)) {
3659 xfs_alert_tag(log->l_mp, XFS_PTAG_LOGRES,
3660 "%s: cycle - 1 != tail_cycle", __func__);
3661 log->l_flags |= XLOG_TAIL_WARN;
3662 }
3663
3664 if (space > BBTOB(tail_blocks) &&
3665 !(log->l_flags & XLOG_TAIL_WARN)) {
3666 xfs_alert_tag(log->l_mp, XFS_PTAG_LOGRES,
3667 "%s: space > BBTOB(tail_blocks)", __func__);
3668 log->l_flags |= XLOG_TAIL_WARN;
3669 }
Dave Chinner3f336c62010-12-21 12:02:52 +11003670 }
3671}
3672
Linus Torvalds1da177e2005-04-16 15:20:36 -07003673/* check if it will fit */
3674STATIC void
Mark Tinguely9a8d2fd2012-06-14 09:22:16 -05003675xlog_verify_tail_lsn(
3676 struct xlog *log,
3677 struct xlog_in_core *iclog,
3678 xfs_lsn_t tail_lsn)
Linus Torvalds1da177e2005-04-16 15:20:36 -07003679{
3680 int blocks;
3681
3682 if (CYCLE_LSN(tail_lsn) == log->l_prev_cycle) {
3683 blocks =
3684 log->l_logBBsize - (log->l_prev_block - BLOCK_LSN(tail_lsn));
3685 if (blocks < BTOBB(iclog->ic_offset)+BTOBB(log->l_iclog_hsize))
Dave Chinnera0fa2b62011-03-07 10:01:35 +11003686 xfs_emerg(log->l_mp, "%s: ran out of log space", __func__);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003687 } else {
3688 ASSERT(CYCLE_LSN(tail_lsn)+1 == log->l_prev_cycle);
3689
3690 if (BLOCK_LSN(tail_lsn) == log->l_prev_block)
Dave Chinnera0fa2b62011-03-07 10:01:35 +11003691 xfs_emerg(log->l_mp, "%s: tail wrapped", __func__);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003692
3693 blocks = BLOCK_LSN(tail_lsn) - log->l_prev_block;
3694 if (blocks < BTOBB(iclog->ic_offset) + 1)
Dave Chinnera0fa2b62011-03-07 10:01:35 +11003695 xfs_emerg(log->l_mp, "%s: ran out of log space", __func__);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003696 }
3697} /* xlog_verify_tail_lsn */
3698
3699/*
3700 * Perform a number of checks on the iclog before writing to disk.
3701 *
3702 * 1. Make sure the iclogs are still circular
3703 * 2. Make sure we have a good magic number
3704 * 3. Make sure we don't have magic numbers in the data
3705 * 4. Check fields of each log operation header for:
3706 * A. Valid client identifier
3707 * B. tid ptr value falls in valid ptr space (user space code)
3708 * C. Length in log record header is correct according to the
3709 * individual operation headers within record.
3710 * 5. When a bwrite will occur within 5 blocks of the front of the physical
3711 * log, check the preceding blocks of the physical log to make sure all
3712 * the cycle numbers agree with the current cycle number.
3713 */
3714STATIC void
Mark Tinguely9a8d2fd2012-06-14 09:22:16 -05003715xlog_verify_iclog(
3716 struct xlog *log,
3717 struct xlog_in_core *iclog,
Christoph Hellwigabca1f32019-06-28 19:27:24 -07003718 int count)
Linus Torvalds1da177e2005-04-16 15:20:36 -07003719{
3720 xlog_op_header_t *ophead;
3721 xlog_in_core_t *icptr;
3722 xlog_in_core_2_t *xhdr;
Christoph Hellwig5809d5e2015-06-22 09:44:47 +10003723 void *base_ptr, *ptr, *p;
Christoph Hellwigdb9d67d2015-06-22 09:43:32 +10003724 ptrdiff_t field_offset;
Darrick J. Wongc8ce5402017-06-16 11:00:05 -07003725 uint8_t clientid;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003726 int len, i, j, k, op_len;
3727 int idx;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003728
3729 /* check validity of iclog pointers */
Eric Sandeenb22cd72c2007-10-11 17:37:10 +10003730 spin_lock(&log->l_icloglock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003731 icptr = log->l_iclog;
Geyslan G. Bem643f7c42013-10-30 16:01:00 -05003732 for (i = 0; i < log->l_iclog_bufs; i++, icptr = icptr->ic_next)
3733 ASSERT(icptr);
3734
Linus Torvalds1da177e2005-04-16 15:20:36 -07003735 if (icptr != log->l_iclog)
Dave Chinnera0fa2b62011-03-07 10:01:35 +11003736 xfs_emerg(log->l_mp, "%s: corrupt iclog ring", __func__);
Eric Sandeenb22cd72c2007-10-11 17:37:10 +10003737 spin_unlock(&log->l_icloglock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003738
3739 /* check log magic numbers */
Christoph Hellwig69ef9212011-07-08 14:36:05 +02003740 if (iclog->ic_header.h_magicno != cpu_to_be32(XLOG_HEADER_MAGIC_NUM))
Dave Chinnera0fa2b62011-03-07 10:01:35 +11003741 xfs_emerg(log->l_mp, "%s: invalid magic num", __func__);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003742
Christoph Hellwig5809d5e2015-06-22 09:44:47 +10003743 base_ptr = ptr = &iclog->ic_header;
3744 p = &iclog->ic_header;
3745 for (ptr += BBSIZE; ptr < base_ptr + count; ptr += BBSIZE) {
Christoph Hellwig69ef9212011-07-08 14:36:05 +02003746 if (*(__be32 *)ptr == cpu_to_be32(XLOG_HEADER_MAGIC_NUM))
Dave Chinnera0fa2b62011-03-07 10:01:35 +11003747 xfs_emerg(log->l_mp, "%s: unexpected magic num",
3748 __func__);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003749 }
3750
3751 /* check fields */
Christoph Hellwigb53e6752007-10-12 10:59:34 +10003752 len = be32_to_cpu(iclog->ic_header.h_num_logops);
Christoph Hellwig5809d5e2015-06-22 09:44:47 +10003753 base_ptr = ptr = iclog->ic_datap;
3754 ophead = ptr;
Christoph Hellwigb28708d2008-11-28 14:23:38 +11003755 xhdr = iclog->ic_data;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003756 for (i = 0; i < len; i++) {
Christoph Hellwig5809d5e2015-06-22 09:44:47 +10003757 ophead = ptr;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003758
3759 /* clientid is only 1 byte */
Christoph Hellwig5809d5e2015-06-22 09:44:47 +10003760 p = &ophead->oh_clientid;
3761 field_offset = p - base_ptr;
Christoph Hellwigabca1f32019-06-28 19:27:24 -07003762 if (field_offset & 0x1ff) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07003763 clientid = ophead->oh_clientid;
3764 } else {
Christoph Hellwigb2a922c2015-06-22 09:45:10 +10003765 idx = BTOBBT((char *)&ophead->oh_clientid - iclog->ic_datap);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003766 if (idx >= (XLOG_HEADER_CYCLE_SIZE / BBSIZE)) {
3767 j = idx / (XLOG_HEADER_CYCLE_SIZE / BBSIZE);
3768 k = idx % (XLOG_HEADER_CYCLE_SIZE / BBSIZE);
Christoph Hellwig03bea6f2007-10-12 10:58:05 +10003769 clientid = xlog_get_client_id(
3770 xhdr[j].hic_xheader.xh_cycle_data[k]);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003771 } else {
Christoph Hellwig03bea6f2007-10-12 10:58:05 +10003772 clientid = xlog_get_client_id(
3773 iclog->ic_header.h_cycle_data[idx]);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003774 }
3775 }
3776 if (clientid != XFS_TRANSACTION && clientid != XFS_LOG)
Dave Chinnera0fa2b62011-03-07 10:01:35 +11003777 xfs_warn(log->l_mp,
Darrick J. Wongc9690042018-01-09 12:02:55 -08003778 "%s: invalid clientid %d op "PTR_FMT" offset 0x%lx",
Dave Chinnera0fa2b62011-03-07 10:01:35 +11003779 __func__, clientid, ophead,
3780 (unsigned long)field_offset);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003781
3782 /* check length */
Christoph Hellwig5809d5e2015-06-22 09:44:47 +10003783 p = &ophead->oh_len;
3784 field_offset = p - base_ptr;
Christoph Hellwigabca1f32019-06-28 19:27:24 -07003785 if (field_offset & 0x1ff) {
Christoph Hellwig67fcb7b2007-10-12 10:58:59 +10003786 op_len = be32_to_cpu(ophead->oh_len);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003787 } else {
Christoph Hellwigdb9d67d2015-06-22 09:43:32 +10003788 idx = BTOBBT((uintptr_t)&ophead->oh_len -
3789 (uintptr_t)iclog->ic_datap);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003790 if (idx >= (XLOG_HEADER_CYCLE_SIZE / BBSIZE)) {
3791 j = idx / (XLOG_HEADER_CYCLE_SIZE / BBSIZE);
3792 k = idx % (XLOG_HEADER_CYCLE_SIZE / BBSIZE);
Christoph Hellwigb53e6752007-10-12 10:59:34 +10003793 op_len = be32_to_cpu(xhdr[j].hic_xheader.xh_cycle_data[k]);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003794 } else {
Christoph Hellwigb53e6752007-10-12 10:59:34 +10003795 op_len = be32_to_cpu(iclog->ic_header.h_cycle_data[idx]);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003796 }
3797 }
3798 ptr += sizeof(xlog_op_header_t) + op_len;
3799 }
3800} /* xlog_verify_iclog */
Nathan Scottcfcbbbd2005-11-02 15:12:04 +11003801#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -07003802
3803/*
Eric Sandeenb22cd72c2007-10-11 17:37:10 +10003804 * Mark all iclogs IOERROR. l_icloglock is held by the caller.
Linus Torvalds1da177e2005-04-16 15:20:36 -07003805 */
3806STATIC int
3807xlog_state_ioerror(
Mark Tinguely9a8d2fd2012-06-14 09:22:16 -05003808 struct xlog *log)
Linus Torvalds1da177e2005-04-16 15:20:36 -07003809{
3810 xlog_in_core_t *iclog, *ic;
3811
3812 iclog = log->l_iclog;
3813 if (! (iclog->ic_state & XLOG_STATE_IOERROR)) {
3814 /*
3815 * Mark all the incore logs IOERROR.
3816 * From now on, no log flushes will result.
3817 */
3818 ic = iclog;
3819 do {
3820 ic->ic_state = XLOG_STATE_IOERROR;
3821 ic = ic->ic_next;
3822 } while (ic != iclog);
Jesper Juhl014c2542006-01-15 02:37:08 +01003823 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003824 }
3825 /*
3826 * Return non-zero, if state transition has already happened.
3827 */
Jesper Juhl014c2542006-01-15 02:37:08 +01003828 return 1;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003829}
3830
3831/*
3832 * This is called from xfs_force_shutdown, when we're forcibly
3833 * shutting down the filesystem, typically because of an IO error.
3834 * Our main objectives here are to make sure that:
Dave Chinnera870fe6d2014-10-02 09:02:28 +10003835 * a. if !logerror, flush the logs to disk. Anything modified
3836 * after this is ignored.
3837 * b. the filesystem gets marked 'SHUTDOWN' for all interested
Linus Torvalds1da177e2005-04-16 15:20:36 -07003838 * parties to find out, 'atomically'.
Dave Chinnera870fe6d2014-10-02 09:02:28 +10003839 * c. those who're sleeping on log reservations, pinned objects and
Linus Torvalds1da177e2005-04-16 15:20:36 -07003840 * other resources get woken up, and be told the bad news.
Dave Chinnera870fe6d2014-10-02 09:02:28 +10003841 * d. nothing new gets queued up after (b) and (c) are done.
Dave Chinner9da1ab12010-05-17 15:51:59 +10003842 *
Dave Chinnera870fe6d2014-10-02 09:02:28 +10003843 * Note: for the !logerror case we need to flush the regions held in memory out
3844 * to disk first. This needs to be done before the log is marked as shutdown,
3845 * otherwise the iclog writes will fail.
Linus Torvalds1da177e2005-04-16 15:20:36 -07003846 */
3847int
3848xfs_log_force_umount(
3849 struct xfs_mount *mp,
3850 int logerror)
3851{
Mark Tinguely9a8d2fd2012-06-14 09:22:16 -05003852 struct xlog *log;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003853 int retval;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003854
3855 log = mp->m_log;
3856
3857 /*
3858 * If this happens during log recovery, don't worry about
3859 * locking; the log isn't open for business yet.
3860 */
3861 if (!log ||
3862 log->l_flags & XLOG_ACTIVE_RECOVERY) {
3863 mp->m_flags |= XFS_MOUNT_FS_SHUTDOWN;
Christoph Hellwigbac8dca2008-11-28 14:23:31 +11003864 if (mp->m_sb_bp)
Dave Chinnerb0388bf2016-02-10 15:01:11 +11003865 mp->m_sb_bp->b_flags |= XBF_DONE;
Jesper Juhl014c2542006-01-15 02:37:08 +01003866 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003867 }
3868
3869 /*
3870 * Somebody could've already done the hard work for us.
3871 * No need to get locks for this.
3872 */
3873 if (logerror && log->l_iclog->ic_state & XLOG_STATE_IOERROR) {
3874 ASSERT(XLOG_FORCED_SHUTDOWN(log));
Jesper Juhl014c2542006-01-15 02:37:08 +01003875 return 1;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003876 }
Dave Chinner9da1ab12010-05-17 15:51:59 +10003877
3878 /*
Dave Chinnera870fe6d2014-10-02 09:02:28 +10003879 * Flush all the completed transactions to disk before marking the log
3880 * being shut down. We need to do it in this order to ensure that
3881 * completed operations are safely on disk before we shut down, and that
3882 * we don't have to issue any buffer IO after the shutdown flags are set
3883 * to guarantee this.
Dave Chinner9da1ab12010-05-17 15:51:59 +10003884 */
Christoph Hellwig93b8a582011-12-06 21:58:07 +00003885 if (!logerror)
Christoph Hellwig60e5bb72018-03-13 23:15:28 -07003886 xfs_log_force(mp, XFS_LOG_SYNC);
Dave Chinner9da1ab12010-05-17 15:51:59 +10003887
Linus Torvalds1da177e2005-04-16 15:20:36 -07003888 /*
Dave Chinner3f16b982010-12-21 12:29:01 +11003889 * mark the filesystem and the as in a shutdown state and wake
3890 * everybody up to tell them the bad news.
Linus Torvalds1da177e2005-04-16 15:20:36 -07003891 */
Eric Sandeenb22cd72c2007-10-11 17:37:10 +10003892 spin_lock(&log->l_icloglock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003893 mp->m_flags |= XFS_MOUNT_FS_SHUTDOWN;
Christoph Hellwigbac8dca2008-11-28 14:23:31 +11003894 if (mp->m_sb_bp)
Dave Chinnerb0388bf2016-02-10 15:01:11 +11003895 mp->m_sb_bp->b_flags |= XBF_DONE;
Christoph Hellwigbac8dca2008-11-28 14:23:31 +11003896
Linus Torvalds1da177e2005-04-16 15:20:36 -07003897 /*
Dave Chinnera870fe6d2014-10-02 09:02:28 +10003898 * Mark the log and the iclogs with IO error flags to prevent any
3899 * further log IO from being issued or completed.
Linus Torvalds1da177e2005-04-16 15:20:36 -07003900 */
3901 log->l_flags |= XLOG_IO_ERROR;
Dave Chinnera870fe6d2014-10-02 09:02:28 +10003902 retval = xlog_state_ioerror(log);
Eric Sandeenb22cd72c2007-10-11 17:37:10 +10003903 spin_unlock(&log->l_icloglock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003904
3905 /*
Dave Chinner10547942010-12-21 12:02:25 +11003906 * We don't want anybody waiting for log reservations after this. That
3907 * means we have to wake up everybody queued up on reserveq as well as
3908 * writeq. In addition, we make sure in xlog_{re}grant_log_space that
3909 * we don't enqueue anything once the SHUTDOWN flag is set, and this
Dave Chinner3f16b982010-12-21 12:29:01 +11003910 * action is protected by the grant locks.
Linus Torvalds1da177e2005-04-16 15:20:36 -07003911 */
Christoph Hellwiga79bf2d2012-02-20 02:31:27 +00003912 xlog_grant_head_wake_all(&log->l_reserve_head);
3913 xlog_grant_head_wake_all(&log->l_write_head);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003914
Linus Torvalds1da177e2005-04-16 15:20:36 -07003915 /*
Dave Chinnerac983512014-05-07 08:05:50 +10003916 * Wake up everybody waiting on xfs_log_force. Wake the CIL push first
3917 * as if the log writes were completed. The abort handling in the log
3918 * item committed callback functions will do this again under lock to
3919 * avoid races.
Linus Torvalds1da177e2005-04-16 15:20:36 -07003920 */
Dave Chinnerac983512014-05-07 08:05:50 +10003921 wake_up_all(&log->l_cilp->xc_commit_wait);
Christoph Hellwigd15cbf22019-06-28 19:27:30 -07003922 xlog_state_do_callback(log, true, NULL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003923
3924#ifdef XFSERRORDEBUG
3925 {
3926 xlog_in_core_t *iclog;
3927
Eric Sandeenb22cd72c2007-10-11 17:37:10 +10003928 spin_lock(&log->l_icloglock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003929 iclog = log->l_iclog;
3930 do {
3931 ASSERT(iclog->ic_callback == 0);
3932 iclog = iclog->ic_next;
3933 } while (iclog != log->l_iclog);
Eric Sandeenb22cd72c2007-10-11 17:37:10 +10003934 spin_unlock(&log->l_icloglock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003935 }
3936#endif
3937 /* return non-zero if log IOERROR transition had already happened */
Jesper Juhl014c2542006-01-15 02:37:08 +01003938 return retval;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003939}
3940
Christoph Hellwigba0f32d2005-06-21 15:36:52 +10003941STATIC int
Mark Tinguely9a8d2fd2012-06-14 09:22:16 -05003942xlog_iclogs_empty(
3943 struct xlog *log)
Linus Torvalds1da177e2005-04-16 15:20:36 -07003944{
3945 xlog_in_core_t *iclog;
3946
3947 iclog = log->l_iclog;
3948 do {
3949 /* endianness does not matter here, zero is zero in
3950 * any language.
3951 */
3952 if (iclog->ic_header.h_num_logops)
Jesper Juhl014c2542006-01-15 02:37:08 +01003953 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003954 iclog = iclog->ic_next;
3955 } while (iclog != log->l_iclog);
Jesper Juhl014c2542006-01-15 02:37:08 +01003956 return 1;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003957}
Dave Chinnerf661f1e2012-10-08 21:56:02 +11003958
Brian Fostera45086e2015-10-12 15:59:25 +11003959/*
3960 * Verify that an LSN stamped into a piece of metadata is valid. This is
3961 * intended for use in read verifiers on v5 superblocks.
3962 */
3963bool
3964xfs_log_check_lsn(
3965 struct xfs_mount *mp,
3966 xfs_lsn_t lsn)
3967{
3968 struct xlog *log = mp->m_log;
3969 bool valid;
3970
3971 /*
3972 * norecovery mode skips mount-time log processing and unconditionally
3973 * resets the in-core LSN. We can't validate in this mode, but
3974 * modifications are not allowed anyways so just return true.
3975 */
3976 if (mp->m_flags & XFS_MOUNT_NORECOVERY)
3977 return true;
3978
3979 /*
3980 * Some metadata LSNs are initialized to NULL (e.g., the agfl). This is
3981 * handled by recovery and thus safe to ignore here.
3982 */
3983 if (lsn == NULLCOMMITLSN)
3984 return true;
3985
3986 valid = xlog_valid_lsn(mp->m_log, lsn);
3987
3988 /* warn the user about what's gone wrong before verifier failure */
3989 if (!valid) {
3990 spin_lock(&log->l_icloglock);
3991 xfs_warn(mp,
3992"Corruption warning: Metadata has LSN (%d:%d) ahead of current LSN (%d:%d). "
3993"Please unmount and run xfs_repair (>= v4.3) to resolve.",
3994 CYCLE_LSN(lsn), BLOCK_LSN(lsn),
3995 log->l_curr_cycle, log->l_curr_block);
3996 spin_unlock(&log->l_icloglock);
3997 }
3998
3999 return valid;
4000}
Darrick J. Wong0c60d3a2018-08-01 07:40:48 -07004001
4002bool
4003xfs_log_in_recovery(
4004 struct xfs_mount *mp)
4005{
4006 struct xlog *log = mp->m_log;
4007
4008 return log->l_flags & XLOG_ACTIVE_RECOVERY;
4009}