blob: 2afb77051cce5dbaba4fc73e3d9a1105732c9aed [file] [log] [blame]
David Teiglande7fd4172006-01-18 09:30:29 +00001/******************************************************************************
2*******************************************************************************
3**
Christine Caulfield391fbdc2009-05-07 10:54:16 -05004** Copyright (C) 2005-2009 Red Hat, Inc. All rights reserved.
David Teiglande7fd4172006-01-18 09:30:29 +00005**
6** This copyrighted material is made available to anyone wishing to use,
7** modify, copy, or redistribute it subject to the terms and conditions
8** of the GNU General Public License v.2.
9**
10*******************************************************************************
11******************************************************************************/
12
13#include "dlm_internal.h"
14#include "lockspace.h"
15#include "member.h"
16#include "recoverd.h"
17#include "recover.h"
David Teiglande7fd4172006-01-18 09:30:29 +000018#include "rcom.h"
19#include "config.h"
Christine Caulfield391fbdc2009-05-07 10:54:16 -050020#include "lowcomms.h"
David Teiglande7fd4172006-01-18 09:30:29 +000021
David Teiglande7fd4172006-01-18 09:30:29 +000022static void add_ordered_member(struct dlm_ls *ls, struct dlm_member *new)
23{
24 struct dlm_member *memb = NULL;
25 struct list_head *tmp;
26 struct list_head *newlist = &new->list;
27 struct list_head *head = &ls->ls_nodes;
28
29 list_for_each(tmp, head) {
30 memb = list_entry(tmp, struct dlm_member, list);
31 if (new->nodeid < memb->nodeid)
32 break;
33 }
34
35 if (!memb)
36 list_add_tail(newlist, head);
37 else {
38 /* FIXME: can use list macro here */
39 newlist->prev = tmp->prev;
40 newlist->next = tmp;
41 tmp->prev->next = newlist;
42 tmp->prev = newlist;
43 }
44}
45
46static int dlm_add_member(struct dlm_ls *ls, int nodeid)
47{
48 struct dlm_member *memb;
Christine Caulfield391fbdc2009-05-07 10:54:16 -050049 int w, error;
David Teiglande7fd4172006-01-18 09:30:29 +000050
David Teigland90135922006-01-20 08:47:07 +000051 memb = kzalloc(sizeof(struct dlm_member), GFP_KERNEL);
David Teiglande7fd4172006-01-18 09:30:29 +000052 if (!memb)
53 return -ENOMEM;
54
55 w = dlm_node_weight(ls->ls_name, nodeid);
Jesper Juhl1a2bf2e2007-07-19 00:27:43 +020056 if (w < 0) {
57 kfree(memb);
David Teiglande7fd4172006-01-18 09:30:29 +000058 return w;
Jesper Juhl1a2bf2e2007-07-19 00:27:43 +020059 }
David Teiglande7fd4172006-01-18 09:30:29 +000060
Christine Caulfield391fbdc2009-05-07 10:54:16 -050061 error = dlm_lowcomms_connect_node(nodeid);
62 if (error < 0) {
63 kfree(memb);
64 return error;
65 }
66
David Teiglande7fd4172006-01-18 09:30:29 +000067 memb->nodeid = nodeid;
68 memb->weight = w;
69 add_ordered_member(ls, memb);
70 ls->ls_num_nodes++;
71 return 0;
72}
73
74static void dlm_remove_member(struct dlm_ls *ls, struct dlm_member *memb)
75{
76 list_move(&memb->list, &ls->ls_nodes_gone);
77 ls->ls_num_nodes--;
78}
79
David Teigland46b43ee2008-01-08 16:24:00 -060080int dlm_is_member(struct dlm_ls *ls, int nodeid)
David Teiglande7fd4172006-01-18 09:30:29 +000081{
82 struct dlm_member *memb;
83
84 list_for_each_entry(memb, &ls->ls_nodes, list) {
85 if (memb->nodeid == nodeid)
David Teigland90135922006-01-20 08:47:07 +000086 return 1;
David Teiglande7fd4172006-01-18 09:30:29 +000087 }
David Teigland90135922006-01-20 08:47:07 +000088 return 0;
David Teiglande7fd4172006-01-18 09:30:29 +000089}
90
91int dlm_is_removed(struct dlm_ls *ls, int nodeid)
92{
93 struct dlm_member *memb;
94
95 list_for_each_entry(memb, &ls->ls_nodes_gone, list) {
96 if (memb->nodeid == nodeid)
David Teigland90135922006-01-20 08:47:07 +000097 return 1;
David Teiglande7fd4172006-01-18 09:30:29 +000098 }
David Teigland90135922006-01-20 08:47:07 +000099 return 0;
David Teiglande7fd4172006-01-18 09:30:29 +0000100}
101
102static void clear_memb_list(struct list_head *head)
103{
104 struct dlm_member *memb;
105
106 while (!list_empty(head)) {
107 memb = list_entry(head->next, struct dlm_member, list);
108 list_del(&memb->list);
109 kfree(memb);
110 }
111}
112
113void dlm_clear_members(struct dlm_ls *ls)
114{
115 clear_memb_list(&ls->ls_nodes);
116 ls->ls_num_nodes = 0;
117}
118
119void dlm_clear_members_gone(struct dlm_ls *ls)
120{
121 clear_memb_list(&ls->ls_nodes_gone);
122}
123
124static void make_member_array(struct dlm_ls *ls)
125{
126 struct dlm_member *memb;
127 int i, w, x = 0, total = 0, all_zero = 0, *array;
128
129 kfree(ls->ls_node_array);
130 ls->ls_node_array = NULL;
131
132 list_for_each_entry(memb, &ls->ls_nodes, list) {
133 if (memb->weight)
134 total += memb->weight;
135 }
136
137 /* all nodes revert to weight of 1 if all have weight 0 */
138
139 if (!total) {
140 total = ls->ls_num_nodes;
141 all_zero = 1;
142 }
143
144 ls->ls_total_weight = total;
145
146 array = kmalloc(sizeof(int) * total, GFP_KERNEL);
147 if (!array)
148 return;
149
150 list_for_each_entry(memb, &ls->ls_nodes, list) {
151 if (!all_zero && !memb->weight)
152 continue;
153
154 if (all_zero)
155 w = 1;
156 else
157 w = memb->weight;
158
159 DLM_ASSERT(x < total, printk("total %d x %d\n", total, x););
160
161 for (i = 0; i < w; i++)
162 array[x++] = memb->nodeid;
163 }
164
165 ls->ls_node_array = array;
166}
167
168/* send a status request to all members just to establish comms connections */
169
David Teiglandf6db1b82006-08-08 17:06:07 -0500170static int ping_members(struct dlm_ls *ls)
David Teiglande7fd4172006-01-18 09:30:29 +0000171{
172 struct dlm_member *memb;
David Teiglandf6db1b82006-08-08 17:06:07 -0500173 int error = 0;
174
175 list_for_each_entry(memb, &ls->ls_nodes, list) {
176 error = dlm_recovery_stopped(ls);
177 if (error)
178 break;
179 error = dlm_rcom_status(ls, memb->nodeid);
180 if (error)
181 break;
182 }
183 if (error)
David Teiglandfaa0f262006-08-08 17:08:42 -0500184 log_debug(ls, "ping_members aborted %d last nodeid %d",
185 error, ls->ls_recover_nodeid);
David Teiglandf6db1b82006-08-08 17:06:07 -0500186 return error;
David Teiglande7fd4172006-01-18 09:30:29 +0000187}
188
189int dlm_recover_members(struct dlm_ls *ls, struct dlm_recover *rv, int *neg_out)
190{
191 struct dlm_member *memb, *safe;
192 int i, error, found, pos = 0, neg = 0, low = -1;
193
David Teigland91c0dc92006-10-31 11:56:01 -0600194 /* previously removed members that we've not finished removing need to
195 count as a negative change so the "neg" recovery steps will happen */
196
197 list_for_each_entry(memb, &ls->ls_nodes_gone, list) {
198 log_debug(ls, "prev removed member %d", memb->nodeid);
199 neg++;
200 }
201
David Teiglande7fd4172006-01-18 09:30:29 +0000202 /* move departed members from ls_nodes to ls_nodes_gone */
203
204 list_for_each_entry_safe(memb, safe, &ls->ls_nodes, list) {
David Teigland90135922006-01-20 08:47:07 +0000205 found = 0;
David Teiglande7fd4172006-01-18 09:30:29 +0000206 for (i = 0; i < rv->node_count; i++) {
207 if (memb->nodeid == rv->nodeids[i]) {
David Teigland90135922006-01-20 08:47:07 +0000208 found = 1;
David Teiglande7fd4172006-01-18 09:30:29 +0000209 break;
210 }
211 }
212
213 if (!found) {
214 neg++;
215 dlm_remove_member(ls, memb);
216 log_debug(ls, "remove member %d", memb->nodeid);
217 }
218 }
219
David Teiglandd44e0fc2008-03-18 14:22:11 -0500220 /* Add an entry to ls_nodes_gone for members that were removed and
221 then added again, so that previous state for these nodes will be
222 cleared during recovery. */
223
224 for (i = 0; i < rv->new_count; i++) {
225 if (!dlm_is_member(ls, rv->new[i]))
226 continue;
227 log_debug(ls, "new nodeid %d is a re-added member", rv->new[i]);
228
229 memb = kzalloc(sizeof(struct dlm_member), GFP_KERNEL);
230 if (!memb)
231 return -ENOMEM;
232 memb->nodeid = rv->new[i];
233 list_add_tail(&memb->list, &ls->ls_nodes_gone);
234 neg++;
235 }
236
David Teiglande7fd4172006-01-18 09:30:29 +0000237 /* add new members to ls_nodes */
238
239 for (i = 0; i < rv->node_count; i++) {
240 if (dlm_is_member(ls, rv->nodeids[i]))
241 continue;
242 dlm_add_member(ls, rv->nodeids[i]);
243 pos++;
244 log_debug(ls, "add member %d", rv->nodeids[i]);
245 }
246
247 list_for_each_entry(memb, &ls->ls_nodes, list) {
248 if (low == -1 || memb->nodeid < low)
249 low = memb->nodeid;
250 }
251 ls->ls_low_nodeid = low;
252
253 make_member_array(ls);
254 dlm_set_recover_status(ls, DLM_RS_NODES);
255 *neg_out = neg;
256
David Teiglandf6db1b82006-08-08 17:06:07 -0500257 error = ping_members(ls);
David Teigland8b0e7b22007-05-18 09:03:35 -0500258 if (!error || error == -EPROTO) {
259 /* new_lockspace() may be waiting to know if the config
260 is good or bad */
261 ls->ls_members_result = error;
262 complete(&ls->ls_members_done);
263 }
David Teiglandf6db1b82006-08-08 17:06:07 -0500264 if (error)
265 goto out;
David Teiglande7fd4172006-01-18 09:30:29 +0000266
267 error = dlm_recover_members_wait(ls);
David Teiglandf6db1b82006-08-08 17:06:07 -0500268 out:
269 log_debug(ls, "total members %d error %d", ls->ls_num_nodes, error);
David Teiglande7fd4172006-01-18 09:30:29 +0000270 return error;
271}
272
David Teiglandc36258b2007-09-27 15:53:38 -0500273/* Userspace guarantees that dlm_ls_stop() has completed on all nodes before
274 dlm_ls_start() is called on any of them to start the new recovery. */
David Teiglande7fd4172006-01-18 09:30:29 +0000275
276int dlm_ls_stop(struct dlm_ls *ls)
277{
278 int new;
279
280 /*
David Teiglandc36258b2007-09-27 15:53:38 -0500281 * Prevent dlm_recv from being in the middle of something when we do
282 * the stop. This includes ensuring dlm_recv isn't processing a
283 * recovery message (rcom), while dlm_recoverd is aborting and
284 * resetting things from an in-progress recovery. i.e. we want
285 * dlm_recoverd to abort its recovery without worrying about dlm_recv
286 * processing an rcom at the same time. Stopping dlm_recv also makes
287 * it easy for dlm_receive_message() to check locking stopped and add a
288 * message to the requestqueue without races.
289 */
290
291 down_write(&ls->ls_recv_active);
292
293 /*
294 * Abort any recovery that's in progress (see RECOVERY_STOP,
295 * dlm_recovery_stopped()) and tell any other threads running in the
296 * dlm to quit any processing (see RUNNING, dlm_locking_stopped()).
David Teiglande7fd4172006-01-18 09:30:29 +0000297 */
298
299 spin_lock(&ls->ls_recover_lock);
300 set_bit(LSFL_RECOVERY_STOP, &ls->ls_flags);
301 new = test_and_clear_bit(LSFL_RUNNING, &ls->ls_flags);
302 ls->ls_recover_seq++;
303 spin_unlock(&ls->ls_recover_lock);
304
305 /*
David Teiglandc36258b2007-09-27 15:53:38 -0500306 * Let dlm_recv run again, now any normal messages will be saved on the
307 * requestqueue for later.
308 */
309
310 up_write(&ls->ls_recv_active);
311
312 /*
David Teiglande7fd4172006-01-18 09:30:29 +0000313 * This in_recovery lock does two things:
David Teiglande7fd4172006-01-18 09:30:29 +0000314 * 1) Keeps this function from returning until all threads are out
315 * of locking routines and locking is truely stopped.
316 * 2) Keeps any new requests from being processed until it's unlocked
317 * when recovery is complete.
318 */
319
320 if (new)
321 down_write(&ls->ls_in_recovery);
322
323 /*
324 * The recoverd suspend/resume makes sure that dlm_recoverd (if
David Teiglandc36258b2007-09-27 15:53:38 -0500325 * running) has noticed RECOVERY_STOP above and quit processing the
326 * previous recovery.
David Teiglande7fd4172006-01-18 09:30:29 +0000327 */
328
329 dlm_recoverd_suspend(ls);
330 ls->ls_recover_status = 0;
331 dlm_recoverd_resume(ls);
David Teigland3ae1acf2007-05-18 08:59:31 -0500332
333 if (!ls->ls_recover_begin)
334 ls->ls_recover_begin = jiffies;
David Teiglande7fd4172006-01-18 09:30:29 +0000335 return 0;
336}
337
338int dlm_ls_start(struct dlm_ls *ls)
339{
340 struct dlm_recover *rv = NULL, *rv_old;
David Teiglandd44e0fc2008-03-18 14:22:11 -0500341 int *ids = NULL, *new = NULL;
342 int error, ids_count = 0, new_count = 0;
David Teiglande7fd4172006-01-18 09:30:29 +0000343
David Teigland90135922006-01-20 08:47:07 +0000344 rv = kzalloc(sizeof(struct dlm_recover), GFP_KERNEL);
David Teiglande7fd4172006-01-18 09:30:29 +0000345 if (!rv)
346 return -ENOMEM;
David Teiglande7fd4172006-01-18 09:30:29 +0000347
David Teiglandd44e0fc2008-03-18 14:22:11 -0500348 error = dlm_nodeid_list(ls->ls_name, &ids, &ids_count,
349 &new, &new_count);
350 if (error < 0)
David Teiglande7fd4172006-01-18 09:30:29 +0000351 goto fail;
352
353 spin_lock(&ls->ls_recover_lock);
354
355 /* the lockspace needs to be stopped before it can be started */
356
357 if (!dlm_locking_stopped(ls)) {
358 spin_unlock(&ls->ls_recover_lock);
359 log_error(ls, "start ignored: lockspace running");
360 error = -EINVAL;
361 goto fail;
362 }
363
364 rv->nodeids = ids;
David Teiglandd44e0fc2008-03-18 14:22:11 -0500365 rv->node_count = ids_count;
366 rv->new = new;
367 rv->new_count = new_count;
David Teiglande7fd4172006-01-18 09:30:29 +0000368 rv->seq = ++ls->ls_recover_seq;
369 rv_old = ls->ls_recover_args;
370 ls->ls_recover_args = rv;
371 spin_unlock(&ls->ls_recover_lock);
372
373 if (rv_old) {
David Teiglandd44e0fc2008-03-18 14:22:11 -0500374 log_error(ls, "unused recovery %llx %d",
375 (unsigned long long)rv_old->seq, rv_old->node_count);
David Teiglande7fd4172006-01-18 09:30:29 +0000376 kfree(rv_old->nodeids);
David Teiglandd44e0fc2008-03-18 14:22:11 -0500377 kfree(rv_old->new);
David Teiglande7fd4172006-01-18 09:30:29 +0000378 kfree(rv_old);
379 }
380
381 dlm_recoverd_kick(ls);
382 return 0;
383
384 fail:
385 kfree(rv);
386 kfree(ids);
David Teiglandd44e0fc2008-03-18 14:22:11 -0500387 kfree(new);
David Teiglande7fd4172006-01-18 09:30:29 +0000388 return error;
389}
390