Thomas Gleixner | 2522fe4 | 2019-05-28 09:57:20 -0700 | [diff] [blame] | 1 | // SPDX-License-Identifier: GPL-2.0-only |
David Teigland | e7fd417 | 2006-01-18 09:30:29 +0000 | [diff] [blame] | 2 | /****************************************************************************** |
| 3 | ******************************************************************************* |
| 4 | ** |
| 5 | ** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. |
| 6 | ** Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved. |
| 7 | ** |
David Teigland | e7fd417 | 2006-01-18 09:30:29 +0000 | [diff] [blame] | 8 | ** |
| 9 | ******************************************************************************* |
| 10 | ******************************************************************************/ |
| 11 | |
| 12 | #include "dlm_internal.h" |
| 13 | #include "lockspace.h" |
| 14 | #include "member.h" |
| 15 | #include "lowcomms.h" |
| 16 | #include "rcom.h" |
| 17 | #include "config.h" |
| 18 | #include "memory.h" |
| 19 | #include "recover.h" |
| 20 | #include "util.h" |
| 21 | #include "lock.h" |
| 22 | #include "dir.h" |
| 23 | |
David Teigland | e7fd417 | 2006-01-18 09:30:29 +0000 | [diff] [blame] | 24 | /* |
| 25 | * We use the upper 16 bits of the hash value to select the directory node. |
| 26 | * Low bits are used for distribution of rsb's among hash buckets on each node. |
| 27 | * |
| 28 | * To give the exact range wanted (0 to num_nodes-1), we apply a modulus of |
| 29 | * num_nodes to the hash value. This value in the desired range is used as an |
| 30 | * offset into the sorted list of nodeid's to give the particular nodeid. |
| 31 | */ |
| 32 | |
| 33 | int dlm_hash2nodeid(struct dlm_ls *ls, uint32_t hash) |
| 34 | { |
David Teigland | c04fecb | 2012-05-10 10:18:07 -0500 | [diff] [blame] | 35 | uint32_t node; |
David Teigland | e7fd417 | 2006-01-18 09:30:29 +0000 | [diff] [blame] | 36 | |
David Teigland | c04fecb | 2012-05-10 10:18:07 -0500 | [diff] [blame] | 37 | if (ls->ls_num_nodes == 1) |
| 38 | return dlm_our_nodeid(); |
| 39 | else { |
David Teigland | e7fd417 | 2006-01-18 09:30:29 +0000 | [diff] [blame] | 40 | node = (hash >> 16) % ls->ls_total_weight; |
David Teigland | c04fecb | 2012-05-10 10:18:07 -0500 | [diff] [blame] | 41 | return ls->ls_node_array[node]; |
David Teigland | e7fd417 | 2006-01-18 09:30:29 +0000 | [diff] [blame] | 42 | } |
David Teigland | e7fd417 | 2006-01-18 09:30:29 +0000 | [diff] [blame] | 43 | } |
| 44 | |
| 45 | int dlm_dir_nodeid(struct dlm_rsb *r) |
| 46 | { |
David Teigland | c04fecb | 2012-05-10 10:18:07 -0500 | [diff] [blame] | 47 | return r->res_dir_nodeid; |
David Teigland | e7fd417 | 2006-01-18 09:30:29 +0000 | [diff] [blame] | 48 | } |
| 49 | |
David Teigland | c04fecb | 2012-05-10 10:18:07 -0500 | [diff] [blame] | 50 | void dlm_recover_dir_nodeid(struct dlm_ls *ls) |
David Teigland | e7fd417 | 2006-01-18 09:30:29 +0000 | [diff] [blame] | 51 | { |
David Teigland | c04fecb | 2012-05-10 10:18:07 -0500 | [diff] [blame] | 52 | struct dlm_rsb *r; |
David Teigland | e7fd417 | 2006-01-18 09:30:29 +0000 | [diff] [blame] | 53 | |
David Teigland | c04fecb | 2012-05-10 10:18:07 -0500 | [diff] [blame] | 54 | down_read(&ls->ls_root_sem); |
| 55 | list_for_each_entry(r, &ls->ls_root_list, res_root_list) { |
| 56 | r->res_dir_nodeid = dlm_hash2nodeid(ls, r->res_hash); |
David Teigland | e7fd417 | 2006-01-18 09:30:29 +0000 | [diff] [blame] | 57 | } |
David Teigland | c04fecb | 2012-05-10 10:18:07 -0500 | [diff] [blame] | 58 | up_read(&ls->ls_root_sem); |
David Teigland | e7fd417 | 2006-01-18 09:30:29 +0000 | [diff] [blame] | 59 | } |
| 60 | |
| 61 | int dlm_recover_directory(struct dlm_ls *ls) |
| 62 | { |
| 63 | struct dlm_member *memb; |
David Teigland | e7fd417 | 2006-01-18 09:30:29 +0000 | [diff] [blame] | 64 | char *b, *last_name = NULL; |
David Teigland | c04fecb | 2012-05-10 10:18:07 -0500 | [diff] [blame] | 65 | int error = -ENOMEM, last_len, nodeid, result; |
David Teigland | e7fd417 | 2006-01-18 09:30:29 +0000 | [diff] [blame] | 66 | uint16_t namelen; |
David Teigland | c04fecb | 2012-05-10 10:18:07 -0500 | [diff] [blame] | 67 | unsigned int count = 0, count_match = 0, count_bad = 0, count_add = 0; |
David Teigland | e7fd417 | 2006-01-18 09:30:29 +0000 | [diff] [blame] | 68 | |
David Teigland | 075f017 | 2014-02-14 11:54:44 -0600 | [diff] [blame] | 69 | log_rinfo(ls, "dlm_recover_directory"); |
David Teigland | e7fd417 | 2006-01-18 09:30:29 +0000 | [diff] [blame] | 70 | |
| 71 | if (dlm_no_directory(ls)) |
| 72 | goto out_status; |
| 73 | |
David Teigland | 573c24c | 2009-11-30 16:34:43 -0600 | [diff] [blame] | 74 | last_name = kmalloc(DLM_RESNAME_MAXLEN, GFP_NOFS); |
David Teigland | e7fd417 | 2006-01-18 09:30:29 +0000 | [diff] [blame] | 75 | if (!last_name) |
| 76 | goto out; |
| 77 | |
| 78 | list_for_each_entry(memb, &ls->ls_nodes, list) { |
David Teigland | c04fecb | 2012-05-10 10:18:07 -0500 | [diff] [blame] | 79 | if (memb->nodeid == dlm_our_nodeid()) |
| 80 | continue; |
| 81 | |
David Teigland | e7fd417 | 2006-01-18 09:30:29 +0000 | [diff] [blame] | 82 | memset(last_name, 0, DLM_RESNAME_MAXLEN); |
| 83 | last_len = 0; |
| 84 | |
| 85 | for (;;) { |
Al Viro | cd9df1a | 2008-01-25 04:08:09 -0500 | [diff] [blame] | 86 | int left; |
David Teigland | e7fd417 | 2006-01-18 09:30:29 +0000 | [diff] [blame] | 87 | error = dlm_recovery_stopped(ls); |
| 88 | if (error) |
| 89 | goto out_free; |
| 90 | |
| 91 | error = dlm_rcom_names(ls, memb->nodeid, |
| 92 | last_name, last_len); |
| 93 | if (error) |
| 94 | goto out_free; |
| 95 | |
David Teigland | c04fecb | 2012-05-10 10:18:07 -0500 | [diff] [blame] | 96 | cond_resched(); |
David Teigland | e7fd417 | 2006-01-18 09:30:29 +0000 | [diff] [blame] | 97 | |
| 98 | /* |
| 99 | * pick namelen/name pairs out of received buffer |
| 100 | */ |
| 101 | |
Al Viro | 4007685 | 2008-01-25 03:01:51 -0500 | [diff] [blame] | 102 | b = ls->ls_recover_buf->rc_buf; |
Al Viro | cd9df1a | 2008-01-25 04:08:09 -0500 | [diff] [blame] | 103 | left = ls->ls_recover_buf->rc_header.h_length; |
| 104 | left -= sizeof(struct dlm_rcom); |
David Teigland | e7fd417 | 2006-01-18 09:30:29 +0000 | [diff] [blame] | 105 | |
| 106 | for (;;) { |
Al Viro | cd9df1a | 2008-01-25 04:08:09 -0500 | [diff] [blame] | 107 | __be16 v; |
| 108 | |
| 109 | error = -EINVAL; |
| 110 | if (left < sizeof(__be16)) |
| 111 | goto out_free; |
| 112 | |
| 113 | memcpy(&v, b, sizeof(__be16)); |
| 114 | namelen = be16_to_cpu(v); |
| 115 | b += sizeof(__be16); |
| 116 | left -= sizeof(__be16); |
David Teigland | e7fd417 | 2006-01-18 09:30:29 +0000 | [diff] [blame] | 117 | |
| 118 | /* namelen of 0xFFFFF marks end of names for |
| 119 | this node; namelen of 0 marks end of the |
| 120 | buffer */ |
| 121 | |
| 122 | if (namelen == 0xFFFF) |
| 123 | goto done; |
| 124 | if (!namelen) |
| 125 | break; |
| 126 | |
Al Viro | cd9df1a | 2008-01-25 04:08:09 -0500 | [diff] [blame] | 127 | if (namelen > left) |
| 128 | goto out_free; |
| 129 | |
| 130 | if (namelen > DLM_RESNAME_MAXLEN) |
| 131 | goto out_free; |
| 132 | |
David Teigland | c04fecb | 2012-05-10 10:18:07 -0500 | [diff] [blame] | 133 | error = dlm_master_lookup(ls, memb->nodeid, |
| 134 | b, namelen, |
| 135 | DLM_LU_RECOVER_DIR, |
| 136 | &nodeid, &result); |
| 137 | if (error) { |
| 138 | log_error(ls, "recover_dir lookup %d", |
| 139 | error); |
David Teigland | e7fd417 | 2006-01-18 09:30:29 +0000 | [diff] [blame] | 140 | goto out_free; |
David Teigland | c04fecb | 2012-05-10 10:18:07 -0500 | [diff] [blame] | 141 | } |
David Teigland | e7fd417 | 2006-01-18 09:30:29 +0000 | [diff] [blame] | 142 | |
David Teigland | c04fecb | 2012-05-10 10:18:07 -0500 | [diff] [blame] | 143 | /* The name was found in rsbtbl, but the |
| 144 | * master nodeid is different from |
| 145 | * memb->nodeid which says it is the master. |
| 146 | * This should not happen. */ |
| 147 | |
| 148 | if (result == DLM_LU_MATCH && |
| 149 | nodeid != memb->nodeid) { |
| 150 | count_bad++; |
| 151 | log_error(ls, "recover_dir lookup %d " |
| 152 | "nodeid %d memb %d bad %u", |
| 153 | result, nodeid, memb->nodeid, |
| 154 | count_bad); |
| 155 | print_hex_dump_bytes("dlm_recover_dir ", |
| 156 | DUMP_PREFIX_NONE, |
| 157 | b, namelen); |
| 158 | } |
| 159 | |
| 160 | /* The name was found in rsbtbl, and the |
| 161 | * master nodeid matches memb->nodeid. */ |
| 162 | |
| 163 | if (result == DLM_LU_MATCH && |
| 164 | nodeid == memb->nodeid) { |
| 165 | count_match++; |
| 166 | } |
| 167 | |
| 168 | /* The name was not found in rsbtbl and was |
| 169 | * added with memb->nodeid as the master. */ |
| 170 | |
| 171 | if (result == DLM_LU_ADD) { |
| 172 | count_add++; |
| 173 | } |
| 174 | |
David Teigland | e7fd417 | 2006-01-18 09:30:29 +0000 | [diff] [blame] | 175 | last_len = namelen; |
David Teigland | e7fd417 | 2006-01-18 09:30:29 +0000 | [diff] [blame] | 176 | memcpy(last_name, b, namelen); |
| 177 | b += namelen; |
Al Viro | cd9df1a | 2008-01-25 04:08:09 -0500 | [diff] [blame] | 178 | left -= namelen; |
David Teigland | e7fd417 | 2006-01-18 09:30:29 +0000 | [diff] [blame] | 179 | count++; |
| 180 | } |
| 181 | } |
David Teigland | c04fecb | 2012-05-10 10:18:07 -0500 | [diff] [blame] | 182 | done: |
David Teigland | e7fd417 | 2006-01-18 09:30:29 +0000 | [diff] [blame] | 183 | ; |
| 184 | } |
| 185 | |
| 186 | out_status: |
| 187 | error = 0; |
David Teigland | c04fecb | 2012-05-10 10:18:07 -0500 | [diff] [blame] | 188 | dlm_set_recover_status(ls, DLM_RS_DIR); |
| 189 | |
David Teigland | 075f017 | 2014-02-14 11:54:44 -0600 | [diff] [blame] | 190 | log_rinfo(ls, "dlm_recover_directory %u in %u new", |
David Teigland | c04fecb | 2012-05-10 10:18:07 -0500 | [diff] [blame] | 191 | count, count_add); |
David Teigland | e7fd417 | 2006-01-18 09:30:29 +0000 | [diff] [blame] | 192 | out_free: |
| 193 | kfree(last_name); |
| 194 | out: |
David Teigland | e7fd417 | 2006-01-18 09:30:29 +0000 | [diff] [blame] | 195 | return error; |
| 196 | } |
| 197 | |
David Teigland | 85f0379 | 2008-01-16 13:02:31 -0600 | [diff] [blame] | 198 | static struct dlm_rsb *find_rsb_root(struct dlm_ls *ls, char *name, int len) |
| 199 | { |
| 200 | struct dlm_rsb *r; |
David Teigland | 7210cb7 | 2012-03-08 12:37:12 -0600 | [diff] [blame] | 201 | uint32_t hash, bucket; |
| 202 | int rv; |
| 203 | |
| 204 | hash = jhash(name, len, 0); |
| 205 | bucket = hash & (ls->ls_rsbtbl_size - 1); |
| 206 | |
| 207 | spin_lock(&ls->ls_rsbtbl[bucket].lock); |
David Teigland | c04fecb | 2012-05-10 10:18:07 -0500 | [diff] [blame] | 208 | rv = dlm_search_rsb_tree(&ls->ls_rsbtbl[bucket].keep, name, len, &r); |
David Teigland | 7210cb7 | 2012-03-08 12:37:12 -0600 | [diff] [blame] | 209 | if (rv) |
| 210 | rv = dlm_search_rsb_tree(&ls->ls_rsbtbl[bucket].toss, |
David Teigland | c04fecb | 2012-05-10 10:18:07 -0500 | [diff] [blame] | 211 | name, len, &r); |
David Teigland | 7210cb7 | 2012-03-08 12:37:12 -0600 | [diff] [blame] | 212 | spin_unlock(&ls->ls_rsbtbl[bucket].lock); |
| 213 | |
| 214 | if (!rv) |
| 215 | return r; |
David Teigland | 85f0379 | 2008-01-16 13:02:31 -0600 | [diff] [blame] | 216 | |
| 217 | down_read(&ls->ls_root_sem); |
| 218 | list_for_each_entry(r, &ls->ls_root_list, res_root_list) { |
| 219 | if (len == r->res_length && !memcmp(name, r->res_name, len)) { |
| 220 | up_read(&ls->ls_root_sem); |
David Teigland | c04fecb | 2012-05-10 10:18:07 -0500 | [diff] [blame] | 221 | log_debug(ls, "find_rsb_root revert to root_list %s", |
David Teigland | 7210cb7 | 2012-03-08 12:37:12 -0600 | [diff] [blame] | 222 | r->res_name); |
David Teigland | 85f0379 | 2008-01-16 13:02:31 -0600 | [diff] [blame] | 223 | return r; |
| 224 | } |
| 225 | } |
| 226 | up_read(&ls->ls_root_sem); |
| 227 | return NULL; |
| 228 | } |
| 229 | |
| 230 | /* Find the rsb where we left off (or start again), then send rsb names |
| 231 | for rsb's we're master of and whose directory node matches the requesting |
| 232 | node. inbuf is the rsb name last sent, inlen is the name's length */ |
David Teigland | e7fd417 | 2006-01-18 09:30:29 +0000 | [diff] [blame] | 233 | |
| 234 | void dlm_copy_master_names(struct dlm_ls *ls, char *inbuf, int inlen, |
| 235 | char *outbuf, int outlen, int nodeid) |
| 236 | { |
| 237 | struct list_head *list; |
David Teigland | 85f0379 | 2008-01-16 13:02:31 -0600 | [diff] [blame] | 238 | struct dlm_rsb *r; |
| 239 | int offset = 0, dir_nodeid; |
Harvey Harrison | cd8e467 | 2008-11-12 16:28:43 -0600 | [diff] [blame] | 240 | __be16 be_namelen; |
David Teigland | e7fd417 | 2006-01-18 09:30:29 +0000 | [diff] [blame] | 241 | |
David Teigland | e7fd417 | 2006-01-18 09:30:29 +0000 | [diff] [blame] | 242 | down_read(&ls->ls_root_sem); |
David Teigland | 85f0379 | 2008-01-16 13:02:31 -0600 | [diff] [blame] | 243 | |
| 244 | if (inlen > 1) { |
| 245 | r = find_rsb_root(ls, inbuf, inlen); |
| 246 | if (!r) { |
| 247 | inbuf[inlen - 1] = '\0'; |
| 248 | log_error(ls, "copy_master_names from %d start %d %s", |
| 249 | nodeid, inlen, inbuf); |
| 250 | goto out; |
| 251 | } |
| 252 | list = r->res_root_list.next; |
| 253 | } else { |
David Teigland | e7fd417 | 2006-01-18 09:30:29 +0000 | [diff] [blame] | 254 | list = ls->ls_root_list.next; |
David Teigland | 85f0379 | 2008-01-16 13:02:31 -0600 | [diff] [blame] | 255 | } |
David Teigland | e7fd417 | 2006-01-18 09:30:29 +0000 | [diff] [blame] | 256 | |
| 257 | for (offset = 0; list != &ls->ls_root_list; list = list->next) { |
| 258 | r = list_entry(list, struct dlm_rsb, res_root_list); |
| 259 | if (r->res_nodeid) |
| 260 | continue; |
| 261 | |
| 262 | dir_nodeid = dlm_dir_nodeid(r); |
| 263 | if (dir_nodeid != nodeid) |
| 264 | continue; |
| 265 | |
| 266 | /* |
| 267 | * The block ends when we can't fit the following in the |
| 268 | * remaining buffer space: |
| 269 | * namelen (uint16_t) + |
| 270 | * name (r->res_length) + |
| 271 | * end-of-block record 0x0000 (uint16_t) |
| 272 | */ |
| 273 | |
| 274 | if (offset + sizeof(uint16_t)*2 + r->res_length > outlen) { |
| 275 | /* Write end-of-block record */ |
Harvey Harrison | cd8e467 | 2008-11-12 16:28:43 -0600 | [diff] [blame] | 276 | be_namelen = cpu_to_be16(0); |
| 277 | memcpy(outbuf + offset, &be_namelen, sizeof(__be16)); |
| 278 | offset += sizeof(__be16); |
David Teigland | c04fecb | 2012-05-10 10:18:07 -0500 | [diff] [blame] | 279 | ls->ls_recover_dir_sent_msg++; |
David Teigland | e7fd417 | 2006-01-18 09:30:29 +0000 | [diff] [blame] | 280 | goto out; |
| 281 | } |
| 282 | |
| 283 | be_namelen = cpu_to_be16(r->res_length); |
Harvey Harrison | cd8e467 | 2008-11-12 16:28:43 -0600 | [diff] [blame] | 284 | memcpy(outbuf + offset, &be_namelen, sizeof(__be16)); |
| 285 | offset += sizeof(__be16); |
David Teigland | e7fd417 | 2006-01-18 09:30:29 +0000 | [diff] [blame] | 286 | memcpy(outbuf + offset, r->res_name, r->res_length); |
| 287 | offset += r->res_length; |
David Teigland | c04fecb | 2012-05-10 10:18:07 -0500 | [diff] [blame] | 288 | ls->ls_recover_dir_sent_res++; |
David Teigland | e7fd417 | 2006-01-18 09:30:29 +0000 | [diff] [blame] | 289 | } |
| 290 | |
| 291 | /* |
| 292 | * If we've reached the end of the list (and there's room) write a |
| 293 | * terminating record. |
| 294 | */ |
| 295 | |
| 296 | if ((list == &ls->ls_root_list) && |
| 297 | (offset + sizeof(uint16_t) <= outlen)) { |
Harvey Harrison | cd8e467 | 2008-11-12 16:28:43 -0600 | [diff] [blame] | 298 | be_namelen = cpu_to_be16(0xFFFF); |
| 299 | memcpy(outbuf + offset, &be_namelen, sizeof(__be16)); |
| 300 | offset += sizeof(__be16); |
David Teigland | c04fecb | 2012-05-10 10:18:07 -0500 | [diff] [blame] | 301 | ls->ls_recover_dir_sent_msg++; |
David Teigland | e7fd417 | 2006-01-18 09:30:29 +0000 | [diff] [blame] | 302 | } |
David Teigland | e7fd417 | 2006-01-18 09:30:29 +0000 | [diff] [blame] | 303 | out: |
| 304 | up_read(&ls->ls_root_sem); |
| 305 | } |
| 306 | |