Greg Kroah-Hartman | b244131 | 2017-11-01 15:07:57 +0100 | [diff] [blame] | 1 | // SPDX-License-Identifier: GPL-2.0 |
Yehuda Sadeh | 3d14c5d | 2010-04-06 15:14:15 -0700 | [diff] [blame] | 2 | #include <linux/ceph/ceph_debug.h> |
Sage Weil | 2f2dc05 | 2009-10-06 11:31:09 -0700 | [diff] [blame] | 3 | |
| 4 | #include <linux/bug.h> |
| 5 | #include <linux/err.h> |
| 6 | #include <linux/random.h> |
| 7 | #include <linux/slab.h> |
| 8 | #include <linux/types.h> |
| 9 | |
Yehuda Sadeh | 3d14c5d | 2010-04-06 15:14:15 -0700 | [diff] [blame] | 10 | #include <linux/ceph/mdsmap.h> |
| 11 | #include <linux/ceph/messenger.h> |
| 12 | #include <linux/ceph/decode.h> |
Sage Weil | 2f2dc05 | 2009-10-06 11:31:09 -0700 | [diff] [blame] | 13 | |
| 14 | #include "super.h" |
| 15 | |
Xiubo Li | 5d47648 | 2019-11-26 07:24:22 -0500 | [diff] [blame] | 16 | #define CEPH_MDS_IS_READY(i, ignore_laggy) \ |
Xiubo Li | b38c9eb | 2019-12-04 06:57:39 -0500 | [diff] [blame] | 17 | (m->m_info[i].state > 0 && ignore_laggy ? true : !m->m_info[i].laggy) |
Sage Weil | 2f2dc05 | 2009-10-06 11:31:09 -0700 | [diff] [blame] | 18 | |
Xiubo Li | 5d47648 | 2019-11-26 07:24:22 -0500 | [diff] [blame] | 19 | static int __mdsmap_get_random_mds(struct ceph_mdsmap *m, bool ignore_laggy) |
Sage Weil | 2f2dc05 | 2009-10-06 11:31:09 -0700 | [diff] [blame] | 20 | { |
| 21 | int n = 0; |
Xiubo Li | 74d6f03 | 2019-11-11 06:51:05 -0500 | [diff] [blame] | 22 | int i, j; |
Sam Lang | a84cd29 | 2013-04-09 16:49:11 -0500 | [diff] [blame] | 23 | |
Sage Weil | 2f2dc05 | 2009-10-06 11:31:09 -0700 | [diff] [blame] | 24 | /* count */ |
Xiubo Li | b38c9eb | 2019-12-04 06:57:39 -0500 | [diff] [blame] | 25 | for (i = 0; i < m->possible_max_rank; i++) |
Xiubo Li | 5d47648 | 2019-11-26 07:24:22 -0500 | [diff] [blame] | 26 | if (CEPH_MDS_IS_READY(i, ignore_laggy)) |
Sage Weil | 2f2dc05 | 2009-10-06 11:31:09 -0700 | [diff] [blame] | 27 | n++; |
| 28 | if (n == 0) |
| 29 | return -1; |
| 30 | |
| 31 | /* pick */ |
Sam Lang | a84cd29 | 2013-04-09 16:49:11 -0500 | [diff] [blame] | 32 | n = prandom_u32() % n; |
Xiubo Li | b38c9eb | 2019-12-04 06:57:39 -0500 | [diff] [blame] | 33 | for (j = 0, i = 0; i < m->possible_max_rank; i++) { |
Xiubo Li | 5d47648 | 2019-11-26 07:24:22 -0500 | [diff] [blame] | 34 | if (CEPH_MDS_IS_READY(i, ignore_laggy)) |
Xiubo Li | 74d6f03 | 2019-11-11 06:51:05 -0500 | [diff] [blame] | 35 | j++; |
| 36 | if (j > n) |
| 37 | break; |
| 38 | } |
Sage Weil | 2f2dc05 | 2009-10-06 11:31:09 -0700 | [diff] [blame] | 39 | |
| 40 | return i; |
| 41 | } |
| 42 | |
Xiubo Li | 5d47648 | 2019-11-26 07:24:22 -0500 | [diff] [blame] | 43 | /* |
| 44 | * choose a random mds that is "up" (i.e. has a state > 0), or -1. |
| 45 | */ |
| 46 | int ceph_mdsmap_get_random_mds(struct ceph_mdsmap *m) |
| 47 | { |
| 48 | int mds; |
| 49 | |
| 50 | mds = __mdsmap_get_random_mds(m, false); |
Xiubo Li | b38c9eb | 2019-12-04 06:57:39 -0500 | [diff] [blame] | 51 | if (mds == m->possible_max_rank || mds == -1) |
Xiubo Li | 5d47648 | 2019-11-26 07:24:22 -0500 | [diff] [blame] | 52 | mds = __mdsmap_get_random_mds(m, true); |
| 53 | |
Xiubo Li | b38c9eb | 2019-12-04 06:57:39 -0500 | [diff] [blame] | 54 | return mds == m->possible_max_rank ? -1 : mds; |
Xiubo Li | 5d47648 | 2019-11-26 07:24:22 -0500 | [diff] [blame] | 55 | } |
| 56 | |
Yan, Zheng | e9e427f | 2016-11-10 16:02:06 +0800 | [diff] [blame] | 57 | #define __decode_and_drop_type(p, end, type, bad) \ |
| 58 | do { \ |
| 59 | if (*p + sizeof(type) > end) \ |
| 60 | goto bad; \ |
| 61 | *p += sizeof(type); \ |
| 62 | } while (0) |
| 63 | |
| 64 | #define __decode_and_drop_set(p, end, type, bad) \ |
| 65 | do { \ |
| 66 | u32 n; \ |
| 67 | size_t need; \ |
| 68 | ceph_decode_32_safe(p, end, n, bad); \ |
| 69 | need = sizeof(type) * n; \ |
| 70 | ceph_decode_need(p, end, need, bad); \ |
| 71 | *p += need; \ |
| 72 | } while (0) |
| 73 | |
| 74 | #define __decode_and_drop_map(p, end, ktype, vtype, bad) \ |
| 75 | do { \ |
| 76 | u32 n; \ |
| 77 | size_t need; \ |
| 78 | ceph_decode_32_safe(p, end, n, bad); \ |
| 79 | need = (sizeof(ktype) + sizeof(vtype)) * n; \ |
| 80 | ceph_decode_need(p, end, need, bad); \ |
| 81 | *p += need; \ |
| 82 | } while (0) |
| 83 | |
| 84 | |
| 85 | static int __decode_and_drop_compat_set(void **p, void* end) |
| 86 | { |
| 87 | int i; |
| 88 | /* compat, ro_compat, incompat*/ |
| 89 | for (i = 0; i < 3; i++) { |
| 90 | u32 n; |
| 91 | ceph_decode_need(p, end, sizeof(u64) + sizeof(u32), bad); |
| 92 | /* mask */ |
| 93 | *p += sizeof(u64); |
| 94 | /* names (map<u64, string>) */ |
| 95 | n = ceph_decode_32(p); |
| 96 | while (n-- > 0) { |
| 97 | u32 len; |
| 98 | ceph_decode_need(p, end, sizeof(u64) + sizeof(u32), |
| 99 | bad); |
| 100 | *p += sizeof(u64); |
| 101 | len = ceph_decode_32(p); |
| 102 | ceph_decode_need(p, end, len, bad); |
| 103 | *p += len; |
| 104 | } |
| 105 | } |
| 106 | return 0; |
| 107 | bad: |
| 108 | return -1; |
| 109 | } |
| 110 | |
Sage Weil | 2f2dc05 | 2009-10-06 11:31:09 -0700 | [diff] [blame] | 111 | /* |
| 112 | * Decode an MDS map |
| 113 | * |
| 114 | * Ignore any fields we don't care about (there are quite a few of |
| 115 | * them). |
| 116 | */ |
| 117 | struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end) |
| 118 | { |
| 119 | struct ceph_mdsmap *m; |
Sage Weil | 9ec7cab | 2009-12-14 15:13:47 -0800 | [diff] [blame] | 120 | const void *start = *p; |
Sage Weil | 2f2dc05 | 2009-10-06 11:31:09 -0700 | [diff] [blame] | 121 | int i, j, n; |
Jeff Layton | f3848af | 2019-06-04 11:26:36 -0400 | [diff] [blame] | 122 | int err; |
Yan, Zheng | d463a43 | 2016-03-31 15:53:01 +0800 | [diff] [blame] | 123 | u8 mdsmap_v, mdsmap_cv; |
Yan, Zheng | e9e427f | 2016-11-10 16:02:06 +0800 | [diff] [blame] | 124 | u16 mdsmap_ev; |
Sage Weil | 2f2dc05 | 2009-10-06 11:31:09 -0700 | [diff] [blame] | 125 | |
| 126 | m = kzalloc(sizeof(*m), GFP_NOFS); |
Markus Elfring | d37b1d9 | 2017-08-20 20:22:02 +0200 | [diff] [blame] | 127 | if (!m) |
Sage Weil | 2f2dc05 | 2009-10-06 11:31:09 -0700 | [diff] [blame] | 128 | return ERR_PTR(-ENOMEM); |
| 129 | |
Yan, Zheng | d463a43 | 2016-03-31 15:53:01 +0800 | [diff] [blame] | 130 | ceph_decode_need(p, end, 1 + 1, bad); |
| 131 | mdsmap_v = ceph_decode_8(p); |
| 132 | mdsmap_cv = ceph_decode_8(p); |
| 133 | if (mdsmap_v >= 4) { |
| 134 | u32 mdsmap_len; |
| 135 | ceph_decode_32_safe(p, end, mdsmap_len, bad); |
| 136 | if (end < *p + mdsmap_len) |
| 137 | goto bad; |
| 138 | end = *p + mdsmap_len; |
Sage Weil | 4f6a7e5 | 2013-02-23 10:41:09 -0800 | [diff] [blame] | 139 | } |
Sage Weil | 2f2dc05 | 2009-10-06 11:31:09 -0700 | [diff] [blame] | 140 | |
| 141 | ceph_decode_need(p, end, 8*sizeof(u32) + sizeof(u64), bad); |
Sage Weil | c89136e | 2009-10-14 09:59:09 -0700 | [diff] [blame] | 142 | m->m_epoch = ceph_decode_32(p); |
| 143 | m->m_client_epoch = ceph_decode_32(p); |
| 144 | m->m_last_failure = ceph_decode_32(p); |
| 145 | m->m_root = ceph_decode_32(p); |
| 146 | m->m_session_timeout = ceph_decode_32(p); |
| 147 | m->m_session_autoclose = ceph_decode_32(p); |
| 148 | m->m_max_file_size = ceph_decode_64(p); |
| 149 | m->m_max_mds = ceph_decode_32(p); |
Xiubo Li | 4d7ace0 | 2019-11-26 07:24:21 -0500 | [diff] [blame] | 150 | |
| 151 | /* |
Xiubo Li | b38c9eb | 2019-12-04 06:57:39 -0500 | [diff] [blame] | 152 | * pick out the active nodes as the m_num_active_mds, the |
| 153 | * m_num_active_mds maybe larger than m_max_mds when decreasing |
| 154 | * the max_mds in cluster side, in other case it should less |
| 155 | * than or equal to m_max_mds. |
Xiubo Li | 4d7ace0 | 2019-11-26 07:24:21 -0500 | [diff] [blame] | 156 | */ |
Xiubo Li | b38c9eb | 2019-12-04 06:57:39 -0500 | [diff] [blame] | 157 | m->m_num_active_mds = n = ceph_decode_32(p); |
Xiubo Li | 4d7ace0 | 2019-11-26 07:24:21 -0500 | [diff] [blame] | 158 | |
| 159 | /* |
Xiubo Li | b38c9eb | 2019-12-04 06:57:39 -0500 | [diff] [blame] | 160 | * the possible max rank, it maybe larger than the m_num_active_mds, |
Xiubo Li | 4d7ace0 | 2019-11-26 07:24:21 -0500 | [diff] [blame] | 161 | * for example if the mds_max == 2 in the cluster, when the MDS(0) |
| 162 | * was laggy and being replaced by a new MDS, we will temporarily |
| 163 | * receive a new mds map with n_num_mds == 1 and the active MDS(1), |
Xiubo Li | b38c9eb | 2019-12-04 06:57:39 -0500 | [diff] [blame] | 164 | * and the mds rank >= m_num_active_mds. |
Xiubo Li | 4d7ace0 | 2019-11-26 07:24:21 -0500 | [diff] [blame] | 165 | */ |
Xiubo Li | b38c9eb | 2019-12-04 06:57:39 -0500 | [diff] [blame] | 166 | m->possible_max_rank = max(m->m_num_active_mds, m->m_max_mds); |
Sage Weil | 2f2dc05 | 2009-10-06 11:31:09 -0700 | [diff] [blame] | 167 | |
Xiubo Li | b38c9eb | 2019-12-04 06:57:39 -0500 | [diff] [blame] | 168 | m->m_info = kcalloc(m->possible_max_rank, sizeof(*m->m_info), GFP_NOFS); |
Markus Elfring | d37b1d9 | 2017-08-20 20:22:02 +0200 | [diff] [blame] | 169 | if (!m->m_info) |
Yan, Zheng | e9e427f | 2016-11-10 16:02:06 +0800 | [diff] [blame] | 170 | goto nomem; |
Sage Weil | 2f2dc05 | 2009-10-06 11:31:09 -0700 | [diff] [blame] | 171 | |
| 172 | /* pick out active nodes from mds_info (state > 0) */ |
Sage Weil | 2f2dc05 | 2009-10-06 11:31:09 -0700 | [diff] [blame] | 173 | for (i = 0; i < n; i++) { |
Sage Weil | 94045e1 | 2009-11-19 15:31:50 -0800 | [diff] [blame] | 174 | u64 global_id; |
Sage Weil | 2f2dc05 | 2009-10-06 11:31:09 -0700 | [diff] [blame] | 175 | u32 namelen; |
| 176 | s32 mds, inc, state; |
| 177 | u64 state_seq; |
Yan, Zheng | d463a43 | 2016-03-31 15:53:01 +0800 | [diff] [blame] | 178 | u8 info_v; |
| 179 | void *info_end = NULL; |
Sage Weil | 2f2dc05 | 2009-10-06 11:31:09 -0700 | [diff] [blame] | 180 | struct ceph_entity_addr addr; |
| 181 | u32 num_export_targets; |
| 182 | void *pexport_targets = NULL; |
Sage Weil | 0deb01c | 2010-06-17 14:19:01 -0700 | [diff] [blame] | 183 | struct ceph_timespec laggy_since; |
Dan Carpenter | 6af8652 | 2013-05-29 06:46:56 -0500 | [diff] [blame] | 184 | struct ceph_mds_info *info; |
Xiubo Li | da08e1e | 2019-11-26 07:24:20 -0500 | [diff] [blame] | 185 | bool laggy; |
Sage Weil | 2f2dc05 | 2009-10-06 11:31:09 -0700 | [diff] [blame] | 186 | |
Yan, Zheng | d463a43 | 2016-03-31 15:53:01 +0800 | [diff] [blame] | 187 | ceph_decode_need(p, end, sizeof(u64) + 1, bad); |
Sage Weil | 94045e1 | 2009-11-19 15:31:50 -0800 | [diff] [blame] | 188 | global_id = ceph_decode_64(p); |
Yan, Zheng | d463a43 | 2016-03-31 15:53:01 +0800 | [diff] [blame] | 189 | info_v= ceph_decode_8(p); |
| 190 | if (info_v >= 4) { |
| 191 | u32 info_len; |
| 192 | u8 info_cv; |
| 193 | ceph_decode_need(p, end, 1 + sizeof(u32), bad); |
| 194 | info_cv = ceph_decode_8(p); |
| 195 | info_len = ceph_decode_32(p); |
| 196 | info_end = *p + info_len; |
| 197 | if (info_end > end) |
| 198 | goto bad; |
| 199 | } |
| 200 | |
| 201 | ceph_decode_need(p, end, sizeof(u64) + sizeof(u32), bad); |
Sage Weil | 94045e1 | 2009-11-19 15:31:50 -0800 | [diff] [blame] | 202 | *p += sizeof(u64); |
Sage Weil | c89136e | 2009-10-14 09:59:09 -0700 | [diff] [blame] | 203 | namelen = ceph_decode_32(p); /* skip mds name */ |
Sage Weil | 2f2dc05 | 2009-10-06 11:31:09 -0700 | [diff] [blame] | 204 | *p += namelen; |
| 205 | |
| 206 | ceph_decode_need(p, end, |
Sage Weil | e251e28 | 2009-10-07 16:38:19 -0700 | [diff] [blame] | 207 | 4*sizeof(u32) + sizeof(u64) + |
Sage Weil | 2f2dc05 | 2009-10-06 11:31:09 -0700 | [diff] [blame] | 208 | sizeof(addr) + sizeof(struct ceph_timespec), |
| 209 | bad); |
Sage Weil | c89136e | 2009-10-14 09:59:09 -0700 | [diff] [blame] | 210 | mds = ceph_decode_32(p); |
| 211 | inc = ceph_decode_32(p); |
| 212 | state = ceph_decode_32(p); |
| 213 | state_seq = ceph_decode_64(p); |
Jeff Layton | f3848af | 2019-06-04 11:26:36 -0400 | [diff] [blame] | 214 | err = ceph_decode_entity_addr(p, end, &addr); |
| 215 | if (err) |
| 216 | goto corrupt; |
Sage Weil | 0deb01c | 2010-06-17 14:19:01 -0700 | [diff] [blame] | 217 | ceph_decode_copy(p, &laggy_since, sizeof(laggy_since)); |
Xiubo Li | da08e1e | 2019-11-26 07:24:20 -0500 | [diff] [blame] | 218 | laggy = laggy_since.tv_sec != 0 || laggy_since.tv_nsec != 0; |
Sage Weil | 2f2dc05 | 2009-10-06 11:31:09 -0700 | [diff] [blame] | 219 | *p += sizeof(u32); |
| 220 | ceph_decode_32_safe(p, end, namelen, bad); |
Sage Weil | e251e28 | 2009-10-07 16:38:19 -0700 | [diff] [blame] | 221 | *p += namelen; |
Yan, Zheng | d463a43 | 2016-03-31 15:53:01 +0800 | [diff] [blame] | 222 | if (info_v >= 2) { |
Sage Weil | 2f2dc05 | 2009-10-06 11:31:09 -0700 | [diff] [blame] | 223 | ceph_decode_32_safe(p, end, num_export_targets, bad); |
| 224 | pexport_targets = *p; |
Sage Weil | e251e28 | 2009-10-07 16:38:19 -0700 | [diff] [blame] | 225 | *p += num_export_targets * sizeof(u32); |
Sage Weil | 2f2dc05 | 2009-10-06 11:31:09 -0700 | [diff] [blame] | 226 | } else { |
| 227 | num_export_targets = 0; |
| 228 | } |
| 229 | |
Yan, Zheng | d463a43 | 2016-03-31 15:53:01 +0800 | [diff] [blame] | 230 | if (info_end && *p != info_end) { |
| 231 | if (*p > info_end) |
| 232 | goto bad; |
| 233 | *p = info_end; |
| 234 | } |
| 235 | |
Xiubo Li | da08e1e | 2019-11-26 07:24:20 -0500 | [diff] [blame] | 236 | dout("mdsmap_decode %d/%d %lld mds%d.%d %s %s%s\n", |
Yehuda Sadeh | 3d14c5d | 2010-04-06 15:14:15 -0700 | [diff] [blame] | 237 | i+1, n, global_id, mds, inc, |
Jeff Layton | b726ec9 | 2019-05-06 09:38:47 -0400 | [diff] [blame] | 238 | ceph_pr_addr(&addr), |
Xiubo Li | da08e1e | 2019-11-26 07:24:20 -0500 | [diff] [blame] | 239 | ceph_mds_state_name(state), |
| 240 | laggy ? "(laggy)" : ""); |
Dan Carpenter | 6af8652 | 2013-05-29 06:46:56 -0500 | [diff] [blame] | 241 | |
Xiubo Li | b38c9eb | 2019-12-04 06:57:39 -0500 | [diff] [blame] | 242 | if (mds < 0 || mds >= m->possible_max_rank) { |
Xiubo Li | 4d7ace0 | 2019-11-26 07:24:21 -0500 | [diff] [blame] | 243 | pr_warn("mdsmap_decode got incorrect mds(%d)\n", mds); |
Dan Carpenter | 6af8652 | 2013-05-29 06:46:56 -0500 | [diff] [blame] | 244 | continue; |
Xiubo Li | 4d7ace0 | 2019-11-26 07:24:21 -0500 | [diff] [blame] | 245 | } |
Dan Carpenter | 6af8652 | 2013-05-29 06:46:56 -0500 | [diff] [blame] | 246 | |
Xiubo Li | 4d7ace0 | 2019-11-26 07:24:21 -0500 | [diff] [blame] | 247 | if (state <= 0) { |
| 248 | pr_warn("mdsmap_decode got incorrect state(%s)\n", |
| 249 | ceph_mds_state_name(state)); |
| 250 | continue; |
Yan, Zheng | 76201b6 | 2017-03-28 17:04:13 +0800 | [diff] [blame] | 251 | } |
| 252 | |
Dan Carpenter | 6af8652 | 2013-05-29 06:46:56 -0500 | [diff] [blame] | 253 | info = &m->m_info[mds]; |
| 254 | info->global_id = global_id; |
| 255 | info->state = state; |
| 256 | info->addr = addr; |
Xiubo Li | da08e1e | 2019-11-26 07:24:20 -0500 | [diff] [blame] | 257 | info->laggy = laggy; |
Dan Carpenter | 6af8652 | 2013-05-29 06:46:56 -0500 | [diff] [blame] | 258 | info->num_export_targets = num_export_targets; |
| 259 | if (num_export_targets) { |
| 260 | info->export_targets = kcalloc(num_export_targets, |
| 261 | sizeof(u32), GFP_NOFS); |
Markus Elfring | d37b1d9 | 2017-08-20 20:22:02 +0200 | [diff] [blame] | 262 | if (!info->export_targets) |
Yan, Zheng | e9e427f | 2016-11-10 16:02:06 +0800 | [diff] [blame] | 263 | goto nomem; |
Dan Carpenter | 6af8652 | 2013-05-29 06:46:56 -0500 | [diff] [blame] | 264 | for (j = 0; j < num_export_targets; j++) |
| 265 | info->export_targets[j] = |
| 266 | ceph_decode_32(&pexport_targets); |
| 267 | } else { |
| 268 | info->export_targets = NULL; |
Sage Weil | 2f2dc05 | 2009-10-06 11:31:09 -0700 | [diff] [blame] | 269 | } |
| 270 | } |
| 271 | |
| 272 | /* pg_pools */ |
| 273 | ceph_decode_32_safe(p, end, n, bad); |
| 274 | m->m_num_data_pg_pools = n; |
Sage Weil | 4f6a7e5 | 2013-02-23 10:41:09 -0800 | [diff] [blame] | 275 | m->m_data_pg_pools = kcalloc(n, sizeof(u64), GFP_NOFS); |
Sage Weil | 2f2dc05 | 2009-10-06 11:31:09 -0700 | [diff] [blame] | 276 | if (!m->m_data_pg_pools) |
Yan, Zheng | e9e427f | 2016-11-10 16:02:06 +0800 | [diff] [blame] | 277 | goto nomem; |
Sage Weil | 4f6a7e5 | 2013-02-23 10:41:09 -0800 | [diff] [blame] | 278 | ceph_decode_need(p, end, sizeof(u64)*(n+1), bad); |
Sage Weil | 2f2dc05 | 2009-10-06 11:31:09 -0700 | [diff] [blame] | 279 | for (i = 0; i < n; i++) |
Sage Weil | 4f6a7e5 | 2013-02-23 10:41:09 -0800 | [diff] [blame] | 280 | m->m_data_pg_pools[i] = ceph_decode_64(p); |
| 281 | m->m_cas_pg_pool = ceph_decode_64(p); |
Yan, Zheng | e9e427f | 2016-11-10 16:02:06 +0800 | [diff] [blame] | 282 | m->m_enabled = m->m_epoch > 1; |
Sage Weil | 2f2dc05 | 2009-10-06 11:31:09 -0700 | [diff] [blame] | 283 | |
Yan, Zheng | e9e427f | 2016-11-10 16:02:06 +0800 | [diff] [blame] | 284 | mdsmap_ev = 1; |
| 285 | if (mdsmap_v >= 2) { |
| 286 | ceph_decode_16_safe(p, end, mdsmap_ev, bad_ext); |
| 287 | } |
| 288 | if (mdsmap_ev >= 3) { |
| 289 | if (__decode_and_drop_compat_set(p, end) < 0) |
| 290 | goto bad_ext; |
| 291 | } |
| 292 | /* metadata_pool */ |
| 293 | if (mdsmap_ev < 5) { |
| 294 | __decode_and_drop_type(p, end, u32, bad_ext); |
| 295 | } else { |
| 296 | __decode_and_drop_type(p, end, u64, bad_ext); |
| 297 | } |
| 298 | |
| 299 | /* created + modified + tableserver */ |
| 300 | __decode_and_drop_type(p, end, struct ceph_timespec, bad_ext); |
| 301 | __decode_and_drop_type(p, end, struct ceph_timespec, bad_ext); |
| 302 | __decode_and_drop_type(p, end, u32, bad_ext); |
| 303 | |
| 304 | /* in */ |
| 305 | { |
| 306 | int num_laggy = 0; |
| 307 | ceph_decode_32_safe(p, end, n, bad_ext); |
| 308 | ceph_decode_need(p, end, sizeof(u32) * n, bad_ext); |
| 309 | |
| 310 | for (i = 0; i < n; i++) { |
| 311 | s32 mds = ceph_decode_32(p); |
Xiubo Li | b38c9eb | 2019-12-04 06:57:39 -0500 | [diff] [blame] | 312 | if (mds >= 0 && mds < m->possible_max_rank) { |
Yan, Zheng | e9e427f | 2016-11-10 16:02:06 +0800 | [diff] [blame] | 313 | if (m->m_info[mds].laggy) |
| 314 | num_laggy++; |
| 315 | } |
| 316 | } |
| 317 | m->m_num_laggy = num_laggy; |
Yan, Zheng | 76201b6 | 2017-03-28 17:04:13 +0800 | [diff] [blame] | 318 | |
Xiubo Li | b38c9eb | 2019-12-04 06:57:39 -0500 | [diff] [blame] | 319 | if (n > m->possible_max_rank) { |
Yan, Zheng | 76201b6 | 2017-03-28 17:04:13 +0800 | [diff] [blame] | 320 | void *new_m_info = krealloc(m->m_info, |
| 321 | n * sizeof(*m->m_info), |
| 322 | GFP_NOFS | __GFP_ZERO); |
| 323 | if (!new_m_info) |
| 324 | goto nomem; |
| 325 | m->m_info = new_m_info; |
| 326 | } |
Xiubo Li | b38c9eb | 2019-12-04 06:57:39 -0500 | [diff] [blame] | 327 | m->possible_max_rank = n; |
Yan, Zheng | e9e427f | 2016-11-10 16:02:06 +0800 | [diff] [blame] | 328 | } |
| 329 | |
| 330 | /* inc */ |
| 331 | __decode_and_drop_map(p, end, u32, u32, bad_ext); |
| 332 | /* up */ |
| 333 | __decode_and_drop_map(p, end, u32, u64, bad_ext); |
| 334 | /* failed */ |
| 335 | __decode_and_drop_set(p, end, u32, bad_ext); |
| 336 | /* stopped */ |
| 337 | __decode_and_drop_set(p, end, u32, bad_ext); |
| 338 | |
| 339 | if (mdsmap_ev >= 4) { |
| 340 | /* last_failure_osd_epoch */ |
| 341 | __decode_and_drop_type(p, end, u32, bad_ext); |
| 342 | } |
| 343 | if (mdsmap_ev >= 6) { |
| 344 | /* ever_allowed_snaps */ |
| 345 | __decode_and_drop_type(p, end, u8, bad_ext); |
| 346 | /* explicitly_allowed_snaps */ |
| 347 | __decode_and_drop_type(p, end, u8, bad_ext); |
| 348 | } |
| 349 | if (mdsmap_ev >= 7) { |
| 350 | /* inline_data_enabled */ |
| 351 | __decode_and_drop_type(p, end, u8, bad_ext); |
| 352 | } |
| 353 | if (mdsmap_ev >= 8) { |
| 354 | u32 name_len; |
| 355 | /* enabled */ |
| 356 | ceph_decode_8_safe(p, end, m->m_enabled, bad_ext); |
| 357 | ceph_decode_32_safe(p, end, name_len, bad_ext); |
| 358 | ceph_decode_need(p, end, name_len, bad_ext); |
| 359 | *p += name_len; |
| 360 | } |
| 361 | /* damaged */ |
| 362 | if (mdsmap_ev >= 9) { |
| 363 | size_t need; |
| 364 | ceph_decode_32_safe(p, end, n, bad_ext); |
| 365 | need = sizeof(u32) * n; |
| 366 | ceph_decode_need(p, end, need, bad_ext); |
| 367 | *p += need; |
| 368 | m->m_damaged = n > 0; |
| 369 | } else { |
| 370 | m->m_damaged = false; |
| 371 | } |
| 372 | bad_ext: |
Xiubo Li | da08e1e | 2019-11-26 07:24:20 -0500 | [diff] [blame] | 373 | dout("mdsmap_decode m_enabled: %d, m_damaged: %d, m_num_laggy: %d\n", |
| 374 | !!m->m_enabled, !!m->m_damaged, m->m_num_laggy); |
Yan, Zheng | d463a43 | 2016-03-31 15:53:01 +0800 | [diff] [blame] | 375 | *p = end; |
Sage Weil | 2f2dc05 | 2009-10-06 11:31:09 -0700 | [diff] [blame] | 376 | dout("mdsmap_decode success epoch %u\n", m->m_epoch); |
| 377 | return m; |
Yan, Zheng | e9e427f | 2016-11-10 16:02:06 +0800 | [diff] [blame] | 378 | nomem: |
Sage Weil | 2f2dc05 | 2009-10-06 11:31:09 -0700 | [diff] [blame] | 379 | err = -ENOMEM; |
Yan, Zheng | e9e427f | 2016-11-10 16:02:06 +0800 | [diff] [blame] | 380 | goto out_err; |
Jeff Layton | f3848af | 2019-06-04 11:26:36 -0400 | [diff] [blame] | 381 | corrupt: |
Sage Weil | 2f2dc05 | 2009-10-06 11:31:09 -0700 | [diff] [blame] | 382 | pr_err("corrupt mdsmap\n"); |
Sage Weil | 9ec7cab | 2009-12-14 15:13:47 -0800 | [diff] [blame] | 383 | print_hex_dump(KERN_DEBUG, "mdsmap: ", |
| 384 | DUMP_PREFIX_OFFSET, 16, 1, |
| 385 | start, end - start, true); |
Yan, Zheng | e9e427f | 2016-11-10 16:02:06 +0800 | [diff] [blame] | 386 | out_err: |
Sage Weil | 2f2dc05 | 2009-10-06 11:31:09 -0700 | [diff] [blame] | 387 | ceph_mdsmap_destroy(m); |
Emil Goode | c213b50 | 2013-05-28 16:59:00 +0200 | [diff] [blame] | 388 | return ERR_PTR(err); |
Jeff Layton | f3848af | 2019-06-04 11:26:36 -0400 | [diff] [blame] | 389 | bad: |
| 390 | err = -EINVAL; |
| 391 | goto corrupt; |
Sage Weil | 2f2dc05 | 2009-10-06 11:31:09 -0700 | [diff] [blame] | 392 | } |
| 393 | |
| 394 | void ceph_mdsmap_destroy(struct ceph_mdsmap *m) |
| 395 | { |
| 396 | int i; |
| 397 | |
Xiubo Li | b38c9eb | 2019-12-04 06:57:39 -0500 | [diff] [blame] | 398 | for (i = 0; i < m->possible_max_rank; i++) |
Sage Weil | 2f2dc05 | 2009-10-06 11:31:09 -0700 | [diff] [blame] | 399 | kfree(m->m_info[i].export_targets); |
| 400 | kfree(m->m_info); |
| 401 | kfree(m->m_data_pg_pools); |
| 402 | kfree(m); |
| 403 | } |
Yan, Zheng | e9e427f | 2016-11-10 16:02:06 +0800 | [diff] [blame] | 404 | |
| 405 | bool ceph_mdsmap_is_cluster_available(struct ceph_mdsmap *m) |
| 406 | { |
| 407 | int i, nr_active = 0; |
| 408 | if (!m->m_enabled) |
| 409 | return false; |
| 410 | if (m->m_damaged) |
| 411 | return false; |
Xiubo Li | 4d7ace0 | 2019-11-26 07:24:21 -0500 | [diff] [blame] | 412 | if (m->m_num_laggy == m->m_num_active_mds) |
Yan, Zheng | e9e427f | 2016-11-10 16:02:06 +0800 | [diff] [blame] | 413 | return false; |
Xiubo Li | b38c9eb | 2019-12-04 06:57:39 -0500 | [diff] [blame] | 414 | for (i = 0; i < m->possible_max_rank; i++) { |
Yan, Zheng | e9e427f | 2016-11-10 16:02:06 +0800 | [diff] [blame] | 415 | if (m->m_info[i].state == CEPH_MDS_STATE_ACTIVE) |
| 416 | nr_active++; |
| 417 | } |
| 418 | return nr_active > 0; |
| 419 | } |