blob: 0d489e81fcca129dbb12f5c00390aad7014ef89e [file] [log] [blame]
Jon Paul Maloy35c55c92016-06-13 20:46:22 -04001/*
2 * net/tipc/monitor.c
3 *
4 * Copyright (c) 2016, Ericsson AB
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions are met:
9 *
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 * 3. Neither the names of the copyright holders nor the names of its
16 * contributors may be used to endorse or promote products derived from
17 * this software without specific prior written permission.
18 *
19 * Alternatively, this software may be distributed under the terms of the
20 * GNU General Public License ("GPL") version 2 as published by the Free
21 * Software Foundation.
22 *
23 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
24 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
27 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
28 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
29 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
30 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
31 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
32 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
33 * POSSIBILITY OF SUCH DAMAGE.
34 */
35
36#include "core.h"
37#include "addr.h"
38#include "monitor.h"
39
40#define MAX_MON_DOMAIN 64
41#define MON_TIMEOUT 120000
42#define MAX_PEER_DOWN_EVENTS 4
43
44/* struct tipc_mon_domain: domain record to be transferred between peers
45 * @len: actual size of domain record
46 * @gen: current generation of sender's domain
47 * @ack_gen: most recent generation of self's domain acked by peer
48 * @member_cnt: number of domain member nodes described in this record
49 * @up_map: bit map indicating which of the members the sender considers up
50 * @members: identity of the domain members
51 */
52struct tipc_mon_domain {
53 u16 len;
54 u16 gen;
55 u16 ack_gen;
56 u16 member_cnt;
57 u64 up_map;
58 u32 members[MAX_MON_DOMAIN];
59};
60
61/* struct tipc_peer: state of a peer node and its domain
62 * @addr: tipc node identity of peer
63 * @head_map: shows which other nodes currently consider peer 'up'
64 * @domain: most recent domain record from peer
65 * @hash: position in hashed lookup list
66 * @list: position in linked list, in circular ascending order by 'addr'
67 * @applied: number of reported domain members applied on this monitor list
68 * @is_up: peer is up as seen from this node
69 * @is_head: peer is assigned domain head as seen from this node
70 * @is_local: peer is in local domain and should be continuously monitored
71 * @down_cnt: - numbers of other peers which have reported this on lost
72 */
73struct tipc_peer {
74 u32 addr;
75 struct tipc_mon_domain *domain;
76 struct hlist_node hash;
77 struct list_head list;
78 u8 applied;
79 u8 down_cnt;
80 bool is_up;
81 bool is_head;
82 bool is_local;
83};
84
85struct tipc_monitor {
86 struct hlist_head peers[NODE_HTABLE_SIZE];
87 int peer_cnt;
88 struct tipc_peer *self;
89 rwlock_t lock;
90 struct tipc_mon_domain cache;
91 u16 list_gen;
92 u16 dom_gen;
93 struct net *net;
94 struct timer_list timer;
95 unsigned long timer_intv;
96};
97
98static struct tipc_monitor *tipc_monitor(struct net *net, int bearer_id)
99{
100 return tipc_net(net)->monitors[bearer_id];
101}
102
103const int tipc_max_domain_size = sizeof(struct tipc_mon_domain);
104
105/* dom_rec_len(): actual length of domain record for transport
106 */
107static int dom_rec_len(struct tipc_mon_domain *dom, u16 mcnt)
108{
109 return ((void *)&dom->members - (void *)dom) + (mcnt * sizeof(u32));
110}
111
112/* dom_size() : calculate size of own domain based on number of peers
113 */
114static int dom_size(int peers)
115{
116 int i = 0;
117
118 while ((i * i) < peers)
119 i++;
120 return i < MAX_MON_DOMAIN ? i : MAX_MON_DOMAIN;
121}
122
123static void map_set(u64 *up_map, int i, unsigned int v)
124{
Dan Carpenter0350cb42016-06-17 12:22:26 +0300125 *up_map &= ~(1ULL << i);
126 *up_map |= ((u64)v << i);
Jon Paul Maloy35c55c92016-06-13 20:46:22 -0400127}
128
129static int map_get(u64 up_map, int i)
130{
131 return (up_map & (1 << i)) >> i;
132}
133
134static struct tipc_peer *peer_prev(struct tipc_peer *peer)
135{
136 return list_last_entry(&peer->list, struct tipc_peer, list);
137}
138
139static struct tipc_peer *peer_nxt(struct tipc_peer *peer)
140{
141 return list_first_entry(&peer->list, struct tipc_peer, list);
142}
143
144static struct tipc_peer *peer_head(struct tipc_peer *peer)
145{
146 while (!peer->is_head)
147 peer = peer_prev(peer);
148 return peer;
149}
150
151static struct tipc_peer *get_peer(struct tipc_monitor *mon, u32 addr)
152{
153 struct tipc_peer *peer;
154 unsigned int thash = tipc_hashfn(addr);
155
156 hlist_for_each_entry(peer, &mon->peers[thash], hash) {
157 if (peer->addr == addr)
158 return peer;
159 }
160 return NULL;
161}
162
163static struct tipc_peer *get_self(struct net *net, int bearer_id)
164{
165 struct tipc_monitor *mon = tipc_monitor(net, bearer_id);
166
167 return mon->self;
168}
169
170static inline bool tipc_mon_is_active(struct net *net, struct tipc_monitor *mon)
171{
172 struct tipc_net *tn = tipc_net(net);
173
174 return mon->peer_cnt > tn->mon_threshold;
175}
176
177/* mon_identify_lost_members() : - identify amd mark potentially lost members
178 */
179static void mon_identify_lost_members(struct tipc_peer *peer,
180 struct tipc_mon_domain *dom_bef,
181 int applied_bef)
182{
183 struct tipc_peer *member = peer;
184 struct tipc_mon_domain *dom_aft = peer->domain;
185 int applied_aft = peer->applied;
186 int i;
187
188 for (i = 0; i < applied_bef; i++) {
189 member = peer_nxt(member);
190
191 /* Do nothing if self or peer already see member as down */
192 if (!member->is_up || !map_get(dom_bef->up_map, i))
193 continue;
194
195 /* Loss of local node must be detected by active probing */
196 if (member->is_local)
197 continue;
198
199 /* Start probing if member was removed from applied domain */
200 if (!applied_aft || (applied_aft < i)) {
201 member->down_cnt = 1;
202 continue;
203 }
204
205 /* Member loss is confirmed if it is still in applied domain */
206 if (!map_get(dom_aft->up_map, i))
207 member->down_cnt++;
208 }
209}
210
211/* mon_apply_domain() : match a peer's domain record against monitor list
212 */
213static void mon_apply_domain(struct tipc_monitor *mon,
214 struct tipc_peer *peer)
215{
216 struct tipc_mon_domain *dom = peer->domain;
217 struct tipc_peer *member;
218 u32 addr;
219 int i;
220
221 if (!dom || !peer->is_up)
222 return;
223
224 /* Scan across domain members and match against monitor list */
225 peer->applied = 0;
226 member = peer_nxt(peer);
227 for (i = 0; i < dom->member_cnt; i++) {
228 addr = dom->members[i];
229 if (addr != member->addr)
230 return;
231 peer->applied++;
232 member = peer_nxt(member);
233 }
234}
235
236/* mon_update_local_domain() : update after peer addition/removal/up/down
237 */
238static void mon_update_local_domain(struct tipc_monitor *mon)
239{
240 struct tipc_peer *self = mon->self;
241 struct tipc_mon_domain *cache = &mon->cache;
242 struct tipc_mon_domain *dom = self->domain;
243 struct tipc_peer *peer = self;
244 u64 prev_up_map = dom->up_map;
245 u16 member_cnt, i;
246 bool diff;
247
248 /* Update local domain size based on current size of cluster */
249 member_cnt = dom_size(mon->peer_cnt) - 1;
250 self->applied = member_cnt;
251
252 /* Update native and cached outgoing local domain records */
253 dom->len = dom_rec_len(dom, member_cnt);
254 diff = dom->member_cnt != member_cnt;
255 dom->member_cnt = member_cnt;
256 for (i = 0; i < member_cnt; i++) {
257 peer = peer_nxt(peer);
258 diff |= dom->members[i] != peer->addr;
259 dom->members[i] = peer->addr;
260 map_set(&dom->up_map, i, peer->is_up);
261 cache->members[i] = htonl(peer->addr);
262 }
263 diff |= dom->up_map != prev_up_map;
264 if (!diff)
265 return;
266 dom->gen = ++mon->dom_gen;
267 cache->len = htons(dom->len);
268 cache->gen = htons(dom->gen);
269 cache->member_cnt = htons(member_cnt);
270 cache->up_map = cpu_to_be64(dom->up_map);
271 mon_apply_domain(mon, self);
272}
273
274/* mon_update_neighbors() : update preceding neighbors of added/removed peer
275 */
276static void mon_update_neighbors(struct tipc_monitor *mon,
277 struct tipc_peer *peer)
278{
279 int dz, i;
280
281 dz = dom_size(mon->peer_cnt);
282 for (i = 0; i < dz; i++) {
283 mon_apply_domain(mon, peer);
284 peer = peer_prev(peer);
285 }
286}
287
288/* mon_assign_roles() : reassign peer roles after a network change
289 * The monitor list is consistent at this stage; i.e., each peer is monitoring
290 * a set of domain members as matched between domain record and the monitor list
291 */
292static void mon_assign_roles(struct tipc_monitor *mon, struct tipc_peer *head)
293{
294 struct tipc_peer *peer = peer_nxt(head);
295 struct tipc_peer *self = mon->self;
296 int i = 0;
297
298 for (; peer != self; peer = peer_nxt(peer)) {
299 peer->is_local = false;
300
301 /* Update domain member */
302 if (i++ < head->applied) {
303 peer->is_head = false;
304 if (head == self)
305 peer->is_local = true;
306 continue;
307 }
308 /* Assign next domain head */
309 if (!peer->is_up)
310 continue;
311 if (peer->is_head)
312 break;
313 head = peer;
314 head->is_head = true;
315 i = 0;
316 }
317 mon->list_gen++;
318}
319
320void tipc_mon_remove_peer(struct net *net, u32 addr, int bearer_id)
321{
322 struct tipc_monitor *mon = tipc_monitor(net, bearer_id);
323 struct tipc_peer *self = get_self(net, bearer_id);
324 struct tipc_peer *peer, *prev, *head;
325
326 write_lock_bh(&mon->lock);
327 peer = get_peer(mon, addr);
328 if (!peer)
329 goto exit;
330 prev = peer_prev(peer);
331 list_del(&peer->list);
332 hlist_del(&peer->hash);
333 kfree(peer->domain);
334 kfree(peer);
335 mon->peer_cnt--;
336 head = peer_head(prev);
337 if (head == self)
338 mon_update_local_domain(mon);
339 mon_update_neighbors(mon, prev);
340
341 /* Revert to full-mesh monitoring if we reach threshold */
342 if (!tipc_mon_is_active(net, mon)) {
343 list_for_each_entry(peer, &self->list, list) {
344 kfree(peer->domain);
345 peer->domain = NULL;
346 peer->applied = 0;
347 }
348 }
349 mon_assign_roles(mon, head);
350exit:
351 write_unlock_bh(&mon->lock);
352}
353
354static bool tipc_mon_add_peer(struct tipc_monitor *mon, u32 addr,
355 struct tipc_peer **peer)
356{
357 struct tipc_peer *self = mon->self;
358 struct tipc_peer *cur, *prev, *p;
359
360 p = kzalloc(sizeof(*p), GFP_ATOMIC);
361 *peer = p;
362 if (!p)
363 return false;
364 p->addr = addr;
365
366 /* Add new peer to lookup list */
367 INIT_LIST_HEAD(&p->list);
368 hlist_add_head(&p->hash, &mon->peers[tipc_hashfn(addr)]);
369
370 /* Sort new peer into iterator list, in ascending circular order */
371 prev = self;
372 list_for_each_entry(cur, &self->list, list) {
373 if ((addr > prev->addr) && (addr < cur->addr))
374 break;
375 if (((addr < cur->addr) || (addr > prev->addr)) &&
376 (prev->addr > cur->addr))
377 break;
378 prev = cur;
379 }
380 list_add_tail(&p->list, &cur->list);
381 mon->peer_cnt++;
382 mon_update_neighbors(mon, p);
383 return true;
384}
385
386void tipc_mon_peer_up(struct net *net, u32 addr, int bearer_id)
387{
388 struct tipc_monitor *mon = tipc_monitor(net, bearer_id);
389 struct tipc_peer *self = get_self(net, bearer_id);
390 struct tipc_peer *peer, *head;
391
392 write_lock_bh(&mon->lock);
393 peer = get_peer(mon, addr);
394 if (!peer && !tipc_mon_add_peer(mon, addr, &peer))
395 goto exit;
396 peer->is_up = true;
397 head = peer_head(peer);
398 if (head == self)
399 mon_update_local_domain(mon);
400 mon_assign_roles(mon, head);
401exit:
402 write_unlock_bh(&mon->lock);
403}
404
405void tipc_mon_peer_down(struct net *net, u32 addr, int bearer_id)
406{
407 struct tipc_monitor *mon = tipc_monitor(net, bearer_id);
408 struct tipc_peer *self = get_self(net, bearer_id);
409 struct tipc_peer *peer, *head;
410 struct tipc_mon_domain *dom;
411 int applied;
412
413 write_lock_bh(&mon->lock);
414 peer = get_peer(mon, addr);
415 if (!peer) {
416 pr_warn("Mon: unknown link %x/%u DOWN\n", addr, bearer_id);
417 goto exit;
418 }
419 applied = peer->applied;
420 peer->applied = 0;
421 dom = peer->domain;
422 peer->domain = NULL;
423 if (peer->is_head)
424 mon_identify_lost_members(peer, dom, applied);
425 kfree(dom);
426 peer->is_up = false;
427 peer->is_head = false;
428 peer->is_local = false;
429 peer->down_cnt = 0;
430 head = peer_head(peer);
431 if (head == self)
432 mon_update_local_domain(mon);
433 mon_assign_roles(mon, head);
434exit:
435 write_unlock_bh(&mon->lock);
436}
437
438/* tipc_mon_rcv - process monitor domain event message
439 */
440void tipc_mon_rcv(struct net *net, void *data, u16 dlen, u32 addr,
441 struct tipc_mon_state *state, int bearer_id)
442{
443 struct tipc_monitor *mon = tipc_monitor(net, bearer_id);
444 struct tipc_mon_domain *arrv_dom = data;
445 struct tipc_mon_domain dom_bef;
446 struct tipc_mon_domain *dom;
447 struct tipc_peer *peer;
448 u16 new_member_cnt = ntohs(arrv_dom->member_cnt);
449 int new_dlen = dom_rec_len(arrv_dom, new_member_cnt);
450 u16 new_gen = ntohs(arrv_dom->gen);
451 u16 acked_gen = ntohs(arrv_dom->ack_gen);
452 bool probing = state->probing;
453 int i, applied_bef;
454
455 state->probing = false;
456 if (!dlen)
457 return;
458
459 /* Sanity check received domain record */
460 if ((dlen < new_dlen) || ntohs(arrv_dom->len) != new_dlen) {
461 pr_warn_ratelimited("Received illegal domain record\n");
462 return;
463 }
464
465 /* Synch generation numbers with peer if link just came up */
466 if (!state->synched) {
467 state->peer_gen = new_gen - 1;
468 state->acked_gen = acked_gen;
469 state->synched = true;
470 }
471
472 if (more(acked_gen, state->acked_gen))
473 state->acked_gen = acked_gen;
474
475 /* Drop duplicate unless we are waiting for a probe response */
476 if (!more(new_gen, state->peer_gen) && !probing)
477 return;
478
479 write_lock_bh(&mon->lock);
480 peer = get_peer(mon, addr);
481 if (!peer || !peer->is_up)
482 goto exit;
483
484 /* Peer is confirmed, stop any ongoing probing */
485 peer->down_cnt = 0;
486
487 /* Task is done for duplicate record */
488 if (!more(new_gen, state->peer_gen))
489 goto exit;
490
491 state->peer_gen = new_gen;
492
493 /* Cache current domain record for later use */
494 dom_bef.member_cnt = 0;
495 dom = peer->domain;
496 if (dom)
497 memcpy(&dom_bef, dom, dom->len);
498
499 /* Transform and store received domain record */
500 if (!dom || (dom->len < new_dlen)) {
501 kfree(dom);
502 dom = kmalloc(new_dlen, GFP_ATOMIC);
503 peer->domain = dom;
504 if (!dom)
505 goto exit;
506 }
507 dom->len = new_dlen;
508 dom->gen = new_gen;
509 dom->member_cnt = new_member_cnt;
510 dom->up_map = be64_to_cpu(arrv_dom->up_map);
511 for (i = 0; i < new_member_cnt; i++)
512 dom->members[i] = ntohl(arrv_dom->members[i]);
513
514 /* Update peers affected by this domain record */
515 applied_bef = peer->applied;
516 mon_apply_domain(mon, peer);
517 mon_identify_lost_members(peer, &dom_bef, applied_bef);
518 mon_assign_roles(mon, peer_head(peer));
519exit:
520 write_unlock_bh(&mon->lock);
521}
522
523void tipc_mon_prep(struct net *net, void *data, int *dlen,
524 struct tipc_mon_state *state, int bearer_id)
525{
526 struct tipc_monitor *mon = tipc_monitor(net, bearer_id);
527 struct tipc_mon_domain *dom = data;
528 u16 gen = mon->dom_gen;
529 u16 len;
530
531 if (!tipc_mon_is_active(net, mon))
532 return;
533
534 /* Send only a dummy record with ack if peer has acked our last sent */
535 if (likely(state->acked_gen == gen)) {
536 len = dom_rec_len(dom, 0);
537 *dlen = len;
538 dom->len = htons(len);
539 dom->gen = htons(gen);
540 dom->ack_gen = htons(state->peer_gen);
541 dom->member_cnt = 0;
542 return;
543 }
544 /* Send the full record */
545 read_lock_bh(&mon->lock);
546 len = ntohs(mon->cache.len);
547 *dlen = len;
548 memcpy(data, &mon->cache, len);
549 read_unlock_bh(&mon->lock);
550 dom->ack_gen = htons(state->peer_gen);
551}
552
553void tipc_mon_get_state(struct net *net, u32 addr,
554 struct tipc_mon_state *state,
555 int bearer_id)
556{
557 struct tipc_monitor *mon = tipc_monitor(net, bearer_id);
558 struct tipc_peer *peer;
559
560 /* Used cached state if table has not changed */
561 if (!state->probing &&
562 (state->list_gen == mon->list_gen) &&
563 (state->acked_gen == mon->dom_gen))
564 return;
565
566 read_lock_bh(&mon->lock);
567 peer = get_peer(mon, addr);
568 if (peer) {
569 state->probing = state->acked_gen != mon->dom_gen;
570 state->probing |= peer->down_cnt;
571 state->reset |= peer->down_cnt >= MAX_PEER_DOWN_EVENTS;
572 state->monitoring = peer->is_local;
573 state->monitoring |= peer->is_head;
574 state->list_gen = mon->list_gen;
575 }
576 read_unlock_bh(&mon->lock);
577}
578
579static void mon_timeout(unsigned long m)
580{
581 struct tipc_monitor *mon = (void *)m;
582 struct tipc_peer *self;
583 int best_member_cnt = dom_size(mon->peer_cnt) - 1;
584
585 write_lock_bh(&mon->lock);
586 self = mon->self;
587 if (self && (best_member_cnt != self->applied)) {
588 mon_update_local_domain(mon);
589 mon_assign_roles(mon, self);
590 }
591 write_unlock_bh(&mon->lock);
592 mod_timer(&mon->timer, jiffies + mon->timer_intv);
593}
594
595int tipc_mon_create(struct net *net, int bearer_id)
596{
597 struct tipc_net *tn = tipc_net(net);
598 struct tipc_monitor *mon;
599 struct tipc_peer *self;
600 struct tipc_mon_domain *dom;
601
602 if (tn->monitors[bearer_id])
603 return 0;
604
605 mon = kzalloc(sizeof(*mon), GFP_ATOMIC);
606 self = kzalloc(sizeof(*self), GFP_ATOMIC);
607 dom = kzalloc(sizeof(*dom), GFP_ATOMIC);
608 if (!mon || !self || !dom) {
609 kfree(mon);
610 kfree(self);
611 kfree(dom);
612 return -ENOMEM;
613 }
614 tn->monitors[bearer_id] = mon;
615 rwlock_init(&mon->lock);
616 mon->net = net;
617 mon->peer_cnt = 1;
618 mon->self = self;
619 self->domain = dom;
620 self->addr = tipc_own_addr(net);
621 self->is_up = true;
622 self->is_head = true;
623 INIT_LIST_HEAD(&self->list);
624 setup_timer(&mon->timer, mon_timeout, (unsigned long)mon);
625 mon->timer_intv = msecs_to_jiffies(MON_TIMEOUT + (tn->random & 0xffff));
626 mod_timer(&mon->timer, jiffies + mon->timer_intv);
627 return 0;
628}
629
630void tipc_mon_delete(struct net *net, int bearer_id)
631{
632 struct tipc_net *tn = tipc_net(net);
633 struct tipc_monitor *mon = tipc_monitor(net, bearer_id);
634 struct tipc_peer *self = get_self(net, bearer_id);
635 struct tipc_peer *peer, *tmp;
636
637 write_lock_bh(&mon->lock);
638 tn->monitors[bearer_id] = NULL;
639 list_for_each_entry_safe(peer, tmp, &self->list, list) {
640 list_del(&peer->list);
641 hlist_del(&peer->hash);
642 kfree(peer->domain);
643 kfree(peer);
644 }
645 mon->self = NULL;
646 write_unlock_bh(&mon->lock);
647 del_timer_sync(&mon->timer);
648 kfree(self->domain);
649 kfree(self);
650 kfree(mon);
651}