blob: 4ea64e93e6b19ea24132606cffaca8bb502d18ab [file] [log] [blame]
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001/******************************************************************************
2*******************************************************************************
3**
4** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
Christine Caulfield5e9ccc32009-01-28 12:57:40 -06005** Copyright (C) 2004-2009 Red Hat, Inc. All rights reserved.
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01006**
7** This copyrighted material is made available to anyone wishing to use,
8** modify, copy, or redistribute it subject to the terms and conditions
9** of the GNU General Public License v.2.
10**
11*******************************************************************************
12******************************************************************************/
13
14/*
15 * lowcomms.c
16 *
17 * This is the "low-level" comms layer.
18 *
19 * It is responsible for sending/receiving messages
20 * from other nodes in the cluster.
21 *
22 * Cluster nodes are referred to by their nodeids. nodeids are
23 * simply 32 bit numbers to the locking module - if they need to
Joe Perches2cf12c02009-01-22 13:26:47 -080024 * be expanded for the cluster infrastructure then that is its
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +010025 * responsibility. It is this layer's
26 * responsibility to resolve these into IP address or
27 * whatever it needs for inter-node communication.
28 *
29 * The comms level is two kernel threads that deal mainly with
30 * the receiving of messages from other nodes and passing them
31 * up to the mid-level comms layer (which understands the
32 * message format) for execution by the locking core, and
33 * a send thread which does all the setting up of connections
34 * to remote nodes and the sending of data. Threads are not allowed
35 * to send their own data because it may cause them to wait in times
36 * of high load. Also, this way, the sending thread can collect together
37 * messages bound for one node and send them in one block.
38 *
Joe Perches2cf12c02009-01-22 13:26:47 -080039 * lowcomms will choose to use either TCP or SCTP as its transport layer
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +010040 * depending on the configuration variable 'protocol'. This should be set
Joe Perches2cf12c02009-01-22 13:26:47 -080041 * to 0 (default) for TCP or 1 for SCTP. It should be configured using a
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +010042 * cluster-wide mechanism as it must be the same on all nodes of the cluster
43 * for the DLM to function.
44 *
45 */
46
47#include <asm/ioctls.h>
48#include <net/sock.h>
49#include <net/tcp.h>
50#include <linux/pagemap.h>
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +010051#include <linux/file.h>
Matthias Kaehlcke7a936ce2008-05-12 10:04:51 -050052#include <linux/mutex.h>
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +010053#include <linux/sctp.h>
Tejun Heo5a0e3ad2010-03-24 17:04:11 +090054#include <linux/slab.h>
Benjamin Poirier2f2d76c2012-03-08 05:55:59 +000055#include <net/sctp/sctp.h>
Joe Perches44ad5322009-01-22 13:24:49 -080056#include <net/ipv6.h>
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +010057
58#include "dlm_internal.h"
59#include "lowcomms.h"
60#include "midcomms.h"
61#include "config.h"
62
63#define NEEDED_RMEM (4*1024*1024)
Christine Caulfield5e9ccc32009-01-28 12:57:40 -060064#define CONN_HASH_SIZE 32
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +010065
Bob Petersonf92c8dd2010-11-12 11:15:20 -060066/* Number of messages to send before rescheduling */
67#define MAX_SEND_MSG_COUNT 25
68
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +010069struct cbuf {
70 unsigned int base;
71 unsigned int len;
72 unsigned int mask;
73};
74
75static void cbuf_add(struct cbuf *cb, int n)
76{
77 cb->len += n;
78}
79
80static int cbuf_data(struct cbuf *cb)
81{
82 return ((cb->base + cb->len) & cb->mask);
83}
84
85static void cbuf_init(struct cbuf *cb, int size)
86{
87 cb->base = cb->len = 0;
88 cb->mask = size-1;
89}
90
91static void cbuf_eat(struct cbuf *cb, int n)
92{
93 cb->len -= n;
94 cb->base += n;
95 cb->base &= cb->mask;
96}
97
98static bool cbuf_empty(struct cbuf *cb)
99{
100 return cb->len == 0;
101}
102
103struct connection {
104 struct socket *sock; /* NULL if not connected */
105 uint32_t nodeid; /* So we know who we are in the list */
106 struct mutex sock_mutex;
107 unsigned long flags;
108#define CF_READ_PENDING 1
109#define CF_WRITE_PENDING 2
110#define CF_CONNECT_PENDING 3
111#define CF_INIT_PENDING 4
112#define CF_IS_OTHERCON 5
Lars Marowsky-Bree063c4c92009-08-11 16:18:23 -0500113#define CF_CLOSE 6
David Millerb36930d2010-11-10 21:56:39 -0800114#define CF_APP_LIMITED 7
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +0100115 struct list_head writequeue; /* List of outgoing writequeue_entries */
116 spinlock_t writequeue_lock;
117 int (*rx_action) (struct connection *); /* What to do when active */
118 void (*connect_action) (struct connection *); /* What to do to connect */
119 struct page *rx_page;
120 struct cbuf cb;
121 int retries;
122#define MAX_CONNECT_RETRIES 3
Christine Caulfield5e9ccc32009-01-28 12:57:40 -0600123 struct hlist_node list;
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +0100124 struct connection *othercon;
125 struct work_struct rwork; /* Receive workqueue */
126 struct work_struct swork; /* Send workqueue */
127};
128#define sock2con(x) ((struct connection *)(x)->sk_user_data)
129
130/* An entry waiting to be sent */
131struct writequeue_entry {
132 struct list_head list;
133 struct page *page;
134 int offset;
135 int len;
136 int end;
137 int users;
138 struct connection *con;
139};
140
David Teigland36b71a82012-07-26 12:44:30 -0500141struct dlm_node_addr {
142 struct list_head list;
143 int nodeid;
144 int addr_count;
Mike Christie98e1b602013-06-14 04:56:12 -0500145 int curr_addr_index;
David Teigland36b71a82012-07-26 12:44:30 -0500146 struct sockaddr_storage *addr[DLM_MAX_ADDR_COUNT];
147};
148
149static LIST_HEAD(dlm_node_addrs);
150static DEFINE_SPINLOCK(dlm_node_addrs_spin);
151
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +0100152static struct sockaddr_storage *dlm_local_addr[DLM_MAX_ADDR_COUNT];
153static int dlm_local_count;
David Teigland513ef592012-03-30 11:46:08 -0500154static int dlm_allow_conn;
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +0100155
156/* Work queues */
157static struct workqueue_struct *recv_workqueue;
158static struct workqueue_struct *send_workqueue;
159
Christine Caulfield5e9ccc32009-01-28 12:57:40 -0600160static struct hlist_head connection_hash[CONN_HASH_SIZE];
Matthias Kaehlcke7a936ce2008-05-12 10:04:51 -0500161static DEFINE_MUTEX(connections_lock);
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +0100162static struct kmem_cache *con_cache;
163
164static void process_recv_sockets(struct work_struct *work);
165static void process_send_sockets(struct work_struct *work);
166
Christine Caulfield5e9ccc32009-01-28 12:57:40 -0600167
168/* This is deliberately very simple because most clusters have simple
169 sequential nodeids, so we should be able to go straight to a connection
170 struct in the array */
171static inline int nodeid_hash(int nodeid)
172{
173 return nodeid & (CONN_HASH_SIZE-1);
174}
175
176static struct connection *__find_con(int nodeid)
177{
178 int r;
Christine Caulfield5e9ccc32009-01-28 12:57:40 -0600179 struct connection *con;
180
181 r = nodeid_hash(nodeid);
182
Sasha Levinb67bfe02013-02-27 17:06:00 -0800183 hlist_for_each_entry(con, &connection_hash[r], list) {
Christine Caulfield5e9ccc32009-01-28 12:57:40 -0600184 if (con->nodeid == nodeid)
185 return con;
186 }
187 return NULL;
188}
189
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +0100190/*
191 * If 'allocation' is zero then we don't attempt to create a new
192 * connection structure for this node.
193 */
194static struct connection *__nodeid2con(int nodeid, gfp_t alloc)
195{
196 struct connection *con = NULL;
197 int r;
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +0100198
Christine Caulfield5e9ccc32009-01-28 12:57:40 -0600199 con = __find_con(nodeid);
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +0100200 if (con || !alloc)
201 return con;
202
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +0100203 con = kmem_cache_zalloc(con_cache, alloc);
204 if (!con)
205 return NULL;
206
Christine Caulfield5e9ccc32009-01-28 12:57:40 -0600207 r = nodeid_hash(nodeid);
208 hlist_add_head(&con->list, &connection_hash[r]);
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +0100209
210 con->nodeid = nodeid;
211 mutex_init(&con->sock_mutex);
212 INIT_LIST_HEAD(&con->writequeue);
213 spin_lock_init(&con->writequeue_lock);
214 INIT_WORK(&con->swork, process_send_sockets);
215 INIT_WORK(&con->rwork, process_recv_sockets);
216
217 /* Setup action pointers for child sockets */
218 if (con->nodeid) {
Christine Caulfield5e9ccc32009-01-28 12:57:40 -0600219 struct connection *zerocon = __find_con(0);
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +0100220
221 con->connect_action = zerocon->connect_action;
222 if (!con->rx_action)
223 con->rx_action = zerocon->rx_action;
224 }
225
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +0100226 return con;
227}
228
Christine Caulfield5e9ccc32009-01-28 12:57:40 -0600229/* Loop round all connections */
230static void foreach_conn(void (*conn_func)(struct connection *c))
231{
232 int i;
Sasha Levinb67bfe02013-02-27 17:06:00 -0800233 struct hlist_node *n;
Christine Caulfield5e9ccc32009-01-28 12:57:40 -0600234 struct connection *con;
235
236 for (i = 0; i < CONN_HASH_SIZE; i++) {
Sasha Levinb67bfe02013-02-27 17:06:00 -0800237 hlist_for_each_entry_safe(con, n, &connection_hash[i], list)
Christine Caulfield5e9ccc32009-01-28 12:57:40 -0600238 conn_func(con);
Christine Caulfield5e9ccc32009-01-28 12:57:40 -0600239 }
240}
241
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +0100242static struct connection *nodeid2con(int nodeid, gfp_t allocation)
243{
244 struct connection *con;
245
Matthias Kaehlcke7a936ce2008-05-12 10:04:51 -0500246 mutex_lock(&connections_lock);
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +0100247 con = __nodeid2con(nodeid, allocation);
Matthias Kaehlcke7a936ce2008-05-12 10:04:51 -0500248 mutex_unlock(&connections_lock);
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +0100249
250 return con;
251}
252
David Teigland36b71a82012-07-26 12:44:30 -0500253static struct dlm_node_addr *find_node_addr(int nodeid)
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +0100254{
David Teigland36b71a82012-07-26 12:44:30 -0500255 struct dlm_node_addr *na;
256
257 list_for_each_entry(na, &dlm_node_addrs, list) {
258 if (na->nodeid == nodeid)
259 return na;
260 }
261 return NULL;
262}
263
264static int addr_compare(struct sockaddr_storage *x, struct sockaddr_storage *y)
265{
266 switch (x->ss_family) {
267 case AF_INET: {
268 struct sockaddr_in *sinx = (struct sockaddr_in *)x;
269 struct sockaddr_in *siny = (struct sockaddr_in *)y;
270 if (sinx->sin_addr.s_addr != siny->sin_addr.s_addr)
271 return 0;
272 if (sinx->sin_port != siny->sin_port)
273 return 0;
274 break;
275 }
276 case AF_INET6: {
277 struct sockaddr_in6 *sinx = (struct sockaddr_in6 *)x;
278 struct sockaddr_in6 *siny = (struct sockaddr_in6 *)y;
279 if (!ipv6_addr_equal(&sinx->sin6_addr, &siny->sin6_addr))
280 return 0;
281 if (sinx->sin6_port != siny->sin6_port)
282 return 0;
283 break;
284 }
285 default:
286 return 0;
287 }
288 return 1;
289}
290
291static int nodeid_to_addr(int nodeid, struct sockaddr_storage *sas_out,
Mike Christie98e1b602013-06-14 04:56:12 -0500292 struct sockaddr *sa_out, bool try_new_addr)
David Teigland36b71a82012-07-26 12:44:30 -0500293{
294 struct sockaddr_storage sas;
295 struct dlm_node_addr *na;
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +0100296
297 if (!dlm_local_count)
298 return -1;
299
David Teigland36b71a82012-07-26 12:44:30 -0500300 spin_lock(&dlm_node_addrs_spin);
301 na = find_node_addr(nodeid);
Mike Christie98e1b602013-06-14 04:56:12 -0500302 if (na && na->addr_count) {
Marcelo Ricardo Leitneree44b4b2015-08-11 19:22:23 -0300303 memcpy(&sas, na->addr[na->curr_addr_index],
304 sizeof(struct sockaddr_storage));
305
Mike Christie98e1b602013-06-14 04:56:12 -0500306 if (try_new_addr) {
307 na->curr_addr_index++;
308 if (na->curr_addr_index == na->addr_count)
309 na->curr_addr_index = 0;
310 }
Mike Christie98e1b602013-06-14 04:56:12 -0500311 }
David Teigland36b71a82012-07-26 12:44:30 -0500312 spin_unlock(&dlm_node_addrs_spin);
313
314 if (!na)
315 return -EEXIST;
316
317 if (!na->addr_count)
318 return -ENOENT;
319
320 if (sas_out)
321 memcpy(sas_out, &sas, sizeof(struct sockaddr_storage));
322
323 if (!sa_out)
324 return 0;
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +0100325
326 if (dlm_local_addr[0]->ss_family == AF_INET) {
David Teigland36b71a82012-07-26 12:44:30 -0500327 struct sockaddr_in *in4 = (struct sockaddr_in *) &sas;
328 struct sockaddr_in *ret4 = (struct sockaddr_in *) sa_out;
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +0100329 ret4->sin_addr.s_addr = in4->sin_addr.s_addr;
330 } else {
David Teigland36b71a82012-07-26 12:44:30 -0500331 struct sockaddr_in6 *in6 = (struct sockaddr_in6 *) &sas;
332 struct sockaddr_in6 *ret6 = (struct sockaddr_in6 *) sa_out;
Alexey Dobriyan4e3fd7a2011-11-21 03:39:03 +0000333 ret6->sin6_addr = in6->sin6_addr;
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +0100334 }
335
336 return 0;
337}
338
David Teigland36b71a82012-07-26 12:44:30 -0500339static int addr_to_nodeid(struct sockaddr_storage *addr, int *nodeid)
340{
341 struct dlm_node_addr *na;
342 int rv = -EEXIST;
Mike Christie98e1b602013-06-14 04:56:12 -0500343 int addr_i;
David Teigland36b71a82012-07-26 12:44:30 -0500344
345 spin_lock(&dlm_node_addrs_spin);
346 list_for_each_entry(na, &dlm_node_addrs, list) {
347 if (!na->addr_count)
348 continue;
349
Mike Christie98e1b602013-06-14 04:56:12 -0500350 for (addr_i = 0; addr_i < na->addr_count; addr_i++) {
351 if (addr_compare(na->addr[addr_i], addr)) {
352 *nodeid = na->nodeid;
353 rv = 0;
354 goto unlock;
355 }
356 }
David Teigland36b71a82012-07-26 12:44:30 -0500357 }
Mike Christie98e1b602013-06-14 04:56:12 -0500358unlock:
David Teigland36b71a82012-07-26 12:44:30 -0500359 spin_unlock(&dlm_node_addrs_spin);
360 return rv;
361}
362
363int dlm_lowcomms_addr(int nodeid, struct sockaddr_storage *addr, int len)
364{
365 struct sockaddr_storage *new_addr;
366 struct dlm_node_addr *new_node, *na;
367
368 new_node = kzalloc(sizeof(struct dlm_node_addr), GFP_NOFS);
369 if (!new_node)
370 return -ENOMEM;
371
372 new_addr = kzalloc(sizeof(struct sockaddr_storage), GFP_NOFS);
373 if (!new_addr) {
374 kfree(new_node);
375 return -ENOMEM;
376 }
377
378 memcpy(new_addr, addr, len);
379
380 spin_lock(&dlm_node_addrs_spin);
381 na = find_node_addr(nodeid);
382 if (!na) {
383 new_node->nodeid = nodeid;
384 new_node->addr[0] = new_addr;
385 new_node->addr_count = 1;
386 list_add(&new_node->list, &dlm_node_addrs);
387 spin_unlock(&dlm_node_addrs_spin);
388 return 0;
389 }
390
391 if (na->addr_count >= DLM_MAX_ADDR_COUNT) {
392 spin_unlock(&dlm_node_addrs_spin);
393 kfree(new_addr);
394 kfree(new_node);
395 return -ENOSPC;
396 }
397
398 na->addr[na->addr_count++] = new_addr;
399 spin_unlock(&dlm_node_addrs_spin);
400 kfree(new_node);
401 return 0;
402}
403
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +0100404/* Data available on socket or listen socket received a connect */
David S. Miller676d2362014-04-11 16:15:36 -0400405static void lowcomms_data_ready(struct sock *sk)
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +0100406{
407 struct connection *con = sock2con(sk);
Patrick Caulfieldafb853f2007-06-01 10:07:26 -0500408 if (con && !test_and_set_bit(CF_READ_PENDING, &con->flags))
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +0100409 queue_work(recv_workqueue, &con->rwork);
410}
411
412static void lowcomms_write_space(struct sock *sk)
413{
414 struct connection *con = sock2con(sk);
415
David Millerb36930d2010-11-10 21:56:39 -0800416 if (!con)
417 return;
418
419 clear_bit(SOCK_NOSPACE, &con->sock->flags);
420
421 if (test_and_clear_bit(CF_APP_LIMITED, &con->flags)) {
422 con->sock->sk->sk_write_pending--;
423 clear_bit(SOCK_ASYNC_NOSPACE, &con->sock->flags);
424 }
425
426 if (!test_and_set_bit(CF_WRITE_PENDING, &con->flags))
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +0100427 queue_work(send_workqueue, &con->swork);
428}
429
430static inline void lowcomms_connect_sock(struct connection *con)
431{
Lars Marowsky-Bree063c4c92009-08-11 16:18:23 -0500432 if (test_bit(CF_CLOSE, &con->flags))
433 return;
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +0100434 if (!test_and_set_bit(CF_CONNECT_PENDING, &con->flags))
435 queue_work(send_workqueue, &con->swork);
436}
437
438static void lowcomms_state_change(struct sock *sk)
439{
Marcelo Ricardo Leitneree44b4b2015-08-11 19:22:23 -0300440 /* SCTP layer is not calling sk_data_ready when the connection
441 * is done, so we catch the signal through here. Also, it
442 * doesn't switch socket state when entering shutdown, so we
443 * skip the write in that case.
444 */
445 if (sk->sk_shutdown) {
446 if (sk->sk_shutdown == RCV_SHUTDOWN)
447 lowcomms_data_ready(sk);
448 } else if (sk->sk_state == TCP_ESTABLISHED) {
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +0100449 lowcomms_write_space(sk);
Marcelo Ricardo Leitneree44b4b2015-08-11 19:22:23 -0300450 }
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +0100451}
452
Christine Caulfield391fbdc2009-05-07 10:54:16 -0500453int dlm_lowcomms_connect_node(int nodeid)
454{
455 struct connection *con;
456
457 if (nodeid == dlm_our_nodeid())
458 return 0;
459
460 con = nodeid2con(nodeid, GFP_NOFS);
461 if (!con)
462 return -ENOMEM;
463 lowcomms_connect_sock(con);
464 return 0;
465}
466
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +0100467/* Make a socket active */
Ying Xue4dd40f02012-08-10 14:58:42 +0800468static void add_sock(struct socket *sock, struct connection *con)
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +0100469{
470 con->sock = sock;
471
472 /* Install a data_ready callback */
473 con->sock->sk->sk_data_ready = lowcomms_data_ready;
474 con->sock->sk->sk_write_space = lowcomms_write_space;
475 con->sock->sk->sk_state_change = lowcomms_state_change;
476 con->sock->sk->sk_user_data = con;
Steven Whitehoused6d7b702008-11-12 16:49:48 -0600477 con->sock->sk->sk_allocation = GFP_NOFS;
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +0100478}
479
480/* Add the port number to an IPv6 or 4 sockaddr and return the address
481 length */
482static void make_sockaddr(struct sockaddr_storage *saddr, uint16_t port,
483 int *addr_len)
484{
485 saddr->ss_family = dlm_local_addr[0]->ss_family;
486 if (saddr->ss_family == AF_INET) {
487 struct sockaddr_in *in4_addr = (struct sockaddr_in *)saddr;
488 in4_addr->sin_port = cpu_to_be16(port);
489 *addr_len = sizeof(struct sockaddr_in);
490 memset(&in4_addr->sin_zero, 0, sizeof(in4_addr->sin_zero));
491 } else {
492 struct sockaddr_in6 *in6_addr = (struct sockaddr_in6 *)saddr;
493 in6_addr->sin6_port = cpu_to_be16(port);
494 *addr_len = sizeof(struct sockaddr_in6);
495 }
Patrick Caulfield01c8cab2007-07-17 16:53:15 +0100496 memset((char *)saddr + *addr_len, 0, sizeof(struct sockaddr_storage) - *addr_len);
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +0100497}
498
499/* Close a remote connection and tidy up */
Marcelo Ricardo Leitner0d737a82015-08-11 19:22:21 -0300500static void close_connection(struct connection *con, bool and_other,
501 bool tx, bool rx)
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +0100502{
Marcelo Ricardo Leitner0d737a82015-08-11 19:22:21 -0300503 clear_bit(CF_CONNECT_PENDING, &con->flags);
504 clear_bit(CF_WRITE_PENDING, &con->flags);
505 if (tx && cancel_work_sync(&con->swork))
506 log_print("canceled swork for node %d", con->nodeid);
507 if (rx && cancel_work_sync(&con->rwork))
508 log_print("canceled rwork for node %d", con->nodeid);
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +0100509
Marcelo Ricardo Leitner0d737a82015-08-11 19:22:21 -0300510 mutex_lock(&con->sock_mutex);
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +0100511 if (con->sock) {
512 sock_release(con->sock);
513 con->sock = NULL;
514 }
515 if (con->othercon && and_other) {
516 /* Will only re-enter once. */
Marcelo Ricardo Leitner0d737a82015-08-11 19:22:21 -0300517 close_connection(con->othercon, false, true, true);
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +0100518 }
519 if (con->rx_page) {
520 __free_page(con->rx_page);
521 con->rx_page = NULL;
522 }
Patrick Caulfield9e5f2822007-08-02 14:58:14 +0100523
Patrick Caulfield61d96be02007-08-20 15:13:38 +0100524 con->retries = 0;
525 mutex_unlock(&con->sock_mutex);
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +0100526}
527
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +0100528/* Data received from remote end */
529static int receive_from_sock(struct connection *con)
530{
531 int ret = 0;
532 struct msghdr msg = {};
533 struct kvec iov[2];
534 unsigned len;
535 int r;
536 int call_again_soon = 0;
537 int nvec;
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +0100538
539 mutex_lock(&con->sock_mutex);
540
541 if (con->sock == NULL) {
542 ret = -EAGAIN;
543 goto out_close;
544 }
Marcelo Ricardo Leitneracee4e52015-08-11 19:22:24 -0300545 if (con->nodeid == 0) {
546 ret = -EINVAL;
547 goto out_close;
548 }
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +0100549
550 if (con->rx_page == NULL) {
551 /*
552 * This doesn't need to be atomic, but I think it should
553 * improve performance if it is.
554 */
555 con->rx_page = alloc_page(GFP_ATOMIC);
556 if (con->rx_page == NULL)
557 goto out_resched;
558 cbuf_init(&con->cb, PAGE_CACHE_SIZE);
559 }
560
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +0100561 /*
562 * iov[0] is the bit of the circular buffer between the current end
563 * point (cb.base + cb.len) and the end of the buffer.
564 */
565 iov[0].iov_len = con->cb.base - cbuf_data(&con->cb);
566 iov[0].iov_base = page_address(con->rx_page) + cbuf_data(&con->cb);
567 iov[1].iov_len = 0;
568 nvec = 1;
569
570 /*
571 * iov[1] is the bit of the circular buffer between the start of the
572 * buffer and the start of the currently used section (cb.base)
573 */
574 if (cbuf_data(&con->cb) >= con->cb.base) {
575 iov[0].iov_len = PAGE_CACHE_SIZE - cbuf_data(&con->cb);
576 iov[1].iov_len = con->cb.base;
577 iov[1].iov_base = page_address(con->rx_page);
578 nvec = 2;
579 }
580 len = iov[0].iov_len + iov[1].iov_len;
581
582 r = ret = kernel_recvmsg(con->sock, &msg, iov, nvec, len,
583 MSG_DONTWAIT | MSG_NOSIGNAL);
584 if (ret <= 0)
585 goto out_close;
Marcelo Ricardo Leitneree44b4b2015-08-11 19:22:23 -0300586 else if (ret == len)
587 call_again_soon = 1;
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +0100588
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +0100589 cbuf_add(&con->cb, ret);
590 ret = dlm_process_incoming_buffer(con->nodeid,
591 page_address(con->rx_page),
592 con->cb.base, con->cb.len,
593 PAGE_CACHE_SIZE);
594 if (ret == -EBADMSG) {
Marcelo Ricardo Leitneree44b4b2015-08-11 19:22:23 -0300595 log_print("lowcomms: addr=%p, base=%u, len=%u, read=%d",
596 page_address(con->rx_page), con->cb.base,
597 con->cb.len, r);
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +0100598 }
599 if (ret < 0)
600 goto out_close;
601 cbuf_eat(&con->cb, ret);
602
603 if (cbuf_empty(&con->cb) && !call_again_soon) {
604 __free_page(con->rx_page);
605 con->rx_page = NULL;
606 }
607
608 if (call_again_soon)
609 goto out_resched;
610 mutex_unlock(&con->sock_mutex);
611 return 0;
612
613out_resched:
614 if (!test_and_set_bit(CF_READ_PENDING, &con->flags))
615 queue_work(recv_workqueue, &con->rwork);
616 mutex_unlock(&con->sock_mutex);
617 return -EAGAIN;
618
619out_close:
620 mutex_unlock(&con->sock_mutex);
Patrick Caulfield9e5f2822007-08-02 14:58:14 +0100621 if (ret != -EAGAIN) {
Marcelo Ricardo Leitner0d737a82015-08-11 19:22:21 -0300622 close_connection(con, false, true, false);
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +0100623 /* Reconnect when there is something to send */
624 }
625 /* Don't return success if we really got EOF */
626 if (ret == 0)
627 ret = -EAGAIN;
628
629 return ret;
630}
631
632/* Listening socket is busy, accept a connection */
633static int tcp_accept_from_sock(struct connection *con)
634{
635 int result;
636 struct sockaddr_storage peeraddr;
637 struct socket *newsock;
638 int len;
639 int nodeid;
640 struct connection *newcon;
641 struct connection *addcon;
642
David Teigland513ef592012-03-30 11:46:08 -0500643 mutex_lock(&connections_lock);
644 if (!dlm_allow_conn) {
645 mutex_unlock(&connections_lock);
646 return -1;
647 }
648 mutex_unlock(&connections_lock);
649
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +0100650 memset(&peeraddr, 0, sizeof(peeraddr));
Eric W. Biedermaneeb1bd52015-05-08 21:08:05 -0500651 result = sock_create_kern(&init_net, dlm_local_addr[0]->ss_family,
652 SOCK_STREAM, IPPROTO_TCP, &newsock);
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +0100653 if (result < 0)
654 return -ENOMEM;
655
656 mutex_lock_nested(&con->sock_mutex, 0);
657
658 result = -ENOTCONN;
659 if (con->sock == NULL)
660 goto accept_err;
661
662 newsock->type = con->sock->type;
663 newsock->ops = con->sock->ops;
664
665 result = con->sock->ops->accept(con->sock, newsock, O_NONBLOCK);
666 if (result < 0)
667 goto accept_err;
668
669 /* Get the connected socket's peer */
670 memset(&peeraddr, 0, sizeof(peeraddr));
671 if (newsock->ops->getname(newsock, (struct sockaddr *)&peeraddr,
672 &len, 2)) {
673 result = -ECONNABORTED;
674 goto accept_err;
675 }
676
677 /* Get the new node's NODEID */
678 make_sockaddr(&peeraddr, 0, &len);
David Teigland36b71a82012-07-26 12:44:30 -0500679 if (addr_to_nodeid(&peeraddr, &nodeid)) {
Masatake YAMATObcaadf52011-07-04 12:25:51 +0900680 unsigned char *b=(unsigned char *)&peeraddr;
David Teigland617e82e2007-04-26 13:46:49 -0500681 log_print("connect from non cluster node");
Masatake YAMATObcaadf52011-07-04 12:25:51 +0900682 print_hex_dump_bytes("ss: ", DUMP_PREFIX_NONE,
683 b, sizeof(struct sockaddr_storage));
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +0100684 sock_release(newsock);
685 mutex_unlock(&con->sock_mutex);
686 return -1;
687 }
688
689 log_print("got connection from %d", nodeid);
690
691 /* Check to see if we already have a connection to this node. This
692 * could happen if the two nodes initiate a connection at roughly
693 * the same time and the connections cross on the wire.
694 * In this case we store the incoming one in "othercon"
695 */
David Teigland748285c2009-05-15 10:50:57 -0500696 newcon = nodeid2con(nodeid, GFP_NOFS);
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +0100697 if (!newcon) {
698 result = -ENOMEM;
699 goto accept_err;
700 }
701 mutex_lock_nested(&newcon->sock_mutex, 1);
702 if (newcon->sock) {
703 struct connection *othercon = newcon->othercon;
704
705 if (!othercon) {
David Teigland748285c2009-05-15 10:50:57 -0500706 othercon = kmem_cache_zalloc(con_cache, GFP_NOFS);
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +0100707 if (!othercon) {
David Teigland617e82e2007-04-26 13:46:49 -0500708 log_print("failed to allocate incoming socket");
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +0100709 mutex_unlock(&newcon->sock_mutex);
710 result = -ENOMEM;
711 goto accept_err;
712 }
713 othercon->nodeid = nodeid;
714 othercon->rx_action = receive_from_sock;
715 mutex_init(&othercon->sock_mutex);
716 INIT_WORK(&othercon->swork, process_send_sockets);
717 INIT_WORK(&othercon->rwork, process_recv_sockets);
718 set_bit(CF_IS_OTHERCON, &othercon->flags);
Patrick Caulfield61d96be02007-08-20 15:13:38 +0100719 }
720 if (!othercon->sock) {
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +0100721 newcon->othercon = othercon;
Patrick Caulfield97d84832007-06-27 11:36:23 +0100722 othercon->sock = newsock;
723 newsock->sk->sk_user_data = othercon;
724 add_sock(newsock, othercon);
725 addcon = othercon;
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +0100726 }
Patrick Caulfield97d84832007-06-27 11:36:23 +0100727 else {
728 printk("Extra connection from node %d attempted\n", nodeid);
729 result = -EAGAIN;
akpm@linux-foundation.orgf4fadb23c2007-06-27 14:43:37 -0700730 mutex_unlock(&newcon->sock_mutex);
Patrick Caulfield97d84832007-06-27 11:36:23 +0100731 goto accept_err;
732 }
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +0100733 }
734 else {
735 newsock->sk->sk_user_data = newcon;
736 newcon->rx_action = receive_from_sock;
737 add_sock(newsock, newcon);
738 addcon = newcon;
739 }
740
741 mutex_unlock(&newcon->sock_mutex);
742
743 /*
744 * Add it to the active queue in case we got data
Lucas De Marchi25985ed2011-03-30 22:57:33 -0300745 * between processing the accept adding the socket
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +0100746 * to the read_sockets list
747 */
748 if (!test_and_set_bit(CF_READ_PENDING, &addcon->flags))
749 queue_work(recv_workqueue, &addcon->rwork);
750 mutex_unlock(&con->sock_mutex);
751
752 return 0;
753
754accept_err:
755 mutex_unlock(&con->sock_mutex);
756 sock_release(newsock);
757
758 if (result != -EAGAIN)
David Teigland617e82e2007-04-26 13:46:49 -0500759 log_print("error accepting connection from node: %d", result);
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +0100760 return result;
761}
762
Marcelo Ricardo Leitneree44b4b2015-08-11 19:22:23 -0300763int sctp_accept_from_sock(struct connection *con)
764{
765 /* Check that the new node is in the lockspace */
766 struct sctp_prim prim;
767 int nodeid;
768 int prim_len, ret;
769 int addr_len;
770 struct connection *newcon;
771 struct connection *addcon;
772 struct socket *newsock;
773
774 mutex_lock(&connections_lock);
775 if (!dlm_allow_conn) {
776 mutex_unlock(&connections_lock);
777 return -1;
778 }
779 mutex_unlock(&connections_lock);
780
781 mutex_lock_nested(&con->sock_mutex, 0);
782
783 ret = kernel_accept(con->sock, &newsock, O_NONBLOCK);
784 if (ret < 0)
785 goto accept_err;
786
787 memset(&prim, 0, sizeof(struct sctp_prim));
788 prim_len = sizeof(struct sctp_prim);
789
790 ret = kernel_getsockopt(newsock, IPPROTO_SCTP, SCTP_PRIMARY_ADDR,
791 (char *)&prim, &prim_len);
792 if (ret < 0) {
793 log_print("getsockopt/sctp_primary_addr failed: %d", ret);
794 goto accept_err;
795 }
796
797 make_sockaddr(&prim.ssp_addr, 0, &addr_len);
798 if (addr_to_nodeid(&prim.ssp_addr, &nodeid)) {
799 unsigned char *b = (unsigned char *)&prim.ssp_addr;
800
801 log_print("reject connect from unknown addr");
802 print_hex_dump_bytes("ss: ", DUMP_PREFIX_NONE,
803 b, sizeof(struct sockaddr_storage));
804 goto accept_err;
805 }
806
807 newcon = nodeid2con(nodeid, GFP_NOFS);
808 if (!newcon) {
809 ret = -ENOMEM;
810 goto accept_err;
811 }
812
813 mutex_lock_nested(&newcon->sock_mutex, 1);
814
815 if (newcon->sock) {
816 struct connection *othercon = newcon->othercon;
817
818 if (!othercon) {
819 othercon = kmem_cache_zalloc(con_cache, GFP_NOFS);
820 if (!othercon) {
821 log_print("failed to allocate incoming socket");
822 mutex_unlock(&newcon->sock_mutex);
823 ret = -ENOMEM;
824 goto accept_err;
825 }
826 othercon->nodeid = nodeid;
827 othercon->rx_action = receive_from_sock;
828 mutex_init(&othercon->sock_mutex);
829 INIT_WORK(&othercon->swork, process_send_sockets);
830 INIT_WORK(&othercon->rwork, process_recv_sockets);
831 set_bit(CF_IS_OTHERCON, &othercon->flags);
832 }
833 if (!othercon->sock) {
834 newcon->othercon = othercon;
835 othercon->sock = newsock;
836 newsock->sk->sk_user_data = othercon;
837 add_sock(newsock, othercon);
838 addcon = othercon;
839 } else {
840 printk("Extra connection from node %d attempted\n", nodeid);
841 ret = -EAGAIN;
842 mutex_unlock(&newcon->sock_mutex);
843 goto accept_err;
844 }
845 } else {
846 newsock->sk->sk_user_data = newcon;
847 newcon->rx_action = receive_from_sock;
848 add_sock(newsock, newcon);
849 addcon = newcon;
850 }
851
852 log_print("connected to %d", nodeid);
853
854 mutex_unlock(&newcon->sock_mutex);
855
856 /*
857 * Add it to the active queue in case we got data
858 * between processing the accept adding the socket
859 * to the read_sockets list
860 */
861 if (!test_and_set_bit(CF_READ_PENDING, &addcon->flags))
862 queue_work(recv_workqueue, &addcon->rwork);
863 mutex_unlock(&con->sock_mutex);
864
865 return 0;
866
867accept_err:
868 mutex_unlock(&con->sock_mutex);
869 if (newsock)
870 sock_release(newsock);
871 if (ret != -EAGAIN)
872 log_print("error accepting connection from node: %d", ret);
873
874 return ret;
875}
876
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +0100877static void free_entry(struct writequeue_entry *e)
878{
879 __free_page(e->page);
880 kfree(e);
881}
882
Mike Christie5d689872013-06-14 04:56:13 -0500883/*
884 * writequeue_entry_complete - try to delete and free write queue entry
885 * @e: write queue entry to try to delete
886 * @completed: bytes completed
887 *
888 * writequeue_lock must be held.
889 */
890static void writequeue_entry_complete(struct writequeue_entry *e, int completed)
891{
892 e->offset += completed;
893 e->len -= completed;
894
895 if (e->len == 0 && e->users == 0) {
896 list_del(&e->list);
897 free_entry(e);
898 }
899}
900
Marcelo Ricardo Leitneree44b4b2015-08-11 19:22:23 -0300901/*
902 * sctp_bind_addrs - bind a SCTP socket to all our addresses
903 */
904static int sctp_bind_addrs(struct connection *con, uint16_t port)
905{
906 struct sockaddr_storage localaddr;
907 int i, addr_len, result = 0;
908
909 for (i = 0; i < dlm_local_count; i++) {
910 memcpy(&localaddr, dlm_local_addr[i], sizeof(localaddr));
911 make_sockaddr(&localaddr, port, &addr_len);
912
913 if (!i)
914 result = kernel_bind(con->sock,
915 (struct sockaddr *)&localaddr,
916 addr_len);
917 else
918 result = kernel_setsockopt(con->sock, SOL_SCTP,
919 SCTP_SOCKOPT_BINDX_ADD,
920 (char *)&localaddr, addr_len);
921
922 if (result < 0) {
923 log_print("Can't bind to %d addr number %d, %d.\n",
924 port, i + 1, result);
925 break;
926 }
927 }
928 return result;
929}
930
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +0100931/* Initiate an SCTP association.
932 This is a special case of send_to_sock() in that we don't yet have a
933 peeled-off socket for this association, so we use the listening socket
934 and add the primary IP address of the remote node.
935 */
Marcelo Ricardo Leitneree44b4b2015-08-11 19:22:23 -0300936static void sctp_connect_to_sock(struct connection *con)
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +0100937{
Marcelo Ricardo Leitneree44b4b2015-08-11 19:22:23 -0300938 struct sockaddr_storage daddr;
939 int one = 1;
940 int result;
941 int addr_len;
942 struct socket *sock;
943
944 if (con->nodeid == 0) {
945 log_print("attempt to connect sock 0 foiled");
946 return;
947 }
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +0100948
Mike Christie5d689872013-06-14 04:56:13 -0500949 mutex_lock(&con->sock_mutex);
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +0100950
Marcelo Ricardo Leitneree44b4b2015-08-11 19:22:23 -0300951 /* Some odd races can cause double-connects, ignore them */
952 if (con->retries++ > MAX_CONNECT_RETRIES)
953 goto out;
954
955 if (con->sock) {
956 log_print("node %d already connected.", con->nodeid);
957 goto out;
958 }
959
960 memset(&daddr, 0, sizeof(daddr));
961 result = nodeid_to_addr(con->nodeid, &daddr, NULL, true);
962 if (result < 0) {
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +0100963 log_print("no address for nodeid %d", con->nodeid);
Marcelo Ricardo Leitneree44b4b2015-08-11 19:22:23 -0300964 goto out;
David Teigland04bedd72009-09-18 14:31:47 -0500965 }
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +0100966
Marcelo Ricardo Leitneree44b4b2015-08-11 19:22:23 -0300967 /* Create a socket to communicate with */
968 result = sock_create_kern(&init_net, dlm_local_addr[0]->ss_family,
969 SOCK_STREAM, IPPROTO_SCTP, &sock);
970 if (result < 0)
971 goto socket_err;
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +0100972
Marcelo Ricardo Leitneree44b4b2015-08-11 19:22:23 -0300973 sock->sk->sk_user_data = con;
974 con->rx_action = receive_from_sock;
975 con->connect_action = sctp_connect_to_sock;
976 add_sock(sock, con);
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +0100977
Marcelo Ricardo Leitneree44b4b2015-08-11 19:22:23 -0300978 /* Bind to all addresses. */
979 if (sctp_bind_addrs(con, 0))
980 goto bind_err;
Mike Christie98e1b602013-06-14 04:56:12 -0500981
Marcelo Ricardo Leitneree44b4b2015-08-11 19:22:23 -0300982 make_sockaddr(&daddr, dlm_config.ci_tcp_port, &addr_len);
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +0100983
Marcelo Ricardo Leitneree44b4b2015-08-11 19:22:23 -0300984 log_print("connecting to %d", con->nodeid);
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +0100985
Marcelo Ricardo Leitneree44b4b2015-08-11 19:22:23 -0300986 /* Turn off Nagle's algorithm */
987 kernel_setsockopt(sock, SOL_TCP, TCP_NODELAY, (char *)&one,
988 sizeof(one));
989
990 result = sock->ops->connect(sock, (struct sockaddr *)&daddr, addr_len,
991 O_NONBLOCK);
992 if (result == -EINPROGRESS)
993 result = 0;
994 if (result == 0)
995 goto out;
996
997
998bind_err:
999 con->sock = NULL;
1000 sock_release(sock);
1001
1002socket_err:
1003 /*
1004 * Some errors are fatal and this list might need adjusting. For other
1005 * errors we try again until the max number of retries is reached.
1006 */
1007 if (result != -EHOSTUNREACH &&
1008 result != -ENETUNREACH &&
1009 result != -ENETDOWN &&
1010 result != -EINVAL &&
1011 result != -EPROTONOSUPPORT) {
1012 log_print("connect %d try %d error %d", con->nodeid,
1013 con->retries, result);
1014 mutex_unlock(&con->sock_mutex);
1015 msleep(1000);
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001016 clear_bit(CF_CONNECT_PENDING, &con->flags);
Marcelo Ricardo Leitneree44b4b2015-08-11 19:22:23 -03001017 lowcomms_connect_sock(con);
1018 return;
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001019 }
Mike Christie5d689872013-06-14 04:56:13 -05001020
Marcelo Ricardo Leitneree44b4b2015-08-11 19:22:23 -03001021out:
Mike Christie5d689872013-06-14 04:56:13 -05001022 mutex_unlock(&con->sock_mutex);
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001023}
1024
1025/* Connect a new socket to its peer */
1026static void tcp_connect_to_sock(struct connection *con)
1027{
Lon Hohberger6bd8fed2007-10-25 18:51:54 -04001028 struct sockaddr_storage saddr, src_addr;
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001029 int addr_len;
Casey Dahlina89d63a2009-07-14 12:17:51 -05001030 struct socket *sock = NULL;
David Teiglandcb2d45d2010-11-12 11:12:55 -06001031 int one = 1;
David Teigland36b71a82012-07-26 12:44:30 -05001032 int result;
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001033
1034 if (con->nodeid == 0) {
1035 log_print("attempt to connect sock 0 foiled");
1036 return;
1037 }
1038
1039 mutex_lock(&con->sock_mutex);
1040 if (con->retries++ > MAX_CONNECT_RETRIES)
1041 goto out;
1042
1043 /* Some odd races can cause double-connects, ignore them */
David Teigland36b71a82012-07-26 12:44:30 -05001044 if (con->sock)
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001045 goto out;
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001046
1047 /* Create a socket to communicate with */
Eric W. Biedermaneeb1bd52015-05-08 21:08:05 -05001048 result = sock_create_kern(&init_net, dlm_local_addr[0]->ss_family,
1049 SOCK_STREAM, IPPROTO_TCP, &sock);
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001050 if (result < 0)
1051 goto out_err;
1052
1053 memset(&saddr, 0, sizeof(saddr));
Mike Christie98e1b602013-06-14 04:56:12 -05001054 result = nodeid_to_addr(con->nodeid, &saddr, NULL, false);
David Teigland36b71a82012-07-26 12:44:30 -05001055 if (result < 0) {
1056 log_print("no address for nodeid %d", con->nodeid);
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001057 goto out_err;
David Teigland36b71a82012-07-26 12:44:30 -05001058 }
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001059
1060 sock->sk->sk_user_data = con;
1061 con->rx_action = receive_from_sock;
1062 con->connect_action = tcp_connect_to_sock;
1063 add_sock(sock, con);
1064
Lon Hohberger6bd8fed2007-10-25 18:51:54 -04001065 /* Bind to our cluster-known address connecting to avoid
1066 routing problems */
1067 memcpy(&src_addr, dlm_local_addr[0], sizeof(src_addr));
1068 make_sockaddr(&src_addr, 0, &addr_len);
1069 result = sock->ops->bind(sock, (struct sockaddr *) &src_addr,
1070 addr_len);
1071 if (result < 0) {
1072 log_print("could not bind for connect: %d", result);
1073 /* This *may* not indicate a critical error */
1074 }
1075
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001076 make_sockaddr(&saddr, dlm_config.ci_tcp_port, &addr_len);
1077
1078 log_print("connecting to %d", con->nodeid);
David Teiglandcb2d45d2010-11-12 11:12:55 -06001079
1080 /* Turn off Nagle's algorithm */
1081 kernel_setsockopt(sock, SOL_TCP, TCP_NODELAY, (char *)&one,
1082 sizeof(one));
1083
David Teigland36b71a82012-07-26 12:44:30 -05001084 result = sock->ops->connect(sock, (struct sockaddr *)&saddr, addr_len,
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001085 O_NONBLOCK);
1086 if (result == -EINPROGRESS)
1087 result = 0;
1088 if (result == 0)
1089 goto out;
1090
1091out_err:
1092 if (con->sock) {
1093 sock_release(con->sock);
1094 con->sock = NULL;
Casey Dahlina89d63a2009-07-14 12:17:51 -05001095 } else if (sock) {
1096 sock_release(sock);
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001097 }
1098 /*
1099 * Some errors are fatal and this list might need adjusting. For other
1100 * errors we try again until the max number of retries is reached.
1101 */
David Teigland36b71a82012-07-26 12:44:30 -05001102 if (result != -EHOSTUNREACH &&
1103 result != -ENETUNREACH &&
1104 result != -ENETDOWN &&
1105 result != -EINVAL &&
1106 result != -EPROTONOSUPPORT) {
1107 log_print("connect %d try %d error %d", con->nodeid,
1108 con->retries, result);
1109 mutex_unlock(&con->sock_mutex);
1110 msleep(1000);
Marcelo Ricardo Leitner356344c2015-08-11 19:22:22 -03001111 clear_bit(CF_CONNECT_PENDING, &con->flags);
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001112 lowcomms_connect_sock(con);
David Teigland36b71a82012-07-26 12:44:30 -05001113 return;
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001114 }
1115out:
1116 mutex_unlock(&con->sock_mutex);
1117 return;
1118}
1119
1120static struct socket *tcp_create_listen_sock(struct connection *con,
1121 struct sockaddr_storage *saddr)
1122{
1123 struct socket *sock = NULL;
1124 int result = 0;
1125 int one = 1;
1126 int addr_len;
1127
1128 if (dlm_local_addr[0]->ss_family == AF_INET)
1129 addr_len = sizeof(struct sockaddr_in);
1130 else
1131 addr_len = sizeof(struct sockaddr_in6);
1132
1133 /* Create a socket to communicate with */
Eric W. Biedermaneeb1bd52015-05-08 21:08:05 -05001134 result = sock_create_kern(&init_net, dlm_local_addr[0]->ss_family,
1135 SOCK_STREAM, IPPROTO_TCP, &sock);
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001136 if (result < 0) {
David Teigland617e82e2007-04-26 13:46:49 -05001137 log_print("Can't create listening comms socket");
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001138 goto create_out;
1139 }
1140
David Teiglandcb2d45d2010-11-12 11:12:55 -06001141 /* Turn off Nagle's algorithm */
1142 kernel_setsockopt(sock, SOL_TCP, TCP_NODELAY, (char *)&one,
1143 sizeof(one));
1144
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001145 result = kernel_setsockopt(sock, SOL_SOCKET, SO_REUSEADDR,
1146 (char *)&one, sizeof(one));
1147
1148 if (result < 0) {
David Teigland617e82e2007-04-26 13:46:49 -05001149 log_print("Failed to set SO_REUSEADDR on socket: %d", result);
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001150 }
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001151 con->rx_action = tcp_accept_from_sock;
1152 con->connect_action = tcp_connect_to_sock;
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001153
1154 /* Bind to our port */
1155 make_sockaddr(saddr, dlm_config.ci_tcp_port, &addr_len);
1156 result = sock->ops->bind(sock, (struct sockaddr *) saddr, addr_len);
1157 if (result < 0) {
David Teigland617e82e2007-04-26 13:46:49 -05001158 log_print("Can't bind to port %d", dlm_config.ci_tcp_port);
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001159 sock_release(sock);
1160 sock = NULL;
1161 con->sock = NULL;
1162 goto create_out;
1163 }
1164 result = kernel_setsockopt(sock, SOL_SOCKET, SO_KEEPALIVE,
1165 (char *)&one, sizeof(one));
1166 if (result < 0) {
David Teigland617e82e2007-04-26 13:46:49 -05001167 log_print("Set keepalive failed: %d", result);
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001168 }
1169
1170 result = sock->ops->listen(sock, 5);
1171 if (result < 0) {
David Teigland617e82e2007-04-26 13:46:49 -05001172 log_print("Can't listen on port %d", dlm_config.ci_tcp_port);
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001173 sock_release(sock);
1174 sock = NULL;
1175 goto create_out;
1176 }
1177
1178create_out:
1179 return sock;
1180}
1181
1182/* Get local addresses */
1183static void init_local(void)
1184{
1185 struct sockaddr_storage sas, *addr;
1186 int i;
1187
Patrick Caulfield30d3a232007-04-23 16:26:21 +01001188 dlm_local_count = 0;
David Teigland1b189b82012-03-21 09:18:34 -05001189 for (i = 0; i < DLM_MAX_ADDR_COUNT; i++) {
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001190 if (dlm_our_addr(&sas, i))
1191 break;
1192
David Teigland573c24c2009-11-30 16:34:43 -06001193 addr = kmalloc(sizeof(*addr), GFP_NOFS);
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001194 if (!addr)
1195 break;
1196 memcpy(addr, &sas, sizeof(*addr));
1197 dlm_local_addr[dlm_local_count++] = addr;
1198 }
1199}
1200
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001201/* Initialise SCTP socket and bind to all interfaces */
1202static int sctp_listen_for_all(void)
1203{
1204 struct socket *sock = NULL;
Marcelo Ricardo Leitneree44b4b2015-08-11 19:22:23 -03001205 int result = -EINVAL;
David Teigland573c24c2009-11-30 16:34:43 -06001206 struct connection *con = nodeid2con(0, GFP_NOFS);
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001207 int bufsize = NEEDED_RMEM;
Mike Christie86e92ad2013-06-14 04:56:14 -05001208 int one = 1;
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001209
1210 if (!con)
1211 return -ENOMEM;
1212
1213 log_print("Using SCTP for communications");
1214
Eric W. Biedermaneeb1bd52015-05-08 21:08:05 -05001215 result = sock_create_kern(&init_net, dlm_local_addr[0]->ss_family,
Marcelo Ricardo Leitneree44b4b2015-08-11 19:22:23 -03001216 SOCK_STREAM, IPPROTO_SCTP, &sock);
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001217 if (result < 0) {
1218 log_print("Can't create comms socket, check SCTP is loaded");
1219 goto out;
1220 }
1221
David S. Millerdf61c952007-11-06 23:48:57 -08001222 result = kernel_setsockopt(sock, SOL_SOCKET, SO_RCVBUFFORCE,
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001223 (char *)&bufsize, sizeof(bufsize));
1224 if (result)
David Teigland617e82e2007-04-26 13:46:49 -05001225 log_print("Error increasing buffer space on socket %d", result);
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001226
Mike Christie86e92ad2013-06-14 04:56:14 -05001227 result = kernel_setsockopt(sock, SOL_SCTP, SCTP_NODELAY, (char *)&one,
1228 sizeof(one));
1229 if (result < 0)
1230 log_print("Could not set SCTP NODELAY error %d\n", result);
1231
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001232 /* Init con struct */
1233 sock->sk->sk_user_data = con;
1234 con->sock = sock;
1235 con->sock->sk->sk_data_ready = lowcomms_data_ready;
Marcelo Ricardo Leitneree44b4b2015-08-11 19:22:23 -03001236 con->rx_action = sctp_accept_from_sock;
1237 con->connect_action = sctp_connect_to_sock;
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001238
Marcelo Ricardo Leitneree44b4b2015-08-11 19:22:23 -03001239 /* Bind to all addresses. */
1240 if (sctp_bind_addrs(con, dlm_config.ci_tcp_port))
1241 goto create_delsock;
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001242
1243 result = sock->ops->listen(sock, 5);
1244 if (result < 0) {
1245 log_print("Can't set socket listening");
1246 goto create_delsock;
1247 }
1248
1249 return 0;
1250
1251create_delsock:
1252 sock_release(sock);
1253 con->sock = NULL;
1254out:
1255 return result;
1256}
1257
1258static int tcp_listen_for_all(void)
1259{
1260 struct socket *sock = NULL;
David Teigland573c24c2009-11-30 16:34:43 -06001261 struct connection *con = nodeid2con(0, GFP_NOFS);
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001262 int result = -EINVAL;
1263
1264 if (!con)
1265 return -ENOMEM;
1266
1267 /* We don't support multi-homed hosts */
1268 if (dlm_local_addr[1] != NULL) {
David Teigland617e82e2007-04-26 13:46:49 -05001269 log_print("TCP protocol can't handle multi-homed hosts, "
1270 "try SCTP");
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001271 return -EINVAL;
1272 }
1273
1274 log_print("Using TCP for communications");
1275
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001276 sock = tcp_create_listen_sock(con, dlm_local_addr[0]);
1277 if (sock) {
1278 add_sock(sock, con);
1279 result = 0;
1280 }
1281 else {
1282 result = -EADDRINUSE;
1283 }
1284
1285 return result;
1286}
1287
1288
1289
1290static struct writequeue_entry *new_writequeue_entry(struct connection *con,
1291 gfp_t allocation)
1292{
1293 struct writequeue_entry *entry;
1294
1295 entry = kmalloc(sizeof(struct writequeue_entry), allocation);
1296 if (!entry)
1297 return NULL;
1298
1299 entry->page = alloc_page(allocation);
1300 if (!entry->page) {
1301 kfree(entry);
1302 return NULL;
1303 }
1304
1305 entry->offset = 0;
1306 entry->len = 0;
1307 entry->end = 0;
1308 entry->users = 0;
1309 entry->con = con;
1310
1311 return entry;
1312}
1313
David Teigland617e82e2007-04-26 13:46:49 -05001314void *dlm_lowcomms_get_buffer(int nodeid, int len, gfp_t allocation, char **ppc)
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001315{
1316 struct connection *con;
1317 struct writequeue_entry *e;
1318 int offset = 0;
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001319
1320 con = nodeid2con(nodeid, allocation);
1321 if (!con)
1322 return NULL;
1323
1324 spin_lock(&con->writequeue_lock);
1325 e = list_entry(con->writequeue.prev, struct writequeue_entry, list);
1326 if ((&e->list == &con->writequeue) ||
1327 (PAGE_CACHE_SIZE - e->end < len)) {
1328 e = NULL;
1329 } else {
1330 offset = e->end;
1331 e->end += len;
Wei Yongjuneeee2b52012-10-18 22:57:19 +08001332 e->users++;
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001333 }
1334 spin_unlock(&con->writequeue_lock);
1335
1336 if (e) {
1337 got_one:
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001338 *ppc = page_address(e->page) + offset;
1339 return e;
1340 }
1341
1342 e = new_writequeue_entry(con, allocation);
1343 if (e) {
1344 spin_lock(&con->writequeue_lock);
1345 offset = e->end;
1346 e->end += len;
Wei Yongjuneeee2b52012-10-18 22:57:19 +08001347 e->users++;
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001348 list_add_tail(&e->list, &con->writequeue);
1349 spin_unlock(&con->writequeue_lock);
1350 goto got_one;
1351 }
1352 return NULL;
1353}
1354
1355void dlm_lowcomms_commit_buffer(void *mh)
1356{
1357 struct writequeue_entry *e = (struct writequeue_entry *)mh;
1358 struct connection *con = e->con;
1359 int users;
1360
1361 spin_lock(&con->writequeue_lock);
1362 users = --e->users;
1363 if (users)
1364 goto out;
1365 e->len = e->end - e->offset;
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001366 spin_unlock(&con->writequeue_lock);
1367
1368 if (!test_and_set_bit(CF_WRITE_PENDING, &con->flags)) {
1369 queue_work(send_workqueue, &con->swork);
1370 }
1371 return;
1372
1373out:
1374 spin_unlock(&con->writequeue_lock);
1375 return;
1376}
1377
1378/* Send a message */
1379static void send_to_sock(struct connection *con)
1380{
1381 int ret = 0;
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001382 const int msg_flags = MSG_DONTWAIT | MSG_NOSIGNAL;
1383 struct writequeue_entry *e;
1384 int len, offset;
Bob Petersonf92c8dd2010-11-12 11:15:20 -06001385 int count = 0;
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001386
1387 mutex_lock(&con->sock_mutex);
1388 if (con->sock == NULL)
1389 goto out_connect;
1390
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001391 spin_lock(&con->writequeue_lock);
1392 for (;;) {
1393 e = list_entry(con->writequeue.next, struct writequeue_entry,
1394 list);
1395 if ((struct list_head *) e == &con->writequeue)
1396 break;
1397
1398 len = e->len;
1399 offset = e->offset;
1400 BUG_ON(len == 0 && e->users == 0);
1401 spin_unlock(&con->writequeue_lock);
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001402
1403 ret = 0;
1404 if (len) {
Paolo Bonzini1329e3f2009-08-24 13:18:04 -05001405 ret = kernel_sendpage(con->sock, e->page, offset, len,
1406 msg_flags);
Patrick Caulfieldd66f8272007-09-14 08:49:21 +01001407 if (ret == -EAGAIN || ret == 0) {
David Millerb36930d2010-11-10 21:56:39 -08001408 if (ret == -EAGAIN &&
1409 test_bit(SOCK_ASYNC_NOSPACE, &con->sock->flags) &&
1410 !test_and_set_bit(CF_APP_LIMITED, &con->flags)) {
1411 /* Notify TCP that we're limited by the
1412 * application window size.
1413 */
1414 set_bit(SOCK_NOSPACE, &con->sock->flags);
1415 con->sock->sk->sk_write_pending++;
1416 }
Patrick Caulfieldd66f8272007-09-14 08:49:21 +01001417 cond_resched();
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001418 goto out;
Ying Xue9c5bef52012-08-13 14:29:55 +08001419 } else if (ret < 0)
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001420 goto send_error;
Patrick Caulfieldd66f8272007-09-14 08:49:21 +01001421 }
Bob Petersonf92c8dd2010-11-12 11:15:20 -06001422
1423 /* Don't starve people filling buffers */
1424 if (++count >= MAX_SEND_MSG_COUNT) {
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001425 cond_resched();
Bob Petersonf92c8dd2010-11-12 11:15:20 -06001426 count = 0;
1427 }
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001428
1429 spin_lock(&con->writequeue_lock);
Mike Christie5d689872013-06-14 04:56:13 -05001430 writequeue_entry_complete(e, ret);
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001431 }
1432 spin_unlock(&con->writequeue_lock);
1433out:
1434 mutex_unlock(&con->sock_mutex);
1435 return;
1436
1437send_error:
1438 mutex_unlock(&con->sock_mutex);
Marcelo Ricardo Leitner0d737a82015-08-11 19:22:21 -03001439 close_connection(con, false, false, true);
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001440 lowcomms_connect_sock(con);
1441 return;
1442
1443out_connect:
1444 mutex_unlock(&con->sock_mutex);
Marcelo Ricardo Leitneree44b4b2015-08-11 19:22:23 -03001445 lowcomms_connect_sock(con);
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001446}
1447
1448static void clean_one_writequeue(struct connection *con)
1449{
Christine Caulfield5e9ccc32009-01-28 12:57:40 -06001450 struct writequeue_entry *e, *safe;
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001451
1452 spin_lock(&con->writequeue_lock);
Christine Caulfield5e9ccc32009-01-28 12:57:40 -06001453 list_for_each_entry_safe(e, safe, &con->writequeue, list) {
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001454 list_del(&e->list);
1455 free_entry(e);
1456 }
1457 spin_unlock(&con->writequeue_lock);
1458}
1459
1460/* Called from recovery when it knows that a node has
1461 left the cluster */
1462int dlm_lowcomms_close(int nodeid)
1463{
1464 struct connection *con;
David Teigland36b71a82012-07-26 12:44:30 -05001465 struct dlm_node_addr *na;
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001466
1467 log_print("closing connection to node %d", nodeid);
1468 con = nodeid2con(nodeid, 0);
1469 if (con) {
Lars Marowsky-Bree063c4c92009-08-11 16:18:23 -05001470 set_bit(CF_CLOSE, &con->flags);
Marcelo Ricardo Leitner0d737a82015-08-11 19:22:21 -03001471 close_connection(con, true, true, true);
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001472 clean_one_writequeue(con);
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001473 }
David Teigland36b71a82012-07-26 12:44:30 -05001474
1475 spin_lock(&dlm_node_addrs_spin);
1476 na = find_node_addr(nodeid);
1477 if (na) {
1478 list_del(&na->list);
1479 while (na->addr_count--)
1480 kfree(na->addr[na->addr_count]);
1481 kfree(na);
1482 }
1483 spin_unlock(&dlm_node_addrs_spin);
1484
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001485 return 0;
1486}
1487
1488/* Receive workqueue function */
1489static void process_recv_sockets(struct work_struct *work)
1490{
1491 struct connection *con = container_of(work, struct connection, rwork);
1492 int err;
1493
1494 clear_bit(CF_READ_PENDING, &con->flags);
1495 do {
1496 err = con->rx_action(con);
1497 } while (!err);
1498}
1499
1500/* Send workqueue function */
1501static void process_send_sockets(struct work_struct *work)
1502{
1503 struct connection *con = container_of(work, struct connection, swork);
1504
1505 if (test_and_clear_bit(CF_CONNECT_PENDING, &con->flags)) {
1506 con->connect_action(con);
Lars Marowsky-Bree063c4c92009-08-11 16:18:23 -05001507 set_bit(CF_WRITE_PENDING, &con->flags);
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001508 }
Lars Marowsky-Bree063c4c92009-08-11 16:18:23 -05001509 if (test_and_clear_bit(CF_WRITE_PENDING, &con->flags))
1510 send_to_sock(con);
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001511}
1512
1513
1514/* Discard all entries on the write queues */
1515static void clean_writequeues(void)
1516{
Christine Caulfield5e9ccc32009-01-28 12:57:40 -06001517 foreach_conn(clean_one_writequeue);
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001518}
1519
1520static void work_stop(void)
1521{
1522 destroy_workqueue(recv_workqueue);
1523 destroy_workqueue(send_workqueue);
1524}
1525
1526static int work_start(void)
1527{
David Teiglande43f0552011-03-10 13:22:34 -06001528 recv_workqueue = alloc_workqueue("dlm_recv",
1529 WQ_UNBOUND | WQ_MEM_RECLAIM, 1);
Namhyung Kimb9d41052010-12-13 13:42:24 -06001530 if (!recv_workqueue) {
1531 log_print("can't start dlm_recv");
1532 return -ENOMEM;
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001533 }
1534
David Teiglande43f0552011-03-10 13:22:34 -06001535 send_workqueue = alloc_workqueue("dlm_send",
1536 WQ_UNBOUND | WQ_MEM_RECLAIM, 1);
Namhyung Kimb9d41052010-12-13 13:42:24 -06001537 if (!send_workqueue) {
1538 log_print("can't start dlm_send");
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001539 destroy_workqueue(recv_workqueue);
Namhyung Kimb9d41052010-12-13 13:42:24 -06001540 return -ENOMEM;
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001541 }
1542
1543 return 0;
1544}
1545
Christine Caulfield5e9ccc32009-01-28 12:57:40 -06001546static void stop_conn(struct connection *con)
1547{
1548 con->flags |= 0x0F;
Christine Caulfield391fbdc2009-05-07 10:54:16 -05001549 if (con->sock && con->sock->sk)
Christine Caulfield5e9ccc32009-01-28 12:57:40 -06001550 con->sock->sk->sk_user_data = NULL;
1551}
1552
1553static void free_conn(struct connection *con)
1554{
Marcelo Ricardo Leitner0d737a82015-08-11 19:22:21 -03001555 close_connection(con, true, true, true);
Christine Caulfield5e9ccc32009-01-28 12:57:40 -06001556 if (con->othercon)
1557 kmem_cache_free(con_cache, con->othercon);
1558 hlist_del(&con->list);
1559 kmem_cache_free(con_cache, con);
1560}
1561
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001562void dlm_lowcomms_stop(void)
1563{
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001564 /* Set all the flags to prevent any
1565 socket activity.
1566 */
Matthias Kaehlcke7a936ce2008-05-12 10:04:51 -05001567 mutex_lock(&connections_lock);
David Teigland513ef592012-03-30 11:46:08 -05001568 dlm_allow_conn = 0;
Christine Caulfield5e9ccc32009-01-28 12:57:40 -06001569 foreach_conn(stop_conn);
Matthias Kaehlcke7a936ce2008-05-12 10:04:51 -05001570 mutex_unlock(&connections_lock);
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001571
1572 work_stop();
1573
Matthias Kaehlcke7a936ce2008-05-12 10:04:51 -05001574 mutex_lock(&connections_lock);
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001575 clean_writequeues();
1576
Christine Caulfield5e9ccc32009-01-28 12:57:40 -06001577 foreach_conn(free_conn);
1578
Matthias Kaehlcke7a936ce2008-05-12 10:04:51 -05001579 mutex_unlock(&connections_lock);
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001580 kmem_cache_destroy(con_cache);
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001581}
1582
1583int dlm_lowcomms_start(void)
1584{
1585 int error = -EINVAL;
1586 struct connection *con;
Christine Caulfield5e9ccc32009-01-28 12:57:40 -06001587 int i;
1588
1589 for (i = 0; i < CONN_HASH_SIZE; i++)
1590 INIT_HLIST_HEAD(&connection_hash[i]);
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001591
1592 init_local();
1593 if (!dlm_local_count) {
David Teigland617e82e2007-04-26 13:46:49 -05001594 error = -ENOTCONN;
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001595 log_print("no local IP address has been set");
David Teigland513ef592012-03-30 11:46:08 -05001596 goto fail;
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001597 }
1598
1599 error = -ENOMEM;
1600 con_cache = kmem_cache_create("dlm_conn", sizeof(struct connection),
1601 __alignof__(struct connection), 0,
Paul Mundt20c2df82007-07-20 10:11:58 +09001602 NULL);
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001603 if (!con_cache)
David Teigland513ef592012-03-30 11:46:08 -05001604 goto fail;
1605
1606 error = work_start();
1607 if (error)
1608 goto fail_destroy;
1609
1610 dlm_allow_conn = 1;
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001611
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001612 /* Start listening */
1613 if (dlm_config.ci_protocol == 0)
1614 error = tcp_listen_for_all();
1615 else
1616 error = sctp_listen_for_all();
1617 if (error)
1618 goto fail_unlisten;
1619
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001620 return 0;
1621
1622fail_unlisten:
David Teigland513ef592012-03-30 11:46:08 -05001623 dlm_allow_conn = 0;
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001624 con = nodeid2con(0,0);
1625 if (con) {
Marcelo Ricardo Leitner0d737a82015-08-11 19:22:21 -03001626 close_connection(con, false, true, true);
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001627 kmem_cache_free(con_cache, con);
1628 }
David Teigland513ef592012-03-30 11:46:08 -05001629fail_destroy:
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001630 kmem_cache_destroy(con_cache);
David Teigland513ef592012-03-30 11:46:08 -05001631fail:
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001632 return error;
1633}
David Teigland36b71a82012-07-26 12:44:30 -05001634
1635void dlm_lowcomms_exit(void)
1636{
1637 struct dlm_node_addr *na, *safe;
1638
1639 spin_lock(&dlm_node_addrs_spin);
1640 list_for_each_entry_safe(na, safe, &dlm_node_addrs, list) {
1641 list_del(&na->list);
1642 while (na->addr_count--)
1643 kfree(na->addr[na->addr_count]);
1644 kfree(na);
1645 }
1646 spin_unlock(&dlm_node_addrs_spin);
1647}