blob: 0ea9ae35da0b362503a275be9f4181e0c9b78075 [file] [log] [blame]
Thomas Gleixner2522fe42019-05-28 09:57:20 -07001// SPDX-License-Identifier: GPL-2.0-only
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01002/******************************************************************************
3*******************************************************************************
4**
5** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
Christine Caulfield5e9ccc32009-01-28 12:57:40 -06006** Copyright (C) 2004-2009 Red Hat, Inc. All rights reserved.
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01007**
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01008**
9*******************************************************************************
10******************************************************************************/
11
12/*
13 * lowcomms.c
14 *
15 * This is the "low-level" comms layer.
16 *
17 * It is responsible for sending/receiving messages
18 * from other nodes in the cluster.
19 *
20 * Cluster nodes are referred to by their nodeids. nodeids are
21 * simply 32 bit numbers to the locking module - if they need to
Joe Perches2cf12c02009-01-22 13:26:47 -080022 * be expanded for the cluster infrastructure then that is its
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +010023 * responsibility. It is this layer's
24 * responsibility to resolve these into IP address or
25 * whatever it needs for inter-node communication.
26 *
27 * The comms level is two kernel threads that deal mainly with
28 * the receiving of messages from other nodes and passing them
29 * up to the mid-level comms layer (which understands the
30 * message format) for execution by the locking core, and
31 * a send thread which does all the setting up of connections
32 * to remote nodes and the sending of data. Threads are not allowed
33 * to send their own data because it may cause them to wait in times
34 * of high load. Also, this way, the sending thread can collect together
35 * messages bound for one node and send them in one block.
36 *
Joe Perches2cf12c02009-01-22 13:26:47 -080037 * lowcomms will choose to use either TCP or SCTP as its transport layer
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +010038 * depending on the configuration variable 'protocol'. This should be set
Joe Perches2cf12c02009-01-22 13:26:47 -080039 * to 0 (default) for TCP or 1 for SCTP. It should be configured using a
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +010040 * cluster-wide mechanism as it must be the same on all nodes of the cluster
41 * for the DLM to function.
42 *
43 */
44
45#include <asm/ioctls.h>
46#include <net/sock.h>
47#include <net/tcp.h>
48#include <linux/pagemap.h>
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +010049#include <linux/file.h>
Matthias Kaehlcke7a936ce2008-05-12 10:04:51 -050050#include <linux/mutex.h>
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +010051#include <linux/sctp.h>
Tejun Heo5a0e3ad2010-03-24 17:04:11 +090052#include <linux/slab.h>
Benjamin Poirier2f2d76c2012-03-08 05:55:59 +000053#include <net/sctp/sctp.h>
Joe Perches44ad5322009-01-22 13:24:49 -080054#include <net/ipv6.h>
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +010055
56#include "dlm_internal.h"
57#include "lowcomms.h"
58#include "midcomms.h"
59#include "config.h"
60
61#define NEEDED_RMEM (4*1024*1024)
62
Bob Petersonf92c8dd2010-11-12 11:15:20 -060063/* Number of messages to send before rescheduling */
64#define MAX_SEND_MSG_COUNT 25
Alexander Aring055923b2020-07-27 09:13:38 -040065#define DLM_SHUTDOWN_WAIT_TIMEOUT msecs_to_jiffies(10000)
Bob Petersonf92c8dd2010-11-12 11:15:20 -060066
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +010067struct connection {
68 struct socket *sock; /* NULL if not connected */
69 uint32_t nodeid; /* So we know who we are in the list */
70 struct mutex sock_mutex;
71 unsigned long flags;
72#define CF_READ_PENDING 1
tsutomu.owa@toshiba.co.jp8a4abb02017-09-12 09:01:16 +000073#define CF_WRITE_PENDING 2
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +010074#define CF_INIT_PENDING 4
75#define CF_IS_OTHERCON 5
Lars Marowsky-Bree063c4c92009-08-11 16:18:23 -050076#define CF_CLOSE 6
David Millerb36930d2010-11-10 21:56:39 -080077#define CF_APP_LIMITED 7
tsutomu.owa@toshiba.co.jpb2a66622017-09-12 08:55:50 +000078#define CF_CLOSING 8
Alexander Aring055923b2020-07-27 09:13:38 -040079#define CF_SHUTDOWN 9
Alexander Aring19633c72020-11-02 20:04:20 -050080#define CF_CONNECTED 10
Alexander Aringba868d92021-05-21 15:08:37 -040081#define CF_RECONNECT 11
82#define CF_DELAY_CONNECT 12
Alexander Aring8aa31cb2021-05-21 15:08:39 -040083#define CF_EOF 13
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +010084 struct list_head writequeue; /* List of outgoing writequeue_entries */
85 spinlock_t writequeue_lock;
Alexander Aring8aa31cb2021-05-21 15:08:39 -040086 atomic_t writequeue_cnt;
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +010087 void (*connect_action) (struct connection *); /* What to do to connect */
Alexander Aring055923b2020-07-27 09:13:38 -040088 void (*shutdown_action)(struct connection *con); /* What to do to shutdown */
Alexander Aring8aa31cb2021-05-21 15:08:39 -040089 bool (*eof_condition)(struct connection *con); /* What to do to eof check */
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +010090 int retries;
91#define MAX_CONNECT_RETRIES 3
Christine Caulfield5e9ccc32009-01-28 12:57:40 -060092 struct hlist_node list;
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +010093 struct connection *othercon;
Alexander Aringba868d92021-05-21 15:08:37 -040094 struct connection *sendcon;
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +010095 struct work_struct rwork; /* Receive workqueue */
96 struct work_struct swork; /* Send workqueue */
Alexander Aring055923b2020-07-27 09:13:38 -040097 wait_queue_head_t shutdown_wait; /* wait for graceful shutdown */
Alexander Aring4798cbb2020-09-24 10:31:26 -040098 unsigned char *rx_buf;
99 int rx_buflen;
100 int rx_leftover;
Alexander Aringa47666eb2020-08-27 15:02:49 -0400101 struct rcu_head rcu;
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +0100102};
103#define sock2con(x) ((struct connection *)(x)->sk_user_data)
104
Alexander Aringd11ccd42020-11-02 20:04:25 -0500105struct listen_connection {
106 struct socket *sock;
107 struct work_struct rwork;
108};
109
Alexander Aringf0747ebf2021-03-01 17:05:16 -0500110#define DLM_WQ_REMAIN_BYTES(e) (PAGE_SIZE - e->end)
111#define DLM_WQ_LENGTH_BYTES(e) (e->end - e->offset)
112
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +0100113/* An entry waiting to be sent */
114struct writequeue_entry {
115 struct list_head list;
116 struct page *page;
117 int offset;
118 int len;
119 int end;
120 int users;
Alexander Aring706474f2021-05-21 15:08:48 -0400121 bool dirty;
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +0100122 struct connection *con;
Alexander Aring8f2dc782021-05-21 15:08:42 -0400123 struct list_head msgs;
124 struct kref ref;
125};
126
127struct dlm_msg {
128 struct writequeue_entry *entry;
Alexander Aring2874d1a2021-05-21 15:08:43 -0400129 struct dlm_msg *orig_msg;
130 bool retransmit;
Alexander Aring8f2dc782021-05-21 15:08:42 -0400131 void *ppc;
132 int len;
133 int idx; /* new()/commit() idx exchange */
134
135 struct list_head list;
136 struct kref ref;
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +0100137};
138
David Teigland36b71a82012-07-26 12:44:30 -0500139struct dlm_node_addr {
140 struct list_head list;
141 int nodeid;
Alexander Aringe125fbe2021-03-01 17:05:09 -0500142 int mark;
David Teigland36b71a82012-07-26 12:44:30 -0500143 int addr_count;
Mike Christie98e1b602013-06-14 04:56:12 -0500144 int curr_addr_index;
David Teigland36b71a82012-07-26 12:44:30 -0500145 struct sockaddr_storage *addr[DLM_MAX_ADDR_COUNT];
146};
147
Bob Petersoncc661fc2017-09-12 08:55:23 +0000148static struct listen_sock_callbacks {
149 void (*sk_error_report)(struct sock *);
150 void (*sk_data_ready)(struct sock *);
151 void (*sk_state_change)(struct sock *);
152 void (*sk_write_space)(struct sock *);
153} listen_sock;
154
David Teigland36b71a82012-07-26 12:44:30 -0500155static LIST_HEAD(dlm_node_addrs);
156static DEFINE_SPINLOCK(dlm_node_addrs_spin);
157
Alexander Aringd11ccd42020-11-02 20:04:25 -0500158static struct listen_connection listen_con;
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +0100159static struct sockaddr_storage *dlm_local_addr[DLM_MAX_ADDR_COUNT];
160static int dlm_local_count;
Alexander Aring51746162021-03-01 17:05:13 -0500161int dlm_allow_conn;
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +0100162
163/* Work queues */
164static struct workqueue_struct *recv_workqueue;
165static struct workqueue_struct *send_workqueue;
166
Christine Caulfield5e9ccc32009-01-28 12:57:40 -0600167static struct hlist_head connection_hash[CONN_HASH_SIZE];
Alexander Aringa47666eb2020-08-27 15:02:49 -0400168static DEFINE_SPINLOCK(connections_lock);
169DEFINE_STATIC_SRCU(connections_srcu);
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +0100170
171static void process_recv_sockets(struct work_struct *work);
172static void process_send_sockets(struct work_struct *work);
173
Alexander Aring0672c3c2020-11-02 20:04:22 -0500174static void sctp_connect_to_sock(struct connection *con);
175static void tcp_connect_to_sock(struct connection *con);
Alexander Aring42873c92020-11-02 20:04:23 -0500176static void dlm_tcp_shutdown(struct connection *con);
Christine Caulfield5e9ccc32009-01-28 12:57:40 -0600177
Alexander Aringb38bc9c2021-05-21 15:08:35 -0400178static struct connection *__find_con(int nodeid, int r)
Christine Caulfield5e9ccc32009-01-28 12:57:40 -0600179{
Christine Caulfield5e9ccc32009-01-28 12:57:40 -0600180 struct connection *con;
181
Alexander Aringa47666eb2020-08-27 15:02:49 -0400182 hlist_for_each_entry_rcu(con, &connection_hash[r], list) {
Alexander Aringb38bc9c2021-05-21 15:08:35 -0400183 if (con->nodeid == nodeid)
Christine Caulfield5e9ccc32009-01-28 12:57:40 -0600184 return con;
185 }
Alexander Aringa47666eb2020-08-27 15:02:49 -0400186
Christine Caulfield5e9ccc32009-01-28 12:57:40 -0600187 return NULL;
188}
189
Alexander Aring8aa31cb2021-05-21 15:08:39 -0400190static bool tcp_eof_condition(struct connection *con)
191{
192 return atomic_read(&con->writequeue_cnt);
193}
194
Alexander Aring6cde2102020-11-02 20:04:21 -0500195static int dlm_con_init(struct connection *con, int nodeid)
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +0100196{
Alexander Aring4798cbb2020-09-24 10:31:26 -0400197 con->rx_buflen = dlm_config.ci_buffer_size;
198 con->rx_buf = kmalloc(con->rx_buflen, GFP_NOFS);
Alexander Aring6cde2102020-11-02 20:04:21 -0500199 if (!con->rx_buf)
200 return -ENOMEM;
Alexander Aring4798cbb2020-09-24 10:31:26 -0400201
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +0100202 con->nodeid = nodeid;
203 mutex_init(&con->sock_mutex);
204 INIT_LIST_HEAD(&con->writequeue);
205 spin_lock_init(&con->writequeue_lock);
Alexander Aring8aa31cb2021-05-21 15:08:39 -0400206 atomic_set(&con->writequeue_cnt, 0);
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +0100207 INIT_WORK(&con->swork, process_send_sockets);
208 INIT_WORK(&con->rwork, process_recv_sockets);
Alexander Aring055923b2020-07-27 09:13:38 -0400209 init_waitqueue_head(&con->shutdown_wait);
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +0100210
Alexander Aringac7d5d02021-06-02 09:45:19 -0400211 switch (dlm_config.ci_protocol) {
212 case DLM_PROTO_TCP:
Alexander Aring0672c3c2020-11-02 20:04:22 -0500213 con->connect_action = tcp_connect_to_sock;
Alexander Aring42873c92020-11-02 20:04:23 -0500214 con->shutdown_action = dlm_tcp_shutdown;
Alexander Aring8aa31cb2021-05-21 15:08:39 -0400215 con->eof_condition = tcp_eof_condition;
Alexander Aringac7d5d02021-06-02 09:45:19 -0400216 break;
217 case DLM_PROTO_SCTP:
Alexander Aring0672c3c2020-11-02 20:04:22 -0500218 con->connect_action = sctp_connect_to_sock;
Alexander Aringac7d5d02021-06-02 09:45:19 -0400219 break;
220 default:
221 kfree(con->rx_buf);
222 return -EINVAL;
Alexander Aring42873c92020-11-02 20:04:23 -0500223 }
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +0100224
Alexander Aring6cde2102020-11-02 20:04:21 -0500225 return 0;
226}
227
228/*
229 * If 'allocation' is zero then we don't attempt to create a new
230 * connection structure for this node.
231 */
232static struct connection *nodeid2con(int nodeid, gfp_t alloc)
233{
234 struct connection *con, *tmp;
235 int r, ret;
236
Alexander Aringb38bc9c2021-05-21 15:08:35 -0400237 r = nodeid_hash(nodeid);
238 con = __find_con(nodeid, r);
Alexander Aring6cde2102020-11-02 20:04:21 -0500239 if (con || !alloc)
240 return con;
241
242 con = kzalloc(sizeof(*con), alloc);
243 if (!con)
244 return NULL;
245
246 ret = dlm_con_init(con, nodeid);
247 if (ret) {
248 kfree(con);
249 return NULL;
250 }
251
Alexander Aringa47666eb2020-08-27 15:02:49 -0400252 spin_lock(&connections_lock);
Alexander Aring4f2b30f2020-09-30 18:37:29 -0400253 /* Because multiple workqueues/threads calls this function it can
254 * race on multiple cpu's. Instead of locking hot path __find_con()
255 * we just check in rare cases of recently added nodes again
256 * under protection of connections_lock. If this is the case we
257 * abort our connection creation and return the existing connection.
258 */
Alexander Aringb38bc9c2021-05-21 15:08:35 -0400259 tmp = __find_con(nodeid, r);
Alexander Aring4f2b30f2020-09-30 18:37:29 -0400260 if (tmp) {
261 spin_unlock(&connections_lock);
262 kfree(con->rx_buf);
263 kfree(con);
264 return tmp;
265 }
266
Alexander Aringa47666eb2020-08-27 15:02:49 -0400267 hlist_add_head_rcu(&con->list, &connection_hash[r]);
268 spin_unlock(&connections_lock);
269
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +0100270 return con;
271}
272
Christine Caulfield5e9ccc32009-01-28 12:57:40 -0600273/* Loop round all connections */
274static void foreach_conn(void (*conn_func)(struct connection *c))
275{
Alexander Aringb38bc9c2021-05-21 15:08:35 -0400276 int i;
Christine Caulfield5e9ccc32009-01-28 12:57:40 -0600277 struct connection *con;
278
279 for (i = 0; i < CONN_HASH_SIZE; i++) {
Alexander Aringa47666eb2020-08-27 15:02:49 -0400280 hlist_for_each_entry_rcu(con, &connection_hash[i], list)
Christine Caulfield5e9ccc32009-01-28 12:57:40 -0600281 conn_func(con);
Christine Caulfield5e9ccc32009-01-28 12:57:40 -0600282 }
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +0100283}
284
David Teigland36b71a82012-07-26 12:44:30 -0500285static struct dlm_node_addr *find_node_addr(int nodeid)
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +0100286{
David Teigland36b71a82012-07-26 12:44:30 -0500287 struct dlm_node_addr *na;
288
289 list_for_each_entry(na, &dlm_node_addrs, list) {
290 if (na->nodeid == nodeid)
291 return na;
292 }
293 return NULL;
294}
295
Alexander Aring40c6b832020-11-02 20:04:27 -0500296static int addr_compare(const struct sockaddr_storage *x,
297 const struct sockaddr_storage *y)
David Teigland36b71a82012-07-26 12:44:30 -0500298{
299 switch (x->ss_family) {
300 case AF_INET: {
301 struct sockaddr_in *sinx = (struct sockaddr_in *)x;
302 struct sockaddr_in *siny = (struct sockaddr_in *)y;
303 if (sinx->sin_addr.s_addr != siny->sin_addr.s_addr)
304 return 0;
305 if (sinx->sin_port != siny->sin_port)
306 return 0;
307 break;
308 }
309 case AF_INET6: {
310 struct sockaddr_in6 *sinx = (struct sockaddr_in6 *)x;
311 struct sockaddr_in6 *siny = (struct sockaddr_in6 *)y;
312 if (!ipv6_addr_equal(&sinx->sin6_addr, &siny->sin6_addr))
313 return 0;
314 if (sinx->sin6_port != siny->sin6_port)
315 return 0;
316 break;
317 }
318 default:
319 return 0;
320 }
321 return 1;
322}
323
324static int nodeid_to_addr(int nodeid, struct sockaddr_storage *sas_out,
Alexander Aringe125fbe2021-03-01 17:05:09 -0500325 struct sockaddr *sa_out, bool try_new_addr,
326 unsigned int *mark)
David Teigland36b71a82012-07-26 12:44:30 -0500327{
328 struct sockaddr_storage sas;
329 struct dlm_node_addr *na;
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +0100330
331 if (!dlm_local_count)
332 return -1;
333
David Teigland36b71a82012-07-26 12:44:30 -0500334 spin_lock(&dlm_node_addrs_spin);
335 na = find_node_addr(nodeid);
Mike Christie98e1b602013-06-14 04:56:12 -0500336 if (na && na->addr_count) {
Marcelo Ricardo Leitneree44b4b2015-08-11 19:22:23 -0300337 memcpy(&sas, na->addr[na->curr_addr_index],
338 sizeof(struct sockaddr_storage));
339
Mike Christie98e1b602013-06-14 04:56:12 -0500340 if (try_new_addr) {
341 na->curr_addr_index++;
342 if (na->curr_addr_index == na->addr_count)
343 na->curr_addr_index = 0;
344 }
Mike Christie98e1b602013-06-14 04:56:12 -0500345 }
David Teigland36b71a82012-07-26 12:44:30 -0500346 spin_unlock(&dlm_node_addrs_spin);
347
348 if (!na)
349 return -EEXIST;
350
351 if (!na->addr_count)
352 return -ENOENT;
353
Alexander Aringe125fbe2021-03-01 17:05:09 -0500354 *mark = na->mark;
355
David Teigland36b71a82012-07-26 12:44:30 -0500356 if (sas_out)
357 memcpy(sas_out, &sas, sizeof(struct sockaddr_storage));
358
359 if (!sa_out)
360 return 0;
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +0100361
362 if (dlm_local_addr[0]->ss_family == AF_INET) {
David Teigland36b71a82012-07-26 12:44:30 -0500363 struct sockaddr_in *in4 = (struct sockaddr_in *) &sas;
364 struct sockaddr_in *ret4 = (struct sockaddr_in *) sa_out;
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +0100365 ret4->sin_addr.s_addr = in4->sin_addr.s_addr;
366 } else {
David Teigland36b71a82012-07-26 12:44:30 -0500367 struct sockaddr_in6 *in6 = (struct sockaddr_in6 *) &sas;
368 struct sockaddr_in6 *ret6 = (struct sockaddr_in6 *) sa_out;
Alexey Dobriyan4e3fd7a2011-11-21 03:39:03 +0000369 ret6->sin6_addr = in6->sin6_addr;
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +0100370 }
371
372 return 0;
373}
374
Alexander Aringe125fbe2021-03-01 17:05:09 -0500375static int addr_to_nodeid(struct sockaddr_storage *addr, int *nodeid,
376 unsigned int *mark)
David Teigland36b71a82012-07-26 12:44:30 -0500377{
378 struct dlm_node_addr *na;
379 int rv = -EEXIST;
Mike Christie98e1b602013-06-14 04:56:12 -0500380 int addr_i;
David Teigland36b71a82012-07-26 12:44:30 -0500381
382 spin_lock(&dlm_node_addrs_spin);
383 list_for_each_entry(na, &dlm_node_addrs, list) {
384 if (!na->addr_count)
385 continue;
386
Mike Christie98e1b602013-06-14 04:56:12 -0500387 for (addr_i = 0; addr_i < na->addr_count; addr_i++) {
388 if (addr_compare(na->addr[addr_i], addr)) {
389 *nodeid = na->nodeid;
Alexander Aringe125fbe2021-03-01 17:05:09 -0500390 *mark = na->mark;
Mike Christie98e1b602013-06-14 04:56:12 -0500391 rv = 0;
392 goto unlock;
393 }
394 }
David Teigland36b71a82012-07-26 12:44:30 -0500395 }
Mike Christie98e1b602013-06-14 04:56:12 -0500396unlock:
David Teigland36b71a82012-07-26 12:44:30 -0500397 spin_unlock(&dlm_node_addrs_spin);
398 return rv;
399}
400
Alexander Aring4f19d072020-11-02 20:04:28 -0500401/* caller need to held dlm_node_addrs_spin lock */
402static bool dlm_lowcomms_na_has_addr(const struct dlm_node_addr *na,
403 const struct sockaddr_storage *addr)
404{
405 int i;
406
407 for (i = 0; i < na->addr_count; i++) {
408 if (addr_compare(na->addr[i], addr))
409 return true;
410 }
411
412 return false;
413}
414
David Teigland36b71a82012-07-26 12:44:30 -0500415int dlm_lowcomms_addr(int nodeid, struct sockaddr_storage *addr, int len)
416{
417 struct sockaddr_storage *new_addr;
418 struct dlm_node_addr *new_node, *na;
Alexander Aring4f19d072020-11-02 20:04:28 -0500419 bool ret;
David Teigland36b71a82012-07-26 12:44:30 -0500420
421 new_node = kzalloc(sizeof(struct dlm_node_addr), GFP_NOFS);
422 if (!new_node)
423 return -ENOMEM;
424
425 new_addr = kzalloc(sizeof(struct sockaddr_storage), GFP_NOFS);
426 if (!new_addr) {
427 kfree(new_node);
428 return -ENOMEM;
429 }
430
431 memcpy(new_addr, addr, len);
432
433 spin_lock(&dlm_node_addrs_spin);
434 na = find_node_addr(nodeid);
435 if (!na) {
436 new_node->nodeid = nodeid;
437 new_node->addr[0] = new_addr;
438 new_node->addr_count = 1;
Alexander Aringe125fbe2021-03-01 17:05:09 -0500439 new_node->mark = dlm_config.ci_mark;
David Teigland36b71a82012-07-26 12:44:30 -0500440 list_add(&new_node->list, &dlm_node_addrs);
441 spin_unlock(&dlm_node_addrs_spin);
442 return 0;
443 }
444
Alexander Aring4f19d072020-11-02 20:04:28 -0500445 ret = dlm_lowcomms_na_has_addr(na, addr);
446 if (ret) {
447 spin_unlock(&dlm_node_addrs_spin);
448 kfree(new_addr);
449 kfree(new_node);
450 return -EEXIST;
451 }
452
David Teigland36b71a82012-07-26 12:44:30 -0500453 if (na->addr_count >= DLM_MAX_ADDR_COUNT) {
454 spin_unlock(&dlm_node_addrs_spin);
455 kfree(new_addr);
456 kfree(new_node);
457 return -ENOSPC;
458 }
459
460 na->addr[na->addr_count++] = new_addr;
461 spin_unlock(&dlm_node_addrs_spin);
462 kfree(new_node);
463 return 0;
464}
465
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +0100466/* Data available on socket or listen socket received a connect */
David S. Miller676d2362014-04-11 16:15:36 -0400467static void lowcomms_data_ready(struct sock *sk)
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +0100468{
tsutomu.owa@toshiba.co.jp93eaade2017-09-12 09:01:55 +0000469 struct connection *con;
470
471 read_lock_bh(&sk->sk_callback_lock);
472 con = sock2con(sk);
Patrick Caulfieldafb853f2007-06-01 10:07:26 -0500473 if (con && !test_and_set_bit(CF_READ_PENDING, &con->flags))
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +0100474 queue_work(recv_workqueue, &con->rwork);
tsutomu.owa@toshiba.co.jp93eaade2017-09-12 09:01:55 +0000475 read_unlock_bh(&sk->sk_callback_lock);
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +0100476}
477
Alexander Aringd11ccd42020-11-02 20:04:25 -0500478static void lowcomms_listen_data_ready(struct sock *sk)
479{
Alexander Aring9a4139a2021-06-02 09:45:18 -0400480 if (!dlm_allow_conn)
481 return;
482
Alexander Aringd11ccd42020-11-02 20:04:25 -0500483 queue_work(recv_workqueue, &listen_con.rwork);
484}
485
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +0100486static void lowcomms_write_space(struct sock *sk)
487{
tsutomu.owa@toshiba.co.jp93eaade2017-09-12 09:01:55 +0000488 struct connection *con;
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +0100489
tsutomu.owa@toshiba.co.jp93eaade2017-09-12 09:01:55 +0000490 read_lock_bh(&sk->sk_callback_lock);
491 con = sock2con(sk);
David Millerb36930d2010-11-10 21:56:39 -0800492 if (!con)
tsutomu.owa@toshiba.co.jp93eaade2017-09-12 09:01:55 +0000493 goto out;
David Millerb36930d2010-11-10 21:56:39 -0800494
Alexander Aring19633c72020-11-02 20:04:20 -0500495 if (!test_and_set_bit(CF_CONNECTED, &con->flags)) {
496 log_print("successful connected to node %d", con->nodeid);
497 queue_work(send_workqueue, &con->swork);
498 goto out;
499 }
500
David Millerb36930d2010-11-10 21:56:39 -0800501 clear_bit(SOCK_NOSPACE, &con->sock->flags);
502
503 if (test_and_clear_bit(CF_APP_LIMITED, &con->flags)) {
504 con->sock->sk->sk_write_pending--;
Eric Dumazet9cd3e072015-11-29 20:03:10 -0800505 clear_bit(SOCKWQ_ASYNC_NOSPACE, &con->sock->flags);
David Millerb36930d2010-11-10 21:56:39 -0800506 }
507
Bob Peterson01da24d2017-09-12 08:55:14 +0000508 queue_work(send_workqueue, &con->swork);
tsutomu.owa@toshiba.co.jp93eaade2017-09-12 09:01:55 +0000509out:
510 read_unlock_bh(&sk->sk_callback_lock);
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +0100511}
512
513static inline void lowcomms_connect_sock(struct connection *con)
514{
Lars Marowsky-Bree063c4c92009-08-11 16:18:23 -0500515 if (test_bit(CF_CLOSE, &con->flags))
516 return;
Bob Peterson61d9102b2017-09-12 08:55:04 +0000517 queue_work(send_workqueue, &con->swork);
518 cond_resched();
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +0100519}
520
521static void lowcomms_state_change(struct sock *sk)
522{
Marcelo Ricardo Leitneree44b4b2015-08-11 19:22:23 -0300523 /* SCTP layer is not calling sk_data_ready when the connection
524 * is done, so we catch the signal through here. Also, it
525 * doesn't switch socket state when entering shutdown, so we
526 * skip the write in that case.
527 */
528 if (sk->sk_shutdown) {
529 if (sk->sk_shutdown == RCV_SHUTDOWN)
530 lowcomms_data_ready(sk);
531 } else if (sk->sk_state == TCP_ESTABLISHED) {
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +0100532 lowcomms_write_space(sk);
Marcelo Ricardo Leitneree44b4b2015-08-11 19:22:23 -0300533 }
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +0100534}
535
Christine Caulfield391fbdc2009-05-07 10:54:16 -0500536int dlm_lowcomms_connect_node(int nodeid)
537{
538 struct connection *con;
Alexander Aringb38bc9c2021-05-21 15:08:35 -0400539 int idx;
Christine Caulfield391fbdc2009-05-07 10:54:16 -0500540
541 if (nodeid == dlm_our_nodeid())
542 return 0;
543
Alexander Aringb38bc9c2021-05-21 15:08:35 -0400544 idx = srcu_read_lock(&connections_srcu);
Christine Caulfield391fbdc2009-05-07 10:54:16 -0500545 con = nodeid2con(nodeid, GFP_NOFS);
Alexander Aringb38bc9c2021-05-21 15:08:35 -0400546 if (!con) {
547 srcu_read_unlock(&connections_srcu, idx);
Christine Caulfield391fbdc2009-05-07 10:54:16 -0500548 return -ENOMEM;
Alexander Aringb38bc9c2021-05-21 15:08:35 -0400549 }
550
Christine Caulfield391fbdc2009-05-07 10:54:16 -0500551 lowcomms_connect_sock(con);
Alexander Aringb38bc9c2021-05-21 15:08:35 -0400552 srcu_read_unlock(&connections_srcu, idx);
553
Christine Caulfield391fbdc2009-05-07 10:54:16 -0500554 return 0;
555}
556
Alexander Aringe125fbe2021-03-01 17:05:09 -0500557int dlm_lowcomms_nodes_set_mark(int nodeid, unsigned int mark)
558{
559 struct dlm_node_addr *na;
560
561 spin_lock(&dlm_node_addrs_spin);
562 na = find_node_addr(nodeid);
563 if (!na) {
564 spin_unlock(&dlm_node_addrs_spin);
565 return -ENOENT;
566 }
567
568 na->mark = mark;
569 spin_unlock(&dlm_node_addrs_spin);
570
571 return 0;
572}
573
Bob Petersonb3a5bbf2015-08-27 09:34:47 -0500574static void lowcomms_error_report(struct sock *sk)
575{
Bob Petersonb81171c2016-02-05 14:39:02 -0500576 struct connection *con;
Bob Petersonb3a5bbf2015-08-27 09:34:47 -0500577 struct sockaddr_storage saddr;
Bob Petersonb81171c2016-02-05 14:39:02 -0500578 void (*orig_report)(struct sock *) = NULL;
Bob Petersonb3a5bbf2015-08-27 09:34:47 -0500579
Bob Petersonb81171c2016-02-05 14:39:02 -0500580 read_lock_bh(&sk->sk_callback_lock);
581 con = sock2con(sk);
582 if (con == NULL)
583 goto out;
584
Bob Petersoncc661fc2017-09-12 08:55:23 +0000585 orig_report = listen_sock.sk_error_report;
Bob Peterson1a318332016-01-18 12:29:15 -0500586 if (con->sock == NULL ||
Denys Vlasenko9b2c45d2018-02-12 20:00:20 +0100587 kernel_getpeername(con->sock, (struct sockaddr *)&saddr) < 0) {
Bob Petersonb3a5bbf2015-08-27 09:34:47 -0500588 printk_ratelimited(KERN_ERR "dlm: node %d: socket error "
589 "sending to node %d, port %d, "
590 "sk_err=%d/%d\n", dlm_our_nodeid(),
591 con->nodeid, dlm_config.ci_tcp_port,
592 sk->sk_err, sk->sk_err_soft);
Bob Petersonb3a5bbf2015-08-27 09:34:47 -0500593 } else if (saddr.ss_family == AF_INET) {
594 struct sockaddr_in *sin4 = (struct sockaddr_in *)&saddr;
595
596 printk_ratelimited(KERN_ERR "dlm: node %d: socket error "
597 "sending to node %d at %pI4, port %d, "
598 "sk_err=%d/%d\n", dlm_our_nodeid(),
599 con->nodeid, &sin4->sin_addr.s_addr,
600 dlm_config.ci_tcp_port, sk->sk_err,
601 sk->sk_err_soft);
602 } else {
603 struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)&saddr;
604
605 printk_ratelimited(KERN_ERR "dlm: node %d: socket error "
606 "sending to node %d at %u.%u.%u.%u, "
607 "port %d, sk_err=%d/%d\n", dlm_our_nodeid(),
608 con->nodeid, sin6->sin6_addr.s6_addr32[0],
609 sin6->sin6_addr.s6_addr32[1],
610 sin6->sin6_addr.s6_addr32[2],
611 sin6->sin6_addr.s6_addr32[3],
612 dlm_config.ci_tcp_port, sk->sk_err,
613 sk->sk_err_soft);
614 }
Alexander Aringba868d92021-05-21 15:08:37 -0400615
616 /* below sendcon only handling */
617 if (test_bit(CF_IS_OTHERCON, &con->flags))
618 con = con->sendcon;
619
620 switch (sk->sk_err) {
621 case ECONNREFUSED:
622 set_bit(CF_DELAY_CONNECT, &con->flags);
623 break;
624 default:
625 break;
626 }
627
628 if (!test_and_set_bit(CF_RECONNECT, &con->flags))
629 queue_work(send_workqueue, &con->swork);
630
Bob Petersonb81171c2016-02-05 14:39:02 -0500631out:
632 read_unlock_bh(&sk->sk_callback_lock);
633 if (orig_report)
634 orig_report(sk);
635}
636
637/* Note: sk_callback_lock must be locked before calling this function. */
Bob Petersoncc661fc2017-09-12 08:55:23 +0000638static void save_listen_callbacks(struct socket *sock)
Bob Petersonb81171c2016-02-05 14:39:02 -0500639{
Bob Petersoncc661fc2017-09-12 08:55:23 +0000640 struct sock *sk = sock->sk;
641
642 listen_sock.sk_data_ready = sk->sk_data_ready;
643 listen_sock.sk_state_change = sk->sk_state_change;
644 listen_sock.sk_write_space = sk->sk_write_space;
645 listen_sock.sk_error_report = sk->sk_error_report;
Bob Petersonb81171c2016-02-05 14:39:02 -0500646}
647
Bob Petersoncc661fc2017-09-12 08:55:23 +0000648static void restore_callbacks(struct socket *sock)
Bob Petersonb81171c2016-02-05 14:39:02 -0500649{
Bob Petersoncc661fc2017-09-12 08:55:23 +0000650 struct sock *sk = sock->sk;
651
Bob Petersonb81171c2016-02-05 14:39:02 -0500652 write_lock_bh(&sk->sk_callback_lock);
Bob Petersonb81171c2016-02-05 14:39:02 -0500653 sk->sk_user_data = NULL;
Bob Petersoncc661fc2017-09-12 08:55:23 +0000654 sk->sk_data_ready = listen_sock.sk_data_ready;
655 sk->sk_state_change = listen_sock.sk_state_change;
656 sk->sk_write_space = listen_sock.sk_write_space;
657 sk->sk_error_report = listen_sock.sk_error_report;
Bob Petersonb81171c2016-02-05 14:39:02 -0500658 write_unlock_bh(&sk->sk_callback_lock);
Bob Petersonb3a5bbf2015-08-27 09:34:47 -0500659}
660
Alexander Aringd11ccd42020-11-02 20:04:25 -0500661static void add_listen_sock(struct socket *sock, struct listen_connection *con)
662{
663 struct sock *sk = sock->sk;
664
665 write_lock_bh(&sk->sk_callback_lock);
666 save_listen_callbacks(sock);
667 con->sock = sock;
668
669 sk->sk_user_data = con;
670 sk->sk_allocation = GFP_NOFS;
671 /* Install a data_ready callback */
672 sk->sk_data_ready = lowcomms_listen_data_ready;
673 write_unlock_bh(&sk->sk_callback_lock);
674}
675
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +0100676/* Make a socket active */
tsutomu.owa@toshiba.co.jp988419a2017-09-12 08:55:32 +0000677static void add_sock(struct socket *sock, struct connection *con)
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +0100678{
Bob Petersonb81171c2016-02-05 14:39:02 -0500679 struct sock *sk = sock->sk;
680
681 write_lock_bh(&sk->sk_callback_lock);
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +0100682 con->sock = sock;
683
Bob Petersonb81171c2016-02-05 14:39:02 -0500684 sk->sk_user_data = con;
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +0100685 /* Install a data_ready callback */
Bob Petersonb81171c2016-02-05 14:39:02 -0500686 sk->sk_data_ready = lowcomms_data_ready;
687 sk->sk_write_space = lowcomms_write_space;
688 sk->sk_state_change = lowcomms_state_change;
689 sk->sk_allocation = GFP_NOFS;
690 sk->sk_error_report = lowcomms_error_report;
691 write_unlock_bh(&sk->sk_callback_lock);
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +0100692}
693
694/* Add the port number to an IPv6 or 4 sockaddr and return the address
695 length */
696static void make_sockaddr(struct sockaddr_storage *saddr, uint16_t port,
697 int *addr_len)
698{
699 saddr->ss_family = dlm_local_addr[0]->ss_family;
700 if (saddr->ss_family == AF_INET) {
701 struct sockaddr_in *in4_addr = (struct sockaddr_in *)saddr;
702 in4_addr->sin_port = cpu_to_be16(port);
703 *addr_len = sizeof(struct sockaddr_in);
704 memset(&in4_addr->sin_zero, 0, sizeof(in4_addr->sin_zero));
705 } else {
706 struct sockaddr_in6 *in6_addr = (struct sockaddr_in6 *)saddr;
707 in6_addr->sin6_port = cpu_to_be16(port);
708 *addr_len = sizeof(struct sockaddr_in6);
709 }
Patrick Caulfield01c8cab2007-07-17 16:53:15 +0100710 memset((char *)saddr + *addr_len, 0, sizeof(struct sockaddr_storage) - *addr_len);
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +0100711}
712
Alexander Aring706474f2021-05-21 15:08:48 -0400713static void dlm_page_release(struct kref *kref)
714{
715 struct writequeue_entry *e = container_of(kref, struct writequeue_entry,
716 ref);
717
718 __free_page(e->page);
719 kfree(e);
720}
721
722static void dlm_msg_release(struct kref *kref)
723{
724 struct dlm_msg *msg = container_of(kref, struct dlm_msg, ref);
725
726 kref_put(&msg->entry->ref, dlm_page_release);
727 kfree(msg);
728}
729
730static void free_entry(struct writequeue_entry *e)
731{
732 struct dlm_msg *msg, *tmp;
733
734 list_for_each_entry_safe(msg, tmp, &e->msgs, list) {
735 if (msg->orig_msg) {
736 msg->orig_msg->retransmit = false;
737 kref_put(&msg->orig_msg->ref, dlm_msg_release);
738 }
739
740 list_del(&msg->list);
741 kref_put(&msg->ref, dlm_msg_release);
742 }
743
744 list_del(&e->list);
745 atomic_dec(&e->con->writequeue_cnt);
746 kref_put(&e->ref, dlm_page_release);
747}
748
Alexander Aringd11ccd42020-11-02 20:04:25 -0500749static void dlm_close_sock(struct socket **sock)
750{
751 if (*sock) {
752 restore_callbacks(*sock);
753 sock_release(*sock);
754 *sock = NULL;
755 }
756}
757
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +0100758/* Close a remote connection and tidy up */
Marcelo Ricardo Leitner0d737a82015-08-11 19:22:21 -0300759static void close_connection(struct connection *con, bool and_other,
760 bool tx, bool rx)
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +0100761{
tsutomu.owa@toshiba.co.jpb2a66622017-09-12 08:55:50 +0000762 bool closing = test_and_set_bit(CF_CLOSING, &con->flags);
Alexander Aring706474f2021-05-21 15:08:48 -0400763 struct writequeue_entry *e;
tsutomu.owa@toshiba.co.jpb2a66622017-09-12 08:55:50 +0000764
tsutomu.owa@toshiba.co.jp0aa18462017-09-12 09:02:02 +0000765 if (tx && !closing && cancel_work_sync(&con->swork)) {
Marcelo Ricardo Leitner0d737a82015-08-11 19:22:21 -0300766 log_print("canceled swork for node %d", con->nodeid);
tsutomu.owa@toshiba.co.jp0aa18462017-09-12 09:02:02 +0000767 clear_bit(CF_WRITE_PENDING, &con->flags);
768 }
769 if (rx && !closing && cancel_work_sync(&con->rwork)) {
Marcelo Ricardo Leitner0d737a82015-08-11 19:22:21 -0300770 log_print("canceled rwork for node %d", con->nodeid);
tsutomu.owa@toshiba.co.jp0aa18462017-09-12 09:02:02 +0000771 clear_bit(CF_READ_PENDING, &con->flags);
772 }
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +0100773
Marcelo Ricardo Leitner0d737a82015-08-11 19:22:21 -0300774 mutex_lock(&con->sock_mutex);
Alexander Aringd11ccd42020-11-02 20:04:25 -0500775 dlm_close_sock(&con->sock);
776
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +0100777 if (con->othercon && and_other) {
778 /* Will only re-enter once. */
Alexander Aringc6aa00e32021-05-21 15:08:38 -0400779 close_connection(con->othercon, false, tx, rx);
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +0100780 }
Patrick Caulfield9e5f2822007-08-02 14:58:14 +0100781
Alexander Aring706474f2021-05-21 15:08:48 -0400782 /* if we send a writequeue entry only a half way, we drop the
783 * whole entry because reconnection and that we not start of the
784 * middle of a msg which will confuse the other end.
785 *
786 * we can always drop messages because retransmits, but what we
787 * cannot allow is to transmit half messages which may be processed
788 * at the other side.
789 *
790 * our policy is to start on a clean state when disconnects, we don't
791 * know what's send/received on transport layer in this case.
792 */
793 spin_lock(&con->writequeue_lock);
794 if (!list_empty(&con->writequeue)) {
795 e = list_first_entry(&con->writequeue, struct writequeue_entry,
796 list);
797 if (e->dirty)
798 free_entry(e);
799 }
800 spin_unlock(&con->writequeue_lock);
801
Alexander Aring4798cbb2020-09-24 10:31:26 -0400802 con->rx_leftover = 0;
Patrick Caulfield61d96be02007-08-20 15:13:38 +0100803 con->retries = 0;
Alexander Aring19633c72020-11-02 20:04:20 -0500804 clear_bit(CF_CONNECTED, &con->flags);
Alexander Aringba868d92021-05-21 15:08:37 -0400805 clear_bit(CF_DELAY_CONNECT, &con->flags);
806 clear_bit(CF_RECONNECT, &con->flags);
Alexander Aring8aa31cb2021-05-21 15:08:39 -0400807 clear_bit(CF_EOF, &con->flags);
Patrick Caulfield61d96be02007-08-20 15:13:38 +0100808 mutex_unlock(&con->sock_mutex);
tsutomu.owa@toshiba.co.jpb2a66622017-09-12 08:55:50 +0000809 clear_bit(CF_CLOSING, &con->flags);
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +0100810}
811
Alexander Aring055923b2020-07-27 09:13:38 -0400812static void shutdown_connection(struct connection *con)
813{
814 int ret;
815
Alexander Aringeec054b2021-03-01 17:05:19 -0500816 flush_work(&con->swork);
Alexander Aring055923b2020-07-27 09:13:38 -0400817
818 mutex_lock(&con->sock_mutex);
819 /* nothing to shutdown */
820 if (!con->sock) {
821 mutex_unlock(&con->sock_mutex);
822 return;
823 }
824
825 set_bit(CF_SHUTDOWN, &con->flags);
826 ret = kernel_sock_shutdown(con->sock, SHUT_WR);
827 mutex_unlock(&con->sock_mutex);
828 if (ret) {
829 log_print("Connection %p failed to shutdown: %d will force close",
830 con, ret);
831 goto force_close;
832 } else {
833 ret = wait_event_timeout(con->shutdown_wait,
834 !test_bit(CF_SHUTDOWN, &con->flags),
835 DLM_SHUTDOWN_WAIT_TIMEOUT);
836 if (ret == 0) {
837 log_print("Connection %p shutdown timed out, will force close",
838 con);
839 goto force_close;
840 }
841 }
842
843 return;
844
845force_close:
846 clear_bit(CF_SHUTDOWN, &con->flags);
847 close_connection(con, false, true, true);
848}
849
850static void dlm_tcp_shutdown(struct connection *con)
851{
852 if (con->othercon)
853 shutdown_connection(con->othercon);
854 shutdown_connection(con);
855}
856
Alexander Aring4798cbb2020-09-24 10:31:26 -0400857static int con_realloc_receive_buf(struct connection *con, int newlen)
858{
859 unsigned char *newbuf;
860
861 newbuf = kmalloc(newlen, GFP_NOFS);
862 if (!newbuf)
863 return -ENOMEM;
864
865 /* copy any leftover from last receive */
866 if (con->rx_leftover)
867 memmove(newbuf, con->rx_buf, con->rx_leftover);
868
869 /* swap to new buffer space */
870 kfree(con->rx_buf);
871 con->rx_buflen = newlen;
872 con->rx_buf = newbuf;
873
874 return 0;
875}
876
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +0100877/* Data received from remote end */
878static int receive_from_sock(struct connection *con)
879{
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +0100880 int call_again_soon = 0;
Alexander Aring4798cbb2020-09-24 10:31:26 -0400881 struct msghdr msg;
882 struct kvec iov;
883 int ret, buflen;
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +0100884
885 mutex_lock(&con->sock_mutex);
886
887 if (con->sock == NULL) {
888 ret = -EAGAIN;
889 goto out_close;
890 }
Alexander Aring4798cbb2020-09-24 10:31:26 -0400891
Alexander Aring4798cbb2020-09-24 10:31:26 -0400892 /* realloc if we get new buffer size to read out */
893 buflen = dlm_config.ci_buffer_size;
894 if (con->rx_buflen != buflen && con->rx_leftover <= buflen) {
895 ret = con_realloc_receive_buf(con, buflen);
896 if (ret < 0)
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +0100897 goto out_resched;
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +0100898 }
899
Alexander Aring4798cbb2020-09-24 10:31:26 -0400900 /* calculate new buffer parameter regarding last receive and
901 * possible leftover bytes
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +0100902 */
Alexander Aring4798cbb2020-09-24 10:31:26 -0400903 iov.iov_base = con->rx_buf + con->rx_leftover;
904 iov.iov_len = con->rx_buflen - con->rx_leftover;
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +0100905
Alexander Aring4798cbb2020-09-24 10:31:26 -0400906 memset(&msg, 0, sizeof(msg));
907 msg.msg_flags = MSG_DONTWAIT | MSG_NOSIGNAL;
908 ret = kernel_recvmsg(con->sock, &msg, &iov, 1, iov.iov_len,
909 msg.msg_flags);
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +0100910 if (ret <= 0)
911 goto out_close;
Alexander Aring4798cbb2020-09-24 10:31:26 -0400912 else if (ret == iov.iov_len)
Marcelo Ricardo Leitneree44b4b2015-08-11 19:22:23 -0300913 call_again_soon = 1;
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +0100914
Alexander Aring4798cbb2020-09-24 10:31:26 -0400915 /* new buflen according readed bytes and leftover from last receive */
916 buflen = ret + con->rx_leftover;
917 ret = dlm_process_incoming_buffer(con->nodeid, con->rx_buf, buflen);
918 if (ret < 0)
919 goto out_close;
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +0100920
Alexander Aring4798cbb2020-09-24 10:31:26 -0400921 /* calculate leftover bytes from process and put it into begin of
922 * the receive buffer, so next receive we have the full message
923 * at the start address of the receive buffer.
924 */
925 con->rx_leftover = buflen - ret;
926 if (con->rx_leftover) {
927 memmove(con->rx_buf, con->rx_buf + ret,
928 con->rx_leftover);
929 call_again_soon = true;
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +0100930 }
931
932 if (call_again_soon)
933 goto out_resched;
Alexander Aring4798cbb2020-09-24 10:31:26 -0400934
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +0100935 mutex_unlock(&con->sock_mutex);
936 return 0;
937
938out_resched:
939 if (!test_and_set_bit(CF_READ_PENDING, &con->flags))
940 queue_work(recv_workqueue, &con->rwork);
941 mutex_unlock(&con->sock_mutex);
942 return -EAGAIN;
943
944out_close:
Alexander Aringba868d92021-05-21 15:08:37 -0400945 if (ret == 0) {
Alexander Aringba868d92021-05-21 15:08:37 -0400946 log_print("connection %p got EOF from %d",
947 con, con->nodeid);
Alexander Aring8aa31cb2021-05-21 15:08:39 -0400948
949 if (con->eof_condition && con->eof_condition(con)) {
950 set_bit(CF_EOF, &con->flags);
951 mutex_unlock(&con->sock_mutex);
952 } else {
953 mutex_unlock(&con->sock_mutex);
954 close_connection(con, false, true, false);
955
956 /* handling for tcp shutdown */
957 clear_bit(CF_SHUTDOWN, &con->flags);
958 wake_up(&con->shutdown_wait);
959 }
960
Alexander Aringba868d92021-05-21 15:08:37 -0400961 /* signal to breaking receive worker */
962 ret = -1;
Alexander Aring8aa31cb2021-05-21 15:08:39 -0400963 } else {
964 mutex_unlock(&con->sock_mutex);
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +0100965 }
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +0100966 return ret;
967}
968
969/* Listening socket is busy, accept a connection */
Alexander Aringd11ccd42020-11-02 20:04:25 -0500970static int accept_from_sock(struct listen_connection *con)
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +0100971{
972 int result;
973 struct sockaddr_storage peeraddr;
974 struct socket *newsock;
Alexander Aringb38bc9c2021-05-21 15:08:35 -0400975 int len, idx;
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +0100976 int nodeid;
977 struct connection *newcon;
978 struct connection *addcon;
Alexander Aring3f78cd72020-09-24 10:31:23 -0400979 unsigned int mark;
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +0100980
Alexander Aringd11ccd42020-11-02 20:04:25 -0500981 if (!con->sock)
tsutomu.owa@toshiba.co.jp3421fb12017-09-12 09:01:38 +0000982 return -ENOTCONN;
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +0100983
tsutomu.owa@toshiba.co.jp3421fb12017-09-12 09:01:38 +0000984 result = kernel_accept(con->sock, &newsock, O_NONBLOCK);
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +0100985 if (result < 0)
986 goto accept_err;
987
988 /* Get the connected socket's peer */
989 memset(&peeraddr, 0, sizeof(peeraddr));
Denys Vlasenko9b2c45d2018-02-12 20:00:20 +0100990 len = newsock->ops->getname(newsock, (struct sockaddr *)&peeraddr, 2);
991 if (len < 0) {
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +0100992 result = -ECONNABORTED;
993 goto accept_err;
994 }
995
996 /* Get the new node's NODEID */
997 make_sockaddr(&peeraddr, 0, &len);
Alexander Aringe125fbe2021-03-01 17:05:09 -0500998 if (addr_to_nodeid(&peeraddr, &nodeid, &mark)) {
Masatake YAMATObcaadf52011-07-04 12:25:51 +0900999 unsigned char *b=(unsigned char *)&peeraddr;
David Teigland617e82e2007-04-26 13:46:49 -05001000 log_print("connect from non cluster node");
Masatake YAMATObcaadf52011-07-04 12:25:51 +09001001 print_hex_dump_bytes("ss: ", DUMP_PREFIX_NONE,
1002 b, sizeof(struct sockaddr_storage));
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001003 sock_release(newsock);
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001004 return -1;
1005 }
1006
1007 log_print("got connection from %d", nodeid);
1008
1009 /* Check to see if we already have a connection to this node. This
1010 * could happen if the two nodes initiate a connection at roughly
1011 * the same time and the connections cross on the wire.
1012 * In this case we store the incoming one in "othercon"
1013 */
Alexander Aringb38bc9c2021-05-21 15:08:35 -04001014 idx = srcu_read_lock(&connections_srcu);
David Teigland748285c2009-05-15 10:50:57 -05001015 newcon = nodeid2con(nodeid, GFP_NOFS);
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001016 if (!newcon) {
Alexander Aringb38bc9c2021-05-21 15:08:35 -04001017 srcu_read_unlock(&connections_srcu, idx);
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001018 result = -ENOMEM;
1019 goto accept_err;
1020 }
Alexander Aringd11ccd42020-11-02 20:04:25 -05001021
Alexander Aringe125fbe2021-03-01 17:05:09 -05001022 sock_set_mark(newsock->sk, mark);
1023
Alexander Aringd11ccd42020-11-02 20:04:25 -05001024 mutex_lock(&newcon->sock_mutex);
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001025 if (newcon->sock) {
1026 struct connection *othercon = newcon->othercon;
1027
1028 if (!othercon) {
Alexander Aringa47666eb2020-08-27 15:02:49 -04001029 othercon = kzalloc(sizeof(*othercon), GFP_NOFS);
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001030 if (!othercon) {
David Teigland617e82e2007-04-26 13:46:49 -05001031 log_print("failed to allocate incoming socket");
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001032 mutex_unlock(&newcon->sock_mutex);
Alexander Aringb38bc9c2021-05-21 15:08:35 -04001033 srcu_read_unlock(&connections_srcu, idx);
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001034 result = -ENOMEM;
1035 goto accept_err;
1036 }
Alexander Aring4798cbb2020-09-24 10:31:26 -04001037
Alexander Aring6cde2102020-11-02 20:04:21 -05001038 result = dlm_con_init(othercon, nodeid);
1039 if (result < 0) {
Alexander Aring4798cbb2020-09-24 10:31:26 -04001040 kfree(othercon);
Yang Yingliang2fd8db22021-03-27 16:37:04 +08001041 mutex_unlock(&newcon->sock_mutex);
Alexander Aringb38bc9c2021-05-21 15:08:35 -04001042 srcu_read_unlock(&connections_srcu, idx);
Alexander Aring4798cbb2020-09-24 10:31:26 -04001043 goto accept_err;
1044 }
1045
Alexander Aringe9a470a2021-03-01 17:05:11 -05001046 lockdep_set_subclass(&othercon->sock_mutex, 1);
Alexander Aring7443bc92021-05-21 15:08:36 -04001047 set_bit(CF_IS_OTHERCON, &othercon->flags);
Alexander Aring6cde2102020-11-02 20:04:21 -05001048 newcon->othercon = othercon;
Alexander Aringba868d92021-05-21 15:08:37 -04001049 othercon->sendcon = newcon;
Alexander Aringba3ab3c2020-07-27 09:13:37 -04001050 } else {
1051 /* close other sock con if we have something new */
1052 close_connection(othercon, false, true, false);
Patrick Caulfield61d96be02007-08-20 15:13:38 +01001053 }
Alexander Aringba3ab3c2020-07-27 09:13:37 -04001054
Alexander Aringe9a470a2021-03-01 17:05:11 -05001055 mutex_lock(&othercon->sock_mutex);
Alexander Aringba3ab3c2020-07-27 09:13:37 -04001056 add_sock(newsock, othercon);
1057 addcon = othercon;
1058 mutex_unlock(&othercon->sock_mutex);
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001059 }
1060 else {
Bob Peterson3735b4b2016-09-23 14:23:26 -04001061 /* accept copies the sk after we've saved the callbacks, so we
1062 don't want to save them a second time or comm errors will
1063 result in calling sk_error_report recursively. */
tsutomu.owa@toshiba.co.jp988419a2017-09-12 08:55:32 +00001064 add_sock(newsock, newcon);
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001065 addcon = newcon;
1066 }
1067
Alexander Aringb30a6242021-03-01 17:05:10 -05001068 set_bit(CF_CONNECTED, &addcon->flags);
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001069 mutex_unlock(&newcon->sock_mutex);
1070
1071 /*
1072 * Add it to the active queue in case we got data
Lucas De Marchi25985ed2011-03-30 22:57:33 -03001073 * between processing the accept adding the socket
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001074 * to the read_sockets list
1075 */
1076 if (!test_and_set_bit(CF_READ_PENDING, &addcon->flags))
1077 queue_work(recv_workqueue, &addcon->rwork);
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001078
Alexander Aringb38bc9c2021-05-21 15:08:35 -04001079 srcu_read_unlock(&connections_srcu, idx);
1080
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001081 return 0;
1082
1083accept_err:
tsutomu.owa@toshiba.co.jp3421fb12017-09-12 09:01:38 +00001084 if (newsock)
1085 sock_release(newsock);
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001086
1087 if (result != -EAGAIN)
David Teigland617e82e2007-04-26 13:46:49 -05001088 log_print("error accepting connection from node: %d", result);
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001089 return result;
1090}
1091
Mike Christie5d689872013-06-14 04:56:13 -05001092/*
1093 * writequeue_entry_complete - try to delete and free write queue entry
1094 * @e: write queue entry to try to delete
1095 * @completed: bytes completed
1096 *
1097 * writequeue_lock must be held.
1098 */
1099static void writequeue_entry_complete(struct writequeue_entry *e, int completed)
1100{
1101 e->offset += completed;
1102 e->len -= completed;
Alexander Aring706474f2021-05-21 15:08:48 -04001103 /* signal that page was half way transmitted */
1104 e->dirty = true;
Mike Christie5d689872013-06-14 04:56:13 -05001105
Alexander Aring8f2dc782021-05-21 15:08:42 -04001106 if (e->len == 0 && e->users == 0)
Mike Christie5d689872013-06-14 04:56:13 -05001107 free_entry(e);
Mike Christie5d689872013-06-14 04:56:13 -05001108}
1109
Marcelo Ricardo Leitneree44b4b2015-08-11 19:22:23 -03001110/*
1111 * sctp_bind_addrs - bind a SCTP socket to all our addresses
1112 */
Alexander Aring13004e82020-11-02 20:04:24 -05001113static int sctp_bind_addrs(struct socket *sock, uint16_t port)
Marcelo Ricardo Leitneree44b4b2015-08-11 19:22:23 -03001114{
1115 struct sockaddr_storage localaddr;
Christoph Hellwigc0425a42020-05-29 14:09:42 +02001116 struct sockaddr *addr = (struct sockaddr *)&localaddr;
Marcelo Ricardo Leitneree44b4b2015-08-11 19:22:23 -03001117 int i, addr_len, result = 0;
1118
1119 for (i = 0; i < dlm_local_count; i++) {
1120 memcpy(&localaddr, dlm_local_addr[i], sizeof(localaddr));
1121 make_sockaddr(&localaddr, port, &addr_len);
1122
1123 if (!i)
Alexander Aring13004e82020-11-02 20:04:24 -05001124 result = kernel_bind(sock, addr, addr_len);
Marcelo Ricardo Leitneree44b4b2015-08-11 19:22:23 -03001125 else
Alexander Aring13004e82020-11-02 20:04:24 -05001126 result = sock_bind_add(sock->sk, addr, addr_len);
Marcelo Ricardo Leitneree44b4b2015-08-11 19:22:23 -03001127
1128 if (result < 0) {
1129 log_print("Can't bind to %d addr number %d, %d.\n",
1130 port, i + 1, result);
1131 break;
1132 }
1133 }
1134 return result;
1135}
1136
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001137/* Initiate an SCTP association.
1138 This is a special case of send_to_sock() in that we don't yet have a
1139 peeled-off socket for this association, so we use the listening socket
1140 and add the primary IP address of the remote node.
1141 */
Marcelo Ricardo Leitneree44b4b2015-08-11 19:22:23 -03001142static void sctp_connect_to_sock(struct connection *con)
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001143{
Marcelo Ricardo Leitneree44b4b2015-08-11 19:22:23 -03001144 struct sockaddr_storage daddr;
Marcelo Ricardo Leitneree44b4b2015-08-11 19:22:23 -03001145 int result;
1146 int addr_len;
1147 struct socket *sock;
Alexander Aring9c9f1682020-06-26 13:26:50 -04001148 unsigned int mark;
Marcelo Ricardo Leitneree44b4b2015-08-11 19:22:23 -03001149
Mike Christie5d689872013-06-14 04:56:13 -05001150 mutex_lock(&con->sock_mutex);
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001151
Marcelo Ricardo Leitneree44b4b2015-08-11 19:22:23 -03001152 /* Some odd races can cause double-connects, ignore them */
1153 if (con->retries++ > MAX_CONNECT_RETRIES)
1154 goto out;
1155
1156 if (con->sock) {
1157 log_print("node %d already connected.", con->nodeid);
1158 goto out;
1159 }
1160
1161 memset(&daddr, 0, sizeof(daddr));
Alexander Aringe125fbe2021-03-01 17:05:09 -05001162 result = nodeid_to_addr(con->nodeid, &daddr, NULL, true, &mark);
Marcelo Ricardo Leitneree44b4b2015-08-11 19:22:23 -03001163 if (result < 0) {
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001164 log_print("no address for nodeid %d", con->nodeid);
Marcelo Ricardo Leitneree44b4b2015-08-11 19:22:23 -03001165 goto out;
David Teigland04bedd72009-09-18 14:31:47 -05001166 }
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001167
Marcelo Ricardo Leitneree44b4b2015-08-11 19:22:23 -03001168 /* Create a socket to communicate with */
1169 result = sock_create_kern(&init_net, dlm_local_addr[0]->ss_family,
1170 SOCK_STREAM, IPPROTO_SCTP, &sock);
1171 if (result < 0)
1172 goto socket_err;
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001173
Alexander Aring9c9f1682020-06-26 13:26:50 -04001174 sock_set_mark(sock->sk, mark);
1175
tsutomu.owa@toshiba.co.jp988419a2017-09-12 08:55:32 +00001176 add_sock(sock, con);
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001177
Marcelo Ricardo Leitneree44b4b2015-08-11 19:22:23 -03001178 /* Bind to all addresses. */
Alexander Aring13004e82020-11-02 20:04:24 -05001179 if (sctp_bind_addrs(con->sock, 0))
Marcelo Ricardo Leitneree44b4b2015-08-11 19:22:23 -03001180 goto bind_err;
Mike Christie98e1b602013-06-14 04:56:12 -05001181
Marcelo Ricardo Leitneree44b4b2015-08-11 19:22:23 -03001182 make_sockaddr(&daddr, dlm_config.ci_tcp_port, &addr_len);
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001183
Alexander Aring2df6b762021-05-21 15:08:34 -04001184 log_print_ratelimited("connecting to %d", con->nodeid);
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001185
Marcelo Ricardo Leitneree44b4b2015-08-11 19:22:23 -03001186 /* Turn off Nagle's algorithm */
Christoph Hellwig40ef92c2020-05-29 14:09:40 +02001187 sctp_sock_set_nodelay(sock->sk);
Marcelo Ricardo Leitneree44b4b2015-08-11 19:22:23 -03001188
Gang Hef706d832018-05-02 10:28:35 -05001189 /*
1190 * Make sock->ops->connect() function return in specified time,
1191 * since O_NONBLOCK argument in connect() function does not work here,
1192 * then, we should restore the default value of this attribute.
1193 */
Christoph Hellwig76ee0782020-05-28 07:12:12 +02001194 sock_set_sndtimeo(sock->sk, 5);
Marcelo Ricardo Leitneree44b4b2015-08-11 19:22:23 -03001195 result = sock->ops->connect(sock, (struct sockaddr *)&daddr, addr_len,
Gang Heda3627c2018-05-29 11:09:22 +08001196 0);
Christoph Hellwig76ee0782020-05-28 07:12:12 +02001197 sock_set_sndtimeo(sock->sk, 0);
Gang Hef706d832018-05-02 10:28:35 -05001198
Marcelo Ricardo Leitneree44b4b2015-08-11 19:22:23 -03001199 if (result == -EINPROGRESS)
1200 result = 0;
Alexander Aring19633c72020-11-02 20:04:20 -05001201 if (result == 0) {
1202 if (!test_and_set_bit(CF_CONNECTED, &con->flags))
1203 log_print("successful connected to node %d", con->nodeid);
Marcelo Ricardo Leitneree44b4b2015-08-11 19:22:23 -03001204 goto out;
Alexander Aring19633c72020-11-02 20:04:20 -05001205 }
Marcelo Ricardo Leitneree44b4b2015-08-11 19:22:23 -03001206
Marcelo Ricardo Leitneree44b4b2015-08-11 19:22:23 -03001207bind_err:
1208 con->sock = NULL;
1209 sock_release(sock);
1210
1211socket_err:
1212 /*
1213 * Some errors are fatal and this list might need adjusting. For other
1214 * errors we try again until the max number of retries is reached.
1215 */
1216 if (result != -EHOSTUNREACH &&
1217 result != -ENETUNREACH &&
1218 result != -ENETDOWN &&
1219 result != -EINVAL &&
1220 result != -EPROTONOSUPPORT) {
1221 log_print("connect %d try %d error %d", con->nodeid,
1222 con->retries, result);
1223 mutex_unlock(&con->sock_mutex);
1224 msleep(1000);
Marcelo Ricardo Leitneree44b4b2015-08-11 19:22:23 -03001225 lowcomms_connect_sock(con);
1226 return;
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001227 }
Mike Christie5d689872013-06-14 04:56:13 -05001228
Marcelo Ricardo Leitneree44b4b2015-08-11 19:22:23 -03001229out:
Mike Christie5d689872013-06-14 04:56:13 -05001230 mutex_unlock(&con->sock_mutex);
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001231}
1232
1233/* Connect a new socket to its peer */
1234static void tcp_connect_to_sock(struct connection *con)
1235{
Lon Hohberger6bd8fed2007-10-25 18:51:54 -04001236 struct sockaddr_storage saddr, src_addr;
Alexander Aringe125fbe2021-03-01 17:05:09 -05001237 unsigned int mark;
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001238 int addr_len;
Casey Dahlina89d63a2009-07-14 12:17:51 -05001239 struct socket *sock = NULL;
David Teigland36b71a82012-07-26 12:44:30 -05001240 int result;
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001241
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001242 mutex_lock(&con->sock_mutex);
1243 if (con->retries++ > MAX_CONNECT_RETRIES)
1244 goto out;
1245
1246 /* Some odd races can cause double-connects, ignore them */
David Teigland36b71a82012-07-26 12:44:30 -05001247 if (con->sock)
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001248 goto out;
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001249
1250 /* Create a socket to communicate with */
Eric W. Biedermaneeb1bd52015-05-08 21:08:05 -05001251 result = sock_create_kern(&init_net, dlm_local_addr[0]->ss_family,
1252 SOCK_STREAM, IPPROTO_TCP, &sock);
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001253 if (result < 0)
1254 goto out_err;
1255
1256 memset(&saddr, 0, sizeof(saddr));
Alexander Aringe125fbe2021-03-01 17:05:09 -05001257 result = nodeid_to_addr(con->nodeid, &saddr, NULL, false, &mark);
David Teigland36b71a82012-07-26 12:44:30 -05001258 if (result < 0) {
1259 log_print("no address for nodeid %d", con->nodeid);
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001260 goto out_err;
David Teigland36b71a82012-07-26 12:44:30 -05001261 }
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001262
Alexander Aringe125fbe2021-03-01 17:05:09 -05001263 sock_set_mark(sock->sk, mark);
1264
tsutomu.owa@toshiba.co.jp988419a2017-09-12 08:55:32 +00001265 add_sock(sock, con);
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001266
Lon Hohberger6bd8fed2007-10-25 18:51:54 -04001267 /* Bind to our cluster-known address connecting to avoid
1268 routing problems */
1269 memcpy(&src_addr, dlm_local_addr[0], sizeof(src_addr));
1270 make_sockaddr(&src_addr, 0, &addr_len);
1271 result = sock->ops->bind(sock, (struct sockaddr *) &src_addr,
1272 addr_len);
1273 if (result < 0) {
1274 log_print("could not bind for connect: %d", result);
1275 /* This *may* not indicate a critical error */
1276 }
1277
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001278 make_sockaddr(&saddr, dlm_config.ci_tcp_port, &addr_len);
1279
Alexander Aring2df6b762021-05-21 15:08:34 -04001280 log_print_ratelimited("connecting to %d", con->nodeid);
David Teiglandcb2d45d2010-11-12 11:12:55 -06001281
1282 /* Turn off Nagle's algorithm */
Christoph Hellwig12abc5e2020-05-28 07:12:19 +02001283 tcp_sock_set_nodelay(sock->sk);
David Teiglandcb2d45d2010-11-12 11:12:55 -06001284
David Teigland36b71a82012-07-26 12:44:30 -05001285 result = sock->ops->connect(sock, (struct sockaddr *)&saddr, addr_len,
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001286 O_NONBLOCK);
1287 if (result == -EINPROGRESS)
1288 result = 0;
1289 if (result == 0)
1290 goto out;
1291
1292out_err:
1293 if (con->sock) {
1294 sock_release(con->sock);
1295 con->sock = NULL;
Casey Dahlina89d63a2009-07-14 12:17:51 -05001296 } else if (sock) {
1297 sock_release(sock);
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001298 }
1299 /*
1300 * Some errors are fatal and this list might need adjusting. For other
1301 * errors we try again until the max number of retries is reached.
1302 */
David Teigland36b71a82012-07-26 12:44:30 -05001303 if (result != -EHOSTUNREACH &&
1304 result != -ENETUNREACH &&
1305 result != -ENETDOWN &&
1306 result != -EINVAL &&
1307 result != -EPROTONOSUPPORT) {
1308 log_print("connect %d try %d error %d", con->nodeid,
1309 con->retries, result);
1310 mutex_unlock(&con->sock_mutex);
1311 msleep(1000);
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001312 lowcomms_connect_sock(con);
David Teigland36b71a82012-07-26 12:44:30 -05001313 return;
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001314 }
1315out:
1316 mutex_unlock(&con->sock_mutex);
1317 return;
1318}
1319
Alexander Aringd11ccd42020-11-02 20:04:25 -05001320/* On error caller must run dlm_close_sock() for the
1321 * listen connection socket.
1322 */
1323static int tcp_create_listen_sock(struct listen_connection *con,
1324 struct sockaddr_storage *saddr)
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001325{
1326 struct socket *sock = NULL;
1327 int result = 0;
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001328 int addr_len;
1329
1330 if (dlm_local_addr[0]->ss_family == AF_INET)
1331 addr_len = sizeof(struct sockaddr_in);
1332 else
1333 addr_len = sizeof(struct sockaddr_in6);
1334
1335 /* Create a socket to communicate with */
Eric W. Biedermaneeb1bd52015-05-08 21:08:05 -05001336 result = sock_create_kern(&init_net, dlm_local_addr[0]->ss_family,
1337 SOCK_STREAM, IPPROTO_TCP, &sock);
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001338 if (result < 0) {
David Teigland617e82e2007-04-26 13:46:49 -05001339 log_print("Can't create listening comms socket");
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001340 goto create_out;
1341 }
1342
Alexander Aringa5b7ab62020-06-26 13:26:49 -04001343 sock_set_mark(sock->sk, dlm_config.ci_mark);
1344
David Teiglandcb2d45d2010-11-12 11:12:55 -06001345 /* Turn off Nagle's algorithm */
Christoph Hellwig12abc5e2020-05-28 07:12:19 +02001346 tcp_sock_set_nodelay(sock->sk);
David Teiglandcb2d45d2010-11-12 11:12:55 -06001347
Christoph Hellwigb58f0e82020-05-28 07:12:09 +02001348 sock_set_reuseaddr(sock->sk);
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001349
Alexander Aringd11ccd42020-11-02 20:04:25 -05001350 add_listen_sock(sock, con);
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001351
1352 /* Bind to our port */
1353 make_sockaddr(saddr, dlm_config.ci_tcp_port, &addr_len);
1354 result = sock->ops->bind(sock, (struct sockaddr *) saddr, addr_len);
1355 if (result < 0) {
David Teigland617e82e2007-04-26 13:46:49 -05001356 log_print("Can't bind to port %d", dlm_config.ci_tcp_port);
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001357 goto create_out;
1358 }
Christoph Hellwigce3d9542020-05-28 07:12:15 +02001359 sock_set_keepalive(sock->sk);
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001360
1361 result = sock->ops->listen(sock, 5);
1362 if (result < 0) {
David Teigland617e82e2007-04-26 13:46:49 -05001363 log_print("Can't listen on port %d", dlm_config.ci_tcp_port);
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001364 goto create_out;
1365 }
1366
Alexander Aringd11ccd42020-11-02 20:04:25 -05001367 return 0;
1368
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001369create_out:
Alexander Aringd11ccd42020-11-02 20:04:25 -05001370 return result;
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001371}
1372
1373/* Get local addresses */
1374static void init_local(void)
1375{
1376 struct sockaddr_storage sas, *addr;
1377 int i;
1378
Patrick Caulfield30d3a232007-04-23 16:26:21 +01001379 dlm_local_count = 0;
David Teigland1b189b82012-03-21 09:18:34 -05001380 for (i = 0; i < DLM_MAX_ADDR_COUNT; i++) {
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001381 if (dlm_our_addr(&sas, i))
1382 break;
1383
Amitoj Kaur Chawla5c93f562016-06-23 10:22:01 +05301384 addr = kmemdup(&sas, sizeof(*addr), GFP_NOFS);
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001385 if (!addr)
1386 break;
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001387 dlm_local_addr[dlm_local_count++] = addr;
1388 }
1389}
1390
Alexander Aring043697f2020-08-27 15:02:50 -04001391static void deinit_local(void)
1392{
1393 int i;
1394
1395 for (i = 0; i < dlm_local_count; i++)
1396 kfree(dlm_local_addr[i]);
1397}
1398
Alexander Aringd11ccd42020-11-02 20:04:25 -05001399/* Initialise SCTP socket and bind to all interfaces
1400 * On error caller must run dlm_close_sock() for the
1401 * listen connection socket.
1402 */
1403static int sctp_listen_for_all(struct listen_connection *con)
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001404{
1405 struct socket *sock = NULL;
Marcelo Ricardo Leitneree44b4b2015-08-11 19:22:23 -03001406 int result = -EINVAL;
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001407
1408 log_print("Using SCTP for communications");
1409
Eric W. Biedermaneeb1bd52015-05-08 21:08:05 -05001410 result = sock_create_kern(&init_net, dlm_local_addr[0]->ss_family,
Marcelo Ricardo Leitneree44b4b2015-08-11 19:22:23 -03001411 SOCK_STREAM, IPPROTO_SCTP, &sock);
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001412 if (result < 0) {
1413 log_print("Can't create comms socket, check SCTP is loaded");
1414 goto out;
1415 }
1416
Christoph Hellwig26cfabf2020-05-28 07:12:16 +02001417 sock_set_rcvbuf(sock->sk, NEEDED_RMEM);
Alexander Aringa5b7ab62020-06-26 13:26:49 -04001418 sock_set_mark(sock->sk, dlm_config.ci_mark);
Christoph Hellwig40ef92c2020-05-29 14:09:40 +02001419 sctp_sock_set_nodelay(sock->sk);
Mike Christie86e92ad2013-06-14 04:56:14 -05001420
Alexander Aringd11ccd42020-11-02 20:04:25 -05001421 add_listen_sock(sock, con);
Bob Petersonb81171c2016-02-05 14:39:02 -05001422
Marcelo Ricardo Leitneree44b4b2015-08-11 19:22:23 -03001423 /* Bind to all addresses. */
Alexander Aringd11ccd42020-11-02 20:04:25 -05001424 result = sctp_bind_addrs(con->sock, dlm_config.ci_tcp_port);
1425 if (result < 0)
1426 goto out;
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001427
1428 result = sock->ops->listen(sock, 5);
1429 if (result < 0) {
1430 log_print("Can't set socket listening");
Alexander Aringd11ccd42020-11-02 20:04:25 -05001431 goto out;
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001432 }
1433
1434 return 0;
1435
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001436out:
1437 return result;
1438}
1439
1440static int tcp_listen_for_all(void)
1441{
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001442 /* We don't support multi-homed hosts */
Alexander Aring1a26bfa2020-11-02 20:04:26 -05001443 if (dlm_local_count > 1) {
David Teigland617e82e2007-04-26 13:46:49 -05001444 log_print("TCP protocol can't handle multi-homed hosts, "
1445 "try SCTP");
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001446 return -EINVAL;
1447 }
1448
1449 log_print("Using TCP for communications");
1450
Alexander Aringd11ccd42020-11-02 20:04:25 -05001451 return tcp_create_listen_sock(&listen_con, dlm_local_addr[0]);
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001452}
1453
1454
1455
1456static struct writequeue_entry *new_writequeue_entry(struct connection *con,
1457 gfp_t allocation)
1458{
1459 struct writequeue_entry *entry;
1460
Alexander Aringf0747ebf2021-03-01 17:05:16 -05001461 entry = kzalloc(sizeof(*entry), allocation);
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001462 if (!entry)
1463 return NULL;
1464
Alexander Aringe1a7cbc2021-03-01 17:05:15 -05001465 entry->page = alloc_page(allocation | __GFP_ZERO);
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001466 if (!entry->page) {
1467 kfree(entry);
1468 return NULL;
1469 }
1470
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001471 entry->con = con;
Alexander Aringf0747ebf2021-03-01 17:05:16 -05001472 entry->users = 1;
Alexander Aring8f2dc782021-05-21 15:08:42 -04001473 kref_init(&entry->ref);
1474 INIT_LIST_HEAD(&entry->msgs);
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001475
1476 return entry;
1477}
1478
Alexander Aringf0747ebf2021-03-01 17:05:16 -05001479static struct writequeue_entry *new_wq_entry(struct connection *con, int len,
Alexander Aring8f2dc782021-05-21 15:08:42 -04001480 gfp_t allocation, char **ppc,
1481 void (*cb)(struct dlm_mhandle *mh),
1482 struct dlm_mhandle *mh)
Alexander Aringf0747ebf2021-03-01 17:05:16 -05001483{
1484 struct writequeue_entry *e;
1485
1486 spin_lock(&con->writequeue_lock);
1487 if (!list_empty(&con->writequeue)) {
1488 e = list_last_entry(&con->writequeue, struct writequeue_entry, list);
1489 if (DLM_WQ_REMAIN_BYTES(e) >= len) {
Alexander Aring8f2dc782021-05-21 15:08:42 -04001490 kref_get(&e->ref);
1491
Alexander Aringf0747ebf2021-03-01 17:05:16 -05001492 *ppc = page_address(e->page) + e->end;
Alexander Aring8f2dc782021-05-21 15:08:42 -04001493 if (cb)
1494 cb(mh);
1495
Alexander Aringf0747ebf2021-03-01 17:05:16 -05001496 e->end += len;
1497 e->users++;
1498 spin_unlock(&con->writequeue_lock);
1499
1500 return e;
1501 }
1502 }
1503 spin_unlock(&con->writequeue_lock);
1504
1505 e = new_writequeue_entry(con, allocation);
1506 if (!e)
1507 return NULL;
1508
Alexander Aring8f2dc782021-05-21 15:08:42 -04001509 kref_get(&e->ref);
Alexander Aringf0747ebf2021-03-01 17:05:16 -05001510 *ppc = page_address(e->page);
1511 e->end += len;
Alexander Aring8aa31cb2021-05-21 15:08:39 -04001512 atomic_inc(&con->writequeue_cnt);
Alexander Aringf0747ebf2021-03-01 17:05:16 -05001513
1514 spin_lock(&con->writequeue_lock);
Alexander Aring8f2dc782021-05-21 15:08:42 -04001515 if (cb)
1516 cb(mh);
1517
Alexander Aringf0747ebf2021-03-01 17:05:16 -05001518 list_add_tail(&e->list, &con->writequeue);
1519 spin_unlock(&con->writequeue_lock);
1520
1521 return e;
1522};
1523
Alexander Aring2874d1a2021-05-21 15:08:43 -04001524static struct dlm_msg *dlm_lowcomms_new_msg_con(struct connection *con, int len,
1525 gfp_t allocation, char **ppc,
1526 void (*cb)(struct dlm_mhandle *mh),
1527 struct dlm_mhandle *mh)
1528{
1529 struct writequeue_entry *e;
1530 struct dlm_msg *msg;
1531
1532 msg = kzalloc(sizeof(*msg), allocation);
1533 if (!msg)
1534 return NULL;
1535
1536 kref_init(&msg->ref);
1537
1538 e = new_wq_entry(con, len, allocation, ppc, cb, mh);
1539 if (!e) {
1540 kfree(msg);
1541 return NULL;
1542 }
1543
1544 msg->ppc = *ppc;
1545 msg->len = len;
1546 msg->entry = e;
1547
1548 return msg;
1549}
1550
Alexander Aring8f2dc782021-05-21 15:08:42 -04001551struct dlm_msg *dlm_lowcomms_new_msg(int nodeid, int len, gfp_t allocation,
1552 char **ppc, void (*cb)(struct dlm_mhandle *mh),
1553 struct dlm_mhandle *mh)
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001554{
1555 struct connection *con;
Alexander Aring8f2dc782021-05-21 15:08:42 -04001556 struct dlm_msg *msg;
Alexander Aringb38bc9c2021-05-21 15:08:35 -04001557 int idx;
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001558
Alexander Aringd10a0b82021-06-02 09:45:20 -04001559 if (len > DLM_MAX_SOCKET_BUFSIZE ||
Alexander Aringc45674f2021-03-01 17:05:14 -05001560 len < sizeof(struct dlm_header)) {
Alexander Aringd10a0b82021-06-02 09:45:20 -04001561 BUILD_BUG_ON(PAGE_SIZE < DLM_MAX_SOCKET_BUFSIZE);
Alexander Aring692f51c2020-11-02 20:04:18 -05001562 log_print("failed to allocate a buffer of size %d", len);
Alexander Aringc45674f2021-03-01 17:05:14 -05001563 WARN_ON(1);
Alexander Aring692f51c2020-11-02 20:04:18 -05001564 return NULL;
1565 }
1566
Alexander Aringb38bc9c2021-05-21 15:08:35 -04001567 idx = srcu_read_lock(&connections_srcu);
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001568 con = nodeid2con(nodeid, allocation);
Alexander Aringb38bc9c2021-05-21 15:08:35 -04001569 if (!con) {
1570 srcu_read_unlock(&connections_srcu, idx);
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001571 return NULL;
Alexander Aringb38bc9c2021-05-21 15:08:35 -04001572 }
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001573
Alexander Aring2874d1a2021-05-21 15:08:43 -04001574 msg = dlm_lowcomms_new_msg_con(con, len, allocation, ppc, cb, mh);
Alexander Aring8f2dc782021-05-21 15:08:42 -04001575 if (!msg) {
Alexander Aringb38bc9c2021-05-21 15:08:35 -04001576 srcu_read_unlock(&connections_srcu, idx);
1577 return NULL;
1578 }
1579
Alexander Aring8f2dc782021-05-21 15:08:42 -04001580 /* we assume if successful commit must called */
1581 msg->idx = idx;
Alexander Aring8f2dc782021-05-21 15:08:42 -04001582 return msg;
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001583}
1584
Alexander Aring2874d1a2021-05-21 15:08:43 -04001585static void _dlm_lowcomms_commit_msg(struct dlm_msg *msg)
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001586{
Alexander Aring8f2dc782021-05-21 15:08:42 -04001587 struct writequeue_entry *e = msg->entry;
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001588 struct connection *con = e->con;
1589 int users;
1590
1591 spin_lock(&con->writequeue_lock);
Alexander Aring8f2dc782021-05-21 15:08:42 -04001592 kref_get(&msg->ref);
1593 list_add(&msg->list, &e->msgs);
1594
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001595 users = --e->users;
1596 if (users)
1597 goto out;
Alexander Aringf0747ebf2021-03-01 17:05:16 -05001598
1599 e->len = DLM_WQ_LENGTH_BYTES(e);
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001600 spin_unlock(&con->writequeue_lock);
1601
Bob Peterson01da24d2017-09-12 08:55:14 +00001602 queue_work(send_workqueue, &con->swork);
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001603 return;
1604
1605out:
1606 spin_unlock(&con->writequeue_lock);
1607 return;
1608}
1609
Alexander Aring2874d1a2021-05-21 15:08:43 -04001610void dlm_lowcomms_commit_msg(struct dlm_msg *msg)
1611{
1612 _dlm_lowcomms_commit_msg(msg);
1613 srcu_read_unlock(&connections_srcu, msg->idx);
1614}
1615
Alexander Aring8f2dc782021-05-21 15:08:42 -04001616void dlm_lowcomms_put_msg(struct dlm_msg *msg)
1617{
1618 kref_put(&msg->ref, dlm_msg_release);
1619}
1620
Alexander Aring2874d1a2021-05-21 15:08:43 -04001621/* does not held connections_srcu, usage workqueue only */
1622int dlm_lowcomms_resend_msg(struct dlm_msg *msg)
1623{
1624 struct dlm_msg *msg_resend;
1625 char *ppc;
1626
1627 if (msg->retransmit)
1628 return 1;
1629
1630 msg_resend = dlm_lowcomms_new_msg_con(msg->entry->con, msg->len,
1631 GFP_ATOMIC, &ppc, NULL, NULL);
1632 if (!msg_resend)
1633 return -ENOMEM;
1634
1635 msg->retransmit = true;
1636 kref_get(&msg->ref);
1637 msg_resend->orig_msg = msg;
1638
1639 memcpy(ppc, msg->ppc, msg->len);
1640 _dlm_lowcomms_commit_msg(msg_resend);
1641 dlm_lowcomms_put_msg(msg_resend);
1642
1643 return 0;
1644}
1645
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001646/* Send a message */
1647static void send_to_sock(struct connection *con)
1648{
1649 int ret = 0;
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001650 const int msg_flags = MSG_DONTWAIT | MSG_NOSIGNAL;
1651 struct writequeue_entry *e;
1652 int len, offset;
Bob Petersonf92c8dd2010-11-12 11:15:20 -06001653 int count = 0;
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001654
1655 mutex_lock(&con->sock_mutex);
1656 if (con->sock == NULL)
1657 goto out_connect;
1658
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001659 spin_lock(&con->writequeue_lock);
1660 for (;;) {
Alexander Aringf0747ebf2021-03-01 17:05:16 -05001661 if (list_empty(&con->writequeue))
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001662 break;
1663
Alexander Aringf0747ebf2021-03-01 17:05:16 -05001664 e = list_first_entry(&con->writequeue, struct writequeue_entry, list);
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001665 len = e->len;
1666 offset = e->offset;
1667 BUG_ON(len == 0 && e->users == 0);
1668 spin_unlock(&con->writequeue_lock);
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001669
1670 ret = 0;
1671 if (len) {
Paolo Bonzini1329e3f2009-08-24 13:18:04 -05001672 ret = kernel_sendpage(con->sock, e->page, offset, len,
1673 msg_flags);
Patrick Caulfieldd66f8272007-09-14 08:49:21 +01001674 if (ret == -EAGAIN || ret == 0) {
David Millerb36930d2010-11-10 21:56:39 -08001675 if (ret == -EAGAIN &&
Eric Dumazet9cd3e072015-11-29 20:03:10 -08001676 test_bit(SOCKWQ_ASYNC_NOSPACE, &con->sock->flags) &&
David Millerb36930d2010-11-10 21:56:39 -08001677 !test_and_set_bit(CF_APP_LIMITED, &con->flags)) {
1678 /* Notify TCP that we're limited by the
1679 * application window size.
1680 */
1681 set_bit(SOCK_NOSPACE, &con->sock->flags);
1682 con->sock->sk->sk_write_pending++;
1683 }
Patrick Caulfieldd66f8272007-09-14 08:49:21 +01001684 cond_resched();
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001685 goto out;
Ying Xue9c5bef52012-08-13 14:29:55 +08001686 } else if (ret < 0)
Alexander Aringba868d92021-05-21 15:08:37 -04001687 goto out;
Patrick Caulfieldd66f8272007-09-14 08:49:21 +01001688 }
Bob Petersonf92c8dd2010-11-12 11:15:20 -06001689
1690 /* Don't starve people filling buffers */
1691 if (++count >= MAX_SEND_MSG_COUNT) {
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001692 cond_resched();
Bob Petersonf92c8dd2010-11-12 11:15:20 -06001693 count = 0;
1694 }
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001695
1696 spin_lock(&con->writequeue_lock);
Mike Christie5d689872013-06-14 04:56:13 -05001697 writequeue_entry_complete(e, ret);
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001698 }
1699 spin_unlock(&con->writequeue_lock);
Alexander Aring8aa31cb2021-05-21 15:08:39 -04001700
1701 /* close if we got EOF */
1702 if (test_and_clear_bit(CF_EOF, &con->flags)) {
1703 mutex_unlock(&con->sock_mutex);
1704 close_connection(con, false, false, true);
1705
1706 /* handling for tcp shutdown */
1707 clear_bit(CF_SHUTDOWN, &con->flags);
1708 wake_up(&con->shutdown_wait);
1709 } else {
1710 mutex_unlock(&con->sock_mutex);
1711 }
1712
1713 return;
1714
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001715out:
1716 mutex_unlock(&con->sock_mutex);
1717 return;
1718
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001719out_connect:
1720 mutex_unlock(&con->sock_mutex);
Bob Peterson01da24d2017-09-12 08:55:14 +00001721 queue_work(send_workqueue, &con->swork);
1722 cond_resched();
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001723}
1724
1725static void clean_one_writequeue(struct connection *con)
1726{
Christine Caulfield5e9ccc32009-01-28 12:57:40 -06001727 struct writequeue_entry *e, *safe;
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001728
1729 spin_lock(&con->writequeue_lock);
Christine Caulfield5e9ccc32009-01-28 12:57:40 -06001730 list_for_each_entry_safe(e, safe, &con->writequeue, list) {
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001731 free_entry(e);
1732 }
1733 spin_unlock(&con->writequeue_lock);
1734}
1735
1736/* Called from recovery when it knows that a node has
1737 left the cluster */
1738int dlm_lowcomms_close(int nodeid)
1739{
1740 struct connection *con;
David Teigland36b71a82012-07-26 12:44:30 -05001741 struct dlm_node_addr *na;
Alexander Aringb38bc9c2021-05-21 15:08:35 -04001742 int idx;
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001743
1744 log_print("closing connection to node %d", nodeid);
Alexander Aringb38bc9c2021-05-21 15:08:35 -04001745 idx = srcu_read_lock(&connections_srcu);
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001746 con = nodeid2con(nodeid, 0);
1747 if (con) {
Lars Marowsky-Bree063c4c92009-08-11 16:18:23 -05001748 set_bit(CF_CLOSE, &con->flags);
Marcelo Ricardo Leitner0d737a82015-08-11 19:22:21 -03001749 close_connection(con, true, true, true);
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001750 clean_one_writequeue(con);
Alexander Aring53a5eda2020-11-02 20:04:19 -05001751 if (con->othercon)
1752 clean_one_writequeue(con->othercon);
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001753 }
Alexander Aringb38bc9c2021-05-21 15:08:35 -04001754 srcu_read_unlock(&connections_srcu, idx);
David Teigland36b71a82012-07-26 12:44:30 -05001755
1756 spin_lock(&dlm_node_addrs_spin);
1757 na = find_node_addr(nodeid);
1758 if (na) {
1759 list_del(&na->list);
1760 while (na->addr_count--)
1761 kfree(na->addr[na->addr_count]);
1762 kfree(na);
1763 }
1764 spin_unlock(&dlm_node_addrs_spin);
1765
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001766 return 0;
1767}
1768
1769/* Receive workqueue function */
1770static void process_recv_sockets(struct work_struct *work)
1771{
1772 struct connection *con = container_of(work, struct connection, rwork);
1773 int err;
1774
1775 clear_bit(CF_READ_PENDING, &con->flags);
1776 do {
Alexander Aringd11ccd42020-11-02 20:04:25 -05001777 err = receive_from_sock(con);
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001778 } while (!err);
1779}
1780
Alexander Aringd11ccd42020-11-02 20:04:25 -05001781static void process_listen_recv_socket(struct work_struct *work)
1782{
1783 accept_from_sock(&listen_con);
1784}
1785
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001786/* Send workqueue function */
1787static void process_send_sockets(struct work_struct *work)
1788{
1789 struct connection *con = container_of(work, struct connection, swork);
1790
Alexander Aring7443bc92021-05-21 15:08:36 -04001791 WARN_ON(test_bit(CF_IS_OTHERCON, &con->flags));
1792
tsutomu.owa@toshiba.co.jp8a4abb02017-09-12 09:01:16 +00001793 clear_bit(CF_WRITE_PENDING, &con->flags);
Alexander Aringba868d92021-05-21 15:08:37 -04001794
Alexander Aring489d8e52021-05-21 15:08:46 -04001795 if (test_and_clear_bit(CF_RECONNECT, &con->flags)) {
Alexander Aringba868d92021-05-21 15:08:37 -04001796 close_connection(con, false, false, true);
Alexander Aring489d8e52021-05-21 15:08:46 -04001797 dlm_midcomms_unack_msg_resend(con->nodeid);
1798 }
Alexander Aringba868d92021-05-21 15:08:37 -04001799
1800 if (con->sock == NULL) { /* not mutex protected so check it inside too */
1801 if (test_and_clear_bit(CF_DELAY_CONNECT, &con->flags))
1802 msleep(1000);
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001803 con->connect_action(con);
Alexander Aringba868d92021-05-21 15:08:37 -04001804 }
Bob Peterson01da24d2017-09-12 08:55:14 +00001805 if (!list_empty(&con->writequeue))
Lars Marowsky-Bree063c4c92009-08-11 16:18:23 -05001806 send_to_sock(con);
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001807}
1808
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001809static void work_stop(void)
1810{
Alexander Aringfcef0e62021-06-02 09:45:15 -04001811 if (recv_workqueue) {
David Windsorb3555162019-04-02 08:37:10 -04001812 destroy_workqueue(recv_workqueue);
Alexander Aringfcef0e62021-06-02 09:45:15 -04001813 recv_workqueue = NULL;
1814 }
1815
1816 if (send_workqueue) {
David Windsorb3555162019-04-02 08:37:10 -04001817 destroy_workqueue(send_workqueue);
Alexander Aringfcef0e62021-06-02 09:45:15 -04001818 send_workqueue = NULL;
1819 }
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001820}
1821
1822static int work_start(void)
1823{
Alexander Aring6c6a1cc2021-06-02 09:45:17 -04001824 recv_workqueue = alloc_ordered_workqueue("dlm_recv", WQ_MEM_RECLAIM);
Namhyung Kimb9d41052010-12-13 13:42:24 -06001825 if (!recv_workqueue) {
1826 log_print("can't start dlm_recv");
1827 return -ENOMEM;
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001828 }
1829
Alexander Aring6c6a1cc2021-06-02 09:45:17 -04001830 send_workqueue = alloc_ordered_workqueue("dlm_send", WQ_MEM_RECLAIM);
Namhyung Kimb9d41052010-12-13 13:42:24 -06001831 if (!send_workqueue) {
1832 log_print("can't start dlm_send");
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001833 destroy_workqueue(recv_workqueue);
Alexander Aringfcef0e62021-06-02 09:45:15 -04001834 recv_workqueue = NULL;
Namhyung Kimb9d41052010-12-13 13:42:24 -06001835 return -ENOMEM;
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001836 }
1837
1838 return 0;
1839}
1840
Alexander Aring9d232462021-03-01 17:05:20 -05001841static void shutdown_conn(struct connection *con)
1842{
1843 if (con->shutdown_action)
1844 con->shutdown_action(con);
1845}
1846
1847void dlm_lowcomms_shutdown(void)
1848{
Alexander Aringb38bc9c2021-05-21 15:08:35 -04001849 int idx;
1850
Alexander Aring9d232462021-03-01 17:05:20 -05001851 /* Set all the flags to prevent any
1852 * socket activity.
1853 */
1854 dlm_allow_conn = 0;
1855
1856 if (recv_workqueue)
1857 flush_workqueue(recv_workqueue);
1858 if (send_workqueue)
1859 flush_workqueue(send_workqueue);
1860
1861 dlm_close_sock(&listen_con.sock);
1862
Alexander Aringb38bc9c2021-05-21 15:08:35 -04001863 idx = srcu_read_lock(&connections_srcu);
Alexander Aring9d232462021-03-01 17:05:20 -05001864 foreach_conn(shutdown_conn);
Alexander Aringb38bc9c2021-05-21 15:08:35 -04001865 srcu_read_unlock(&connections_srcu, idx);
Alexander Aring9d232462021-03-01 17:05:20 -05001866}
1867
tsutomu.owa@toshiba.co.jpf0fb83c2017-09-12 08:55:40 +00001868static void _stop_conn(struct connection *con, bool and_other)
Christine Caulfield5e9ccc32009-01-28 12:57:40 -06001869{
tsutomu.owa@toshiba.co.jpf0fb83c2017-09-12 08:55:40 +00001870 mutex_lock(&con->sock_mutex);
tsutomu.owa@toshiba.co.jp173a31f2017-09-12 09:01:24 +00001871 set_bit(CF_CLOSE, &con->flags);
tsutomu.owa@toshiba.co.jpf0fb83c2017-09-12 08:55:40 +00001872 set_bit(CF_READ_PENDING, &con->flags);
tsutomu.owa@toshiba.co.jp8a4abb02017-09-12 09:01:16 +00001873 set_bit(CF_WRITE_PENDING, &con->flags);
tsutomu.owa@toshiba.co.jp93eaade2017-09-12 09:01:55 +00001874 if (con->sock && con->sock->sk) {
1875 write_lock_bh(&con->sock->sk->sk_callback_lock);
Christine Caulfield5e9ccc32009-01-28 12:57:40 -06001876 con->sock->sk->sk_user_data = NULL;
tsutomu.owa@toshiba.co.jp93eaade2017-09-12 09:01:55 +00001877 write_unlock_bh(&con->sock->sk->sk_callback_lock);
1878 }
tsutomu.owa@toshiba.co.jpf0fb83c2017-09-12 08:55:40 +00001879 if (con->othercon && and_other)
1880 _stop_conn(con->othercon, false);
1881 mutex_unlock(&con->sock_mutex);
1882}
1883
1884static void stop_conn(struct connection *con)
1885{
1886 _stop_conn(con, true);
Christine Caulfield5e9ccc32009-01-28 12:57:40 -06001887}
1888
Alexander Aring4798cbb2020-09-24 10:31:26 -04001889static void connection_release(struct rcu_head *rcu)
1890{
1891 struct connection *con = container_of(rcu, struct connection, rcu);
1892
1893 kfree(con->rx_buf);
1894 kfree(con);
1895}
1896
Christine Caulfield5e9ccc32009-01-28 12:57:40 -06001897static void free_conn(struct connection *con)
1898{
Marcelo Ricardo Leitner0d737a82015-08-11 19:22:21 -03001899 close_connection(con, true, true, true);
Alexander Aringa47666eb2020-08-27 15:02:49 -04001900 spin_lock(&connections_lock);
1901 hlist_del_rcu(&con->list);
1902 spin_unlock(&connections_lock);
Alexander Aring948c47e2020-08-27 15:02:53 -04001903 if (con->othercon) {
1904 clean_one_writequeue(con->othercon);
Alexander Aring5cbec202020-11-02 20:04:16 -05001905 call_srcu(&connections_srcu, &con->othercon->rcu,
1906 connection_release);
Alexander Aring948c47e2020-08-27 15:02:53 -04001907 }
Alexander Aring0de98432020-08-27 15:02:52 -04001908 clean_one_writequeue(con);
Alexander Aring5cbec202020-11-02 20:04:16 -05001909 call_srcu(&connections_srcu, &con->rcu, connection_release);
Christine Caulfield5e9ccc32009-01-28 12:57:40 -06001910}
1911
tsutomu.owa@toshiba.co.jpf0fb83c2017-09-12 08:55:40 +00001912static void work_flush(void)
1913{
Alexander Aringb38bc9c2021-05-21 15:08:35 -04001914 int ok;
tsutomu.owa@toshiba.co.jpf0fb83c2017-09-12 08:55:40 +00001915 int i;
tsutomu.owa@toshiba.co.jpf0fb83c2017-09-12 08:55:40 +00001916 struct connection *con;
1917
tsutomu.owa@toshiba.co.jpf0fb83c2017-09-12 08:55:40 +00001918 do {
1919 ok = 1;
1920 foreach_conn(stop_conn);
David Windsorb3555162019-04-02 08:37:10 -04001921 if (recv_workqueue)
1922 flush_workqueue(recv_workqueue);
1923 if (send_workqueue)
1924 flush_workqueue(send_workqueue);
tsutomu.owa@toshiba.co.jpf0fb83c2017-09-12 08:55:40 +00001925 for (i = 0; i < CONN_HASH_SIZE && ok; i++) {
Alexander Aringa47666eb2020-08-27 15:02:49 -04001926 hlist_for_each_entry_rcu(con, &connection_hash[i],
1927 list) {
tsutomu.owa@toshiba.co.jpf0fb83c2017-09-12 08:55:40 +00001928 ok &= test_bit(CF_READ_PENDING, &con->flags);
tsutomu.owa@toshiba.co.jp8a4abb02017-09-12 09:01:16 +00001929 ok &= test_bit(CF_WRITE_PENDING, &con->flags);
1930 if (con->othercon) {
tsutomu.owa@toshiba.co.jpf0fb83c2017-09-12 08:55:40 +00001931 ok &= test_bit(CF_READ_PENDING,
1932 &con->othercon->flags);
tsutomu.owa@toshiba.co.jp8a4abb02017-09-12 09:01:16 +00001933 ok &= test_bit(CF_WRITE_PENDING,
1934 &con->othercon->flags);
1935 }
tsutomu.owa@toshiba.co.jpf0fb83c2017-09-12 08:55:40 +00001936 }
1937 }
1938 } while (!ok);
1939}
1940
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001941void dlm_lowcomms_stop(void)
1942{
Alexander Aringb38bc9c2021-05-21 15:08:35 -04001943 int idx;
1944
1945 idx = srcu_read_lock(&connections_srcu);
tsutomu.owa@toshiba.co.jpf0fb83c2017-09-12 08:55:40 +00001946 work_flush();
Marcelo Ricardo Leitner3a8db792016-10-08 10:14:37 -03001947 foreach_conn(free_conn);
Alexander Aringb38bc9c2021-05-21 15:08:35 -04001948 srcu_read_unlock(&connections_srcu, idx);
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001949 work_stop();
Alexander Aring043697f2020-08-27 15:02:50 -04001950 deinit_local();
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001951}
1952
1953int dlm_lowcomms_start(void)
1954{
1955 int error = -EINVAL;
Christine Caulfield5e9ccc32009-01-28 12:57:40 -06001956 int i;
1957
1958 for (i = 0; i < CONN_HASH_SIZE; i++)
1959 INIT_HLIST_HEAD(&connection_hash[i]);
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001960
1961 init_local();
1962 if (!dlm_local_count) {
David Teigland617e82e2007-04-26 13:46:49 -05001963 error = -ENOTCONN;
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001964 log_print("no local IP address has been set");
David Teigland513ef592012-03-30 11:46:08 -05001965 goto fail;
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001966 }
1967
Alexander Aringd11ccd42020-11-02 20:04:25 -05001968 INIT_WORK(&listen_con.rwork, process_listen_recv_socket);
1969
David Teigland513ef592012-03-30 11:46:08 -05001970 error = work_start();
1971 if (error)
Alexander Aringfcef0e62021-06-02 09:45:15 -04001972 goto fail_local;
David Teigland513ef592012-03-30 11:46:08 -05001973
1974 dlm_allow_conn = 1;
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001975
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001976 /* Start listening */
Alexander Aringac7d5d02021-06-02 09:45:19 -04001977 switch (dlm_config.ci_protocol) {
1978 case DLM_PROTO_TCP:
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001979 error = tcp_listen_for_all();
Alexander Aringac7d5d02021-06-02 09:45:19 -04001980 break;
1981 case DLM_PROTO_SCTP:
Alexander Aringd11ccd42020-11-02 20:04:25 -05001982 error = sctp_listen_for_all(&listen_con);
Alexander Aringac7d5d02021-06-02 09:45:19 -04001983 break;
1984 default:
1985 log_print("Invalid protocol identifier %d set",
1986 dlm_config.ci_protocol);
1987 error = -EINVAL;
1988 break;
1989 }
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001990 if (error)
1991 goto fail_unlisten;
1992
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001993 return 0;
1994
1995fail_unlisten:
David Teigland513ef592012-03-30 11:46:08 -05001996 dlm_allow_conn = 0;
Alexander Aringd11ccd42020-11-02 20:04:25 -05001997 dlm_close_sock(&listen_con.sock);
Alexander Aringfcef0e62021-06-02 09:45:15 -04001998 work_stop();
1999fail_local:
2000 deinit_local();
David Teigland513ef592012-03-30 11:46:08 -05002001fail:
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01002002 return error;
2003}
David Teigland36b71a82012-07-26 12:44:30 -05002004
2005void dlm_lowcomms_exit(void)
2006{
2007 struct dlm_node_addr *na, *safe;
2008
2009 spin_lock(&dlm_node_addrs_spin);
2010 list_for_each_entry_safe(na, safe, &dlm_node_addrs, list) {
2011 list_del(&na->list);
2012 while (na->addr_count--)
2013 kfree(na->addr[na->addr_count]);
2014 kfree(na);
2015 }
2016 spin_unlock(&dlm_node_addrs_spin);
2017}