blob: d222e6088ab26842769734fe416ff720a20e35fc [file] [log] [blame]
Thomas Gleixner2522fe42019-05-28 09:57:20 -07001// SPDX-License-Identifier: GPL-2.0-only
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01002/******************************************************************************
3*******************************************************************************
4**
5** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
Christine Caulfield5e9ccc32009-01-28 12:57:40 -06006** Copyright (C) 2004-2009 Red Hat, Inc. All rights reserved.
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01007**
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01008**
9*******************************************************************************
10******************************************************************************/
11
12/*
13 * lowcomms.c
14 *
15 * This is the "low-level" comms layer.
16 *
17 * It is responsible for sending/receiving messages
18 * from other nodes in the cluster.
19 *
20 * Cluster nodes are referred to by their nodeids. nodeids are
21 * simply 32 bit numbers to the locking module - if they need to
Joe Perches2cf12c02009-01-22 13:26:47 -080022 * be expanded for the cluster infrastructure then that is its
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +010023 * responsibility. It is this layer's
24 * responsibility to resolve these into IP address or
25 * whatever it needs for inter-node communication.
26 *
27 * The comms level is two kernel threads that deal mainly with
28 * the receiving of messages from other nodes and passing them
29 * up to the mid-level comms layer (which understands the
30 * message format) for execution by the locking core, and
31 * a send thread which does all the setting up of connections
32 * to remote nodes and the sending of data. Threads are not allowed
33 * to send their own data because it may cause them to wait in times
34 * of high load. Also, this way, the sending thread can collect together
35 * messages bound for one node and send them in one block.
36 *
Joe Perches2cf12c02009-01-22 13:26:47 -080037 * lowcomms will choose to use either TCP or SCTP as its transport layer
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +010038 * depending on the configuration variable 'protocol'. This should be set
Joe Perches2cf12c02009-01-22 13:26:47 -080039 * to 0 (default) for TCP or 1 for SCTP. It should be configured using a
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +010040 * cluster-wide mechanism as it must be the same on all nodes of the cluster
41 * for the DLM to function.
42 *
43 */
44
45#include <asm/ioctls.h>
46#include <net/sock.h>
47#include <net/tcp.h>
48#include <linux/pagemap.h>
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +010049#include <linux/file.h>
Matthias Kaehlcke7a936ce2008-05-12 10:04:51 -050050#include <linux/mutex.h>
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +010051#include <linux/sctp.h>
Tejun Heo5a0e3ad2010-03-24 17:04:11 +090052#include <linux/slab.h>
Benjamin Poirier2f2d76c2012-03-08 05:55:59 +000053#include <net/sctp/sctp.h>
Joe Perches44ad5322009-01-22 13:24:49 -080054#include <net/ipv6.h>
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +010055
56#include "dlm_internal.h"
57#include "lowcomms.h"
58#include "midcomms.h"
59#include "config.h"
60
61#define NEEDED_RMEM (4*1024*1024)
Christine Caulfield5e9ccc32009-01-28 12:57:40 -060062#define CONN_HASH_SIZE 32
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +010063
Bob Petersonf92c8dd2010-11-12 11:15:20 -060064/* Number of messages to send before rescheduling */
65#define MAX_SEND_MSG_COUNT 25
Alexander Aring055923b2020-07-27 09:13:38 -040066#define DLM_SHUTDOWN_WAIT_TIMEOUT msecs_to_jiffies(10000)
Bob Petersonf92c8dd2010-11-12 11:15:20 -060067
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +010068struct connection {
69 struct socket *sock; /* NULL if not connected */
70 uint32_t nodeid; /* So we know who we are in the list */
71 struct mutex sock_mutex;
72 unsigned long flags;
73#define CF_READ_PENDING 1
tsutomu.owa@toshiba.co.jp8a4abb02017-09-12 09:01:16 +000074#define CF_WRITE_PENDING 2
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +010075#define CF_INIT_PENDING 4
76#define CF_IS_OTHERCON 5
Lars Marowsky-Bree063c4c92009-08-11 16:18:23 -050077#define CF_CLOSE 6
David Millerb36930d2010-11-10 21:56:39 -080078#define CF_APP_LIMITED 7
tsutomu.owa@toshiba.co.jpb2a66622017-09-12 08:55:50 +000079#define CF_CLOSING 8
Alexander Aring055923b2020-07-27 09:13:38 -040080#define CF_SHUTDOWN 9
Alexander Aring19633c72020-11-02 20:04:20 -050081#define CF_CONNECTED 10
Alexander Aringba868d92021-05-21 15:08:37 -040082#define CF_RECONNECT 11
83#define CF_DELAY_CONNECT 12
Alexander Aring8aa31cb2021-05-21 15:08:39 -040084#define CF_EOF 13
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +010085 struct list_head writequeue; /* List of outgoing writequeue_entries */
86 spinlock_t writequeue_lock;
Alexander Aring8aa31cb2021-05-21 15:08:39 -040087 atomic_t writequeue_cnt;
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +010088 void (*connect_action) (struct connection *); /* What to do to connect */
Alexander Aring055923b2020-07-27 09:13:38 -040089 void (*shutdown_action)(struct connection *con); /* What to do to shutdown */
Alexander Aring8aa31cb2021-05-21 15:08:39 -040090 bool (*eof_condition)(struct connection *con); /* What to do to eof check */
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +010091 int retries;
92#define MAX_CONNECT_RETRIES 3
Christine Caulfield5e9ccc32009-01-28 12:57:40 -060093 struct hlist_node list;
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +010094 struct connection *othercon;
Alexander Aringba868d92021-05-21 15:08:37 -040095 struct connection *sendcon;
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +010096 struct work_struct rwork; /* Receive workqueue */
97 struct work_struct swork; /* Send workqueue */
Alexander Aring055923b2020-07-27 09:13:38 -040098 wait_queue_head_t shutdown_wait; /* wait for graceful shutdown */
Alexander Aring4798cbb2020-09-24 10:31:26 -040099 unsigned char *rx_buf;
100 int rx_buflen;
101 int rx_leftover;
Alexander Aringa47666eb2020-08-27 15:02:49 -0400102 struct rcu_head rcu;
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +0100103};
104#define sock2con(x) ((struct connection *)(x)->sk_user_data)
105
Alexander Aringd11ccd42020-11-02 20:04:25 -0500106struct listen_connection {
107 struct socket *sock;
108 struct work_struct rwork;
109};
110
Alexander Aringf0747ebf2021-03-01 17:05:16 -0500111#define DLM_WQ_REMAIN_BYTES(e) (PAGE_SIZE - e->end)
112#define DLM_WQ_LENGTH_BYTES(e) (e->end - e->offset)
113
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +0100114/* An entry waiting to be sent */
115struct writequeue_entry {
116 struct list_head list;
117 struct page *page;
118 int offset;
119 int len;
120 int end;
121 int users;
122 struct connection *con;
Alexander Aring8f2dc782021-05-21 15:08:42 -0400123 struct list_head msgs;
124 struct kref ref;
125};
126
127struct dlm_msg {
128 struct writequeue_entry *entry;
129 void *ppc;
130 int len;
131 int idx; /* new()/commit() idx exchange */
132
133 struct list_head list;
134 struct kref ref;
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +0100135};
136
David Teigland36b71a82012-07-26 12:44:30 -0500137struct dlm_node_addr {
138 struct list_head list;
139 int nodeid;
Alexander Aringe125fbe2021-03-01 17:05:09 -0500140 int mark;
David Teigland36b71a82012-07-26 12:44:30 -0500141 int addr_count;
Mike Christie98e1b602013-06-14 04:56:12 -0500142 int curr_addr_index;
David Teigland36b71a82012-07-26 12:44:30 -0500143 struct sockaddr_storage *addr[DLM_MAX_ADDR_COUNT];
144};
145
Bob Petersoncc661fc2017-09-12 08:55:23 +0000146static struct listen_sock_callbacks {
147 void (*sk_error_report)(struct sock *);
148 void (*sk_data_ready)(struct sock *);
149 void (*sk_state_change)(struct sock *);
150 void (*sk_write_space)(struct sock *);
151} listen_sock;
152
David Teigland36b71a82012-07-26 12:44:30 -0500153static LIST_HEAD(dlm_node_addrs);
154static DEFINE_SPINLOCK(dlm_node_addrs_spin);
155
Alexander Aringd11ccd42020-11-02 20:04:25 -0500156static struct listen_connection listen_con;
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +0100157static struct sockaddr_storage *dlm_local_addr[DLM_MAX_ADDR_COUNT];
158static int dlm_local_count;
Alexander Aring51746162021-03-01 17:05:13 -0500159int dlm_allow_conn;
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +0100160
161/* Work queues */
162static struct workqueue_struct *recv_workqueue;
163static struct workqueue_struct *send_workqueue;
164
Christine Caulfield5e9ccc32009-01-28 12:57:40 -0600165static struct hlist_head connection_hash[CONN_HASH_SIZE];
Alexander Aringa47666eb2020-08-27 15:02:49 -0400166static DEFINE_SPINLOCK(connections_lock);
167DEFINE_STATIC_SRCU(connections_srcu);
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +0100168
169static void process_recv_sockets(struct work_struct *work);
170static void process_send_sockets(struct work_struct *work);
171
Alexander Aring0672c3c2020-11-02 20:04:22 -0500172static void sctp_connect_to_sock(struct connection *con);
173static void tcp_connect_to_sock(struct connection *con);
Alexander Aring42873c92020-11-02 20:04:23 -0500174static void dlm_tcp_shutdown(struct connection *con);
Christine Caulfield5e9ccc32009-01-28 12:57:40 -0600175
176/* This is deliberately very simple because most clusters have simple
177 sequential nodeids, so we should be able to go straight to a connection
178 struct in the array */
179static inline int nodeid_hash(int nodeid)
180{
181 return nodeid & (CONN_HASH_SIZE-1);
182}
183
Alexander Aringb38bc9c2021-05-21 15:08:35 -0400184static struct connection *__find_con(int nodeid, int r)
Christine Caulfield5e9ccc32009-01-28 12:57:40 -0600185{
Christine Caulfield5e9ccc32009-01-28 12:57:40 -0600186 struct connection *con;
187
Alexander Aringa47666eb2020-08-27 15:02:49 -0400188 hlist_for_each_entry_rcu(con, &connection_hash[r], list) {
Alexander Aringb38bc9c2021-05-21 15:08:35 -0400189 if (con->nodeid == nodeid)
Christine Caulfield5e9ccc32009-01-28 12:57:40 -0600190 return con;
191 }
Alexander Aringa47666eb2020-08-27 15:02:49 -0400192
Christine Caulfield5e9ccc32009-01-28 12:57:40 -0600193 return NULL;
194}
195
Alexander Aring8aa31cb2021-05-21 15:08:39 -0400196static bool tcp_eof_condition(struct connection *con)
197{
198 return atomic_read(&con->writequeue_cnt);
199}
200
Alexander Aring6cde2102020-11-02 20:04:21 -0500201static int dlm_con_init(struct connection *con, int nodeid)
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +0100202{
Alexander Aring4798cbb2020-09-24 10:31:26 -0400203 con->rx_buflen = dlm_config.ci_buffer_size;
204 con->rx_buf = kmalloc(con->rx_buflen, GFP_NOFS);
Alexander Aring6cde2102020-11-02 20:04:21 -0500205 if (!con->rx_buf)
206 return -ENOMEM;
Alexander Aring4798cbb2020-09-24 10:31:26 -0400207
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +0100208 con->nodeid = nodeid;
209 mutex_init(&con->sock_mutex);
210 INIT_LIST_HEAD(&con->writequeue);
211 spin_lock_init(&con->writequeue_lock);
Alexander Aring8aa31cb2021-05-21 15:08:39 -0400212 atomic_set(&con->writequeue_cnt, 0);
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +0100213 INIT_WORK(&con->swork, process_send_sockets);
214 INIT_WORK(&con->rwork, process_recv_sockets);
Alexander Aring055923b2020-07-27 09:13:38 -0400215 init_waitqueue_head(&con->shutdown_wait);
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +0100216
Alexander Aring42873c92020-11-02 20:04:23 -0500217 if (dlm_config.ci_protocol == 0) {
Alexander Aring0672c3c2020-11-02 20:04:22 -0500218 con->connect_action = tcp_connect_to_sock;
Alexander Aring42873c92020-11-02 20:04:23 -0500219 con->shutdown_action = dlm_tcp_shutdown;
Alexander Aring8aa31cb2021-05-21 15:08:39 -0400220 con->eof_condition = tcp_eof_condition;
Alexander Aring42873c92020-11-02 20:04:23 -0500221 } else {
Alexander Aring0672c3c2020-11-02 20:04:22 -0500222 con->connect_action = sctp_connect_to_sock;
Alexander Aring42873c92020-11-02 20:04:23 -0500223 }
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +0100224
Alexander Aring6cde2102020-11-02 20:04:21 -0500225 return 0;
226}
227
228/*
229 * If 'allocation' is zero then we don't attempt to create a new
230 * connection structure for this node.
231 */
232static struct connection *nodeid2con(int nodeid, gfp_t alloc)
233{
234 struct connection *con, *tmp;
235 int r, ret;
236
Alexander Aringb38bc9c2021-05-21 15:08:35 -0400237 r = nodeid_hash(nodeid);
238 con = __find_con(nodeid, r);
Alexander Aring6cde2102020-11-02 20:04:21 -0500239 if (con || !alloc)
240 return con;
241
242 con = kzalloc(sizeof(*con), alloc);
243 if (!con)
244 return NULL;
245
246 ret = dlm_con_init(con, nodeid);
247 if (ret) {
248 kfree(con);
249 return NULL;
250 }
251
Alexander Aringa47666eb2020-08-27 15:02:49 -0400252 spin_lock(&connections_lock);
Alexander Aring4f2b30f2020-09-30 18:37:29 -0400253 /* Because multiple workqueues/threads calls this function it can
254 * race on multiple cpu's. Instead of locking hot path __find_con()
255 * we just check in rare cases of recently added nodes again
256 * under protection of connections_lock. If this is the case we
257 * abort our connection creation and return the existing connection.
258 */
Alexander Aringb38bc9c2021-05-21 15:08:35 -0400259 tmp = __find_con(nodeid, r);
Alexander Aring4f2b30f2020-09-30 18:37:29 -0400260 if (tmp) {
261 spin_unlock(&connections_lock);
262 kfree(con->rx_buf);
263 kfree(con);
264 return tmp;
265 }
266
Alexander Aringa47666eb2020-08-27 15:02:49 -0400267 hlist_add_head_rcu(&con->list, &connection_hash[r]);
268 spin_unlock(&connections_lock);
269
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +0100270 return con;
271}
272
Christine Caulfield5e9ccc32009-01-28 12:57:40 -0600273/* Loop round all connections */
274static void foreach_conn(void (*conn_func)(struct connection *c))
275{
Alexander Aringb38bc9c2021-05-21 15:08:35 -0400276 int i;
Christine Caulfield5e9ccc32009-01-28 12:57:40 -0600277 struct connection *con;
278
279 for (i = 0; i < CONN_HASH_SIZE; i++) {
Alexander Aringa47666eb2020-08-27 15:02:49 -0400280 hlist_for_each_entry_rcu(con, &connection_hash[i], list)
Christine Caulfield5e9ccc32009-01-28 12:57:40 -0600281 conn_func(con);
Christine Caulfield5e9ccc32009-01-28 12:57:40 -0600282 }
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +0100283}
284
David Teigland36b71a82012-07-26 12:44:30 -0500285static struct dlm_node_addr *find_node_addr(int nodeid)
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +0100286{
David Teigland36b71a82012-07-26 12:44:30 -0500287 struct dlm_node_addr *na;
288
289 list_for_each_entry(na, &dlm_node_addrs, list) {
290 if (na->nodeid == nodeid)
291 return na;
292 }
293 return NULL;
294}
295
Alexander Aring40c6b832020-11-02 20:04:27 -0500296static int addr_compare(const struct sockaddr_storage *x,
297 const struct sockaddr_storage *y)
David Teigland36b71a82012-07-26 12:44:30 -0500298{
299 switch (x->ss_family) {
300 case AF_INET: {
301 struct sockaddr_in *sinx = (struct sockaddr_in *)x;
302 struct sockaddr_in *siny = (struct sockaddr_in *)y;
303 if (sinx->sin_addr.s_addr != siny->sin_addr.s_addr)
304 return 0;
305 if (sinx->sin_port != siny->sin_port)
306 return 0;
307 break;
308 }
309 case AF_INET6: {
310 struct sockaddr_in6 *sinx = (struct sockaddr_in6 *)x;
311 struct sockaddr_in6 *siny = (struct sockaddr_in6 *)y;
312 if (!ipv6_addr_equal(&sinx->sin6_addr, &siny->sin6_addr))
313 return 0;
314 if (sinx->sin6_port != siny->sin6_port)
315 return 0;
316 break;
317 }
318 default:
319 return 0;
320 }
321 return 1;
322}
323
324static int nodeid_to_addr(int nodeid, struct sockaddr_storage *sas_out,
Alexander Aringe125fbe2021-03-01 17:05:09 -0500325 struct sockaddr *sa_out, bool try_new_addr,
326 unsigned int *mark)
David Teigland36b71a82012-07-26 12:44:30 -0500327{
328 struct sockaddr_storage sas;
329 struct dlm_node_addr *na;
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +0100330
331 if (!dlm_local_count)
332 return -1;
333
David Teigland36b71a82012-07-26 12:44:30 -0500334 spin_lock(&dlm_node_addrs_spin);
335 na = find_node_addr(nodeid);
Mike Christie98e1b602013-06-14 04:56:12 -0500336 if (na && na->addr_count) {
Marcelo Ricardo Leitneree44b4b2015-08-11 19:22:23 -0300337 memcpy(&sas, na->addr[na->curr_addr_index],
338 sizeof(struct sockaddr_storage));
339
Mike Christie98e1b602013-06-14 04:56:12 -0500340 if (try_new_addr) {
341 na->curr_addr_index++;
342 if (na->curr_addr_index == na->addr_count)
343 na->curr_addr_index = 0;
344 }
Mike Christie98e1b602013-06-14 04:56:12 -0500345 }
David Teigland36b71a82012-07-26 12:44:30 -0500346 spin_unlock(&dlm_node_addrs_spin);
347
348 if (!na)
349 return -EEXIST;
350
351 if (!na->addr_count)
352 return -ENOENT;
353
Alexander Aringe125fbe2021-03-01 17:05:09 -0500354 *mark = na->mark;
355
David Teigland36b71a82012-07-26 12:44:30 -0500356 if (sas_out)
357 memcpy(sas_out, &sas, sizeof(struct sockaddr_storage));
358
359 if (!sa_out)
360 return 0;
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +0100361
362 if (dlm_local_addr[0]->ss_family == AF_INET) {
David Teigland36b71a82012-07-26 12:44:30 -0500363 struct sockaddr_in *in4 = (struct sockaddr_in *) &sas;
364 struct sockaddr_in *ret4 = (struct sockaddr_in *) sa_out;
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +0100365 ret4->sin_addr.s_addr = in4->sin_addr.s_addr;
366 } else {
David Teigland36b71a82012-07-26 12:44:30 -0500367 struct sockaddr_in6 *in6 = (struct sockaddr_in6 *) &sas;
368 struct sockaddr_in6 *ret6 = (struct sockaddr_in6 *) sa_out;
Alexey Dobriyan4e3fd7a2011-11-21 03:39:03 +0000369 ret6->sin6_addr = in6->sin6_addr;
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +0100370 }
371
372 return 0;
373}
374
Alexander Aringe125fbe2021-03-01 17:05:09 -0500375static int addr_to_nodeid(struct sockaddr_storage *addr, int *nodeid,
376 unsigned int *mark)
David Teigland36b71a82012-07-26 12:44:30 -0500377{
378 struct dlm_node_addr *na;
379 int rv = -EEXIST;
Mike Christie98e1b602013-06-14 04:56:12 -0500380 int addr_i;
David Teigland36b71a82012-07-26 12:44:30 -0500381
382 spin_lock(&dlm_node_addrs_spin);
383 list_for_each_entry(na, &dlm_node_addrs, list) {
384 if (!na->addr_count)
385 continue;
386
Mike Christie98e1b602013-06-14 04:56:12 -0500387 for (addr_i = 0; addr_i < na->addr_count; addr_i++) {
388 if (addr_compare(na->addr[addr_i], addr)) {
389 *nodeid = na->nodeid;
Alexander Aringe125fbe2021-03-01 17:05:09 -0500390 *mark = na->mark;
Mike Christie98e1b602013-06-14 04:56:12 -0500391 rv = 0;
392 goto unlock;
393 }
394 }
David Teigland36b71a82012-07-26 12:44:30 -0500395 }
Mike Christie98e1b602013-06-14 04:56:12 -0500396unlock:
David Teigland36b71a82012-07-26 12:44:30 -0500397 spin_unlock(&dlm_node_addrs_spin);
398 return rv;
399}
400
Alexander Aring4f19d072020-11-02 20:04:28 -0500401/* caller need to held dlm_node_addrs_spin lock */
402static bool dlm_lowcomms_na_has_addr(const struct dlm_node_addr *na,
403 const struct sockaddr_storage *addr)
404{
405 int i;
406
407 for (i = 0; i < na->addr_count; i++) {
408 if (addr_compare(na->addr[i], addr))
409 return true;
410 }
411
412 return false;
413}
414
David Teigland36b71a82012-07-26 12:44:30 -0500415int dlm_lowcomms_addr(int nodeid, struct sockaddr_storage *addr, int len)
416{
417 struct sockaddr_storage *new_addr;
418 struct dlm_node_addr *new_node, *na;
Alexander Aring4f19d072020-11-02 20:04:28 -0500419 bool ret;
David Teigland36b71a82012-07-26 12:44:30 -0500420
421 new_node = kzalloc(sizeof(struct dlm_node_addr), GFP_NOFS);
422 if (!new_node)
423 return -ENOMEM;
424
425 new_addr = kzalloc(sizeof(struct sockaddr_storage), GFP_NOFS);
426 if (!new_addr) {
427 kfree(new_node);
428 return -ENOMEM;
429 }
430
431 memcpy(new_addr, addr, len);
432
433 spin_lock(&dlm_node_addrs_spin);
434 na = find_node_addr(nodeid);
435 if (!na) {
436 new_node->nodeid = nodeid;
437 new_node->addr[0] = new_addr;
438 new_node->addr_count = 1;
Alexander Aringe125fbe2021-03-01 17:05:09 -0500439 new_node->mark = dlm_config.ci_mark;
David Teigland36b71a82012-07-26 12:44:30 -0500440 list_add(&new_node->list, &dlm_node_addrs);
441 spin_unlock(&dlm_node_addrs_spin);
442 return 0;
443 }
444
Alexander Aring4f19d072020-11-02 20:04:28 -0500445 ret = dlm_lowcomms_na_has_addr(na, addr);
446 if (ret) {
447 spin_unlock(&dlm_node_addrs_spin);
448 kfree(new_addr);
449 kfree(new_node);
450 return -EEXIST;
451 }
452
David Teigland36b71a82012-07-26 12:44:30 -0500453 if (na->addr_count >= DLM_MAX_ADDR_COUNT) {
454 spin_unlock(&dlm_node_addrs_spin);
455 kfree(new_addr);
456 kfree(new_node);
457 return -ENOSPC;
458 }
459
460 na->addr[na->addr_count++] = new_addr;
461 spin_unlock(&dlm_node_addrs_spin);
462 kfree(new_node);
463 return 0;
464}
465
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +0100466/* Data available on socket or listen socket received a connect */
David S. Miller676d2362014-04-11 16:15:36 -0400467static void lowcomms_data_ready(struct sock *sk)
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +0100468{
tsutomu.owa@toshiba.co.jp93eaade2017-09-12 09:01:55 +0000469 struct connection *con;
470
471 read_lock_bh(&sk->sk_callback_lock);
472 con = sock2con(sk);
Patrick Caulfieldafb853f2007-06-01 10:07:26 -0500473 if (con && !test_and_set_bit(CF_READ_PENDING, &con->flags))
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +0100474 queue_work(recv_workqueue, &con->rwork);
tsutomu.owa@toshiba.co.jp93eaade2017-09-12 09:01:55 +0000475 read_unlock_bh(&sk->sk_callback_lock);
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +0100476}
477
Alexander Aringd11ccd42020-11-02 20:04:25 -0500478static void lowcomms_listen_data_ready(struct sock *sk)
479{
480 queue_work(recv_workqueue, &listen_con.rwork);
481}
482
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +0100483static void lowcomms_write_space(struct sock *sk)
484{
tsutomu.owa@toshiba.co.jp93eaade2017-09-12 09:01:55 +0000485 struct connection *con;
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +0100486
tsutomu.owa@toshiba.co.jp93eaade2017-09-12 09:01:55 +0000487 read_lock_bh(&sk->sk_callback_lock);
488 con = sock2con(sk);
David Millerb36930d2010-11-10 21:56:39 -0800489 if (!con)
tsutomu.owa@toshiba.co.jp93eaade2017-09-12 09:01:55 +0000490 goto out;
David Millerb36930d2010-11-10 21:56:39 -0800491
Alexander Aring19633c72020-11-02 20:04:20 -0500492 if (!test_and_set_bit(CF_CONNECTED, &con->flags)) {
493 log_print("successful connected to node %d", con->nodeid);
494 queue_work(send_workqueue, &con->swork);
495 goto out;
496 }
497
David Millerb36930d2010-11-10 21:56:39 -0800498 clear_bit(SOCK_NOSPACE, &con->sock->flags);
499
500 if (test_and_clear_bit(CF_APP_LIMITED, &con->flags)) {
501 con->sock->sk->sk_write_pending--;
Eric Dumazet9cd3e072015-11-29 20:03:10 -0800502 clear_bit(SOCKWQ_ASYNC_NOSPACE, &con->sock->flags);
David Millerb36930d2010-11-10 21:56:39 -0800503 }
504
Bob Peterson01da24d2017-09-12 08:55:14 +0000505 queue_work(send_workqueue, &con->swork);
tsutomu.owa@toshiba.co.jp93eaade2017-09-12 09:01:55 +0000506out:
507 read_unlock_bh(&sk->sk_callback_lock);
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +0100508}
509
510static inline void lowcomms_connect_sock(struct connection *con)
511{
Lars Marowsky-Bree063c4c92009-08-11 16:18:23 -0500512 if (test_bit(CF_CLOSE, &con->flags))
513 return;
Bob Peterson61d9102b2017-09-12 08:55:04 +0000514 queue_work(send_workqueue, &con->swork);
515 cond_resched();
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +0100516}
517
518static void lowcomms_state_change(struct sock *sk)
519{
Marcelo Ricardo Leitneree44b4b2015-08-11 19:22:23 -0300520 /* SCTP layer is not calling sk_data_ready when the connection
521 * is done, so we catch the signal through here. Also, it
522 * doesn't switch socket state when entering shutdown, so we
523 * skip the write in that case.
524 */
525 if (sk->sk_shutdown) {
526 if (sk->sk_shutdown == RCV_SHUTDOWN)
527 lowcomms_data_ready(sk);
528 } else if (sk->sk_state == TCP_ESTABLISHED) {
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +0100529 lowcomms_write_space(sk);
Marcelo Ricardo Leitneree44b4b2015-08-11 19:22:23 -0300530 }
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +0100531}
532
Christine Caulfield391fbdc2009-05-07 10:54:16 -0500533int dlm_lowcomms_connect_node(int nodeid)
534{
535 struct connection *con;
Alexander Aringb38bc9c2021-05-21 15:08:35 -0400536 int idx;
Christine Caulfield391fbdc2009-05-07 10:54:16 -0500537
538 if (nodeid == dlm_our_nodeid())
539 return 0;
540
Alexander Aringb38bc9c2021-05-21 15:08:35 -0400541 idx = srcu_read_lock(&connections_srcu);
Christine Caulfield391fbdc2009-05-07 10:54:16 -0500542 con = nodeid2con(nodeid, GFP_NOFS);
Alexander Aringb38bc9c2021-05-21 15:08:35 -0400543 if (!con) {
544 srcu_read_unlock(&connections_srcu, idx);
Christine Caulfield391fbdc2009-05-07 10:54:16 -0500545 return -ENOMEM;
Alexander Aringb38bc9c2021-05-21 15:08:35 -0400546 }
547
Christine Caulfield391fbdc2009-05-07 10:54:16 -0500548 lowcomms_connect_sock(con);
Alexander Aringb38bc9c2021-05-21 15:08:35 -0400549 srcu_read_unlock(&connections_srcu, idx);
550
Christine Caulfield391fbdc2009-05-07 10:54:16 -0500551 return 0;
552}
553
Alexander Aringe125fbe2021-03-01 17:05:09 -0500554int dlm_lowcomms_nodes_set_mark(int nodeid, unsigned int mark)
555{
556 struct dlm_node_addr *na;
557
558 spin_lock(&dlm_node_addrs_spin);
559 na = find_node_addr(nodeid);
560 if (!na) {
561 spin_unlock(&dlm_node_addrs_spin);
562 return -ENOENT;
563 }
564
565 na->mark = mark;
566 spin_unlock(&dlm_node_addrs_spin);
567
568 return 0;
569}
570
Bob Petersonb3a5bbf2015-08-27 09:34:47 -0500571static void lowcomms_error_report(struct sock *sk)
572{
Bob Petersonb81171c2016-02-05 14:39:02 -0500573 struct connection *con;
Bob Petersonb3a5bbf2015-08-27 09:34:47 -0500574 struct sockaddr_storage saddr;
Bob Petersonb81171c2016-02-05 14:39:02 -0500575 void (*orig_report)(struct sock *) = NULL;
Bob Petersonb3a5bbf2015-08-27 09:34:47 -0500576
Bob Petersonb81171c2016-02-05 14:39:02 -0500577 read_lock_bh(&sk->sk_callback_lock);
578 con = sock2con(sk);
579 if (con == NULL)
580 goto out;
581
Bob Petersoncc661fc2017-09-12 08:55:23 +0000582 orig_report = listen_sock.sk_error_report;
Bob Peterson1a318332016-01-18 12:29:15 -0500583 if (con->sock == NULL ||
Denys Vlasenko9b2c45d2018-02-12 20:00:20 +0100584 kernel_getpeername(con->sock, (struct sockaddr *)&saddr) < 0) {
Bob Petersonb3a5bbf2015-08-27 09:34:47 -0500585 printk_ratelimited(KERN_ERR "dlm: node %d: socket error "
586 "sending to node %d, port %d, "
587 "sk_err=%d/%d\n", dlm_our_nodeid(),
588 con->nodeid, dlm_config.ci_tcp_port,
589 sk->sk_err, sk->sk_err_soft);
Bob Petersonb3a5bbf2015-08-27 09:34:47 -0500590 } else if (saddr.ss_family == AF_INET) {
591 struct sockaddr_in *sin4 = (struct sockaddr_in *)&saddr;
592
593 printk_ratelimited(KERN_ERR "dlm: node %d: socket error "
594 "sending to node %d at %pI4, port %d, "
595 "sk_err=%d/%d\n", dlm_our_nodeid(),
596 con->nodeid, &sin4->sin_addr.s_addr,
597 dlm_config.ci_tcp_port, sk->sk_err,
598 sk->sk_err_soft);
599 } else {
600 struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)&saddr;
601
602 printk_ratelimited(KERN_ERR "dlm: node %d: socket error "
603 "sending to node %d at %u.%u.%u.%u, "
604 "port %d, sk_err=%d/%d\n", dlm_our_nodeid(),
605 con->nodeid, sin6->sin6_addr.s6_addr32[0],
606 sin6->sin6_addr.s6_addr32[1],
607 sin6->sin6_addr.s6_addr32[2],
608 sin6->sin6_addr.s6_addr32[3],
609 dlm_config.ci_tcp_port, sk->sk_err,
610 sk->sk_err_soft);
611 }
Alexander Aringba868d92021-05-21 15:08:37 -0400612
613 /* below sendcon only handling */
614 if (test_bit(CF_IS_OTHERCON, &con->flags))
615 con = con->sendcon;
616
617 switch (sk->sk_err) {
618 case ECONNREFUSED:
619 set_bit(CF_DELAY_CONNECT, &con->flags);
620 break;
621 default:
622 break;
623 }
624
625 if (!test_and_set_bit(CF_RECONNECT, &con->flags))
626 queue_work(send_workqueue, &con->swork);
627
Bob Petersonb81171c2016-02-05 14:39:02 -0500628out:
629 read_unlock_bh(&sk->sk_callback_lock);
630 if (orig_report)
631 orig_report(sk);
632}
633
634/* Note: sk_callback_lock must be locked before calling this function. */
Bob Petersoncc661fc2017-09-12 08:55:23 +0000635static void save_listen_callbacks(struct socket *sock)
Bob Petersonb81171c2016-02-05 14:39:02 -0500636{
Bob Petersoncc661fc2017-09-12 08:55:23 +0000637 struct sock *sk = sock->sk;
638
639 listen_sock.sk_data_ready = sk->sk_data_ready;
640 listen_sock.sk_state_change = sk->sk_state_change;
641 listen_sock.sk_write_space = sk->sk_write_space;
642 listen_sock.sk_error_report = sk->sk_error_report;
Bob Petersonb81171c2016-02-05 14:39:02 -0500643}
644
Bob Petersoncc661fc2017-09-12 08:55:23 +0000645static void restore_callbacks(struct socket *sock)
Bob Petersonb81171c2016-02-05 14:39:02 -0500646{
Bob Petersoncc661fc2017-09-12 08:55:23 +0000647 struct sock *sk = sock->sk;
648
Bob Petersonb81171c2016-02-05 14:39:02 -0500649 write_lock_bh(&sk->sk_callback_lock);
Bob Petersonb81171c2016-02-05 14:39:02 -0500650 sk->sk_user_data = NULL;
Bob Petersoncc661fc2017-09-12 08:55:23 +0000651 sk->sk_data_ready = listen_sock.sk_data_ready;
652 sk->sk_state_change = listen_sock.sk_state_change;
653 sk->sk_write_space = listen_sock.sk_write_space;
654 sk->sk_error_report = listen_sock.sk_error_report;
Bob Petersonb81171c2016-02-05 14:39:02 -0500655 write_unlock_bh(&sk->sk_callback_lock);
Bob Petersonb3a5bbf2015-08-27 09:34:47 -0500656}
657
Alexander Aringd11ccd42020-11-02 20:04:25 -0500658static void add_listen_sock(struct socket *sock, struct listen_connection *con)
659{
660 struct sock *sk = sock->sk;
661
662 write_lock_bh(&sk->sk_callback_lock);
663 save_listen_callbacks(sock);
664 con->sock = sock;
665
666 sk->sk_user_data = con;
667 sk->sk_allocation = GFP_NOFS;
668 /* Install a data_ready callback */
669 sk->sk_data_ready = lowcomms_listen_data_ready;
670 write_unlock_bh(&sk->sk_callback_lock);
671}
672
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +0100673/* Make a socket active */
tsutomu.owa@toshiba.co.jp988419a2017-09-12 08:55:32 +0000674static void add_sock(struct socket *sock, struct connection *con)
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +0100675{
Bob Petersonb81171c2016-02-05 14:39:02 -0500676 struct sock *sk = sock->sk;
677
678 write_lock_bh(&sk->sk_callback_lock);
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +0100679 con->sock = sock;
680
Bob Petersonb81171c2016-02-05 14:39:02 -0500681 sk->sk_user_data = con;
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +0100682 /* Install a data_ready callback */
Bob Petersonb81171c2016-02-05 14:39:02 -0500683 sk->sk_data_ready = lowcomms_data_ready;
684 sk->sk_write_space = lowcomms_write_space;
685 sk->sk_state_change = lowcomms_state_change;
686 sk->sk_allocation = GFP_NOFS;
687 sk->sk_error_report = lowcomms_error_report;
688 write_unlock_bh(&sk->sk_callback_lock);
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +0100689}
690
691/* Add the port number to an IPv6 or 4 sockaddr and return the address
692 length */
693static void make_sockaddr(struct sockaddr_storage *saddr, uint16_t port,
694 int *addr_len)
695{
696 saddr->ss_family = dlm_local_addr[0]->ss_family;
697 if (saddr->ss_family == AF_INET) {
698 struct sockaddr_in *in4_addr = (struct sockaddr_in *)saddr;
699 in4_addr->sin_port = cpu_to_be16(port);
700 *addr_len = sizeof(struct sockaddr_in);
701 memset(&in4_addr->sin_zero, 0, sizeof(in4_addr->sin_zero));
702 } else {
703 struct sockaddr_in6 *in6_addr = (struct sockaddr_in6 *)saddr;
704 in6_addr->sin6_port = cpu_to_be16(port);
705 *addr_len = sizeof(struct sockaddr_in6);
706 }
Patrick Caulfield01c8cab2007-07-17 16:53:15 +0100707 memset((char *)saddr + *addr_len, 0, sizeof(struct sockaddr_storage) - *addr_len);
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +0100708}
709
Alexander Aringd11ccd42020-11-02 20:04:25 -0500710static void dlm_close_sock(struct socket **sock)
711{
712 if (*sock) {
713 restore_callbacks(*sock);
714 sock_release(*sock);
715 *sock = NULL;
716 }
717}
718
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +0100719/* Close a remote connection and tidy up */
Marcelo Ricardo Leitner0d737a82015-08-11 19:22:21 -0300720static void close_connection(struct connection *con, bool and_other,
721 bool tx, bool rx)
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +0100722{
tsutomu.owa@toshiba.co.jpb2a66622017-09-12 08:55:50 +0000723 bool closing = test_and_set_bit(CF_CLOSING, &con->flags);
724
tsutomu.owa@toshiba.co.jp0aa18462017-09-12 09:02:02 +0000725 if (tx && !closing && cancel_work_sync(&con->swork)) {
Marcelo Ricardo Leitner0d737a82015-08-11 19:22:21 -0300726 log_print("canceled swork for node %d", con->nodeid);
tsutomu.owa@toshiba.co.jp0aa18462017-09-12 09:02:02 +0000727 clear_bit(CF_WRITE_PENDING, &con->flags);
728 }
729 if (rx && !closing && cancel_work_sync(&con->rwork)) {
Marcelo Ricardo Leitner0d737a82015-08-11 19:22:21 -0300730 log_print("canceled rwork for node %d", con->nodeid);
tsutomu.owa@toshiba.co.jp0aa18462017-09-12 09:02:02 +0000731 clear_bit(CF_READ_PENDING, &con->flags);
732 }
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +0100733
Marcelo Ricardo Leitner0d737a82015-08-11 19:22:21 -0300734 mutex_lock(&con->sock_mutex);
Alexander Aringd11ccd42020-11-02 20:04:25 -0500735 dlm_close_sock(&con->sock);
736
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +0100737 if (con->othercon && and_other) {
738 /* Will only re-enter once. */
Alexander Aringc6aa00e32021-05-21 15:08:38 -0400739 close_connection(con->othercon, false, tx, rx);
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +0100740 }
Patrick Caulfield9e5f2822007-08-02 14:58:14 +0100741
Alexander Aring4798cbb2020-09-24 10:31:26 -0400742 con->rx_leftover = 0;
Patrick Caulfield61d96be02007-08-20 15:13:38 +0100743 con->retries = 0;
Alexander Aring19633c72020-11-02 20:04:20 -0500744 clear_bit(CF_CONNECTED, &con->flags);
Alexander Aringba868d92021-05-21 15:08:37 -0400745 clear_bit(CF_DELAY_CONNECT, &con->flags);
746 clear_bit(CF_RECONNECT, &con->flags);
Alexander Aring8aa31cb2021-05-21 15:08:39 -0400747 clear_bit(CF_EOF, &con->flags);
Patrick Caulfield61d96be02007-08-20 15:13:38 +0100748 mutex_unlock(&con->sock_mutex);
tsutomu.owa@toshiba.co.jpb2a66622017-09-12 08:55:50 +0000749 clear_bit(CF_CLOSING, &con->flags);
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +0100750}
751
Alexander Aring055923b2020-07-27 09:13:38 -0400752static void shutdown_connection(struct connection *con)
753{
754 int ret;
755
Alexander Aringeec054b2021-03-01 17:05:19 -0500756 flush_work(&con->swork);
Alexander Aring055923b2020-07-27 09:13:38 -0400757
758 mutex_lock(&con->sock_mutex);
759 /* nothing to shutdown */
760 if (!con->sock) {
761 mutex_unlock(&con->sock_mutex);
762 return;
763 }
764
765 set_bit(CF_SHUTDOWN, &con->flags);
766 ret = kernel_sock_shutdown(con->sock, SHUT_WR);
767 mutex_unlock(&con->sock_mutex);
768 if (ret) {
769 log_print("Connection %p failed to shutdown: %d will force close",
770 con, ret);
771 goto force_close;
772 } else {
773 ret = wait_event_timeout(con->shutdown_wait,
774 !test_bit(CF_SHUTDOWN, &con->flags),
775 DLM_SHUTDOWN_WAIT_TIMEOUT);
776 if (ret == 0) {
777 log_print("Connection %p shutdown timed out, will force close",
778 con);
779 goto force_close;
780 }
781 }
782
783 return;
784
785force_close:
786 clear_bit(CF_SHUTDOWN, &con->flags);
787 close_connection(con, false, true, true);
788}
789
790static void dlm_tcp_shutdown(struct connection *con)
791{
792 if (con->othercon)
793 shutdown_connection(con->othercon);
794 shutdown_connection(con);
795}
796
Alexander Aring4798cbb2020-09-24 10:31:26 -0400797static int con_realloc_receive_buf(struct connection *con, int newlen)
798{
799 unsigned char *newbuf;
800
801 newbuf = kmalloc(newlen, GFP_NOFS);
802 if (!newbuf)
803 return -ENOMEM;
804
805 /* copy any leftover from last receive */
806 if (con->rx_leftover)
807 memmove(newbuf, con->rx_buf, con->rx_leftover);
808
809 /* swap to new buffer space */
810 kfree(con->rx_buf);
811 con->rx_buflen = newlen;
812 con->rx_buf = newbuf;
813
814 return 0;
815}
816
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +0100817/* Data received from remote end */
818static int receive_from_sock(struct connection *con)
819{
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +0100820 int call_again_soon = 0;
Alexander Aring4798cbb2020-09-24 10:31:26 -0400821 struct msghdr msg;
822 struct kvec iov;
823 int ret, buflen;
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +0100824
825 mutex_lock(&con->sock_mutex);
826
827 if (con->sock == NULL) {
828 ret = -EAGAIN;
829 goto out_close;
830 }
Alexander Aring4798cbb2020-09-24 10:31:26 -0400831
Alexander Aring4798cbb2020-09-24 10:31:26 -0400832 /* realloc if we get new buffer size to read out */
833 buflen = dlm_config.ci_buffer_size;
834 if (con->rx_buflen != buflen && con->rx_leftover <= buflen) {
835 ret = con_realloc_receive_buf(con, buflen);
836 if (ret < 0)
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +0100837 goto out_resched;
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +0100838 }
839
Alexander Aring4798cbb2020-09-24 10:31:26 -0400840 /* calculate new buffer parameter regarding last receive and
841 * possible leftover bytes
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +0100842 */
Alexander Aring4798cbb2020-09-24 10:31:26 -0400843 iov.iov_base = con->rx_buf + con->rx_leftover;
844 iov.iov_len = con->rx_buflen - con->rx_leftover;
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +0100845
Alexander Aring4798cbb2020-09-24 10:31:26 -0400846 memset(&msg, 0, sizeof(msg));
847 msg.msg_flags = MSG_DONTWAIT | MSG_NOSIGNAL;
848 ret = kernel_recvmsg(con->sock, &msg, &iov, 1, iov.iov_len,
849 msg.msg_flags);
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +0100850 if (ret <= 0)
851 goto out_close;
Alexander Aring4798cbb2020-09-24 10:31:26 -0400852 else if (ret == iov.iov_len)
Marcelo Ricardo Leitneree44b4b2015-08-11 19:22:23 -0300853 call_again_soon = 1;
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +0100854
Alexander Aring4798cbb2020-09-24 10:31:26 -0400855 /* new buflen according readed bytes and leftover from last receive */
856 buflen = ret + con->rx_leftover;
857 ret = dlm_process_incoming_buffer(con->nodeid, con->rx_buf, buflen);
858 if (ret < 0)
859 goto out_close;
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +0100860
Alexander Aring4798cbb2020-09-24 10:31:26 -0400861 /* calculate leftover bytes from process and put it into begin of
862 * the receive buffer, so next receive we have the full message
863 * at the start address of the receive buffer.
864 */
865 con->rx_leftover = buflen - ret;
866 if (con->rx_leftover) {
867 memmove(con->rx_buf, con->rx_buf + ret,
868 con->rx_leftover);
869 call_again_soon = true;
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +0100870 }
871
872 if (call_again_soon)
873 goto out_resched;
Alexander Aring4798cbb2020-09-24 10:31:26 -0400874
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +0100875 mutex_unlock(&con->sock_mutex);
876 return 0;
877
878out_resched:
879 if (!test_and_set_bit(CF_READ_PENDING, &con->flags))
880 queue_work(recv_workqueue, &con->rwork);
881 mutex_unlock(&con->sock_mutex);
882 return -EAGAIN;
883
884out_close:
Alexander Aringba868d92021-05-21 15:08:37 -0400885 if (ret == 0) {
Alexander Aringba868d92021-05-21 15:08:37 -0400886 log_print("connection %p got EOF from %d",
887 con, con->nodeid);
Alexander Aring8aa31cb2021-05-21 15:08:39 -0400888
889 if (con->eof_condition && con->eof_condition(con)) {
890 set_bit(CF_EOF, &con->flags);
891 mutex_unlock(&con->sock_mutex);
892 } else {
893 mutex_unlock(&con->sock_mutex);
894 close_connection(con, false, true, false);
895
896 /* handling for tcp shutdown */
897 clear_bit(CF_SHUTDOWN, &con->flags);
898 wake_up(&con->shutdown_wait);
899 }
900
Alexander Aringba868d92021-05-21 15:08:37 -0400901 /* signal to breaking receive worker */
902 ret = -1;
Alexander Aring8aa31cb2021-05-21 15:08:39 -0400903 } else {
904 mutex_unlock(&con->sock_mutex);
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +0100905 }
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +0100906 return ret;
907}
908
909/* Listening socket is busy, accept a connection */
Alexander Aringd11ccd42020-11-02 20:04:25 -0500910static int accept_from_sock(struct listen_connection *con)
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +0100911{
912 int result;
913 struct sockaddr_storage peeraddr;
914 struct socket *newsock;
Alexander Aringb38bc9c2021-05-21 15:08:35 -0400915 int len, idx;
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +0100916 int nodeid;
917 struct connection *newcon;
918 struct connection *addcon;
Alexander Aring3f78cd72020-09-24 10:31:23 -0400919 unsigned int mark;
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +0100920
David Teigland513ef592012-03-30 11:46:08 -0500921 if (!dlm_allow_conn) {
David Teigland513ef592012-03-30 11:46:08 -0500922 return -1;
923 }
David Teigland513ef592012-03-30 11:46:08 -0500924
Alexander Aringd11ccd42020-11-02 20:04:25 -0500925 if (!con->sock)
tsutomu.owa@toshiba.co.jp3421fb12017-09-12 09:01:38 +0000926 return -ENOTCONN;
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +0100927
tsutomu.owa@toshiba.co.jp3421fb12017-09-12 09:01:38 +0000928 result = kernel_accept(con->sock, &newsock, O_NONBLOCK);
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +0100929 if (result < 0)
930 goto accept_err;
931
932 /* Get the connected socket's peer */
933 memset(&peeraddr, 0, sizeof(peeraddr));
Denys Vlasenko9b2c45d2018-02-12 20:00:20 +0100934 len = newsock->ops->getname(newsock, (struct sockaddr *)&peeraddr, 2);
935 if (len < 0) {
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +0100936 result = -ECONNABORTED;
937 goto accept_err;
938 }
939
940 /* Get the new node's NODEID */
941 make_sockaddr(&peeraddr, 0, &len);
Alexander Aringe125fbe2021-03-01 17:05:09 -0500942 if (addr_to_nodeid(&peeraddr, &nodeid, &mark)) {
Masatake YAMATObcaadf52011-07-04 12:25:51 +0900943 unsigned char *b=(unsigned char *)&peeraddr;
David Teigland617e82e2007-04-26 13:46:49 -0500944 log_print("connect from non cluster node");
Masatake YAMATObcaadf52011-07-04 12:25:51 +0900945 print_hex_dump_bytes("ss: ", DUMP_PREFIX_NONE,
946 b, sizeof(struct sockaddr_storage));
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +0100947 sock_release(newsock);
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +0100948 return -1;
949 }
950
951 log_print("got connection from %d", nodeid);
952
953 /* Check to see if we already have a connection to this node. This
954 * could happen if the two nodes initiate a connection at roughly
955 * the same time and the connections cross on the wire.
956 * In this case we store the incoming one in "othercon"
957 */
Alexander Aringb38bc9c2021-05-21 15:08:35 -0400958 idx = srcu_read_lock(&connections_srcu);
David Teigland748285c2009-05-15 10:50:57 -0500959 newcon = nodeid2con(nodeid, GFP_NOFS);
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +0100960 if (!newcon) {
Alexander Aringb38bc9c2021-05-21 15:08:35 -0400961 srcu_read_unlock(&connections_srcu, idx);
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +0100962 result = -ENOMEM;
963 goto accept_err;
964 }
Alexander Aringd11ccd42020-11-02 20:04:25 -0500965
Alexander Aringe125fbe2021-03-01 17:05:09 -0500966 sock_set_mark(newsock->sk, mark);
967
Alexander Aringd11ccd42020-11-02 20:04:25 -0500968 mutex_lock(&newcon->sock_mutex);
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +0100969 if (newcon->sock) {
970 struct connection *othercon = newcon->othercon;
971
972 if (!othercon) {
Alexander Aringa47666eb2020-08-27 15:02:49 -0400973 othercon = kzalloc(sizeof(*othercon), GFP_NOFS);
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +0100974 if (!othercon) {
David Teigland617e82e2007-04-26 13:46:49 -0500975 log_print("failed to allocate incoming socket");
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +0100976 mutex_unlock(&newcon->sock_mutex);
Alexander Aringb38bc9c2021-05-21 15:08:35 -0400977 srcu_read_unlock(&connections_srcu, idx);
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +0100978 result = -ENOMEM;
979 goto accept_err;
980 }
Alexander Aring4798cbb2020-09-24 10:31:26 -0400981
Alexander Aring6cde2102020-11-02 20:04:21 -0500982 result = dlm_con_init(othercon, nodeid);
983 if (result < 0) {
Alexander Aring4798cbb2020-09-24 10:31:26 -0400984 kfree(othercon);
Yang Yingliang2fd8db22021-03-27 16:37:04 +0800985 mutex_unlock(&newcon->sock_mutex);
Alexander Aringb38bc9c2021-05-21 15:08:35 -0400986 srcu_read_unlock(&connections_srcu, idx);
Alexander Aring4798cbb2020-09-24 10:31:26 -0400987 goto accept_err;
988 }
989
Alexander Aringe9a470a2021-03-01 17:05:11 -0500990 lockdep_set_subclass(&othercon->sock_mutex, 1);
Alexander Aring7443bc92021-05-21 15:08:36 -0400991 set_bit(CF_IS_OTHERCON, &othercon->flags);
Alexander Aring6cde2102020-11-02 20:04:21 -0500992 newcon->othercon = othercon;
Alexander Aringba868d92021-05-21 15:08:37 -0400993 othercon->sendcon = newcon;
Alexander Aringba3ab3c2020-07-27 09:13:37 -0400994 } else {
995 /* close other sock con if we have something new */
996 close_connection(othercon, false, true, false);
Patrick Caulfield61d96be02007-08-20 15:13:38 +0100997 }
Alexander Aringba3ab3c2020-07-27 09:13:37 -0400998
Alexander Aringe9a470a2021-03-01 17:05:11 -0500999 mutex_lock(&othercon->sock_mutex);
Alexander Aringba3ab3c2020-07-27 09:13:37 -04001000 add_sock(newsock, othercon);
1001 addcon = othercon;
1002 mutex_unlock(&othercon->sock_mutex);
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001003 }
1004 else {
Bob Peterson3735b4b2016-09-23 14:23:26 -04001005 /* accept copies the sk after we've saved the callbacks, so we
1006 don't want to save them a second time or comm errors will
1007 result in calling sk_error_report recursively. */
tsutomu.owa@toshiba.co.jp988419a2017-09-12 08:55:32 +00001008 add_sock(newsock, newcon);
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001009 addcon = newcon;
1010 }
1011
Alexander Aringb30a6242021-03-01 17:05:10 -05001012 set_bit(CF_CONNECTED, &addcon->flags);
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001013 mutex_unlock(&newcon->sock_mutex);
1014
1015 /*
1016 * Add it to the active queue in case we got data
Lucas De Marchi25985ed2011-03-30 22:57:33 -03001017 * between processing the accept adding the socket
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001018 * to the read_sockets list
1019 */
1020 if (!test_and_set_bit(CF_READ_PENDING, &addcon->flags))
1021 queue_work(recv_workqueue, &addcon->rwork);
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001022
Alexander Aringb38bc9c2021-05-21 15:08:35 -04001023 srcu_read_unlock(&connections_srcu, idx);
1024
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001025 return 0;
1026
1027accept_err:
tsutomu.owa@toshiba.co.jp3421fb12017-09-12 09:01:38 +00001028 if (newsock)
1029 sock_release(newsock);
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001030
1031 if (result != -EAGAIN)
David Teigland617e82e2007-04-26 13:46:49 -05001032 log_print("error accepting connection from node: %d", result);
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001033 return result;
1034}
1035
Alexander Aring8f2dc782021-05-21 15:08:42 -04001036static void dlm_page_release(struct kref *kref)
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001037{
Alexander Aring8f2dc782021-05-21 15:08:42 -04001038 struct writequeue_entry *e = container_of(kref, struct writequeue_entry,
1039 ref);
1040
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001041 __free_page(e->page);
1042 kfree(e);
1043}
1044
Alexander Aring8f2dc782021-05-21 15:08:42 -04001045static void dlm_msg_release(struct kref *kref)
1046{
1047 struct dlm_msg *msg = container_of(kref, struct dlm_msg, ref);
1048
1049 kref_put(&msg->entry->ref, dlm_page_release);
1050 kfree(msg);
1051}
1052
1053static void free_entry(struct writequeue_entry *e)
1054{
1055 struct dlm_msg *msg, *tmp;
1056
1057 list_for_each_entry_safe(msg, tmp, &e->msgs, list) {
1058 list_del(&msg->list);
1059 kref_put(&msg->ref, dlm_msg_release);
1060 }
1061
1062 list_del(&e->list);
1063 atomic_dec(&e->con->writequeue_cnt);
1064 kref_put(&e->ref, dlm_page_release);
1065}
1066
Mike Christie5d689872013-06-14 04:56:13 -05001067/*
1068 * writequeue_entry_complete - try to delete and free write queue entry
1069 * @e: write queue entry to try to delete
1070 * @completed: bytes completed
1071 *
1072 * writequeue_lock must be held.
1073 */
1074static void writequeue_entry_complete(struct writequeue_entry *e, int completed)
1075{
1076 e->offset += completed;
1077 e->len -= completed;
1078
Alexander Aring8f2dc782021-05-21 15:08:42 -04001079 if (e->len == 0 && e->users == 0)
Mike Christie5d689872013-06-14 04:56:13 -05001080 free_entry(e);
Mike Christie5d689872013-06-14 04:56:13 -05001081}
1082
Marcelo Ricardo Leitneree44b4b2015-08-11 19:22:23 -03001083/*
1084 * sctp_bind_addrs - bind a SCTP socket to all our addresses
1085 */
Alexander Aring13004e82020-11-02 20:04:24 -05001086static int sctp_bind_addrs(struct socket *sock, uint16_t port)
Marcelo Ricardo Leitneree44b4b2015-08-11 19:22:23 -03001087{
1088 struct sockaddr_storage localaddr;
Christoph Hellwigc0425a42020-05-29 14:09:42 +02001089 struct sockaddr *addr = (struct sockaddr *)&localaddr;
Marcelo Ricardo Leitneree44b4b2015-08-11 19:22:23 -03001090 int i, addr_len, result = 0;
1091
1092 for (i = 0; i < dlm_local_count; i++) {
1093 memcpy(&localaddr, dlm_local_addr[i], sizeof(localaddr));
1094 make_sockaddr(&localaddr, port, &addr_len);
1095
1096 if (!i)
Alexander Aring13004e82020-11-02 20:04:24 -05001097 result = kernel_bind(sock, addr, addr_len);
Marcelo Ricardo Leitneree44b4b2015-08-11 19:22:23 -03001098 else
Alexander Aring13004e82020-11-02 20:04:24 -05001099 result = sock_bind_add(sock->sk, addr, addr_len);
Marcelo Ricardo Leitneree44b4b2015-08-11 19:22:23 -03001100
1101 if (result < 0) {
1102 log_print("Can't bind to %d addr number %d, %d.\n",
1103 port, i + 1, result);
1104 break;
1105 }
1106 }
1107 return result;
1108}
1109
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001110/* Initiate an SCTP association.
1111 This is a special case of send_to_sock() in that we don't yet have a
1112 peeled-off socket for this association, so we use the listening socket
1113 and add the primary IP address of the remote node.
1114 */
Marcelo Ricardo Leitneree44b4b2015-08-11 19:22:23 -03001115static void sctp_connect_to_sock(struct connection *con)
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001116{
Marcelo Ricardo Leitneree44b4b2015-08-11 19:22:23 -03001117 struct sockaddr_storage daddr;
Marcelo Ricardo Leitneree44b4b2015-08-11 19:22:23 -03001118 int result;
1119 int addr_len;
1120 struct socket *sock;
Alexander Aring9c9f1682020-06-26 13:26:50 -04001121 unsigned int mark;
Marcelo Ricardo Leitneree44b4b2015-08-11 19:22:23 -03001122
Mike Christie5d689872013-06-14 04:56:13 -05001123 mutex_lock(&con->sock_mutex);
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001124
Marcelo Ricardo Leitneree44b4b2015-08-11 19:22:23 -03001125 /* Some odd races can cause double-connects, ignore them */
1126 if (con->retries++ > MAX_CONNECT_RETRIES)
1127 goto out;
1128
1129 if (con->sock) {
1130 log_print("node %d already connected.", con->nodeid);
1131 goto out;
1132 }
1133
1134 memset(&daddr, 0, sizeof(daddr));
Alexander Aringe125fbe2021-03-01 17:05:09 -05001135 result = nodeid_to_addr(con->nodeid, &daddr, NULL, true, &mark);
Marcelo Ricardo Leitneree44b4b2015-08-11 19:22:23 -03001136 if (result < 0) {
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001137 log_print("no address for nodeid %d", con->nodeid);
Marcelo Ricardo Leitneree44b4b2015-08-11 19:22:23 -03001138 goto out;
David Teigland04bedd72009-09-18 14:31:47 -05001139 }
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001140
Marcelo Ricardo Leitneree44b4b2015-08-11 19:22:23 -03001141 /* Create a socket to communicate with */
1142 result = sock_create_kern(&init_net, dlm_local_addr[0]->ss_family,
1143 SOCK_STREAM, IPPROTO_SCTP, &sock);
1144 if (result < 0)
1145 goto socket_err;
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001146
Alexander Aring9c9f1682020-06-26 13:26:50 -04001147 sock_set_mark(sock->sk, mark);
1148
tsutomu.owa@toshiba.co.jp988419a2017-09-12 08:55:32 +00001149 add_sock(sock, con);
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001150
Marcelo Ricardo Leitneree44b4b2015-08-11 19:22:23 -03001151 /* Bind to all addresses. */
Alexander Aring13004e82020-11-02 20:04:24 -05001152 if (sctp_bind_addrs(con->sock, 0))
Marcelo Ricardo Leitneree44b4b2015-08-11 19:22:23 -03001153 goto bind_err;
Mike Christie98e1b602013-06-14 04:56:12 -05001154
Marcelo Ricardo Leitneree44b4b2015-08-11 19:22:23 -03001155 make_sockaddr(&daddr, dlm_config.ci_tcp_port, &addr_len);
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001156
Alexander Aring2df6b762021-05-21 15:08:34 -04001157 log_print_ratelimited("connecting to %d", con->nodeid);
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001158
Marcelo Ricardo Leitneree44b4b2015-08-11 19:22:23 -03001159 /* Turn off Nagle's algorithm */
Christoph Hellwig40ef92c2020-05-29 14:09:40 +02001160 sctp_sock_set_nodelay(sock->sk);
Marcelo Ricardo Leitneree44b4b2015-08-11 19:22:23 -03001161
Gang Hef706d832018-05-02 10:28:35 -05001162 /*
1163 * Make sock->ops->connect() function return in specified time,
1164 * since O_NONBLOCK argument in connect() function does not work here,
1165 * then, we should restore the default value of this attribute.
1166 */
Christoph Hellwig76ee0782020-05-28 07:12:12 +02001167 sock_set_sndtimeo(sock->sk, 5);
Marcelo Ricardo Leitneree44b4b2015-08-11 19:22:23 -03001168 result = sock->ops->connect(sock, (struct sockaddr *)&daddr, addr_len,
Gang Heda3627c2018-05-29 11:09:22 +08001169 0);
Christoph Hellwig76ee0782020-05-28 07:12:12 +02001170 sock_set_sndtimeo(sock->sk, 0);
Gang Hef706d832018-05-02 10:28:35 -05001171
Marcelo Ricardo Leitneree44b4b2015-08-11 19:22:23 -03001172 if (result == -EINPROGRESS)
1173 result = 0;
Alexander Aring19633c72020-11-02 20:04:20 -05001174 if (result == 0) {
1175 if (!test_and_set_bit(CF_CONNECTED, &con->flags))
1176 log_print("successful connected to node %d", con->nodeid);
Marcelo Ricardo Leitneree44b4b2015-08-11 19:22:23 -03001177 goto out;
Alexander Aring19633c72020-11-02 20:04:20 -05001178 }
Marcelo Ricardo Leitneree44b4b2015-08-11 19:22:23 -03001179
Marcelo Ricardo Leitneree44b4b2015-08-11 19:22:23 -03001180bind_err:
1181 con->sock = NULL;
1182 sock_release(sock);
1183
1184socket_err:
1185 /*
1186 * Some errors are fatal and this list might need adjusting. For other
1187 * errors we try again until the max number of retries is reached.
1188 */
1189 if (result != -EHOSTUNREACH &&
1190 result != -ENETUNREACH &&
1191 result != -ENETDOWN &&
1192 result != -EINVAL &&
1193 result != -EPROTONOSUPPORT) {
1194 log_print("connect %d try %d error %d", con->nodeid,
1195 con->retries, result);
1196 mutex_unlock(&con->sock_mutex);
1197 msleep(1000);
Marcelo Ricardo Leitneree44b4b2015-08-11 19:22:23 -03001198 lowcomms_connect_sock(con);
1199 return;
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001200 }
Mike Christie5d689872013-06-14 04:56:13 -05001201
Marcelo Ricardo Leitneree44b4b2015-08-11 19:22:23 -03001202out:
Mike Christie5d689872013-06-14 04:56:13 -05001203 mutex_unlock(&con->sock_mutex);
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001204}
1205
1206/* Connect a new socket to its peer */
1207static void tcp_connect_to_sock(struct connection *con)
1208{
Lon Hohberger6bd8fed2007-10-25 18:51:54 -04001209 struct sockaddr_storage saddr, src_addr;
Alexander Aringe125fbe2021-03-01 17:05:09 -05001210 unsigned int mark;
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001211 int addr_len;
Casey Dahlina89d63a2009-07-14 12:17:51 -05001212 struct socket *sock = NULL;
David Teigland36b71a82012-07-26 12:44:30 -05001213 int result;
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001214
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001215 mutex_lock(&con->sock_mutex);
1216 if (con->retries++ > MAX_CONNECT_RETRIES)
1217 goto out;
1218
1219 /* Some odd races can cause double-connects, ignore them */
David Teigland36b71a82012-07-26 12:44:30 -05001220 if (con->sock)
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001221 goto out;
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001222
1223 /* Create a socket to communicate with */
Eric W. Biedermaneeb1bd52015-05-08 21:08:05 -05001224 result = sock_create_kern(&init_net, dlm_local_addr[0]->ss_family,
1225 SOCK_STREAM, IPPROTO_TCP, &sock);
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001226 if (result < 0)
1227 goto out_err;
1228
1229 memset(&saddr, 0, sizeof(saddr));
Alexander Aringe125fbe2021-03-01 17:05:09 -05001230 result = nodeid_to_addr(con->nodeid, &saddr, NULL, false, &mark);
David Teigland36b71a82012-07-26 12:44:30 -05001231 if (result < 0) {
1232 log_print("no address for nodeid %d", con->nodeid);
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001233 goto out_err;
David Teigland36b71a82012-07-26 12:44:30 -05001234 }
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001235
Alexander Aringe125fbe2021-03-01 17:05:09 -05001236 sock_set_mark(sock->sk, mark);
1237
tsutomu.owa@toshiba.co.jp988419a2017-09-12 08:55:32 +00001238 add_sock(sock, con);
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001239
Lon Hohberger6bd8fed2007-10-25 18:51:54 -04001240 /* Bind to our cluster-known address connecting to avoid
1241 routing problems */
1242 memcpy(&src_addr, dlm_local_addr[0], sizeof(src_addr));
1243 make_sockaddr(&src_addr, 0, &addr_len);
1244 result = sock->ops->bind(sock, (struct sockaddr *) &src_addr,
1245 addr_len);
1246 if (result < 0) {
1247 log_print("could not bind for connect: %d", result);
1248 /* This *may* not indicate a critical error */
1249 }
1250
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001251 make_sockaddr(&saddr, dlm_config.ci_tcp_port, &addr_len);
1252
Alexander Aring2df6b762021-05-21 15:08:34 -04001253 log_print_ratelimited("connecting to %d", con->nodeid);
David Teiglandcb2d45d2010-11-12 11:12:55 -06001254
1255 /* Turn off Nagle's algorithm */
Christoph Hellwig12abc5e2020-05-28 07:12:19 +02001256 tcp_sock_set_nodelay(sock->sk);
David Teiglandcb2d45d2010-11-12 11:12:55 -06001257
David Teigland36b71a82012-07-26 12:44:30 -05001258 result = sock->ops->connect(sock, (struct sockaddr *)&saddr, addr_len,
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001259 O_NONBLOCK);
1260 if (result == -EINPROGRESS)
1261 result = 0;
1262 if (result == 0)
1263 goto out;
1264
1265out_err:
1266 if (con->sock) {
1267 sock_release(con->sock);
1268 con->sock = NULL;
Casey Dahlina89d63a2009-07-14 12:17:51 -05001269 } else if (sock) {
1270 sock_release(sock);
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001271 }
1272 /*
1273 * Some errors are fatal and this list might need adjusting. For other
1274 * errors we try again until the max number of retries is reached.
1275 */
David Teigland36b71a82012-07-26 12:44:30 -05001276 if (result != -EHOSTUNREACH &&
1277 result != -ENETUNREACH &&
1278 result != -ENETDOWN &&
1279 result != -EINVAL &&
1280 result != -EPROTONOSUPPORT) {
1281 log_print("connect %d try %d error %d", con->nodeid,
1282 con->retries, result);
1283 mutex_unlock(&con->sock_mutex);
1284 msleep(1000);
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001285 lowcomms_connect_sock(con);
David Teigland36b71a82012-07-26 12:44:30 -05001286 return;
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001287 }
1288out:
1289 mutex_unlock(&con->sock_mutex);
1290 return;
1291}
1292
Alexander Aringd11ccd42020-11-02 20:04:25 -05001293/* On error caller must run dlm_close_sock() for the
1294 * listen connection socket.
1295 */
1296static int tcp_create_listen_sock(struct listen_connection *con,
1297 struct sockaddr_storage *saddr)
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001298{
1299 struct socket *sock = NULL;
1300 int result = 0;
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001301 int addr_len;
1302
1303 if (dlm_local_addr[0]->ss_family == AF_INET)
1304 addr_len = sizeof(struct sockaddr_in);
1305 else
1306 addr_len = sizeof(struct sockaddr_in6);
1307
1308 /* Create a socket to communicate with */
Eric W. Biedermaneeb1bd52015-05-08 21:08:05 -05001309 result = sock_create_kern(&init_net, dlm_local_addr[0]->ss_family,
1310 SOCK_STREAM, IPPROTO_TCP, &sock);
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001311 if (result < 0) {
David Teigland617e82e2007-04-26 13:46:49 -05001312 log_print("Can't create listening comms socket");
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001313 goto create_out;
1314 }
1315
Alexander Aringa5b7ab62020-06-26 13:26:49 -04001316 sock_set_mark(sock->sk, dlm_config.ci_mark);
1317
David Teiglandcb2d45d2010-11-12 11:12:55 -06001318 /* Turn off Nagle's algorithm */
Christoph Hellwig12abc5e2020-05-28 07:12:19 +02001319 tcp_sock_set_nodelay(sock->sk);
David Teiglandcb2d45d2010-11-12 11:12:55 -06001320
Christoph Hellwigb58f0e82020-05-28 07:12:09 +02001321 sock_set_reuseaddr(sock->sk);
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001322
Alexander Aringd11ccd42020-11-02 20:04:25 -05001323 add_listen_sock(sock, con);
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001324
1325 /* Bind to our port */
1326 make_sockaddr(saddr, dlm_config.ci_tcp_port, &addr_len);
1327 result = sock->ops->bind(sock, (struct sockaddr *) saddr, addr_len);
1328 if (result < 0) {
David Teigland617e82e2007-04-26 13:46:49 -05001329 log_print("Can't bind to port %d", dlm_config.ci_tcp_port);
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001330 goto create_out;
1331 }
Christoph Hellwigce3d9542020-05-28 07:12:15 +02001332 sock_set_keepalive(sock->sk);
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001333
1334 result = sock->ops->listen(sock, 5);
1335 if (result < 0) {
David Teigland617e82e2007-04-26 13:46:49 -05001336 log_print("Can't listen on port %d", dlm_config.ci_tcp_port);
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001337 goto create_out;
1338 }
1339
Alexander Aringd11ccd42020-11-02 20:04:25 -05001340 return 0;
1341
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001342create_out:
Alexander Aringd11ccd42020-11-02 20:04:25 -05001343 return result;
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001344}
1345
1346/* Get local addresses */
1347static void init_local(void)
1348{
1349 struct sockaddr_storage sas, *addr;
1350 int i;
1351
Patrick Caulfield30d3a232007-04-23 16:26:21 +01001352 dlm_local_count = 0;
David Teigland1b189b82012-03-21 09:18:34 -05001353 for (i = 0; i < DLM_MAX_ADDR_COUNT; i++) {
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001354 if (dlm_our_addr(&sas, i))
1355 break;
1356
Amitoj Kaur Chawla5c93f562016-06-23 10:22:01 +05301357 addr = kmemdup(&sas, sizeof(*addr), GFP_NOFS);
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001358 if (!addr)
1359 break;
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001360 dlm_local_addr[dlm_local_count++] = addr;
1361 }
1362}
1363
Alexander Aring043697f2020-08-27 15:02:50 -04001364static void deinit_local(void)
1365{
1366 int i;
1367
1368 for (i = 0; i < dlm_local_count; i++)
1369 kfree(dlm_local_addr[i]);
1370}
1371
Alexander Aringd11ccd42020-11-02 20:04:25 -05001372/* Initialise SCTP socket and bind to all interfaces
1373 * On error caller must run dlm_close_sock() for the
1374 * listen connection socket.
1375 */
1376static int sctp_listen_for_all(struct listen_connection *con)
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001377{
1378 struct socket *sock = NULL;
Marcelo Ricardo Leitneree44b4b2015-08-11 19:22:23 -03001379 int result = -EINVAL;
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001380
1381 log_print("Using SCTP for communications");
1382
Eric W. Biedermaneeb1bd52015-05-08 21:08:05 -05001383 result = sock_create_kern(&init_net, dlm_local_addr[0]->ss_family,
Marcelo Ricardo Leitneree44b4b2015-08-11 19:22:23 -03001384 SOCK_STREAM, IPPROTO_SCTP, &sock);
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001385 if (result < 0) {
1386 log_print("Can't create comms socket, check SCTP is loaded");
1387 goto out;
1388 }
1389
Christoph Hellwig26cfabf2020-05-28 07:12:16 +02001390 sock_set_rcvbuf(sock->sk, NEEDED_RMEM);
Alexander Aringa5b7ab62020-06-26 13:26:49 -04001391 sock_set_mark(sock->sk, dlm_config.ci_mark);
Christoph Hellwig40ef92c2020-05-29 14:09:40 +02001392 sctp_sock_set_nodelay(sock->sk);
Mike Christie86e92ad2013-06-14 04:56:14 -05001393
Alexander Aringd11ccd42020-11-02 20:04:25 -05001394 add_listen_sock(sock, con);
Bob Petersonb81171c2016-02-05 14:39:02 -05001395
Marcelo Ricardo Leitneree44b4b2015-08-11 19:22:23 -03001396 /* Bind to all addresses. */
Alexander Aringd11ccd42020-11-02 20:04:25 -05001397 result = sctp_bind_addrs(con->sock, dlm_config.ci_tcp_port);
1398 if (result < 0)
1399 goto out;
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001400
1401 result = sock->ops->listen(sock, 5);
1402 if (result < 0) {
1403 log_print("Can't set socket listening");
Alexander Aringd11ccd42020-11-02 20:04:25 -05001404 goto out;
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001405 }
1406
1407 return 0;
1408
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001409out:
1410 return result;
1411}
1412
1413static int tcp_listen_for_all(void)
1414{
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001415 /* We don't support multi-homed hosts */
Alexander Aring1a26bfa2020-11-02 20:04:26 -05001416 if (dlm_local_count > 1) {
David Teigland617e82e2007-04-26 13:46:49 -05001417 log_print("TCP protocol can't handle multi-homed hosts, "
1418 "try SCTP");
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001419 return -EINVAL;
1420 }
1421
1422 log_print("Using TCP for communications");
1423
Alexander Aringd11ccd42020-11-02 20:04:25 -05001424 return tcp_create_listen_sock(&listen_con, dlm_local_addr[0]);
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001425}
1426
1427
1428
1429static struct writequeue_entry *new_writequeue_entry(struct connection *con,
1430 gfp_t allocation)
1431{
1432 struct writequeue_entry *entry;
1433
Alexander Aringf0747ebf2021-03-01 17:05:16 -05001434 entry = kzalloc(sizeof(*entry), allocation);
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001435 if (!entry)
1436 return NULL;
1437
Alexander Aringe1a7cbc2021-03-01 17:05:15 -05001438 entry->page = alloc_page(allocation | __GFP_ZERO);
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001439 if (!entry->page) {
1440 kfree(entry);
1441 return NULL;
1442 }
1443
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001444 entry->con = con;
Alexander Aringf0747ebf2021-03-01 17:05:16 -05001445 entry->users = 1;
Alexander Aring8f2dc782021-05-21 15:08:42 -04001446 kref_init(&entry->ref);
1447 INIT_LIST_HEAD(&entry->msgs);
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001448
1449 return entry;
1450}
1451
Alexander Aringf0747ebf2021-03-01 17:05:16 -05001452static struct writequeue_entry *new_wq_entry(struct connection *con, int len,
Alexander Aring8f2dc782021-05-21 15:08:42 -04001453 gfp_t allocation, char **ppc,
1454 void (*cb)(struct dlm_mhandle *mh),
1455 struct dlm_mhandle *mh)
Alexander Aringf0747ebf2021-03-01 17:05:16 -05001456{
1457 struct writequeue_entry *e;
1458
1459 spin_lock(&con->writequeue_lock);
1460 if (!list_empty(&con->writequeue)) {
1461 e = list_last_entry(&con->writequeue, struct writequeue_entry, list);
1462 if (DLM_WQ_REMAIN_BYTES(e) >= len) {
Alexander Aring8f2dc782021-05-21 15:08:42 -04001463 kref_get(&e->ref);
1464
Alexander Aringf0747ebf2021-03-01 17:05:16 -05001465 *ppc = page_address(e->page) + e->end;
Alexander Aring8f2dc782021-05-21 15:08:42 -04001466 if (cb)
1467 cb(mh);
1468
Alexander Aringf0747ebf2021-03-01 17:05:16 -05001469 e->end += len;
1470 e->users++;
1471 spin_unlock(&con->writequeue_lock);
1472
1473 return e;
1474 }
1475 }
1476 spin_unlock(&con->writequeue_lock);
1477
1478 e = new_writequeue_entry(con, allocation);
1479 if (!e)
1480 return NULL;
1481
Alexander Aring8f2dc782021-05-21 15:08:42 -04001482 kref_get(&e->ref);
Alexander Aringf0747ebf2021-03-01 17:05:16 -05001483 *ppc = page_address(e->page);
1484 e->end += len;
Alexander Aring8aa31cb2021-05-21 15:08:39 -04001485 atomic_inc(&con->writequeue_cnt);
Alexander Aringf0747ebf2021-03-01 17:05:16 -05001486
1487 spin_lock(&con->writequeue_lock);
Alexander Aring8f2dc782021-05-21 15:08:42 -04001488 if (cb)
1489 cb(mh);
1490
Alexander Aringf0747ebf2021-03-01 17:05:16 -05001491 list_add_tail(&e->list, &con->writequeue);
1492 spin_unlock(&con->writequeue_lock);
1493
1494 return e;
1495};
1496
Alexander Aring8f2dc782021-05-21 15:08:42 -04001497struct dlm_msg *dlm_lowcomms_new_msg(int nodeid, int len, gfp_t allocation,
1498 char **ppc, void (*cb)(struct dlm_mhandle *mh),
1499 struct dlm_mhandle *mh)
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001500{
Alexander Aringb38bc9c2021-05-21 15:08:35 -04001501 struct writequeue_entry *e;
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001502 struct connection *con;
Alexander Aring8f2dc782021-05-21 15:08:42 -04001503 struct dlm_msg *msg;
Alexander Aringb38bc9c2021-05-21 15:08:35 -04001504 int idx;
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001505
Alexander Aringc45674f2021-03-01 17:05:14 -05001506 if (len > DEFAULT_BUFFER_SIZE ||
1507 len < sizeof(struct dlm_header)) {
1508 BUILD_BUG_ON(PAGE_SIZE < DEFAULT_BUFFER_SIZE);
Alexander Aring692f51c2020-11-02 20:04:18 -05001509 log_print("failed to allocate a buffer of size %d", len);
Alexander Aringc45674f2021-03-01 17:05:14 -05001510 WARN_ON(1);
Alexander Aring692f51c2020-11-02 20:04:18 -05001511 return NULL;
1512 }
1513
Alexander Aringb38bc9c2021-05-21 15:08:35 -04001514 idx = srcu_read_lock(&connections_srcu);
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001515 con = nodeid2con(nodeid, allocation);
Alexander Aringb38bc9c2021-05-21 15:08:35 -04001516 if (!con) {
1517 srcu_read_unlock(&connections_srcu, idx);
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001518 return NULL;
Alexander Aringb38bc9c2021-05-21 15:08:35 -04001519 }
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001520
Alexander Aring8f2dc782021-05-21 15:08:42 -04001521 msg = kzalloc(sizeof(*msg), allocation);
1522 if (!msg) {
Alexander Aringb38bc9c2021-05-21 15:08:35 -04001523 srcu_read_unlock(&connections_srcu, idx);
1524 return NULL;
1525 }
1526
Alexander Aring8f2dc782021-05-21 15:08:42 -04001527 kref_init(&msg->ref);
Alexander Aringb38bc9c2021-05-21 15:08:35 -04001528
Alexander Aring8f2dc782021-05-21 15:08:42 -04001529 e = new_wq_entry(con, len, allocation, ppc, cb, mh);
1530 if (!e) {
1531 srcu_read_unlock(&connections_srcu, idx);
1532 kfree(msg);
1533 return NULL;
1534 }
1535
1536 msg->ppc = *ppc;
1537 msg->len = len;
1538 msg->entry = e;
1539
1540 /* we assume if successful commit must called */
1541 msg->idx = idx;
1542
1543 return msg;
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001544}
1545
Alexander Aring8f2dc782021-05-21 15:08:42 -04001546void dlm_lowcomms_commit_msg(struct dlm_msg *msg)
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001547{
Alexander Aring8f2dc782021-05-21 15:08:42 -04001548 struct writequeue_entry *e = msg->entry;
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001549 struct connection *con = e->con;
1550 int users;
1551
1552 spin_lock(&con->writequeue_lock);
Alexander Aring8f2dc782021-05-21 15:08:42 -04001553 kref_get(&msg->ref);
1554 list_add(&msg->list, &e->msgs);
1555
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001556 users = --e->users;
1557 if (users)
1558 goto out;
Alexander Aringf0747ebf2021-03-01 17:05:16 -05001559
1560 e->len = DLM_WQ_LENGTH_BYTES(e);
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001561 spin_unlock(&con->writequeue_lock);
1562
Bob Peterson01da24d2017-09-12 08:55:14 +00001563 queue_work(send_workqueue, &con->swork);
Alexander Aring8f2dc782021-05-21 15:08:42 -04001564 srcu_read_unlock(&connections_srcu, msg->idx);
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001565 return;
1566
1567out:
1568 spin_unlock(&con->writequeue_lock);
Alexander Aring8f2dc782021-05-21 15:08:42 -04001569 srcu_read_unlock(&connections_srcu, msg->idx);
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001570 return;
1571}
1572
Alexander Aring8f2dc782021-05-21 15:08:42 -04001573void dlm_lowcomms_put_msg(struct dlm_msg *msg)
1574{
1575 kref_put(&msg->ref, dlm_msg_release);
1576}
1577
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001578/* Send a message */
1579static void send_to_sock(struct connection *con)
1580{
1581 int ret = 0;
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001582 const int msg_flags = MSG_DONTWAIT | MSG_NOSIGNAL;
1583 struct writequeue_entry *e;
1584 int len, offset;
Bob Petersonf92c8dd2010-11-12 11:15:20 -06001585 int count = 0;
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001586
1587 mutex_lock(&con->sock_mutex);
1588 if (con->sock == NULL)
1589 goto out_connect;
1590
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001591 spin_lock(&con->writequeue_lock);
1592 for (;;) {
Alexander Aringf0747ebf2021-03-01 17:05:16 -05001593 if (list_empty(&con->writequeue))
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001594 break;
1595
Alexander Aringf0747ebf2021-03-01 17:05:16 -05001596 e = list_first_entry(&con->writequeue, struct writequeue_entry, list);
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001597 len = e->len;
1598 offset = e->offset;
1599 BUG_ON(len == 0 && e->users == 0);
1600 spin_unlock(&con->writequeue_lock);
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001601
1602 ret = 0;
1603 if (len) {
Paolo Bonzini1329e3f2009-08-24 13:18:04 -05001604 ret = kernel_sendpage(con->sock, e->page, offset, len,
1605 msg_flags);
Patrick Caulfieldd66f8272007-09-14 08:49:21 +01001606 if (ret == -EAGAIN || ret == 0) {
David Millerb36930d2010-11-10 21:56:39 -08001607 if (ret == -EAGAIN &&
Eric Dumazet9cd3e072015-11-29 20:03:10 -08001608 test_bit(SOCKWQ_ASYNC_NOSPACE, &con->sock->flags) &&
David Millerb36930d2010-11-10 21:56:39 -08001609 !test_and_set_bit(CF_APP_LIMITED, &con->flags)) {
1610 /* Notify TCP that we're limited by the
1611 * application window size.
1612 */
1613 set_bit(SOCK_NOSPACE, &con->sock->flags);
1614 con->sock->sk->sk_write_pending++;
1615 }
Patrick Caulfieldd66f8272007-09-14 08:49:21 +01001616 cond_resched();
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001617 goto out;
Ying Xue9c5bef52012-08-13 14:29:55 +08001618 } else if (ret < 0)
Alexander Aringba868d92021-05-21 15:08:37 -04001619 goto out;
Patrick Caulfieldd66f8272007-09-14 08:49:21 +01001620 }
Bob Petersonf92c8dd2010-11-12 11:15:20 -06001621
1622 /* Don't starve people filling buffers */
1623 if (++count >= MAX_SEND_MSG_COUNT) {
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001624 cond_resched();
Bob Petersonf92c8dd2010-11-12 11:15:20 -06001625 count = 0;
1626 }
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001627
1628 spin_lock(&con->writequeue_lock);
Mike Christie5d689872013-06-14 04:56:13 -05001629 writequeue_entry_complete(e, ret);
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001630 }
1631 spin_unlock(&con->writequeue_lock);
Alexander Aring8aa31cb2021-05-21 15:08:39 -04001632
1633 /* close if we got EOF */
1634 if (test_and_clear_bit(CF_EOF, &con->flags)) {
1635 mutex_unlock(&con->sock_mutex);
1636 close_connection(con, false, false, true);
1637
1638 /* handling for tcp shutdown */
1639 clear_bit(CF_SHUTDOWN, &con->flags);
1640 wake_up(&con->shutdown_wait);
1641 } else {
1642 mutex_unlock(&con->sock_mutex);
1643 }
1644
1645 return;
1646
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001647out:
1648 mutex_unlock(&con->sock_mutex);
1649 return;
1650
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001651out_connect:
1652 mutex_unlock(&con->sock_mutex);
Bob Peterson01da24d2017-09-12 08:55:14 +00001653 queue_work(send_workqueue, &con->swork);
1654 cond_resched();
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001655}
1656
1657static void clean_one_writequeue(struct connection *con)
1658{
Christine Caulfield5e9ccc32009-01-28 12:57:40 -06001659 struct writequeue_entry *e, *safe;
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001660
1661 spin_lock(&con->writequeue_lock);
Christine Caulfield5e9ccc32009-01-28 12:57:40 -06001662 list_for_each_entry_safe(e, safe, &con->writequeue, list) {
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001663 free_entry(e);
1664 }
1665 spin_unlock(&con->writequeue_lock);
1666}
1667
1668/* Called from recovery when it knows that a node has
1669 left the cluster */
1670int dlm_lowcomms_close(int nodeid)
1671{
1672 struct connection *con;
David Teigland36b71a82012-07-26 12:44:30 -05001673 struct dlm_node_addr *na;
Alexander Aringb38bc9c2021-05-21 15:08:35 -04001674 int idx;
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001675
1676 log_print("closing connection to node %d", nodeid);
Alexander Aringb38bc9c2021-05-21 15:08:35 -04001677 idx = srcu_read_lock(&connections_srcu);
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001678 con = nodeid2con(nodeid, 0);
1679 if (con) {
Lars Marowsky-Bree063c4c92009-08-11 16:18:23 -05001680 set_bit(CF_CLOSE, &con->flags);
Marcelo Ricardo Leitner0d737a82015-08-11 19:22:21 -03001681 close_connection(con, true, true, true);
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001682 clean_one_writequeue(con);
Alexander Aring53a5eda2020-11-02 20:04:19 -05001683 if (con->othercon)
1684 clean_one_writequeue(con->othercon);
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001685 }
Alexander Aringb38bc9c2021-05-21 15:08:35 -04001686 srcu_read_unlock(&connections_srcu, idx);
David Teigland36b71a82012-07-26 12:44:30 -05001687
1688 spin_lock(&dlm_node_addrs_spin);
1689 na = find_node_addr(nodeid);
1690 if (na) {
1691 list_del(&na->list);
1692 while (na->addr_count--)
1693 kfree(na->addr[na->addr_count]);
1694 kfree(na);
1695 }
1696 spin_unlock(&dlm_node_addrs_spin);
1697
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001698 return 0;
1699}
1700
1701/* Receive workqueue function */
1702static void process_recv_sockets(struct work_struct *work)
1703{
1704 struct connection *con = container_of(work, struct connection, rwork);
1705 int err;
1706
1707 clear_bit(CF_READ_PENDING, &con->flags);
1708 do {
Alexander Aringd11ccd42020-11-02 20:04:25 -05001709 err = receive_from_sock(con);
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001710 } while (!err);
1711}
1712
Alexander Aringd11ccd42020-11-02 20:04:25 -05001713static void process_listen_recv_socket(struct work_struct *work)
1714{
1715 accept_from_sock(&listen_con);
1716}
1717
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001718/* Send workqueue function */
1719static void process_send_sockets(struct work_struct *work)
1720{
1721 struct connection *con = container_of(work, struct connection, swork);
1722
Alexander Aring7443bc92021-05-21 15:08:36 -04001723 WARN_ON(test_bit(CF_IS_OTHERCON, &con->flags));
1724
tsutomu.owa@toshiba.co.jp8a4abb02017-09-12 09:01:16 +00001725 clear_bit(CF_WRITE_PENDING, &con->flags);
Alexander Aringba868d92021-05-21 15:08:37 -04001726
1727 if (test_and_clear_bit(CF_RECONNECT, &con->flags))
1728 close_connection(con, false, false, true);
1729
1730 if (con->sock == NULL) { /* not mutex protected so check it inside too */
1731 if (test_and_clear_bit(CF_DELAY_CONNECT, &con->flags))
1732 msleep(1000);
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001733 con->connect_action(con);
Alexander Aringba868d92021-05-21 15:08:37 -04001734 }
Bob Peterson01da24d2017-09-12 08:55:14 +00001735 if (!list_empty(&con->writequeue))
Lars Marowsky-Bree063c4c92009-08-11 16:18:23 -05001736 send_to_sock(con);
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001737}
1738
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001739static void work_stop(void)
1740{
David Windsorb3555162019-04-02 08:37:10 -04001741 if (recv_workqueue)
1742 destroy_workqueue(recv_workqueue);
1743 if (send_workqueue)
1744 destroy_workqueue(send_workqueue);
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001745}
1746
1747static int work_start(void)
1748{
David Teiglande43f0552011-03-10 13:22:34 -06001749 recv_workqueue = alloc_workqueue("dlm_recv",
1750 WQ_UNBOUND | WQ_MEM_RECLAIM, 1);
Namhyung Kimb9d41052010-12-13 13:42:24 -06001751 if (!recv_workqueue) {
1752 log_print("can't start dlm_recv");
1753 return -ENOMEM;
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001754 }
1755
David Teiglande43f0552011-03-10 13:22:34 -06001756 send_workqueue = alloc_workqueue("dlm_send",
1757 WQ_UNBOUND | WQ_MEM_RECLAIM, 1);
Namhyung Kimb9d41052010-12-13 13:42:24 -06001758 if (!send_workqueue) {
1759 log_print("can't start dlm_send");
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001760 destroy_workqueue(recv_workqueue);
Namhyung Kimb9d41052010-12-13 13:42:24 -06001761 return -ENOMEM;
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001762 }
1763
1764 return 0;
1765}
1766
Alexander Aring9d232462021-03-01 17:05:20 -05001767static void shutdown_conn(struct connection *con)
1768{
1769 if (con->shutdown_action)
1770 con->shutdown_action(con);
1771}
1772
1773void dlm_lowcomms_shutdown(void)
1774{
Alexander Aringb38bc9c2021-05-21 15:08:35 -04001775 int idx;
1776
Alexander Aring9d232462021-03-01 17:05:20 -05001777 /* Set all the flags to prevent any
1778 * socket activity.
1779 */
1780 dlm_allow_conn = 0;
1781
1782 if (recv_workqueue)
1783 flush_workqueue(recv_workqueue);
1784 if (send_workqueue)
1785 flush_workqueue(send_workqueue);
1786
1787 dlm_close_sock(&listen_con.sock);
1788
Alexander Aringb38bc9c2021-05-21 15:08:35 -04001789 idx = srcu_read_lock(&connections_srcu);
Alexander Aring9d232462021-03-01 17:05:20 -05001790 foreach_conn(shutdown_conn);
Alexander Aringb38bc9c2021-05-21 15:08:35 -04001791 srcu_read_unlock(&connections_srcu, idx);
Alexander Aring9d232462021-03-01 17:05:20 -05001792}
1793
tsutomu.owa@toshiba.co.jpf0fb83c2017-09-12 08:55:40 +00001794static void _stop_conn(struct connection *con, bool and_other)
Christine Caulfield5e9ccc32009-01-28 12:57:40 -06001795{
tsutomu.owa@toshiba.co.jpf0fb83c2017-09-12 08:55:40 +00001796 mutex_lock(&con->sock_mutex);
tsutomu.owa@toshiba.co.jp173a31f2017-09-12 09:01:24 +00001797 set_bit(CF_CLOSE, &con->flags);
tsutomu.owa@toshiba.co.jpf0fb83c2017-09-12 08:55:40 +00001798 set_bit(CF_READ_PENDING, &con->flags);
tsutomu.owa@toshiba.co.jp8a4abb02017-09-12 09:01:16 +00001799 set_bit(CF_WRITE_PENDING, &con->flags);
tsutomu.owa@toshiba.co.jp93eaade2017-09-12 09:01:55 +00001800 if (con->sock && con->sock->sk) {
1801 write_lock_bh(&con->sock->sk->sk_callback_lock);
Christine Caulfield5e9ccc32009-01-28 12:57:40 -06001802 con->sock->sk->sk_user_data = NULL;
tsutomu.owa@toshiba.co.jp93eaade2017-09-12 09:01:55 +00001803 write_unlock_bh(&con->sock->sk->sk_callback_lock);
1804 }
tsutomu.owa@toshiba.co.jpf0fb83c2017-09-12 08:55:40 +00001805 if (con->othercon && and_other)
1806 _stop_conn(con->othercon, false);
1807 mutex_unlock(&con->sock_mutex);
1808}
1809
1810static void stop_conn(struct connection *con)
1811{
1812 _stop_conn(con, true);
Christine Caulfield5e9ccc32009-01-28 12:57:40 -06001813}
1814
Alexander Aring4798cbb2020-09-24 10:31:26 -04001815static void connection_release(struct rcu_head *rcu)
1816{
1817 struct connection *con = container_of(rcu, struct connection, rcu);
1818
1819 kfree(con->rx_buf);
1820 kfree(con);
1821}
1822
Christine Caulfield5e9ccc32009-01-28 12:57:40 -06001823static void free_conn(struct connection *con)
1824{
Marcelo Ricardo Leitner0d737a82015-08-11 19:22:21 -03001825 close_connection(con, true, true, true);
Alexander Aringa47666eb2020-08-27 15:02:49 -04001826 spin_lock(&connections_lock);
1827 hlist_del_rcu(&con->list);
1828 spin_unlock(&connections_lock);
Alexander Aring948c47e2020-08-27 15:02:53 -04001829 if (con->othercon) {
1830 clean_one_writequeue(con->othercon);
Alexander Aring5cbec202020-11-02 20:04:16 -05001831 call_srcu(&connections_srcu, &con->othercon->rcu,
1832 connection_release);
Alexander Aring948c47e2020-08-27 15:02:53 -04001833 }
Alexander Aring0de98432020-08-27 15:02:52 -04001834 clean_one_writequeue(con);
Alexander Aring5cbec202020-11-02 20:04:16 -05001835 call_srcu(&connections_srcu, &con->rcu, connection_release);
Christine Caulfield5e9ccc32009-01-28 12:57:40 -06001836}
1837
tsutomu.owa@toshiba.co.jpf0fb83c2017-09-12 08:55:40 +00001838static void work_flush(void)
1839{
Alexander Aringb38bc9c2021-05-21 15:08:35 -04001840 int ok;
tsutomu.owa@toshiba.co.jpf0fb83c2017-09-12 08:55:40 +00001841 int i;
tsutomu.owa@toshiba.co.jpf0fb83c2017-09-12 08:55:40 +00001842 struct connection *con;
1843
tsutomu.owa@toshiba.co.jpf0fb83c2017-09-12 08:55:40 +00001844 do {
1845 ok = 1;
1846 foreach_conn(stop_conn);
David Windsorb3555162019-04-02 08:37:10 -04001847 if (recv_workqueue)
1848 flush_workqueue(recv_workqueue);
1849 if (send_workqueue)
1850 flush_workqueue(send_workqueue);
tsutomu.owa@toshiba.co.jpf0fb83c2017-09-12 08:55:40 +00001851 for (i = 0; i < CONN_HASH_SIZE && ok; i++) {
Alexander Aringa47666eb2020-08-27 15:02:49 -04001852 hlist_for_each_entry_rcu(con, &connection_hash[i],
1853 list) {
tsutomu.owa@toshiba.co.jpf0fb83c2017-09-12 08:55:40 +00001854 ok &= test_bit(CF_READ_PENDING, &con->flags);
tsutomu.owa@toshiba.co.jp8a4abb02017-09-12 09:01:16 +00001855 ok &= test_bit(CF_WRITE_PENDING, &con->flags);
1856 if (con->othercon) {
tsutomu.owa@toshiba.co.jpf0fb83c2017-09-12 08:55:40 +00001857 ok &= test_bit(CF_READ_PENDING,
1858 &con->othercon->flags);
tsutomu.owa@toshiba.co.jp8a4abb02017-09-12 09:01:16 +00001859 ok &= test_bit(CF_WRITE_PENDING,
1860 &con->othercon->flags);
1861 }
tsutomu.owa@toshiba.co.jpf0fb83c2017-09-12 08:55:40 +00001862 }
1863 }
1864 } while (!ok);
1865}
1866
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001867void dlm_lowcomms_stop(void)
1868{
Alexander Aringb38bc9c2021-05-21 15:08:35 -04001869 int idx;
1870
1871 idx = srcu_read_lock(&connections_srcu);
tsutomu.owa@toshiba.co.jpf0fb83c2017-09-12 08:55:40 +00001872 work_flush();
Marcelo Ricardo Leitner3a8db792016-10-08 10:14:37 -03001873 foreach_conn(free_conn);
Alexander Aringb38bc9c2021-05-21 15:08:35 -04001874 srcu_read_unlock(&connections_srcu, idx);
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001875 work_stop();
Alexander Aring043697f2020-08-27 15:02:50 -04001876 deinit_local();
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001877}
1878
1879int dlm_lowcomms_start(void)
1880{
1881 int error = -EINVAL;
Christine Caulfield5e9ccc32009-01-28 12:57:40 -06001882 int i;
1883
1884 for (i = 0; i < CONN_HASH_SIZE; i++)
1885 INIT_HLIST_HEAD(&connection_hash[i]);
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001886
1887 init_local();
1888 if (!dlm_local_count) {
David Teigland617e82e2007-04-26 13:46:49 -05001889 error = -ENOTCONN;
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001890 log_print("no local IP address has been set");
David Teigland513ef592012-03-30 11:46:08 -05001891 goto fail;
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001892 }
1893
Alexander Aringd11ccd42020-11-02 20:04:25 -05001894 INIT_WORK(&listen_con.rwork, process_listen_recv_socket);
1895
David Teigland513ef592012-03-30 11:46:08 -05001896 error = work_start();
1897 if (error)
Alexander Aringa47666eb2020-08-27 15:02:49 -04001898 goto fail;
David Teigland513ef592012-03-30 11:46:08 -05001899
1900 dlm_allow_conn = 1;
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001901
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001902 /* Start listening */
1903 if (dlm_config.ci_protocol == 0)
1904 error = tcp_listen_for_all();
1905 else
Alexander Aringd11ccd42020-11-02 20:04:25 -05001906 error = sctp_listen_for_all(&listen_con);
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001907 if (error)
1908 goto fail_unlisten;
1909
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001910 return 0;
1911
1912fail_unlisten:
David Teigland513ef592012-03-30 11:46:08 -05001913 dlm_allow_conn = 0;
Alexander Aringd11ccd42020-11-02 20:04:25 -05001914 dlm_close_sock(&listen_con.sock);
David Teigland513ef592012-03-30 11:46:08 -05001915fail:
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001916 return error;
1917}
David Teigland36b71a82012-07-26 12:44:30 -05001918
1919void dlm_lowcomms_exit(void)
1920{
1921 struct dlm_node_addr *na, *safe;
1922
1923 spin_lock(&dlm_node_addrs_spin);
1924 list_for_each_entry_safe(na, safe, &dlm_node_addrs, list) {
1925 list_del(&na->list);
1926 while (na->addr_count--)
1927 kfree(na->addr[na->addr_count]);
1928 kfree(na);
1929 }
1930 spin_unlock(&dlm_node_addrs_spin);
1931}