blob: 75e702c3678cfda1051b3f1e1a97dce95708dc51 [file] [log] [blame]
Thomas Gleixner2522fe42019-05-28 09:57:20 -07001// SPDX-License-Identifier: GPL-2.0-only
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01002/******************************************************************************
3*******************************************************************************
4**
5** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
Christine Caulfield5e9ccc32009-01-28 12:57:40 -06006** Copyright (C) 2004-2009 Red Hat, Inc. All rights reserved.
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01007**
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01008**
9*******************************************************************************
10******************************************************************************/
11
12/*
13 * lowcomms.c
14 *
15 * This is the "low-level" comms layer.
16 *
17 * It is responsible for sending/receiving messages
18 * from other nodes in the cluster.
19 *
20 * Cluster nodes are referred to by their nodeids. nodeids are
21 * simply 32 bit numbers to the locking module - if they need to
Joe Perches2cf12c02009-01-22 13:26:47 -080022 * be expanded for the cluster infrastructure then that is its
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +010023 * responsibility. It is this layer's
24 * responsibility to resolve these into IP address or
25 * whatever it needs for inter-node communication.
26 *
27 * The comms level is two kernel threads that deal mainly with
28 * the receiving of messages from other nodes and passing them
29 * up to the mid-level comms layer (which understands the
30 * message format) for execution by the locking core, and
31 * a send thread which does all the setting up of connections
32 * to remote nodes and the sending of data. Threads are not allowed
33 * to send their own data because it may cause them to wait in times
34 * of high load. Also, this way, the sending thread can collect together
35 * messages bound for one node and send them in one block.
36 *
Joe Perches2cf12c02009-01-22 13:26:47 -080037 * lowcomms will choose to use either TCP or SCTP as its transport layer
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +010038 * depending on the configuration variable 'protocol'. This should be set
Joe Perches2cf12c02009-01-22 13:26:47 -080039 * to 0 (default) for TCP or 1 for SCTP. It should be configured using a
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +010040 * cluster-wide mechanism as it must be the same on all nodes of the cluster
41 * for the DLM to function.
42 *
43 */
44
45#include <asm/ioctls.h>
46#include <net/sock.h>
47#include <net/tcp.h>
48#include <linux/pagemap.h>
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +010049#include <linux/file.h>
Matthias Kaehlcke7a936ce2008-05-12 10:04:51 -050050#include <linux/mutex.h>
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +010051#include <linux/sctp.h>
Tejun Heo5a0e3ad2010-03-24 17:04:11 +090052#include <linux/slab.h>
Benjamin Poirier2f2d76c2012-03-08 05:55:59 +000053#include <net/sctp/sctp.h>
Joe Perches44ad5322009-01-22 13:24:49 -080054#include <net/ipv6.h>
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +010055
56#include "dlm_internal.h"
57#include "lowcomms.h"
58#include "midcomms.h"
59#include "config.h"
60
61#define NEEDED_RMEM (4*1024*1024)
62
Bob Petersonf92c8dd2010-11-12 11:15:20 -060063/* Number of messages to send before rescheduling */
64#define MAX_SEND_MSG_COUNT 25
Alexander Aring055923b2020-07-27 09:13:38 -040065#define DLM_SHUTDOWN_WAIT_TIMEOUT msecs_to_jiffies(10000)
Bob Petersonf92c8dd2010-11-12 11:15:20 -060066
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +010067struct connection {
68 struct socket *sock; /* NULL if not connected */
69 uint32_t nodeid; /* So we know who we are in the list */
70 struct mutex sock_mutex;
71 unsigned long flags;
72#define CF_READ_PENDING 1
tsutomu.owa@toshiba.co.jp8a4abb02017-09-12 09:01:16 +000073#define CF_WRITE_PENDING 2
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +010074#define CF_INIT_PENDING 4
75#define CF_IS_OTHERCON 5
Lars Marowsky-Bree063c4c92009-08-11 16:18:23 -050076#define CF_CLOSE 6
David Millerb36930d2010-11-10 21:56:39 -080077#define CF_APP_LIMITED 7
tsutomu.owa@toshiba.co.jpb2a66622017-09-12 08:55:50 +000078#define CF_CLOSING 8
Alexander Aring055923b2020-07-27 09:13:38 -040079#define CF_SHUTDOWN 9
Alexander Aring19633c72020-11-02 20:04:20 -050080#define CF_CONNECTED 10
Alexander Aringba868d92021-05-21 15:08:37 -040081#define CF_RECONNECT 11
82#define CF_DELAY_CONNECT 12
Alexander Aring8aa31cb2021-05-21 15:08:39 -040083#define CF_EOF 13
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +010084 struct list_head writequeue; /* List of outgoing writequeue_entries */
85 spinlock_t writequeue_lock;
Alexander Aring8aa31cb2021-05-21 15:08:39 -040086 atomic_t writequeue_cnt;
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +010087 int retries;
88#define MAX_CONNECT_RETRIES 3
Christine Caulfield5e9ccc32009-01-28 12:57:40 -060089 struct hlist_node list;
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +010090 struct connection *othercon;
Alexander Aringba868d92021-05-21 15:08:37 -040091 struct connection *sendcon;
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +010092 struct work_struct rwork; /* Receive workqueue */
93 struct work_struct swork; /* Send workqueue */
Alexander Aring055923b2020-07-27 09:13:38 -040094 wait_queue_head_t shutdown_wait; /* wait for graceful shutdown */
Alexander Aring4798cbb2020-09-24 10:31:26 -040095 unsigned char *rx_buf;
96 int rx_buflen;
97 int rx_leftover;
Alexander Aringa47666eb2020-08-27 15:02:49 -040098 struct rcu_head rcu;
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +010099};
100#define sock2con(x) ((struct connection *)(x)->sk_user_data)
101
Alexander Aringd11ccd42020-11-02 20:04:25 -0500102struct listen_connection {
103 struct socket *sock;
104 struct work_struct rwork;
105};
106
Alexander Aringf0747ebf2021-03-01 17:05:16 -0500107#define DLM_WQ_REMAIN_BYTES(e) (PAGE_SIZE - e->end)
108#define DLM_WQ_LENGTH_BYTES(e) (e->end - e->offset)
109
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +0100110/* An entry waiting to be sent */
111struct writequeue_entry {
112 struct list_head list;
113 struct page *page;
114 int offset;
115 int len;
116 int end;
117 int users;
Alexander Aring706474f2021-05-21 15:08:48 -0400118 bool dirty;
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +0100119 struct connection *con;
Alexander Aring8f2dc782021-05-21 15:08:42 -0400120 struct list_head msgs;
121 struct kref ref;
122};
123
124struct dlm_msg {
125 struct writequeue_entry *entry;
Alexander Aring2874d1a2021-05-21 15:08:43 -0400126 struct dlm_msg *orig_msg;
127 bool retransmit;
Alexander Aring8f2dc782021-05-21 15:08:42 -0400128 void *ppc;
129 int len;
130 int idx; /* new()/commit() idx exchange */
131
132 struct list_head list;
133 struct kref ref;
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +0100134};
135
David Teigland36b71a82012-07-26 12:44:30 -0500136struct dlm_node_addr {
137 struct list_head list;
138 int nodeid;
Alexander Aringe125fbe2021-03-01 17:05:09 -0500139 int mark;
David Teigland36b71a82012-07-26 12:44:30 -0500140 int addr_count;
Mike Christie98e1b602013-06-14 04:56:12 -0500141 int curr_addr_index;
David Teigland36b71a82012-07-26 12:44:30 -0500142 struct sockaddr_storage *addr[DLM_MAX_ADDR_COUNT];
143};
144
Alexander Aringa66c0082021-07-16 16:22:40 -0400145struct dlm_proto_ops {
Alexander Aring8728a452021-07-16 16:22:43 -0400146 bool try_new_addr;
Alexander Aring2dc6b112021-07-16 16:22:41 -0400147 const char *name;
148 int proto;
149
Alexander Aring8728a452021-07-16 16:22:43 -0400150 int (*connect)(struct connection *con, struct socket *sock,
151 struct sockaddr *addr, int addr_len);
152 void (*sockopts)(struct socket *sock);
153 int (*bind)(struct socket *sock);
Alexander Aring2dc6b112021-07-16 16:22:41 -0400154 int (*listen_validate)(void);
155 void (*listen_sockopts)(struct socket *sock);
156 int (*listen_bind)(struct socket *sock);
Alexander Aringa66c0082021-07-16 16:22:40 -0400157 /* What to do to shutdown */
158 void (*shutdown_action)(struct connection *con);
159 /* What to do to eof check */
160 bool (*eof_condition)(struct connection *con);
161};
162
Bob Petersoncc661fc2017-09-12 08:55:23 +0000163static struct listen_sock_callbacks {
164 void (*sk_error_report)(struct sock *);
165 void (*sk_data_ready)(struct sock *);
166 void (*sk_state_change)(struct sock *);
167 void (*sk_write_space)(struct sock *);
168} listen_sock;
169
David Teigland36b71a82012-07-26 12:44:30 -0500170static LIST_HEAD(dlm_node_addrs);
171static DEFINE_SPINLOCK(dlm_node_addrs_spin);
172
Alexander Aringd11ccd42020-11-02 20:04:25 -0500173static struct listen_connection listen_con;
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +0100174static struct sockaddr_storage *dlm_local_addr[DLM_MAX_ADDR_COUNT];
175static int dlm_local_count;
Alexander Aring51746162021-03-01 17:05:13 -0500176int dlm_allow_conn;
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +0100177
178/* Work queues */
179static struct workqueue_struct *recv_workqueue;
180static struct workqueue_struct *send_workqueue;
181
Christine Caulfield5e9ccc32009-01-28 12:57:40 -0600182static struct hlist_head connection_hash[CONN_HASH_SIZE];
Alexander Aringa47666eb2020-08-27 15:02:49 -0400183static DEFINE_SPINLOCK(connections_lock);
184DEFINE_STATIC_SRCU(connections_srcu);
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +0100185
Alexander Aringa66c0082021-07-16 16:22:40 -0400186static const struct dlm_proto_ops *dlm_proto_ops;
187
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +0100188static void process_recv_sockets(struct work_struct *work);
189static void process_send_sockets(struct work_struct *work);
190
Alexander Aring66d59552021-07-16 16:22:39 -0400191/* need to held writequeue_lock */
192static struct writequeue_entry *con_next_wq(struct connection *con)
193{
194 struct writequeue_entry *e;
195
196 if (list_empty(&con->writequeue))
197 return NULL;
198
199 e = list_first_entry(&con->writequeue, struct writequeue_entry,
200 list);
201 if (e->len == 0)
202 return NULL;
203
204 return e;
205}
206
Alexander Aringb38bc9c2021-05-21 15:08:35 -0400207static struct connection *__find_con(int nodeid, int r)
Christine Caulfield5e9ccc32009-01-28 12:57:40 -0600208{
Christine Caulfield5e9ccc32009-01-28 12:57:40 -0600209 struct connection *con;
210
Alexander Aringa47666eb2020-08-27 15:02:49 -0400211 hlist_for_each_entry_rcu(con, &connection_hash[r], list) {
Alexander Aringb38bc9c2021-05-21 15:08:35 -0400212 if (con->nodeid == nodeid)
Christine Caulfield5e9ccc32009-01-28 12:57:40 -0600213 return con;
214 }
Alexander Aringa47666eb2020-08-27 15:02:49 -0400215
Christine Caulfield5e9ccc32009-01-28 12:57:40 -0600216 return NULL;
217}
218
Alexander Aring8aa31cb2021-05-21 15:08:39 -0400219static bool tcp_eof_condition(struct connection *con)
220{
221 return atomic_read(&con->writequeue_cnt);
222}
223
Alexander Aring6cde2102020-11-02 20:04:21 -0500224static int dlm_con_init(struct connection *con, int nodeid)
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +0100225{
Alexander Aring4798cbb2020-09-24 10:31:26 -0400226 con->rx_buflen = dlm_config.ci_buffer_size;
227 con->rx_buf = kmalloc(con->rx_buflen, GFP_NOFS);
Alexander Aring6cde2102020-11-02 20:04:21 -0500228 if (!con->rx_buf)
229 return -ENOMEM;
Alexander Aring4798cbb2020-09-24 10:31:26 -0400230
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +0100231 con->nodeid = nodeid;
232 mutex_init(&con->sock_mutex);
233 INIT_LIST_HEAD(&con->writequeue);
234 spin_lock_init(&con->writequeue_lock);
Alexander Aring8aa31cb2021-05-21 15:08:39 -0400235 atomic_set(&con->writequeue_cnt, 0);
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +0100236 INIT_WORK(&con->swork, process_send_sockets);
237 INIT_WORK(&con->rwork, process_recv_sockets);
Alexander Aring055923b2020-07-27 09:13:38 -0400238 init_waitqueue_head(&con->shutdown_wait);
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +0100239
Alexander Aring6cde2102020-11-02 20:04:21 -0500240 return 0;
241}
242
243/*
244 * If 'allocation' is zero then we don't attempt to create a new
245 * connection structure for this node.
246 */
247static struct connection *nodeid2con(int nodeid, gfp_t alloc)
248{
249 struct connection *con, *tmp;
250 int r, ret;
251
Alexander Aringb38bc9c2021-05-21 15:08:35 -0400252 r = nodeid_hash(nodeid);
253 con = __find_con(nodeid, r);
Alexander Aring6cde2102020-11-02 20:04:21 -0500254 if (con || !alloc)
255 return con;
256
257 con = kzalloc(sizeof(*con), alloc);
258 if (!con)
259 return NULL;
260
261 ret = dlm_con_init(con, nodeid);
262 if (ret) {
263 kfree(con);
264 return NULL;
265 }
266
Alexander Aringa47666eb2020-08-27 15:02:49 -0400267 spin_lock(&connections_lock);
Alexander Aring4f2b30f2020-09-30 18:37:29 -0400268 /* Because multiple workqueues/threads calls this function it can
269 * race on multiple cpu's. Instead of locking hot path __find_con()
270 * we just check in rare cases of recently added nodes again
271 * under protection of connections_lock. If this is the case we
272 * abort our connection creation and return the existing connection.
273 */
Alexander Aringb38bc9c2021-05-21 15:08:35 -0400274 tmp = __find_con(nodeid, r);
Alexander Aring4f2b30f2020-09-30 18:37:29 -0400275 if (tmp) {
276 spin_unlock(&connections_lock);
277 kfree(con->rx_buf);
278 kfree(con);
279 return tmp;
280 }
281
Alexander Aringa47666eb2020-08-27 15:02:49 -0400282 hlist_add_head_rcu(&con->list, &connection_hash[r]);
283 spin_unlock(&connections_lock);
284
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +0100285 return con;
286}
287
Christine Caulfield5e9ccc32009-01-28 12:57:40 -0600288/* Loop round all connections */
289static void foreach_conn(void (*conn_func)(struct connection *c))
290{
Alexander Aringb38bc9c2021-05-21 15:08:35 -0400291 int i;
Christine Caulfield5e9ccc32009-01-28 12:57:40 -0600292 struct connection *con;
293
294 for (i = 0; i < CONN_HASH_SIZE; i++) {
Alexander Aringa47666eb2020-08-27 15:02:49 -0400295 hlist_for_each_entry_rcu(con, &connection_hash[i], list)
Christine Caulfield5e9ccc32009-01-28 12:57:40 -0600296 conn_func(con);
Christine Caulfield5e9ccc32009-01-28 12:57:40 -0600297 }
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +0100298}
299
David Teigland36b71a82012-07-26 12:44:30 -0500300static struct dlm_node_addr *find_node_addr(int nodeid)
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +0100301{
David Teigland36b71a82012-07-26 12:44:30 -0500302 struct dlm_node_addr *na;
303
304 list_for_each_entry(na, &dlm_node_addrs, list) {
305 if (na->nodeid == nodeid)
306 return na;
307 }
308 return NULL;
309}
310
Alexander Aring40c6b832020-11-02 20:04:27 -0500311static int addr_compare(const struct sockaddr_storage *x,
312 const struct sockaddr_storage *y)
David Teigland36b71a82012-07-26 12:44:30 -0500313{
314 switch (x->ss_family) {
315 case AF_INET: {
316 struct sockaddr_in *sinx = (struct sockaddr_in *)x;
317 struct sockaddr_in *siny = (struct sockaddr_in *)y;
318 if (sinx->sin_addr.s_addr != siny->sin_addr.s_addr)
319 return 0;
320 if (sinx->sin_port != siny->sin_port)
321 return 0;
322 break;
323 }
324 case AF_INET6: {
325 struct sockaddr_in6 *sinx = (struct sockaddr_in6 *)x;
326 struct sockaddr_in6 *siny = (struct sockaddr_in6 *)y;
327 if (!ipv6_addr_equal(&sinx->sin6_addr, &siny->sin6_addr))
328 return 0;
329 if (sinx->sin6_port != siny->sin6_port)
330 return 0;
331 break;
332 }
333 default:
334 return 0;
335 }
336 return 1;
337}
338
339static int nodeid_to_addr(int nodeid, struct sockaddr_storage *sas_out,
Alexander Aringe125fbe2021-03-01 17:05:09 -0500340 struct sockaddr *sa_out, bool try_new_addr,
341 unsigned int *mark)
David Teigland36b71a82012-07-26 12:44:30 -0500342{
343 struct sockaddr_storage sas;
344 struct dlm_node_addr *na;
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +0100345
346 if (!dlm_local_count)
347 return -1;
348
David Teigland36b71a82012-07-26 12:44:30 -0500349 spin_lock(&dlm_node_addrs_spin);
350 na = find_node_addr(nodeid);
Mike Christie98e1b602013-06-14 04:56:12 -0500351 if (na && na->addr_count) {
Marcelo Ricardo Leitneree44b4b2015-08-11 19:22:23 -0300352 memcpy(&sas, na->addr[na->curr_addr_index],
353 sizeof(struct sockaddr_storage));
354
Mike Christie98e1b602013-06-14 04:56:12 -0500355 if (try_new_addr) {
356 na->curr_addr_index++;
357 if (na->curr_addr_index == na->addr_count)
358 na->curr_addr_index = 0;
359 }
Mike Christie98e1b602013-06-14 04:56:12 -0500360 }
David Teigland36b71a82012-07-26 12:44:30 -0500361 spin_unlock(&dlm_node_addrs_spin);
362
363 if (!na)
364 return -EEXIST;
365
366 if (!na->addr_count)
367 return -ENOENT;
368
Alexander Aringe125fbe2021-03-01 17:05:09 -0500369 *mark = na->mark;
370
David Teigland36b71a82012-07-26 12:44:30 -0500371 if (sas_out)
372 memcpy(sas_out, &sas, sizeof(struct sockaddr_storage));
373
374 if (!sa_out)
375 return 0;
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +0100376
377 if (dlm_local_addr[0]->ss_family == AF_INET) {
David Teigland36b71a82012-07-26 12:44:30 -0500378 struct sockaddr_in *in4 = (struct sockaddr_in *) &sas;
379 struct sockaddr_in *ret4 = (struct sockaddr_in *) sa_out;
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +0100380 ret4->sin_addr.s_addr = in4->sin_addr.s_addr;
381 } else {
David Teigland36b71a82012-07-26 12:44:30 -0500382 struct sockaddr_in6 *in6 = (struct sockaddr_in6 *) &sas;
383 struct sockaddr_in6 *ret6 = (struct sockaddr_in6 *) sa_out;
Alexey Dobriyan4e3fd7a2011-11-21 03:39:03 +0000384 ret6->sin6_addr = in6->sin6_addr;
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +0100385 }
386
387 return 0;
388}
389
Alexander Aringe125fbe2021-03-01 17:05:09 -0500390static int addr_to_nodeid(struct sockaddr_storage *addr, int *nodeid,
391 unsigned int *mark)
David Teigland36b71a82012-07-26 12:44:30 -0500392{
393 struct dlm_node_addr *na;
394 int rv = -EEXIST;
Mike Christie98e1b602013-06-14 04:56:12 -0500395 int addr_i;
David Teigland36b71a82012-07-26 12:44:30 -0500396
397 spin_lock(&dlm_node_addrs_spin);
398 list_for_each_entry(na, &dlm_node_addrs, list) {
399 if (!na->addr_count)
400 continue;
401
Mike Christie98e1b602013-06-14 04:56:12 -0500402 for (addr_i = 0; addr_i < na->addr_count; addr_i++) {
403 if (addr_compare(na->addr[addr_i], addr)) {
404 *nodeid = na->nodeid;
Alexander Aringe125fbe2021-03-01 17:05:09 -0500405 *mark = na->mark;
Mike Christie98e1b602013-06-14 04:56:12 -0500406 rv = 0;
407 goto unlock;
408 }
409 }
David Teigland36b71a82012-07-26 12:44:30 -0500410 }
Mike Christie98e1b602013-06-14 04:56:12 -0500411unlock:
David Teigland36b71a82012-07-26 12:44:30 -0500412 spin_unlock(&dlm_node_addrs_spin);
413 return rv;
414}
415
Alexander Aring4f19d072020-11-02 20:04:28 -0500416/* caller need to held dlm_node_addrs_spin lock */
417static bool dlm_lowcomms_na_has_addr(const struct dlm_node_addr *na,
418 const struct sockaddr_storage *addr)
419{
420 int i;
421
422 for (i = 0; i < na->addr_count; i++) {
423 if (addr_compare(na->addr[i], addr))
424 return true;
425 }
426
427 return false;
428}
429
David Teigland36b71a82012-07-26 12:44:30 -0500430int dlm_lowcomms_addr(int nodeid, struct sockaddr_storage *addr, int len)
431{
432 struct sockaddr_storage *new_addr;
433 struct dlm_node_addr *new_node, *na;
Alexander Aring4f19d072020-11-02 20:04:28 -0500434 bool ret;
David Teigland36b71a82012-07-26 12:44:30 -0500435
436 new_node = kzalloc(sizeof(struct dlm_node_addr), GFP_NOFS);
437 if (!new_node)
438 return -ENOMEM;
439
440 new_addr = kzalloc(sizeof(struct sockaddr_storage), GFP_NOFS);
441 if (!new_addr) {
442 kfree(new_node);
443 return -ENOMEM;
444 }
445
446 memcpy(new_addr, addr, len);
447
448 spin_lock(&dlm_node_addrs_spin);
449 na = find_node_addr(nodeid);
450 if (!na) {
451 new_node->nodeid = nodeid;
452 new_node->addr[0] = new_addr;
453 new_node->addr_count = 1;
Alexander Aringe125fbe2021-03-01 17:05:09 -0500454 new_node->mark = dlm_config.ci_mark;
David Teigland36b71a82012-07-26 12:44:30 -0500455 list_add(&new_node->list, &dlm_node_addrs);
456 spin_unlock(&dlm_node_addrs_spin);
457 return 0;
458 }
459
Alexander Aring4f19d072020-11-02 20:04:28 -0500460 ret = dlm_lowcomms_na_has_addr(na, addr);
461 if (ret) {
462 spin_unlock(&dlm_node_addrs_spin);
463 kfree(new_addr);
464 kfree(new_node);
465 return -EEXIST;
466 }
467
David Teigland36b71a82012-07-26 12:44:30 -0500468 if (na->addr_count >= DLM_MAX_ADDR_COUNT) {
469 spin_unlock(&dlm_node_addrs_spin);
470 kfree(new_addr);
471 kfree(new_node);
472 return -ENOSPC;
473 }
474
475 na->addr[na->addr_count++] = new_addr;
476 spin_unlock(&dlm_node_addrs_spin);
477 kfree(new_node);
478 return 0;
479}
480
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +0100481/* Data available on socket or listen socket received a connect */
David S. Miller676d2362014-04-11 16:15:36 -0400482static void lowcomms_data_ready(struct sock *sk)
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +0100483{
tsutomu.owa@toshiba.co.jp93eaade2017-09-12 09:01:55 +0000484 struct connection *con;
485
486 read_lock_bh(&sk->sk_callback_lock);
487 con = sock2con(sk);
Patrick Caulfieldafb853f2007-06-01 10:07:26 -0500488 if (con && !test_and_set_bit(CF_READ_PENDING, &con->flags))
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +0100489 queue_work(recv_workqueue, &con->rwork);
tsutomu.owa@toshiba.co.jp93eaade2017-09-12 09:01:55 +0000490 read_unlock_bh(&sk->sk_callback_lock);
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +0100491}
492
Alexander Aringd11ccd42020-11-02 20:04:25 -0500493static void lowcomms_listen_data_ready(struct sock *sk)
494{
Alexander Aring9a4139a2021-06-02 09:45:18 -0400495 if (!dlm_allow_conn)
496 return;
497
Alexander Aringd11ccd42020-11-02 20:04:25 -0500498 queue_work(recv_workqueue, &listen_con.rwork);
499}
500
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +0100501static void lowcomms_write_space(struct sock *sk)
502{
tsutomu.owa@toshiba.co.jp93eaade2017-09-12 09:01:55 +0000503 struct connection *con;
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +0100504
tsutomu.owa@toshiba.co.jp93eaade2017-09-12 09:01:55 +0000505 read_lock_bh(&sk->sk_callback_lock);
506 con = sock2con(sk);
David Millerb36930d2010-11-10 21:56:39 -0800507 if (!con)
tsutomu.owa@toshiba.co.jp93eaade2017-09-12 09:01:55 +0000508 goto out;
David Millerb36930d2010-11-10 21:56:39 -0800509
Alexander Aring19633c72020-11-02 20:04:20 -0500510 if (!test_and_set_bit(CF_CONNECTED, &con->flags)) {
511 log_print("successful connected to node %d", con->nodeid);
512 queue_work(send_workqueue, &con->swork);
513 goto out;
514 }
515
David Millerb36930d2010-11-10 21:56:39 -0800516 clear_bit(SOCK_NOSPACE, &con->sock->flags);
517
518 if (test_and_clear_bit(CF_APP_LIMITED, &con->flags)) {
519 con->sock->sk->sk_write_pending--;
Eric Dumazet9cd3e072015-11-29 20:03:10 -0800520 clear_bit(SOCKWQ_ASYNC_NOSPACE, &con->sock->flags);
David Millerb36930d2010-11-10 21:56:39 -0800521 }
522
Bob Peterson01da24d2017-09-12 08:55:14 +0000523 queue_work(send_workqueue, &con->swork);
tsutomu.owa@toshiba.co.jp93eaade2017-09-12 09:01:55 +0000524out:
525 read_unlock_bh(&sk->sk_callback_lock);
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +0100526}
527
528static inline void lowcomms_connect_sock(struct connection *con)
529{
Lars Marowsky-Bree063c4c92009-08-11 16:18:23 -0500530 if (test_bit(CF_CLOSE, &con->flags))
531 return;
Bob Peterson61d9102b2017-09-12 08:55:04 +0000532 queue_work(send_workqueue, &con->swork);
533 cond_resched();
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +0100534}
535
536static void lowcomms_state_change(struct sock *sk)
537{
Marcelo Ricardo Leitneree44b4b2015-08-11 19:22:23 -0300538 /* SCTP layer is not calling sk_data_ready when the connection
539 * is done, so we catch the signal through here. Also, it
540 * doesn't switch socket state when entering shutdown, so we
541 * skip the write in that case.
542 */
543 if (sk->sk_shutdown) {
544 if (sk->sk_shutdown == RCV_SHUTDOWN)
545 lowcomms_data_ready(sk);
546 } else if (sk->sk_state == TCP_ESTABLISHED) {
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +0100547 lowcomms_write_space(sk);
Marcelo Ricardo Leitneree44b4b2015-08-11 19:22:23 -0300548 }
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +0100549}
550
Christine Caulfield391fbdc2009-05-07 10:54:16 -0500551int dlm_lowcomms_connect_node(int nodeid)
552{
553 struct connection *con;
Alexander Aringb38bc9c2021-05-21 15:08:35 -0400554 int idx;
Christine Caulfield391fbdc2009-05-07 10:54:16 -0500555
556 if (nodeid == dlm_our_nodeid())
557 return 0;
558
Alexander Aringb38bc9c2021-05-21 15:08:35 -0400559 idx = srcu_read_lock(&connections_srcu);
Christine Caulfield391fbdc2009-05-07 10:54:16 -0500560 con = nodeid2con(nodeid, GFP_NOFS);
Alexander Aringb38bc9c2021-05-21 15:08:35 -0400561 if (!con) {
562 srcu_read_unlock(&connections_srcu, idx);
Christine Caulfield391fbdc2009-05-07 10:54:16 -0500563 return -ENOMEM;
Alexander Aringb38bc9c2021-05-21 15:08:35 -0400564 }
565
Christine Caulfield391fbdc2009-05-07 10:54:16 -0500566 lowcomms_connect_sock(con);
Alexander Aringb38bc9c2021-05-21 15:08:35 -0400567 srcu_read_unlock(&connections_srcu, idx);
568
Christine Caulfield391fbdc2009-05-07 10:54:16 -0500569 return 0;
570}
571
Alexander Aringe125fbe2021-03-01 17:05:09 -0500572int dlm_lowcomms_nodes_set_mark(int nodeid, unsigned int mark)
573{
574 struct dlm_node_addr *na;
575
576 spin_lock(&dlm_node_addrs_spin);
577 na = find_node_addr(nodeid);
578 if (!na) {
579 spin_unlock(&dlm_node_addrs_spin);
580 return -ENOENT;
581 }
582
583 na->mark = mark;
584 spin_unlock(&dlm_node_addrs_spin);
585
586 return 0;
587}
588
Bob Petersonb3a5bbf2015-08-27 09:34:47 -0500589static void lowcomms_error_report(struct sock *sk)
590{
Bob Petersonb81171c2016-02-05 14:39:02 -0500591 struct connection *con;
Bob Petersonb3a5bbf2015-08-27 09:34:47 -0500592 struct sockaddr_storage saddr;
Bob Petersonb81171c2016-02-05 14:39:02 -0500593 void (*orig_report)(struct sock *) = NULL;
Bob Petersonb3a5bbf2015-08-27 09:34:47 -0500594
Bob Petersonb81171c2016-02-05 14:39:02 -0500595 read_lock_bh(&sk->sk_callback_lock);
596 con = sock2con(sk);
597 if (con == NULL)
598 goto out;
599
Bob Petersoncc661fc2017-09-12 08:55:23 +0000600 orig_report = listen_sock.sk_error_report;
Alexander Aringfeb704b2021-07-16 16:22:34 -0400601 if (kernel_getpeername(sk->sk_socket, (struct sockaddr *)&saddr) < 0) {
Bob Petersonb3a5bbf2015-08-27 09:34:47 -0500602 printk_ratelimited(KERN_ERR "dlm: node %d: socket error "
603 "sending to node %d, port %d, "
604 "sk_err=%d/%d\n", dlm_our_nodeid(),
605 con->nodeid, dlm_config.ci_tcp_port,
606 sk->sk_err, sk->sk_err_soft);
Bob Petersonb3a5bbf2015-08-27 09:34:47 -0500607 } else if (saddr.ss_family == AF_INET) {
608 struct sockaddr_in *sin4 = (struct sockaddr_in *)&saddr;
609
610 printk_ratelimited(KERN_ERR "dlm: node %d: socket error "
611 "sending to node %d at %pI4, port %d, "
612 "sk_err=%d/%d\n", dlm_our_nodeid(),
613 con->nodeid, &sin4->sin_addr.s_addr,
614 dlm_config.ci_tcp_port, sk->sk_err,
615 sk->sk_err_soft);
616 } else {
617 struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)&saddr;
618
619 printk_ratelimited(KERN_ERR "dlm: node %d: socket error "
620 "sending to node %d at %u.%u.%u.%u, "
621 "port %d, sk_err=%d/%d\n", dlm_our_nodeid(),
622 con->nodeid, sin6->sin6_addr.s6_addr32[0],
623 sin6->sin6_addr.s6_addr32[1],
624 sin6->sin6_addr.s6_addr32[2],
625 sin6->sin6_addr.s6_addr32[3],
626 dlm_config.ci_tcp_port, sk->sk_err,
627 sk->sk_err_soft);
628 }
Alexander Aringba868d92021-05-21 15:08:37 -0400629
630 /* below sendcon only handling */
631 if (test_bit(CF_IS_OTHERCON, &con->flags))
632 con = con->sendcon;
633
634 switch (sk->sk_err) {
635 case ECONNREFUSED:
636 set_bit(CF_DELAY_CONNECT, &con->flags);
637 break;
638 default:
639 break;
640 }
641
642 if (!test_and_set_bit(CF_RECONNECT, &con->flags))
643 queue_work(send_workqueue, &con->swork);
644
Bob Petersonb81171c2016-02-05 14:39:02 -0500645out:
646 read_unlock_bh(&sk->sk_callback_lock);
647 if (orig_report)
648 orig_report(sk);
649}
650
651/* Note: sk_callback_lock must be locked before calling this function. */
Bob Petersoncc661fc2017-09-12 08:55:23 +0000652static void save_listen_callbacks(struct socket *sock)
Bob Petersonb81171c2016-02-05 14:39:02 -0500653{
Bob Petersoncc661fc2017-09-12 08:55:23 +0000654 struct sock *sk = sock->sk;
655
656 listen_sock.sk_data_ready = sk->sk_data_ready;
657 listen_sock.sk_state_change = sk->sk_state_change;
658 listen_sock.sk_write_space = sk->sk_write_space;
659 listen_sock.sk_error_report = sk->sk_error_report;
Bob Petersonb81171c2016-02-05 14:39:02 -0500660}
661
Bob Petersoncc661fc2017-09-12 08:55:23 +0000662static void restore_callbacks(struct socket *sock)
Bob Petersonb81171c2016-02-05 14:39:02 -0500663{
Bob Petersoncc661fc2017-09-12 08:55:23 +0000664 struct sock *sk = sock->sk;
665
Bob Petersonb81171c2016-02-05 14:39:02 -0500666 write_lock_bh(&sk->sk_callback_lock);
Bob Petersonb81171c2016-02-05 14:39:02 -0500667 sk->sk_user_data = NULL;
Bob Petersoncc661fc2017-09-12 08:55:23 +0000668 sk->sk_data_ready = listen_sock.sk_data_ready;
669 sk->sk_state_change = listen_sock.sk_state_change;
670 sk->sk_write_space = listen_sock.sk_write_space;
671 sk->sk_error_report = listen_sock.sk_error_report;
Bob Petersonb81171c2016-02-05 14:39:02 -0500672 write_unlock_bh(&sk->sk_callback_lock);
Bob Petersonb3a5bbf2015-08-27 09:34:47 -0500673}
674
Alexander Aringd11ccd42020-11-02 20:04:25 -0500675static void add_listen_sock(struct socket *sock, struct listen_connection *con)
676{
677 struct sock *sk = sock->sk;
678
679 write_lock_bh(&sk->sk_callback_lock);
680 save_listen_callbacks(sock);
681 con->sock = sock;
682
683 sk->sk_user_data = con;
684 sk->sk_allocation = GFP_NOFS;
685 /* Install a data_ready callback */
686 sk->sk_data_ready = lowcomms_listen_data_ready;
687 write_unlock_bh(&sk->sk_callback_lock);
688}
689
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +0100690/* Make a socket active */
tsutomu.owa@toshiba.co.jp988419a2017-09-12 08:55:32 +0000691static void add_sock(struct socket *sock, struct connection *con)
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +0100692{
Bob Petersonb81171c2016-02-05 14:39:02 -0500693 struct sock *sk = sock->sk;
694
695 write_lock_bh(&sk->sk_callback_lock);
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +0100696 con->sock = sock;
697
Bob Petersonb81171c2016-02-05 14:39:02 -0500698 sk->sk_user_data = con;
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +0100699 /* Install a data_ready callback */
Bob Petersonb81171c2016-02-05 14:39:02 -0500700 sk->sk_data_ready = lowcomms_data_ready;
701 sk->sk_write_space = lowcomms_write_space;
702 sk->sk_state_change = lowcomms_state_change;
703 sk->sk_allocation = GFP_NOFS;
704 sk->sk_error_report = lowcomms_error_report;
705 write_unlock_bh(&sk->sk_callback_lock);
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +0100706}
707
708/* Add the port number to an IPv6 or 4 sockaddr and return the address
709 length */
710static void make_sockaddr(struct sockaddr_storage *saddr, uint16_t port,
711 int *addr_len)
712{
713 saddr->ss_family = dlm_local_addr[0]->ss_family;
714 if (saddr->ss_family == AF_INET) {
715 struct sockaddr_in *in4_addr = (struct sockaddr_in *)saddr;
716 in4_addr->sin_port = cpu_to_be16(port);
717 *addr_len = sizeof(struct sockaddr_in);
718 memset(&in4_addr->sin_zero, 0, sizeof(in4_addr->sin_zero));
719 } else {
720 struct sockaddr_in6 *in6_addr = (struct sockaddr_in6 *)saddr;
721 in6_addr->sin6_port = cpu_to_be16(port);
722 *addr_len = sizeof(struct sockaddr_in6);
723 }
Patrick Caulfield01c8cab2007-07-17 16:53:15 +0100724 memset((char *)saddr + *addr_len, 0, sizeof(struct sockaddr_storage) - *addr_len);
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +0100725}
726
Alexander Aring706474f2021-05-21 15:08:48 -0400727static void dlm_page_release(struct kref *kref)
728{
729 struct writequeue_entry *e = container_of(kref, struct writequeue_entry,
730 ref);
731
732 __free_page(e->page);
733 kfree(e);
734}
735
736static void dlm_msg_release(struct kref *kref)
737{
738 struct dlm_msg *msg = container_of(kref, struct dlm_msg, ref);
739
740 kref_put(&msg->entry->ref, dlm_page_release);
741 kfree(msg);
742}
743
744static void free_entry(struct writequeue_entry *e)
745{
746 struct dlm_msg *msg, *tmp;
747
748 list_for_each_entry_safe(msg, tmp, &e->msgs, list) {
749 if (msg->orig_msg) {
750 msg->orig_msg->retransmit = false;
751 kref_put(&msg->orig_msg->ref, dlm_msg_release);
752 }
753
754 list_del(&msg->list);
755 kref_put(&msg->ref, dlm_msg_release);
756 }
757
758 list_del(&e->list);
759 atomic_dec(&e->con->writequeue_cnt);
760 kref_put(&e->ref, dlm_page_release);
761}
762
Alexander Aringd11ccd42020-11-02 20:04:25 -0500763static void dlm_close_sock(struct socket **sock)
764{
765 if (*sock) {
766 restore_callbacks(*sock);
767 sock_release(*sock);
768 *sock = NULL;
769 }
770}
771
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +0100772/* Close a remote connection and tidy up */
Marcelo Ricardo Leitner0d737a82015-08-11 19:22:21 -0300773static void close_connection(struct connection *con, bool and_other,
774 bool tx, bool rx)
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +0100775{
tsutomu.owa@toshiba.co.jpb2a66622017-09-12 08:55:50 +0000776 bool closing = test_and_set_bit(CF_CLOSING, &con->flags);
Alexander Aring706474f2021-05-21 15:08:48 -0400777 struct writequeue_entry *e;
tsutomu.owa@toshiba.co.jpb2a66622017-09-12 08:55:50 +0000778
tsutomu.owa@toshiba.co.jp0aa18462017-09-12 09:02:02 +0000779 if (tx && !closing && cancel_work_sync(&con->swork)) {
Marcelo Ricardo Leitner0d737a82015-08-11 19:22:21 -0300780 log_print("canceled swork for node %d", con->nodeid);
tsutomu.owa@toshiba.co.jp0aa18462017-09-12 09:02:02 +0000781 clear_bit(CF_WRITE_PENDING, &con->flags);
782 }
783 if (rx && !closing && cancel_work_sync(&con->rwork)) {
Marcelo Ricardo Leitner0d737a82015-08-11 19:22:21 -0300784 log_print("canceled rwork for node %d", con->nodeid);
tsutomu.owa@toshiba.co.jp0aa18462017-09-12 09:02:02 +0000785 clear_bit(CF_READ_PENDING, &con->flags);
786 }
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +0100787
Marcelo Ricardo Leitner0d737a82015-08-11 19:22:21 -0300788 mutex_lock(&con->sock_mutex);
Alexander Aringd11ccd42020-11-02 20:04:25 -0500789 dlm_close_sock(&con->sock);
790
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +0100791 if (con->othercon && and_other) {
792 /* Will only re-enter once. */
Alexander Aringc6aa00e32021-05-21 15:08:38 -0400793 close_connection(con->othercon, false, tx, rx);
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +0100794 }
Patrick Caulfield9e5f2822007-08-02 14:58:14 +0100795
Alexander Aring706474f2021-05-21 15:08:48 -0400796 /* if we send a writequeue entry only a half way, we drop the
797 * whole entry because reconnection and that we not start of the
798 * middle of a msg which will confuse the other end.
799 *
800 * we can always drop messages because retransmits, but what we
801 * cannot allow is to transmit half messages which may be processed
802 * at the other side.
803 *
804 * our policy is to start on a clean state when disconnects, we don't
805 * know what's send/received on transport layer in this case.
806 */
807 spin_lock(&con->writequeue_lock);
808 if (!list_empty(&con->writequeue)) {
809 e = list_first_entry(&con->writequeue, struct writequeue_entry,
810 list);
811 if (e->dirty)
812 free_entry(e);
813 }
814 spin_unlock(&con->writequeue_lock);
815
Alexander Aring4798cbb2020-09-24 10:31:26 -0400816 con->rx_leftover = 0;
Patrick Caulfield61d96be02007-08-20 15:13:38 +0100817 con->retries = 0;
Alexander Aring052849b2021-07-16 16:22:37 -0400818 clear_bit(CF_APP_LIMITED, &con->flags);
Alexander Aring19633c72020-11-02 20:04:20 -0500819 clear_bit(CF_CONNECTED, &con->flags);
Alexander Aringba868d92021-05-21 15:08:37 -0400820 clear_bit(CF_DELAY_CONNECT, &con->flags);
821 clear_bit(CF_RECONNECT, &con->flags);
Alexander Aring8aa31cb2021-05-21 15:08:39 -0400822 clear_bit(CF_EOF, &con->flags);
Patrick Caulfield61d96be02007-08-20 15:13:38 +0100823 mutex_unlock(&con->sock_mutex);
tsutomu.owa@toshiba.co.jpb2a66622017-09-12 08:55:50 +0000824 clear_bit(CF_CLOSING, &con->flags);
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +0100825}
826
Alexander Aring055923b2020-07-27 09:13:38 -0400827static void shutdown_connection(struct connection *con)
828{
829 int ret;
830
Alexander Aringeec054b2021-03-01 17:05:19 -0500831 flush_work(&con->swork);
Alexander Aring055923b2020-07-27 09:13:38 -0400832
833 mutex_lock(&con->sock_mutex);
834 /* nothing to shutdown */
835 if (!con->sock) {
836 mutex_unlock(&con->sock_mutex);
837 return;
838 }
839
840 set_bit(CF_SHUTDOWN, &con->flags);
841 ret = kernel_sock_shutdown(con->sock, SHUT_WR);
842 mutex_unlock(&con->sock_mutex);
843 if (ret) {
844 log_print("Connection %p failed to shutdown: %d will force close",
845 con, ret);
846 goto force_close;
847 } else {
848 ret = wait_event_timeout(con->shutdown_wait,
849 !test_bit(CF_SHUTDOWN, &con->flags),
850 DLM_SHUTDOWN_WAIT_TIMEOUT);
851 if (ret == 0) {
852 log_print("Connection %p shutdown timed out, will force close",
853 con);
854 goto force_close;
855 }
856 }
857
858 return;
859
860force_close:
861 clear_bit(CF_SHUTDOWN, &con->flags);
862 close_connection(con, false, true, true);
863}
864
865static void dlm_tcp_shutdown(struct connection *con)
866{
867 if (con->othercon)
868 shutdown_connection(con->othercon);
869 shutdown_connection(con);
870}
871
Alexander Aring4798cbb2020-09-24 10:31:26 -0400872static int con_realloc_receive_buf(struct connection *con, int newlen)
873{
874 unsigned char *newbuf;
875
876 newbuf = kmalloc(newlen, GFP_NOFS);
877 if (!newbuf)
878 return -ENOMEM;
879
880 /* copy any leftover from last receive */
881 if (con->rx_leftover)
882 memmove(newbuf, con->rx_buf, con->rx_leftover);
883
884 /* swap to new buffer space */
885 kfree(con->rx_buf);
886 con->rx_buflen = newlen;
887 con->rx_buf = newbuf;
888
889 return 0;
890}
891
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +0100892/* Data received from remote end */
893static int receive_from_sock(struct connection *con)
894{
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +0100895 int call_again_soon = 0;
Alexander Aring4798cbb2020-09-24 10:31:26 -0400896 struct msghdr msg;
897 struct kvec iov;
898 int ret, buflen;
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +0100899
900 mutex_lock(&con->sock_mutex);
901
902 if (con->sock == NULL) {
903 ret = -EAGAIN;
904 goto out_close;
905 }
Alexander Aring4798cbb2020-09-24 10:31:26 -0400906
Alexander Aring4798cbb2020-09-24 10:31:26 -0400907 /* realloc if we get new buffer size to read out */
908 buflen = dlm_config.ci_buffer_size;
909 if (con->rx_buflen != buflen && con->rx_leftover <= buflen) {
910 ret = con_realloc_receive_buf(con, buflen);
911 if (ret < 0)
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +0100912 goto out_resched;
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +0100913 }
914
Alexander Aring4798cbb2020-09-24 10:31:26 -0400915 /* calculate new buffer parameter regarding last receive and
916 * possible leftover bytes
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +0100917 */
Alexander Aring4798cbb2020-09-24 10:31:26 -0400918 iov.iov_base = con->rx_buf + con->rx_leftover;
919 iov.iov_len = con->rx_buflen - con->rx_leftover;
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +0100920
Alexander Aring4798cbb2020-09-24 10:31:26 -0400921 memset(&msg, 0, sizeof(msg));
922 msg.msg_flags = MSG_DONTWAIT | MSG_NOSIGNAL;
923 ret = kernel_recvmsg(con->sock, &msg, &iov, 1, iov.iov_len,
924 msg.msg_flags);
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +0100925 if (ret <= 0)
926 goto out_close;
Alexander Aring4798cbb2020-09-24 10:31:26 -0400927 else if (ret == iov.iov_len)
Marcelo Ricardo Leitneree44b4b2015-08-11 19:22:23 -0300928 call_again_soon = 1;
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +0100929
Alexander Aring4798cbb2020-09-24 10:31:26 -0400930 /* new buflen according readed bytes and leftover from last receive */
931 buflen = ret + con->rx_leftover;
932 ret = dlm_process_incoming_buffer(con->nodeid, con->rx_buf, buflen);
933 if (ret < 0)
934 goto out_close;
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +0100935
Alexander Aring4798cbb2020-09-24 10:31:26 -0400936 /* calculate leftover bytes from process and put it into begin of
937 * the receive buffer, so next receive we have the full message
938 * at the start address of the receive buffer.
939 */
940 con->rx_leftover = buflen - ret;
941 if (con->rx_leftover) {
942 memmove(con->rx_buf, con->rx_buf + ret,
943 con->rx_leftover);
944 call_again_soon = true;
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +0100945 }
946
947 if (call_again_soon)
948 goto out_resched;
Alexander Aring4798cbb2020-09-24 10:31:26 -0400949
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +0100950 mutex_unlock(&con->sock_mutex);
951 return 0;
952
953out_resched:
954 if (!test_and_set_bit(CF_READ_PENDING, &con->flags))
955 queue_work(recv_workqueue, &con->rwork);
956 mutex_unlock(&con->sock_mutex);
957 return -EAGAIN;
958
959out_close:
Alexander Aringba868d92021-05-21 15:08:37 -0400960 if (ret == 0) {
Alexander Aringba868d92021-05-21 15:08:37 -0400961 log_print("connection %p got EOF from %d",
962 con, con->nodeid);
Alexander Aring8aa31cb2021-05-21 15:08:39 -0400963
Alexander Aringa66c0082021-07-16 16:22:40 -0400964 if (dlm_proto_ops->eof_condition &&
965 dlm_proto_ops->eof_condition(con)) {
Alexander Aring8aa31cb2021-05-21 15:08:39 -0400966 set_bit(CF_EOF, &con->flags);
967 mutex_unlock(&con->sock_mutex);
968 } else {
969 mutex_unlock(&con->sock_mutex);
970 close_connection(con, false, true, false);
971
972 /* handling for tcp shutdown */
973 clear_bit(CF_SHUTDOWN, &con->flags);
974 wake_up(&con->shutdown_wait);
975 }
976
Alexander Aringba868d92021-05-21 15:08:37 -0400977 /* signal to breaking receive worker */
978 ret = -1;
Alexander Aring8aa31cb2021-05-21 15:08:39 -0400979 } else {
980 mutex_unlock(&con->sock_mutex);
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +0100981 }
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +0100982 return ret;
983}
984
985/* Listening socket is busy, accept a connection */
Alexander Aringd11ccd42020-11-02 20:04:25 -0500986static int accept_from_sock(struct listen_connection *con)
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +0100987{
988 int result;
989 struct sockaddr_storage peeraddr;
990 struct socket *newsock;
Alexander Aringb38bc9c2021-05-21 15:08:35 -0400991 int len, idx;
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +0100992 int nodeid;
993 struct connection *newcon;
994 struct connection *addcon;
Alexander Aring3f78cd72020-09-24 10:31:23 -0400995 unsigned int mark;
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +0100996
Alexander Aringd11ccd42020-11-02 20:04:25 -0500997 if (!con->sock)
tsutomu.owa@toshiba.co.jp3421fb12017-09-12 09:01:38 +0000998 return -ENOTCONN;
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +0100999
tsutomu.owa@toshiba.co.jp3421fb12017-09-12 09:01:38 +00001000 result = kernel_accept(con->sock, &newsock, O_NONBLOCK);
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001001 if (result < 0)
1002 goto accept_err;
1003
1004 /* Get the connected socket's peer */
1005 memset(&peeraddr, 0, sizeof(peeraddr));
Denys Vlasenko9b2c45d2018-02-12 20:00:20 +01001006 len = newsock->ops->getname(newsock, (struct sockaddr *)&peeraddr, 2);
1007 if (len < 0) {
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001008 result = -ECONNABORTED;
1009 goto accept_err;
1010 }
1011
1012 /* Get the new node's NODEID */
1013 make_sockaddr(&peeraddr, 0, &len);
Alexander Aringe125fbe2021-03-01 17:05:09 -05001014 if (addr_to_nodeid(&peeraddr, &nodeid, &mark)) {
Masatake YAMATObcaadf52011-07-04 12:25:51 +09001015 unsigned char *b=(unsigned char *)&peeraddr;
David Teigland617e82e2007-04-26 13:46:49 -05001016 log_print("connect from non cluster node");
Masatake YAMATObcaadf52011-07-04 12:25:51 +09001017 print_hex_dump_bytes("ss: ", DUMP_PREFIX_NONE,
1018 b, sizeof(struct sockaddr_storage));
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001019 sock_release(newsock);
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001020 return -1;
1021 }
1022
1023 log_print("got connection from %d", nodeid);
1024
1025 /* Check to see if we already have a connection to this node. This
1026 * could happen if the two nodes initiate a connection at roughly
1027 * the same time and the connections cross on the wire.
1028 * In this case we store the incoming one in "othercon"
1029 */
Alexander Aringb38bc9c2021-05-21 15:08:35 -04001030 idx = srcu_read_lock(&connections_srcu);
David Teigland748285c2009-05-15 10:50:57 -05001031 newcon = nodeid2con(nodeid, GFP_NOFS);
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001032 if (!newcon) {
Alexander Aringb38bc9c2021-05-21 15:08:35 -04001033 srcu_read_unlock(&connections_srcu, idx);
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001034 result = -ENOMEM;
1035 goto accept_err;
1036 }
Alexander Aringd11ccd42020-11-02 20:04:25 -05001037
Alexander Aringe125fbe2021-03-01 17:05:09 -05001038 sock_set_mark(newsock->sk, mark);
1039
Alexander Aringd11ccd42020-11-02 20:04:25 -05001040 mutex_lock(&newcon->sock_mutex);
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001041 if (newcon->sock) {
1042 struct connection *othercon = newcon->othercon;
1043
1044 if (!othercon) {
Alexander Aringa47666eb2020-08-27 15:02:49 -04001045 othercon = kzalloc(sizeof(*othercon), GFP_NOFS);
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001046 if (!othercon) {
David Teigland617e82e2007-04-26 13:46:49 -05001047 log_print("failed to allocate incoming socket");
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001048 mutex_unlock(&newcon->sock_mutex);
Alexander Aringb38bc9c2021-05-21 15:08:35 -04001049 srcu_read_unlock(&connections_srcu, idx);
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001050 result = -ENOMEM;
1051 goto accept_err;
1052 }
Alexander Aring4798cbb2020-09-24 10:31:26 -04001053
Alexander Aring6cde2102020-11-02 20:04:21 -05001054 result = dlm_con_init(othercon, nodeid);
1055 if (result < 0) {
Alexander Aring4798cbb2020-09-24 10:31:26 -04001056 kfree(othercon);
Yang Yingliang2fd8db22021-03-27 16:37:04 +08001057 mutex_unlock(&newcon->sock_mutex);
Alexander Aringb38bc9c2021-05-21 15:08:35 -04001058 srcu_read_unlock(&connections_srcu, idx);
Alexander Aring4798cbb2020-09-24 10:31:26 -04001059 goto accept_err;
1060 }
1061
Alexander Aringe9a470a2021-03-01 17:05:11 -05001062 lockdep_set_subclass(&othercon->sock_mutex, 1);
Alexander Aring7443bc92021-05-21 15:08:36 -04001063 set_bit(CF_IS_OTHERCON, &othercon->flags);
Alexander Aring6cde2102020-11-02 20:04:21 -05001064 newcon->othercon = othercon;
Alexander Aringba868d92021-05-21 15:08:37 -04001065 othercon->sendcon = newcon;
Alexander Aringba3ab3c2020-07-27 09:13:37 -04001066 } else {
1067 /* close other sock con if we have something new */
1068 close_connection(othercon, false, true, false);
Patrick Caulfield61d96be02007-08-20 15:13:38 +01001069 }
Alexander Aringba3ab3c2020-07-27 09:13:37 -04001070
Alexander Aringe9a470a2021-03-01 17:05:11 -05001071 mutex_lock(&othercon->sock_mutex);
Alexander Aringba3ab3c2020-07-27 09:13:37 -04001072 add_sock(newsock, othercon);
1073 addcon = othercon;
1074 mutex_unlock(&othercon->sock_mutex);
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001075 }
1076 else {
Bob Peterson3735b4b2016-09-23 14:23:26 -04001077 /* accept copies the sk after we've saved the callbacks, so we
1078 don't want to save them a second time or comm errors will
1079 result in calling sk_error_report recursively. */
tsutomu.owa@toshiba.co.jp988419a2017-09-12 08:55:32 +00001080 add_sock(newsock, newcon);
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001081 addcon = newcon;
1082 }
1083
Alexander Aringb30a6242021-03-01 17:05:10 -05001084 set_bit(CF_CONNECTED, &addcon->flags);
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001085 mutex_unlock(&newcon->sock_mutex);
1086
1087 /*
1088 * Add it to the active queue in case we got data
Lucas De Marchi25985ed2011-03-30 22:57:33 -03001089 * between processing the accept adding the socket
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001090 * to the read_sockets list
1091 */
1092 if (!test_and_set_bit(CF_READ_PENDING, &addcon->flags))
1093 queue_work(recv_workqueue, &addcon->rwork);
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001094
Alexander Aringb38bc9c2021-05-21 15:08:35 -04001095 srcu_read_unlock(&connections_srcu, idx);
1096
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001097 return 0;
1098
1099accept_err:
tsutomu.owa@toshiba.co.jp3421fb12017-09-12 09:01:38 +00001100 if (newsock)
1101 sock_release(newsock);
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001102
1103 if (result != -EAGAIN)
David Teigland617e82e2007-04-26 13:46:49 -05001104 log_print("error accepting connection from node: %d", result);
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001105 return result;
1106}
1107
Mike Christie5d689872013-06-14 04:56:13 -05001108/*
1109 * writequeue_entry_complete - try to delete and free write queue entry
1110 * @e: write queue entry to try to delete
1111 * @completed: bytes completed
1112 *
1113 * writequeue_lock must be held.
1114 */
1115static void writequeue_entry_complete(struct writequeue_entry *e, int completed)
1116{
1117 e->offset += completed;
1118 e->len -= completed;
Alexander Aring706474f2021-05-21 15:08:48 -04001119 /* signal that page was half way transmitted */
1120 e->dirty = true;
Mike Christie5d689872013-06-14 04:56:13 -05001121
Alexander Aring8f2dc782021-05-21 15:08:42 -04001122 if (e->len == 0 && e->users == 0)
Mike Christie5d689872013-06-14 04:56:13 -05001123 free_entry(e);
Mike Christie5d689872013-06-14 04:56:13 -05001124}
1125
Marcelo Ricardo Leitneree44b4b2015-08-11 19:22:23 -03001126/*
1127 * sctp_bind_addrs - bind a SCTP socket to all our addresses
1128 */
Alexander Aring13004e82020-11-02 20:04:24 -05001129static int sctp_bind_addrs(struct socket *sock, uint16_t port)
Marcelo Ricardo Leitneree44b4b2015-08-11 19:22:23 -03001130{
1131 struct sockaddr_storage localaddr;
Christoph Hellwigc0425a42020-05-29 14:09:42 +02001132 struct sockaddr *addr = (struct sockaddr *)&localaddr;
Marcelo Ricardo Leitneree44b4b2015-08-11 19:22:23 -03001133 int i, addr_len, result = 0;
1134
1135 for (i = 0; i < dlm_local_count; i++) {
1136 memcpy(&localaddr, dlm_local_addr[i], sizeof(localaddr));
1137 make_sockaddr(&localaddr, port, &addr_len);
1138
1139 if (!i)
Alexander Aring13004e82020-11-02 20:04:24 -05001140 result = kernel_bind(sock, addr, addr_len);
Marcelo Ricardo Leitneree44b4b2015-08-11 19:22:23 -03001141 else
Alexander Aring13004e82020-11-02 20:04:24 -05001142 result = sock_bind_add(sock->sk, addr, addr_len);
Marcelo Ricardo Leitneree44b4b2015-08-11 19:22:23 -03001143
1144 if (result < 0) {
1145 log_print("Can't bind to %d addr number %d, %d.\n",
1146 port, i + 1, result);
1147 break;
1148 }
1149 }
1150 return result;
1151}
1152
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001153/* Get local addresses */
1154static void init_local(void)
1155{
1156 struct sockaddr_storage sas, *addr;
1157 int i;
1158
Patrick Caulfield30d3a232007-04-23 16:26:21 +01001159 dlm_local_count = 0;
David Teigland1b189b82012-03-21 09:18:34 -05001160 for (i = 0; i < DLM_MAX_ADDR_COUNT; i++) {
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001161 if (dlm_our_addr(&sas, i))
1162 break;
1163
Amitoj Kaur Chawla5c93f562016-06-23 10:22:01 +05301164 addr = kmemdup(&sas, sizeof(*addr), GFP_NOFS);
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001165 if (!addr)
1166 break;
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001167 dlm_local_addr[dlm_local_count++] = addr;
1168 }
1169}
1170
Alexander Aring043697f2020-08-27 15:02:50 -04001171static void deinit_local(void)
1172{
1173 int i;
1174
1175 for (i = 0; i < dlm_local_count; i++)
1176 kfree(dlm_local_addr[i]);
1177}
1178
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001179static struct writequeue_entry *new_writequeue_entry(struct connection *con,
1180 gfp_t allocation)
1181{
1182 struct writequeue_entry *entry;
1183
Alexander Aringf0747ebf2021-03-01 17:05:16 -05001184 entry = kzalloc(sizeof(*entry), allocation);
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001185 if (!entry)
1186 return NULL;
1187
Alexander Aringe1a7cbc2021-03-01 17:05:15 -05001188 entry->page = alloc_page(allocation | __GFP_ZERO);
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001189 if (!entry->page) {
1190 kfree(entry);
1191 return NULL;
1192 }
1193
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001194 entry->con = con;
Alexander Aringf0747ebf2021-03-01 17:05:16 -05001195 entry->users = 1;
Alexander Aring8f2dc782021-05-21 15:08:42 -04001196 kref_init(&entry->ref);
1197 INIT_LIST_HEAD(&entry->msgs);
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001198
1199 return entry;
1200}
1201
Alexander Aringf0747ebf2021-03-01 17:05:16 -05001202static struct writequeue_entry *new_wq_entry(struct connection *con, int len,
Alexander Aring8f2dc782021-05-21 15:08:42 -04001203 gfp_t allocation, char **ppc,
1204 void (*cb)(struct dlm_mhandle *mh),
1205 struct dlm_mhandle *mh)
Alexander Aringf0747ebf2021-03-01 17:05:16 -05001206{
1207 struct writequeue_entry *e;
1208
1209 spin_lock(&con->writequeue_lock);
1210 if (!list_empty(&con->writequeue)) {
1211 e = list_last_entry(&con->writequeue, struct writequeue_entry, list);
1212 if (DLM_WQ_REMAIN_BYTES(e) >= len) {
Alexander Aring8f2dc782021-05-21 15:08:42 -04001213 kref_get(&e->ref);
1214
Alexander Aringf0747ebf2021-03-01 17:05:16 -05001215 *ppc = page_address(e->page) + e->end;
Alexander Aring8f2dc782021-05-21 15:08:42 -04001216 if (cb)
1217 cb(mh);
1218
Alexander Aringf0747ebf2021-03-01 17:05:16 -05001219 e->end += len;
1220 e->users++;
1221 spin_unlock(&con->writequeue_lock);
1222
1223 return e;
1224 }
1225 }
1226 spin_unlock(&con->writequeue_lock);
1227
1228 e = new_writequeue_entry(con, allocation);
1229 if (!e)
1230 return NULL;
1231
Alexander Aring8f2dc782021-05-21 15:08:42 -04001232 kref_get(&e->ref);
Alexander Aringf0747ebf2021-03-01 17:05:16 -05001233 *ppc = page_address(e->page);
1234 e->end += len;
Alexander Aring8aa31cb2021-05-21 15:08:39 -04001235 atomic_inc(&con->writequeue_cnt);
Alexander Aringf0747ebf2021-03-01 17:05:16 -05001236
1237 spin_lock(&con->writequeue_lock);
Alexander Aring8f2dc782021-05-21 15:08:42 -04001238 if (cb)
1239 cb(mh);
1240
Alexander Aringf0747ebf2021-03-01 17:05:16 -05001241 list_add_tail(&e->list, &con->writequeue);
1242 spin_unlock(&con->writequeue_lock);
1243
1244 return e;
1245};
1246
Alexander Aring2874d1a2021-05-21 15:08:43 -04001247static struct dlm_msg *dlm_lowcomms_new_msg_con(struct connection *con, int len,
1248 gfp_t allocation, char **ppc,
1249 void (*cb)(struct dlm_mhandle *mh),
1250 struct dlm_mhandle *mh)
1251{
1252 struct writequeue_entry *e;
1253 struct dlm_msg *msg;
1254
1255 msg = kzalloc(sizeof(*msg), allocation);
1256 if (!msg)
1257 return NULL;
1258
1259 kref_init(&msg->ref);
1260
1261 e = new_wq_entry(con, len, allocation, ppc, cb, mh);
1262 if (!e) {
1263 kfree(msg);
1264 return NULL;
1265 }
1266
1267 msg->ppc = *ppc;
1268 msg->len = len;
1269 msg->entry = e;
1270
1271 return msg;
1272}
1273
Alexander Aring8f2dc782021-05-21 15:08:42 -04001274struct dlm_msg *dlm_lowcomms_new_msg(int nodeid, int len, gfp_t allocation,
1275 char **ppc, void (*cb)(struct dlm_mhandle *mh),
1276 struct dlm_mhandle *mh)
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001277{
1278 struct connection *con;
Alexander Aring8f2dc782021-05-21 15:08:42 -04001279 struct dlm_msg *msg;
Alexander Aringb38bc9c2021-05-21 15:08:35 -04001280 int idx;
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001281
Alexander Aringd10a0b82021-06-02 09:45:20 -04001282 if (len > DLM_MAX_SOCKET_BUFSIZE ||
Alexander Aringc45674f2021-03-01 17:05:14 -05001283 len < sizeof(struct dlm_header)) {
Alexander Aringd10a0b82021-06-02 09:45:20 -04001284 BUILD_BUG_ON(PAGE_SIZE < DLM_MAX_SOCKET_BUFSIZE);
Alexander Aring692f51c2020-11-02 20:04:18 -05001285 log_print("failed to allocate a buffer of size %d", len);
Alexander Aringc45674f2021-03-01 17:05:14 -05001286 WARN_ON(1);
Alexander Aring692f51c2020-11-02 20:04:18 -05001287 return NULL;
1288 }
1289
Alexander Aringb38bc9c2021-05-21 15:08:35 -04001290 idx = srcu_read_lock(&connections_srcu);
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001291 con = nodeid2con(nodeid, allocation);
Alexander Aringb38bc9c2021-05-21 15:08:35 -04001292 if (!con) {
1293 srcu_read_unlock(&connections_srcu, idx);
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001294 return NULL;
Alexander Aringb38bc9c2021-05-21 15:08:35 -04001295 }
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001296
Alexander Aring2874d1a2021-05-21 15:08:43 -04001297 msg = dlm_lowcomms_new_msg_con(con, len, allocation, ppc, cb, mh);
Alexander Aring8f2dc782021-05-21 15:08:42 -04001298 if (!msg) {
Alexander Aringb38bc9c2021-05-21 15:08:35 -04001299 srcu_read_unlock(&connections_srcu, idx);
1300 return NULL;
1301 }
1302
Alexander Aring8f2dc782021-05-21 15:08:42 -04001303 /* we assume if successful commit must called */
1304 msg->idx = idx;
Alexander Aring8f2dc782021-05-21 15:08:42 -04001305 return msg;
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001306}
1307
Alexander Aring2874d1a2021-05-21 15:08:43 -04001308static void _dlm_lowcomms_commit_msg(struct dlm_msg *msg)
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001309{
Alexander Aring8f2dc782021-05-21 15:08:42 -04001310 struct writequeue_entry *e = msg->entry;
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001311 struct connection *con = e->con;
1312 int users;
1313
1314 spin_lock(&con->writequeue_lock);
Alexander Aring8f2dc782021-05-21 15:08:42 -04001315 kref_get(&msg->ref);
1316 list_add(&msg->list, &e->msgs);
1317
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001318 users = --e->users;
1319 if (users)
1320 goto out;
Alexander Aringf0747ebf2021-03-01 17:05:16 -05001321
1322 e->len = DLM_WQ_LENGTH_BYTES(e);
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001323 spin_unlock(&con->writequeue_lock);
1324
Bob Peterson01da24d2017-09-12 08:55:14 +00001325 queue_work(send_workqueue, &con->swork);
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001326 return;
1327
1328out:
1329 spin_unlock(&con->writequeue_lock);
1330 return;
1331}
1332
Alexander Aring2874d1a2021-05-21 15:08:43 -04001333void dlm_lowcomms_commit_msg(struct dlm_msg *msg)
1334{
1335 _dlm_lowcomms_commit_msg(msg);
1336 srcu_read_unlock(&connections_srcu, msg->idx);
1337}
1338
Alexander Aring8f2dc782021-05-21 15:08:42 -04001339void dlm_lowcomms_put_msg(struct dlm_msg *msg)
1340{
1341 kref_put(&msg->ref, dlm_msg_release);
1342}
1343
Alexander Aring2874d1a2021-05-21 15:08:43 -04001344/* does not held connections_srcu, usage workqueue only */
1345int dlm_lowcomms_resend_msg(struct dlm_msg *msg)
1346{
1347 struct dlm_msg *msg_resend;
1348 char *ppc;
1349
1350 if (msg->retransmit)
1351 return 1;
1352
1353 msg_resend = dlm_lowcomms_new_msg_con(msg->entry->con, msg->len,
1354 GFP_ATOMIC, &ppc, NULL, NULL);
1355 if (!msg_resend)
1356 return -ENOMEM;
1357
1358 msg->retransmit = true;
1359 kref_get(&msg->ref);
1360 msg_resend->orig_msg = msg;
1361
1362 memcpy(ppc, msg->ppc, msg->len);
1363 _dlm_lowcomms_commit_msg(msg_resend);
1364 dlm_lowcomms_put_msg(msg_resend);
1365
1366 return 0;
1367}
1368
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001369/* Send a message */
1370static void send_to_sock(struct connection *con)
1371{
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001372 const int msg_flags = MSG_DONTWAIT | MSG_NOSIGNAL;
1373 struct writequeue_entry *e;
Alexander Aring66d59552021-07-16 16:22:39 -04001374 int len, offset, ret;
Bob Petersonf92c8dd2010-11-12 11:15:20 -06001375 int count = 0;
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001376
1377 mutex_lock(&con->sock_mutex);
1378 if (con->sock == NULL)
1379 goto out_connect;
1380
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001381 spin_lock(&con->writequeue_lock);
1382 for (;;) {
Alexander Aring66d59552021-07-16 16:22:39 -04001383 e = con_next_wq(con);
1384 if (!e)
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001385 break;
1386
Alexander Aringf0747ebf2021-03-01 17:05:16 -05001387 e = list_first_entry(&con->writequeue, struct writequeue_entry, list);
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001388 len = e->len;
1389 offset = e->offset;
1390 BUG_ON(len == 0 && e->users == 0);
1391 spin_unlock(&con->writequeue_lock);
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001392
Alexander Aring66d59552021-07-16 16:22:39 -04001393 ret = kernel_sendpage(con->sock, e->page, offset, len,
1394 msg_flags);
1395 if (ret == -EAGAIN || ret == 0) {
1396 if (ret == -EAGAIN &&
1397 test_bit(SOCKWQ_ASYNC_NOSPACE, &con->sock->flags) &&
1398 !test_and_set_bit(CF_APP_LIMITED, &con->flags)) {
1399 /* Notify TCP that we're limited by the
1400 * application window size.
1401 */
1402 set_bit(SOCK_NOSPACE, &con->sock->flags);
1403 con->sock->sk->sk_write_pending++;
1404 }
1405 cond_resched();
1406 goto out;
1407 } else if (ret < 0)
1408 goto out;
Bob Petersonf92c8dd2010-11-12 11:15:20 -06001409
1410 /* Don't starve people filling buffers */
1411 if (++count >= MAX_SEND_MSG_COUNT) {
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001412 cond_resched();
Bob Petersonf92c8dd2010-11-12 11:15:20 -06001413 count = 0;
1414 }
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001415
1416 spin_lock(&con->writequeue_lock);
Mike Christie5d689872013-06-14 04:56:13 -05001417 writequeue_entry_complete(e, ret);
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001418 }
1419 spin_unlock(&con->writequeue_lock);
Alexander Aring8aa31cb2021-05-21 15:08:39 -04001420
1421 /* close if we got EOF */
1422 if (test_and_clear_bit(CF_EOF, &con->flags)) {
1423 mutex_unlock(&con->sock_mutex);
1424 close_connection(con, false, false, true);
1425
1426 /* handling for tcp shutdown */
1427 clear_bit(CF_SHUTDOWN, &con->flags);
1428 wake_up(&con->shutdown_wait);
1429 } else {
1430 mutex_unlock(&con->sock_mutex);
1431 }
1432
1433 return;
1434
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001435out:
1436 mutex_unlock(&con->sock_mutex);
1437 return;
1438
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001439out_connect:
1440 mutex_unlock(&con->sock_mutex);
Bob Peterson01da24d2017-09-12 08:55:14 +00001441 queue_work(send_workqueue, &con->swork);
1442 cond_resched();
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001443}
1444
1445static void clean_one_writequeue(struct connection *con)
1446{
Christine Caulfield5e9ccc32009-01-28 12:57:40 -06001447 struct writequeue_entry *e, *safe;
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001448
1449 spin_lock(&con->writequeue_lock);
Christine Caulfield5e9ccc32009-01-28 12:57:40 -06001450 list_for_each_entry_safe(e, safe, &con->writequeue, list) {
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001451 free_entry(e);
1452 }
1453 spin_unlock(&con->writequeue_lock);
1454}
1455
1456/* Called from recovery when it knows that a node has
1457 left the cluster */
1458int dlm_lowcomms_close(int nodeid)
1459{
1460 struct connection *con;
David Teigland36b71a82012-07-26 12:44:30 -05001461 struct dlm_node_addr *na;
Alexander Aringb38bc9c2021-05-21 15:08:35 -04001462 int idx;
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001463
1464 log_print("closing connection to node %d", nodeid);
Alexander Aringb38bc9c2021-05-21 15:08:35 -04001465 idx = srcu_read_lock(&connections_srcu);
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001466 con = nodeid2con(nodeid, 0);
1467 if (con) {
Lars Marowsky-Bree063c4c92009-08-11 16:18:23 -05001468 set_bit(CF_CLOSE, &con->flags);
Marcelo Ricardo Leitner0d737a82015-08-11 19:22:21 -03001469 close_connection(con, true, true, true);
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001470 clean_one_writequeue(con);
Alexander Aring53a5eda2020-11-02 20:04:19 -05001471 if (con->othercon)
1472 clean_one_writequeue(con->othercon);
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001473 }
Alexander Aringb38bc9c2021-05-21 15:08:35 -04001474 srcu_read_unlock(&connections_srcu, idx);
David Teigland36b71a82012-07-26 12:44:30 -05001475
1476 spin_lock(&dlm_node_addrs_spin);
1477 na = find_node_addr(nodeid);
1478 if (na) {
1479 list_del(&na->list);
1480 while (na->addr_count--)
1481 kfree(na->addr[na->addr_count]);
1482 kfree(na);
1483 }
1484 spin_unlock(&dlm_node_addrs_spin);
1485
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001486 return 0;
1487}
1488
1489/* Receive workqueue function */
1490static void process_recv_sockets(struct work_struct *work)
1491{
1492 struct connection *con = container_of(work, struct connection, rwork);
1493 int err;
1494
1495 clear_bit(CF_READ_PENDING, &con->flags);
1496 do {
Alexander Aringd11ccd42020-11-02 20:04:25 -05001497 err = receive_from_sock(con);
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001498 } while (!err);
1499}
1500
Alexander Aringd11ccd42020-11-02 20:04:25 -05001501static void process_listen_recv_socket(struct work_struct *work)
1502{
1503 accept_from_sock(&listen_con);
1504}
1505
Alexander Aring8728a452021-07-16 16:22:43 -04001506static void dlm_connect(struct connection *con)
1507{
1508 struct sockaddr_storage addr;
1509 int result, addr_len;
1510 struct socket *sock;
1511 unsigned int mark;
1512
1513 /* Some odd races can cause double-connects, ignore them */
1514 if (con->retries++ > MAX_CONNECT_RETRIES)
1515 return;
1516
1517 if (con->sock) {
1518 log_print("node %d already connected.", con->nodeid);
1519 return;
1520 }
1521
1522 memset(&addr, 0, sizeof(addr));
1523 result = nodeid_to_addr(con->nodeid, &addr, NULL,
1524 dlm_proto_ops->try_new_addr, &mark);
1525 if (result < 0) {
1526 log_print("no address for nodeid %d", con->nodeid);
1527 return;
1528 }
1529
1530 /* Create a socket to communicate with */
1531 result = sock_create_kern(&init_net, dlm_local_addr[0]->ss_family,
1532 SOCK_STREAM, dlm_proto_ops->proto, &sock);
1533 if (result < 0)
1534 goto socket_err;
1535
1536 sock_set_mark(sock->sk, mark);
1537 dlm_proto_ops->sockopts(sock);
1538
1539 add_sock(sock, con);
1540
1541 result = dlm_proto_ops->bind(sock);
1542 if (result < 0)
1543 goto add_sock_err;
1544
1545 log_print_ratelimited("connecting to %d", con->nodeid);
1546 make_sockaddr(&addr, dlm_config.ci_tcp_port, &addr_len);
1547 result = dlm_proto_ops->connect(con, sock, (struct sockaddr *)&addr,
1548 addr_len);
1549 if (result < 0)
1550 goto add_sock_err;
1551
1552 return;
1553
1554add_sock_err:
1555 dlm_close_sock(&con->sock);
1556
1557socket_err:
1558 /*
1559 * Some errors are fatal and this list might need adjusting. For other
1560 * errors we try again until the max number of retries is reached.
1561 */
1562 if (result != -EHOSTUNREACH &&
1563 result != -ENETUNREACH &&
1564 result != -ENETDOWN &&
1565 result != -EINVAL &&
1566 result != -EPROTONOSUPPORT) {
1567 log_print("connect %d try %d error %d", con->nodeid,
1568 con->retries, result);
1569 msleep(1000);
1570 lowcomms_connect_sock(con);
1571 }
1572}
1573
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001574/* Send workqueue function */
1575static void process_send_sockets(struct work_struct *work)
1576{
1577 struct connection *con = container_of(work, struct connection, swork);
1578
Alexander Aring7443bc92021-05-21 15:08:36 -04001579 WARN_ON(test_bit(CF_IS_OTHERCON, &con->flags));
1580
tsutomu.owa@toshiba.co.jp8a4abb02017-09-12 09:01:16 +00001581 clear_bit(CF_WRITE_PENDING, &con->flags);
Alexander Aringba868d92021-05-21 15:08:37 -04001582
Alexander Aring489d8e52021-05-21 15:08:46 -04001583 if (test_and_clear_bit(CF_RECONNECT, &con->flags)) {
Alexander Aringba868d92021-05-21 15:08:37 -04001584 close_connection(con, false, false, true);
Alexander Aring489d8e52021-05-21 15:08:46 -04001585 dlm_midcomms_unack_msg_resend(con->nodeid);
1586 }
Alexander Aringba868d92021-05-21 15:08:37 -04001587
Alexander Aring8728a452021-07-16 16:22:43 -04001588 if (con->sock == NULL) {
Alexander Aringba868d92021-05-21 15:08:37 -04001589 if (test_and_clear_bit(CF_DELAY_CONNECT, &con->flags))
1590 msleep(1000);
Alexander Aring8728a452021-07-16 16:22:43 -04001591
1592 mutex_lock(&con->sock_mutex);
1593 dlm_connect(con);
1594 mutex_unlock(&con->sock_mutex);
Alexander Aringba868d92021-05-21 15:08:37 -04001595 }
Alexander Aring8728a452021-07-16 16:22:43 -04001596
Bob Peterson01da24d2017-09-12 08:55:14 +00001597 if (!list_empty(&con->writequeue))
Lars Marowsky-Bree063c4c92009-08-11 16:18:23 -05001598 send_to_sock(con);
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001599}
1600
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001601static void work_stop(void)
1602{
Alexander Aringfcef0e62021-06-02 09:45:15 -04001603 if (recv_workqueue) {
David Windsorb3555162019-04-02 08:37:10 -04001604 destroy_workqueue(recv_workqueue);
Alexander Aringfcef0e62021-06-02 09:45:15 -04001605 recv_workqueue = NULL;
1606 }
1607
1608 if (send_workqueue) {
David Windsorb3555162019-04-02 08:37:10 -04001609 destroy_workqueue(send_workqueue);
Alexander Aringfcef0e62021-06-02 09:45:15 -04001610 send_workqueue = NULL;
1611 }
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001612}
1613
1614static int work_start(void)
1615{
Alexander Aring6c6a1cc2021-06-02 09:45:17 -04001616 recv_workqueue = alloc_ordered_workqueue("dlm_recv", WQ_MEM_RECLAIM);
Namhyung Kimb9d41052010-12-13 13:42:24 -06001617 if (!recv_workqueue) {
1618 log_print("can't start dlm_recv");
1619 return -ENOMEM;
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001620 }
1621
Alexander Aring6c6a1cc2021-06-02 09:45:17 -04001622 send_workqueue = alloc_ordered_workqueue("dlm_send", WQ_MEM_RECLAIM);
Namhyung Kimb9d41052010-12-13 13:42:24 -06001623 if (!send_workqueue) {
1624 log_print("can't start dlm_send");
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001625 destroy_workqueue(recv_workqueue);
Alexander Aringfcef0e62021-06-02 09:45:15 -04001626 recv_workqueue = NULL;
Namhyung Kimb9d41052010-12-13 13:42:24 -06001627 return -ENOMEM;
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001628 }
1629
1630 return 0;
1631}
1632
Alexander Aring9d232462021-03-01 17:05:20 -05001633static void shutdown_conn(struct connection *con)
1634{
Alexander Aringa66c0082021-07-16 16:22:40 -04001635 if (dlm_proto_ops->shutdown_action)
1636 dlm_proto_ops->shutdown_action(con);
Alexander Aring9d232462021-03-01 17:05:20 -05001637}
1638
1639void dlm_lowcomms_shutdown(void)
1640{
Alexander Aringb38bc9c2021-05-21 15:08:35 -04001641 int idx;
1642
Alexander Aring9d232462021-03-01 17:05:20 -05001643 /* Set all the flags to prevent any
1644 * socket activity.
1645 */
1646 dlm_allow_conn = 0;
1647
1648 if (recv_workqueue)
1649 flush_workqueue(recv_workqueue);
1650 if (send_workqueue)
1651 flush_workqueue(send_workqueue);
1652
1653 dlm_close_sock(&listen_con.sock);
1654
Alexander Aringb38bc9c2021-05-21 15:08:35 -04001655 idx = srcu_read_lock(&connections_srcu);
Alexander Aring9d232462021-03-01 17:05:20 -05001656 foreach_conn(shutdown_conn);
Alexander Aringb38bc9c2021-05-21 15:08:35 -04001657 srcu_read_unlock(&connections_srcu, idx);
Alexander Aring9d232462021-03-01 17:05:20 -05001658}
1659
tsutomu.owa@toshiba.co.jpf0fb83c2017-09-12 08:55:40 +00001660static void _stop_conn(struct connection *con, bool and_other)
Christine Caulfield5e9ccc32009-01-28 12:57:40 -06001661{
tsutomu.owa@toshiba.co.jpf0fb83c2017-09-12 08:55:40 +00001662 mutex_lock(&con->sock_mutex);
tsutomu.owa@toshiba.co.jp173a31f2017-09-12 09:01:24 +00001663 set_bit(CF_CLOSE, &con->flags);
tsutomu.owa@toshiba.co.jpf0fb83c2017-09-12 08:55:40 +00001664 set_bit(CF_READ_PENDING, &con->flags);
tsutomu.owa@toshiba.co.jp8a4abb02017-09-12 09:01:16 +00001665 set_bit(CF_WRITE_PENDING, &con->flags);
tsutomu.owa@toshiba.co.jp93eaade2017-09-12 09:01:55 +00001666 if (con->sock && con->sock->sk) {
1667 write_lock_bh(&con->sock->sk->sk_callback_lock);
Christine Caulfield5e9ccc32009-01-28 12:57:40 -06001668 con->sock->sk->sk_user_data = NULL;
tsutomu.owa@toshiba.co.jp93eaade2017-09-12 09:01:55 +00001669 write_unlock_bh(&con->sock->sk->sk_callback_lock);
1670 }
tsutomu.owa@toshiba.co.jpf0fb83c2017-09-12 08:55:40 +00001671 if (con->othercon && and_other)
1672 _stop_conn(con->othercon, false);
1673 mutex_unlock(&con->sock_mutex);
1674}
1675
1676static void stop_conn(struct connection *con)
1677{
1678 _stop_conn(con, true);
Christine Caulfield5e9ccc32009-01-28 12:57:40 -06001679}
1680
Alexander Aring4798cbb2020-09-24 10:31:26 -04001681static void connection_release(struct rcu_head *rcu)
1682{
1683 struct connection *con = container_of(rcu, struct connection, rcu);
1684
1685 kfree(con->rx_buf);
1686 kfree(con);
1687}
1688
Christine Caulfield5e9ccc32009-01-28 12:57:40 -06001689static void free_conn(struct connection *con)
1690{
Marcelo Ricardo Leitner0d737a82015-08-11 19:22:21 -03001691 close_connection(con, true, true, true);
Alexander Aringa47666eb2020-08-27 15:02:49 -04001692 spin_lock(&connections_lock);
1693 hlist_del_rcu(&con->list);
1694 spin_unlock(&connections_lock);
Alexander Aring948c47e2020-08-27 15:02:53 -04001695 if (con->othercon) {
1696 clean_one_writequeue(con->othercon);
Alexander Aring5cbec202020-11-02 20:04:16 -05001697 call_srcu(&connections_srcu, &con->othercon->rcu,
1698 connection_release);
Alexander Aring948c47e2020-08-27 15:02:53 -04001699 }
Alexander Aring0de98432020-08-27 15:02:52 -04001700 clean_one_writequeue(con);
Alexander Aring5cbec202020-11-02 20:04:16 -05001701 call_srcu(&connections_srcu, &con->rcu, connection_release);
Christine Caulfield5e9ccc32009-01-28 12:57:40 -06001702}
1703
tsutomu.owa@toshiba.co.jpf0fb83c2017-09-12 08:55:40 +00001704static void work_flush(void)
1705{
Alexander Aringb38bc9c2021-05-21 15:08:35 -04001706 int ok;
tsutomu.owa@toshiba.co.jpf0fb83c2017-09-12 08:55:40 +00001707 int i;
tsutomu.owa@toshiba.co.jpf0fb83c2017-09-12 08:55:40 +00001708 struct connection *con;
1709
tsutomu.owa@toshiba.co.jpf0fb83c2017-09-12 08:55:40 +00001710 do {
1711 ok = 1;
1712 foreach_conn(stop_conn);
David Windsorb3555162019-04-02 08:37:10 -04001713 if (recv_workqueue)
1714 flush_workqueue(recv_workqueue);
1715 if (send_workqueue)
1716 flush_workqueue(send_workqueue);
tsutomu.owa@toshiba.co.jpf0fb83c2017-09-12 08:55:40 +00001717 for (i = 0; i < CONN_HASH_SIZE && ok; i++) {
Alexander Aringa47666eb2020-08-27 15:02:49 -04001718 hlist_for_each_entry_rcu(con, &connection_hash[i],
1719 list) {
tsutomu.owa@toshiba.co.jpf0fb83c2017-09-12 08:55:40 +00001720 ok &= test_bit(CF_READ_PENDING, &con->flags);
tsutomu.owa@toshiba.co.jp8a4abb02017-09-12 09:01:16 +00001721 ok &= test_bit(CF_WRITE_PENDING, &con->flags);
1722 if (con->othercon) {
tsutomu.owa@toshiba.co.jpf0fb83c2017-09-12 08:55:40 +00001723 ok &= test_bit(CF_READ_PENDING,
1724 &con->othercon->flags);
tsutomu.owa@toshiba.co.jp8a4abb02017-09-12 09:01:16 +00001725 ok &= test_bit(CF_WRITE_PENDING,
1726 &con->othercon->flags);
1727 }
tsutomu.owa@toshiba.co.jpf0fb83c2017-09-12 08:55:40 +00001728 }
1729 }
1730 } while (!ok);
1731}
1732
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001733void dlm_lowcomms_stop(void)
1734{
Alexander Aringb38bc9c2021-05-21 15:08:35 -04001735 int idx;
1736
1737 idx = srcu_read_lock(&connections_srcu);
tsutomu.owa@toshiba.co.jpf0fb83c2017-09-12 08:55:40 +00001738 work_flush();
Marcelo Ricardo Leitner3a8db792016-10-08 10:14:37 -03001739 foreach_conn(free_conn);
Alexander Aringb38bc9c2021-05-21 15:08:35 -04001740 srcu_read_unlock(&connections_srcu, idx);
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001741 work_stop();
Alexander Aring043697f2020-08-27 15:02:50 -04001742 deinit_local();
Alexander Aringa66c0082021-07-16 16:22:40 -04001743
1744 dlm_proto_ops = NULL;
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001745}
1746
Alexander Aring2dc6b112021-07-16 16:22:41 -04001747static int dlm_listen_for_all(void)
1748{
1749 struct socket *sock;
1750 int result;
1751
1752 log_print("Using %s for communications",
1753 dlm_proto_ops->name);
1754
Alexander Aring90d21fc2021-07-16 16:22:42 -04001755 result = dlm_proto_ops->listen_validate();
1756 if (result < 0)
1757 return result;
Alexander Aring2dc6b112021-07-16 16:22:41 -04001758
1759 result = sock_create_kern(&init_net, dlm_local_addr[0]->ss_family,
1760 SOCK_STREAM, dlm_proto_ops->proto, &sock);
1761 if (result < 0) {
1762 log_print("Can't create comms socket, check SCTP is loaded");
1763 goto out;
1764 }
1765
1766 sock_set_mark(sock->sk, dlm_config.ci_mark);
1767 dlm_proto_ops->listen_sockopts(sock);
1768
1769 result = dlm_proto_ops->listen_bind(sock);
1770 if (result < 0)
1771 goto out;
1772
1773 save_listen_callbacks(sock);
1774 add_listen_sock(sock, &listen_con);
1775
1776 INIT_WORK(&listen_con.rwork, process_listen_recv_socket);
1777 result = sock->ops->listen(sock, 5);
1778 if (result < 0) {
1779 dlm_close_sock(&listen_con.sock);
1780 goto out;
1781 }
1782
1783 return 0;
1784
1785out:
1786 sock_release(sock);
1787 return result;
1788}
1789
Alexander Aring8728a452021-07-16 16:22:43 -04001790static int dlm_tcp_bind(struct socket *sock)
1791{
1792 struct sockaddr_storage src_addr;
1793 int result, addr_len;
1794
1795 /* Bind to our cluster-known address connecting to avoid
1796 * routing problems.
1797 */
1798 memcpy(&src_addr, dlm_local_addr[0], sizeof(src_addr));
1799 make_sockaddr(&src_addr, 0, &addr_len);
1800
1801 result = sock->ops->bind(sock, (struct sockaddr *)&src_addr,
1802 addr_len);
1803 if (result < 0) {
1804 /* This *may* not indicate a critical error */
1805 log_print("could not bind for connect: %d", result);
1806 }
1807
1808 return 0;
1809}
1810
1811static int dlm_tcp_connect(struct connection *con, struct socket *sock,
1812 struct sockaddr *addr, int addr_len)
1813{
1814 int ret;
1815
1816 ret = sock->ops->connect(sock, addr, addr_len, O_NONBLOCK);
1817 switch (ret) {
1818 case -EINPROGRESS:
1819 fallthrough;
1820 case 0:
1821 return 0;
1822 }
1823
1824 return ret;
1825}
1826
Alexander Aring2dc6b112021-07-16 16:22:41 -04001827static int dlm_tcp_listen_validate(void)
1828{
1829 /* We don't support multi-homed hosts */
1830 if (dlm_local_count > 1) {
1831 log_print("TCP protocol can't handle multi-homed hosts, try SCTP");
1832 return -EINVAL;
1833 }
1834
1835 return 0;
1836}
1837
1838static void dlm_tcp_sockopts(struct socket *sock)
1839{
1840 /* Turn off Nagle's algorithm */
1841 tcp_sock_set_nodelay(sock->sk);
1842}
1843
1844static void dlm_tcp_listen_sockopts(struct socket *sock)
1845{
1846 dlm_tcp_sockopts(sock);
1847 sock_set_reuseaddr(sock->sk);
1848}
1849
1850static int dlm_tcp_listen_bind(struct socket *sock)
1851{
1852 int addr_len;
1853
1854 /* Bind to our port */
1855 make_sockaddr(dlm_local_addr[0], dlm_config.ci_tcp_port, &addr_len);
1856 return sock->ops->bind(sock, (struct sockaddr *)dlm_local_addr[0],
1857 addr_len);
1858}
1859
Alexander Aringa66c0082021-07-16 16:22:40 -04001860static const struct dlm_proto_ops dlm_tcp_ops = {
Alexander Aring2dc6b112021-07-16 16:22:41 -04001861 .name = "TCP",
1862 .proto = IPPROTO_TCP,
Alexander Aring8728a452021-07-16 16:22:43 -04001863 .connect = dlm_tcp_connect,
1864 .sockopts = dlm_tcp_sockopts,
1865 .bind = dlm_tcp_bind,
Alexander Aring2dc6b112021-07-16 16:22:41 -04001866 .listen_validate = dlm_tcp_listen_validate,
1867 .listen_sockopts = dlm_tcp_listen_sockopts,
1868 .listen_bind = dlm_tcp_listen_bind,
Alexander Aringa66c0082021-07-16 16:22:40 -04001869 .shutdown_action = dlm_tcp_shutdown,
1870 .eof_condition = tcp_eof_condition,
1871};
1872
Alexander Aring8728a452021-07-16 16:22:43 -04001873static int dlm_sctp_bind(struct socket *sock)
1874{
1875 return sctp_bind_addrs(sock, 0);
1876}
1877
1878static int dlm_sctp_connect(struct connection *con, struct socket *sock,
1879 struct sockaddr *addr, int addr_len)
1880{
1881 int ret;
1882
1883 /*
1884 * Make sock->ops->connect() function return in specified time,
1885 * since O_NONBLOCK argument in connect() function does not work here,
1886 * then, we should restore the default value of this attribute.
1887 */
1888 sock_set_sndtimeo(sock->sk, 5);
1889 ret = sock->ops->connect(sock, addr, addr_len, 0);
1890 sock_set_sndtimeo(sock->sk, 0);
1891 if (ret < 0)
1892 return ret;
1893
1894 if (!test_and_set_bit(CF_CONNECTED, &con->flags))
1895 log_print("successful connected to node %d", con->nodeid);
1896
1897 return 0;
1898}
1899
Alexander Aring90d21fc2021-07-16 16:22:42 -04001900static int dlm_sctp_listen_validate(void)
1901{
1902 if (!IS_ENABLED(CONFIG_IP_SCTP)) {
1903 log_print("SCTP is not enabled by this kernel");
1904 return -EOPNOTSUPP;
1905 }
1906
1907 request_module("sctp");
1908 return 0;
1909}
1910
Alexander Aring2dc6b112021-07-16 16:22:41 -04001911static int dlm_sctp_bind_listen(struct socket *sock)
1912{
1913 return sctp_bind_addrs(sock, dlm_config.ci_tcp_port);
1914}
1915
1916static void dlm_sctp_sockopts(struct socket *sock)
1917{
1918 /* Turn off Nagle's algorithm */
1919 sctp_sock_set_nodelay(sock->sk);
1920 sock_set_rcvbuf(sock->sk, NEEDED_RMEM);
1921}
1922
Alexander Aringa66c0082021-07-16 16:22:40 -04001923static const struct dlm_proto_ops dlm_sctp_ops = {
Alexander Aring2dc6b112021-07-16 16:22:41 -04001924 .name = "SCTP",
1925 .proto = IPPROTO_SCTP,
Alexander Aring8728a452021-07-16 16:22:43 -04001926 .try_new_addr = true,
1927 .connect = dlm_sctp_connect,
1928 .sockopts = dlm_sctp_sockopts,
1929 .bind = dlm_sctp_bind,
Alexander Aring90d21fc2021-07-16 16:22:42 -04001930 .listen_validate = dlm_sctp_listen_validate,
Alexander Aring2dc6b112021-07-16 16:22:41 -04001931 .listen_sockopts = dlm_sctp_sockopts,
1932 .listen_bind = dlm_sctp_bind_listen,
Alexander Aringa66c0082021-07-16 16:22:40 -04001933};
1934
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001935int dlm_lowcomms_start(void)
1936{
1937 int error = -EINVAL;
Christine Caulfield5e9ccc32009-01-28 12:57:40 -06001938 int i;
1939
1940 for (i = 0; i < CONN_HASH_SIZE; i++)
1941 INIT_HLIST_HEAD(&connection_hash[i]);
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001942
1943 init_local();
1944 if (!dlm_local_count) {
David Teigland617e82e2007-04-26 13:46:49 -05001945 error = -ENOTCONN;
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001946 log_print("no local IP address has been set");
David Teigland513ef592012-03-30 11:46:08 -05001947 goto fail;
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001948 }
1949
Alexander Aringd11ccd42020-11-02 20:04:25 -05001950 INIT_WORK(&listen_con.rwork, process_listen_recv_socket);
1951
David Teigland513ef592012-03-30 11:46:08 -05001952 error = work_start();
1953 if (error)
Alexander Aringfcef0e62021-06-02 09:45:15 -04001954 goto fail_local;
David Teigland513ef592012-03-30 11:46:08 -05001955
1956 dlm_allow_conn = 1;
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001957
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001958 /* Start listening */
Alexander Aringac7d5d02021-06-02 09:45:19 -04001959 switch (dlm_config.ci_protocol) {
1960 case DLM_PROTO_TCP:
Alexander Aringa66c0082021-07-16 16:22:40 -04001961 dlm_proto_ops = &dlm_tcp_ops;
Alexander Aringac7d5d02021-06-02 09:45:19 -04001962 break;
1963 case DLM_PROTO_SCTP:
Alexander Aringa66c0082021-07-16 16:22:40 -04001964 dlm_proto_ops = &dlm_sctp_ops;
Alexander Aringac7d5d02021-06-02 09:45:19 -04001965 break;
1966 default:
1967 log_print("Invalid protocol identifier %d set",
1968 dlm_config.ci_protocol);
1969 error = -EINVAL;
Alexander Aring2dc6b112021-07-16 16:22:41 -04001970 goto fail_proto_ops;
Alexander Aringac7d5d02021-06-02 09:45:19 -04001971 }
Alexander Aring2dc6b112021-07-16 16:22:41 -04001972
1973 error = dlm_listen_for_all();
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001974 if (error)
Alexander Aring2dc6b112021-07-16 16:22:41 -04001975 goto fail_listen;
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001976
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001977 return 0;
1978
Alexander Aring2dc6b112021-07-16 16:22:41 -04001979fail_listen:
1980 dlm_proto_ops = NULL;
1981fail_proto_ops:
David Teigland513ef592012-03-30 11:46:08 -05001982 dlm_allow_conn = 0;
Alexander Aringd11ccd42020-11-02 20:04:25 -05001983 dlm_close_sock(&listen_con.sock);
Alexander Aringfcef0e62021-06-02 09:45:15 -04001984 work_stop();
1985fail_local:
1986 deinit_local();
David Teigland513ef592012-03-30 11:46:08 -05001987fail:
Patrick Caulfield6ed7257b2007-04-17 15:39:57 +01001988 return error;
1989}
David Teigland36b71a82012-07-26 12:44:30 -05001990
1991void dlm_lowcomms_exit(void)
1992{
1993 struct dlm_node_addr *na, *safe;
1994
1995 spin_lock(&dlm_node_addrs_spin);
1996 list_for_each_entry_safe(na, safe, &dlm_node_addrs, list) {
1997 list_del(&na->list);
1998 while (na->addr_count--)
1999 kfree(na->addr[na->addr_count]);
2000 kfree(na);
2001 }
2002 spin_unlock(&dlm_node_addrs_spin);
2003}