blob: e7e98ad63a91ae8ea126a2ee646814a7ec20ab41 [file] [log] [blame]
Thomas Gleixnerb4d0d232019-05-20 19:08:01 +02001// SPDX-License-Identifier: GPL-2.0-or-later
David Howells3bf0fb62018-10-20 00:57:59 +01002/* AFS fileserver probing
3 *
David Howellsf6cbb362020-04-24 15:10:00 +01004 * Copyright (C) 2018, 2020 Red Hat, Inc. All Rights Reserved.
David Howells3bf0fb62018-10-20 00:57:59 +01005 * Written by David Howells (dhowells@redhat.com)
David Howells3bf0fb62018-10-20 00:57:59 +01006 */
7
8#include <linux/sched.h>
9#include <linux/slab.h>
10#include "afs_fs.h"
11#include "internal.h"
12#include "protocol_yfs.h"
13
David Howellsf6cbb362020-04-24 15:10:00 +010014static unsigned int afs_fs_probe_fast_poll_interval = 30 * HZ;
15static unsigned int afs_fs_probe_slow_poll_interval = 5 * 60 * HZ;
David Howells3bf0fb62018-10-20 00:57:59 +010016
David Howellsf6cbb362020-04-24 15:10:00 +010017/*
18 * Start the probe polling timer. We have to supply it with an inc on the
19 * outstanding server count.
20 */
21static void afs_schedule_fs_probe(struct afs_net *net,
22 struct afs_server *server, bool fast)
23{
24 unsigned long atj;
25
26 if (!net->live)
27 return;
28
29 atj = server->probed_at;
30 atj += fast ? afs_fs_probe_fast_poll_interval : afs_fs_probe_slow_poll_interval;
31
32 afs_inc_servers_outstanding(net);
33 if (timer_reduce(&net->fs_probe_timer, atj))
34 afs_dec_servers_outstanding(net);
35}
36
37/*
38 * Handle the completion of a set of probes.
39 */
40static void afs_finished_fs_probe(struct afs_net *net, struct afs_server *server)
41{
42 bool responded = server->probe.responded;
43
44 write_seqlock(&net->fs_lock);
David Howellsf3c130e2020-05-02 13:39:57 +010045 if (responded) {
David Howellsf6cbb362020-04-24 15:10:00 +010046 list_add_tail(&server->probe_link, &net->fs_probe_slow);
David Howellsf3c130e2020-05-02 13:39:57 +010047 } else {
48 server->rtt = UINT_MAX;
49 clear_bit(AFS_SERVER_FL_RESPONDING, &server->flags);
David Howellsf6cbb362020-04-24 15:10:00 +010050 list_add_tail(&server->probe_link, &net->fs_probe_fast);
David Howellsf3c130e2020-05-02 13:39:57 +010051 }
David Howellsf6cbb362020-04-24 15:10:00 +010052 write_sequnlock(&net->fs_lock);
53
54 afs_schedule_fs_probe(net, server, !responded);
55}
56
57/*
58 * Handle the completion of a probe.
59 */
60static void afs_done_one_fs_probe(struct afs_net *net, struct afs_server *server)
61{
62 _enter("");
63
64 if (atomic_dec_and_test(&server->probe_outstanding))
65 afs_finished_fs_probe(net, server);
66
67 wake_up_all(&server->probe_wq);
68}
69
70/*
71 * Handle inability to send a probe due to ENOMEM when trying to allocate a
72 * call struct.
73 */
74static void afs_fs_probe_not_done(struct afs_net *net,
75 struct afs_server *server,
76 struct afs_addr_cursor *ac)
77{
78 struct afs_addr_list *alist = ac->alist;
79 unsigned int index = ac->index;
80
81 _enter("");
82
83 trace_afs_io_error(0, -ENOMEM, afs_io_error_fs_probe_fail);
84 spin_lock(&server->probe_lock);
85
86 server->probe.local_failure = true;
87 if (server->probe.error == 0)
88 server->probe.error = -ENOMEM;
89
90 set_bit(index, &alist->failed);
91
92 spin_unlock(&server->probe_lock);
93 return afs_done_one_fs_probe(net, server);
David Howells3bf0fb62018-10-20 00:57:59 +010094}
95
96/*
97 * Process the result of probing a fileserver. This is called after successful
98 * or failed delivery of an FS.GetCapabilities operation.
99 */
100void afs_fileserver_probe_result(struct afs_call *call)
101{
102 struct afs_addr_list *alist = call->alist;
David Howellsffba7182019-05-09 22:22:50 +0100103 struct afs_server *server = call->server;
David Howells3bf0fb62018-10-20 00:57:59 +0100104 unsigned int index = call->addr_ix;
David Howells8a1d24e2020-05-22 23:58:28 +0100105 unsigned int rtt_us = 0;
David Howells3bf0fb62018-10-20 00:57:59 +0100106 int ret = call->error;
107
108 _enter("%pU,%u", &server->uuid, index);
109
110 spin_lock(&server->probe_lock);
111
112 switch (ret) {
113 case 0:
114 server->probe.error = 0;
115 goto responded;
116 case -ECONNABORTED:
117 if (!server->probe.responded) {
118 server->probe.abort_code = call->abort_code;
119 server->probe.error = ret;
120 }
121 goto responded;
122 case -ENOMEM:
123 case -ENONET:
David Howellsf6cbb362020-04-24 15:10:00 +0100124 clear_bit(index, &alist->responded);
David Howells3bf0fb62018-10-20 00:57:59 +0100125 server->probe.local_failure = true;
David Howellsf6cbb362020-04-24 15:10:00 +0100126 trace_afs_io_error(call->debug_id, ret, afs_io_error_fs_probe_fail);
David Howells3bf0fb62018-10-20 00:57:59 +0100127 goto out;
128 case -ECONNRESET: /* Responded, but call expired. */
David Howells4584ae92018-11-13 23:20:28 +0000129 case -ERFKILL:
130 case -EADDRNOTAVAIL:
David Howells3bf0fb62018-10-20 00:57:59 +0100131 case -ENETUNREACH:
132 case -EHOSTUNREACH:
David Howells4584ae92018-11-13 23:20:28 +0000133 case -EHOSTDOWN:
David Howells3bf0fb62018-10-20 00:57:59 +0100134 case -ECONNREFUSED:
135 case -ETIMEDOUT:
136 case -ETIME:
137 default:
138 clear_bit(index, &alist->responded);
139 set_bit(index, &alist->failed);
140 if (!server->probe.responded &&
141 (server->probe.error == 0 ||
142 server->probe.error == -ETIMEDOUT ||
143 server->probe.error == -ETIME))
144 server->probe.error = ret;
David Howellsf6cbb362020-04-24 15:10:00 +0100145 trace_afs_io_error(call->debug_id, ret, afs_io_error_fs_probe_fail);
David Howells3bf0fb62018-10-20 00:57:59 +0100146 goto out;
147 }
148
149responded:
David Howells3bf0fb62018-10-20 00:57:59 +0100150 clear_bit(index, &alist->failed);
151
152 if (call->service_id == YFS_FS_SERVICE) {
153 server->probe.is_yfs = true;
154 set_bit(AFS_SERVER_FL_IS_YFS, &server->flags);
155 alist->addrs[index].srx_service = call->service_id;
156 } else {
157 server->probe.not_yfs = true;
158 if (!server->probe.is_yfs) {
159 clear_bit(AFS_SERVER_FL_IS_YFS, &server->flags);
160 alist->addrs[index].srx_service = call->service_id;
161 }
162 }
163
David Howells1d4adfa2020-08-20 15:13:00 +0100164 if (rxrpc_kernel_get_srtt(call->net->socket, call->rxcall, &rtt_us) &&
165 rtt_us < server->probe.rtt) {
David Howellsc410bf012020-05-11 14:54:34 +0100166 server->probe.rtt = rtt_us;
David Howellsf3c130e2020-05-02 13:39:57 +0100167 server->rtt = rtt_us;
David Howells3bf0fb62018-10-20 00:57:59 +0100168 alist->preferred = index;
David Howells3bf0fb62018-10-20 00:57:59 +0100169 }
170
171 smp_wmb(); /* Set rtt before responded. */
172 server->probe.responded = true;
David Howellsf6cbb362020-04-24 15:10:00 +0100173 set_bit(index, &alist->responded);
David Howellsf3c130e2020-05-02 13:39:57 +0100174 set_bit(AFS_SERVER_FL_RESPONDING, &server->flags);
David Howells3bf0fb62018-10-20 00:57:59 +0100175out:
176 spin_unlock(&server->probe_lock);
177
David Howellsf6cbb362020-04-24 15:10:00 +0100178 _debug("probe %pU [%u] %pISpc rtt=%u ret=%d",
179 &server->uuid, index, &alist->addrs[index].transport,
180 rtt_us, ret);
David Howells3bf0fb62018-10-20 00:57:59 +0100181
David Howellsf6cbb362020-04-24 15:10:00 +0100182 return afs_done_one_fs_probe(call->net, server);
David Howells3bf0fb62018-10-20 00:57:59 +0100183}
184
185/*
David Howellsf6cbb362020-04-24 15:10:00 +0100186 * Probe one or all of a fileserver's addresses to find out the best route and
187 * to query its capabilities.
David Howells3bf0fb62018-10-20 00:57:59 +0100188 */
David Howellsf6cbb362020-04-24 15:10:00 +0100189void afs_fs_probe_fileserver(struct afs_net *net, struct afs_server *server,
190 struct key *key, bool all)
David Howells3bf0fb62018-10-20 00:57:59 +0100191{
192 struct afs_addr_cursor ac = {
193 .index = 0,
194 };
David Howells3bf0fb62018-10-20 00:57:59 +0100195
196 _enter("%pU", &server->uuid);
197
198 read_lock(&server->fs_lock);
199 ac.alist = rcu_dereference_protected(server->addresses,
200 lockdep_is_held(&server->fs_lock));
David Howells9efcc4a12020-03-26 15:24:07 +0000201 afs_get_addrlist(ac.alist);
David Howells3bf0fb62018-10-20 00:57:59 +0100202 read_unlock(&server->fs_lock);
203
David Howellsf6cbb362020-04-24 15:10:00 +0100204 server->probed_at = jiffies;
205 atomic_set(&server->probe_outstanding, all ? ac.alist->nr_addrs : 1);
David Howells3bf0fb62018-10-20 00:57:59 +0100206 memset(&server->probe, 0, sizeof(server->probe));
207 server->probe.rtt = UINT_MAX;
208
David Howellsf6cbb362020-04-24 15:10:00 +0100209 ac.index = ac.alist->preferred;
210 if (ac.index < 0 || ac.index >= ac.alist->nr_addrs)
211 all = true;
212
213 if (all) {
214 for (ac.index = 0; ac.index < ac.alist->nr_addrs; ac.index++)
215 if (!afs_fs_get_capabilities(net, server, &ac, key))
216 afs_fs_probe_not_done(net, server, &ac);
217 } else {
218 if (!afs_fs_get_capabilities(net, server, &ac, key))
219 afs_fs_probe_not_done(net, server, &ac);
David Howells3bf0fb62018-10-20 00:57:59 +0100220 }
221
David Howells9efcc4a12020-03-26 15:24:07 +0000222 afs_put_addrlist(ac.alist);
David Howells3bf0fb62018-10-20 00:57:59 +0100223}
224
225/*
226 * Wait for the first as-yet untried fileserver to respond.
227 */
228int afs_wait_for_fs_probes(struct afs_server_list *slist, unsigned long untried)
229{
230 struct wait_queue_entry *waits;
231 struct afs_server *server;
David Howellsf3c130e2020-05-02 13:39:57 +0100232 unsigned int rtt = UINT_MAX, rtt_s;
David Howells3bf0fb62018-10-20 00:57:59 +0100233 bool have_responders = false;
234 int pref = -1, i;
235
236 _enter("%u,%lx", slist->nr_servers, untried);
237
238 /* Only wait for servers that have a probe outstanding. */
239 for (i = 0; i < slist->nr_servers; i++) {
240 if (test_bit(i, &untried)) {
241 server = slist->servers[i].server;
David Howellsf6cbb362020-04-24 15:10:00 +0100242 if (!atomic_read(&server->probe_outstanding))
David Howells3bf0fb62018-10-20 00:57:59 +0100243 __clear_bit(i, &untried);
244 if (server->probe.responded)
245 have_responders = true;
246 }
247 }
248 if (have_responders || !untried)
249 return 0;
250
251 waits = kmalloc(array_size(slist->nr_servers, sizeof(*waits)), GFP_KERNEL);
252 if (!waits)
253 return -ENOMEM;
254
255 for (i = 0; i < slist->nr_servers; i++) {
256 if (test_bit(i, &untried)) {
257 server = slist->servers[i].server;
258 init_waitqueue_entry(&waits[i], current);
259 add_wait_queue(&server->probe_wq, &waits[i]);
260 }
261 }
262
263 for (;;) {
264 bool still_probing = false;
265
266 set_current_state(TASK_INTERRUPTIBLE);
267 for (i = 0; i < slist->nr_servers; i++) {
268 if (test_bit(i, &untried)) {
269 server = slist->servers[i].server;
270 if (server->probe.responded)
271 goto stop;
David Howellsf6cbb362020-04-24 15:10:00 +0100272 if (atomic_read(&server->probe_outstanding))
David Howells3bf0fb62018-10-20 00:57:59 +0100273 still_probing = true;
274 }
275 }
276
Davidlohr Bueso08d405c2019-01-03 15:28:58 -0800277 if (!still_probing || signal_pending(current))
David Howells3bf0fb62018-10-20 00:57:59 +0100278 goto stop;
279 schedule();
280 }
281
282stop:
283 set_current_state(TASK_RUNNING);
284
285 for (i = 0; i < slist->nr_servers; i++) {
286 if (test_bit(i, &untried)) {
287 server = slist->servers[i].server;
David Howellsf3c130e2020-05-02 13:39:57 +0100288 rtt_s = READ_ONCE(server->rtt);
289 if (test_bit(AFS_SERVER_FL_RESPONDING, &server->flags) &&
290 rtt_s < rtt) {
David Howells3bf0fb62018-10-20 00:57:59 +0100291 pref = i;
David Howellsf3c130e2020-05-02 13:39:57 +0100292 rtt = rtt_s;
David Howells3bf0fb62018-10-20 00:57:59 +0100293 }
294
295 remove_wait_queue(&server->probe_wq, &waits[i]);
296 }
297 }
298
299 kfree(waits);
300
301 if (pref == -1 && signal_pending(current))
302 return -ERESTARTSYS;
303
304 if (pref >= 0)
305 slist->preferred = pref;
306 return 0;
307}
David Howellsf6cbb362020-04-24 15:10:00 +0100308
309/*
310 * Probe timer. We have an increment on fs_outstanding that we need to pass
311 * along to the work item.
312 */
313void afs_fs_probe_timer(struct timer_list *timer)
314{
315 struct afs_net *net = container_of(timer, struct afs_net, fs_probe_timer);
316
David Howells5481fc62020-06-19 23:39:36 +0100317 if (!net->live || !queue_work(afs_wq, &net->fs_prober))
David Howellsf6cbb362020-04-24 15:10:00 +0100318 afs_dec_servers_outstanding(net);
319}
320
321/*
322 * Dispatch a probe to a server.
323 */
324static void afs_dispatch_fs_probe(struct afs_net *net, struct afs_server *server, bool all)
325 __releases(&net->fs_lock)
326{
327 struct key *key = NULL;
328
329 /* We remove it from the queues here - it will be added back to
330 * one of the queues on the completion of the probe.
331 */
332 list_del_init(&server->probe_link);
333
334 afs_get_server(server, afs_server_trace_get_probe);
335 write_sequnlock(&net->fs_lock);
336
337 afs_fs_probe_fileserver(net, server, key, all);
338 afs_put_server(net, server, afs_server_trace_put_probe);
339}
340
341/*
David Howells8409f672020-04-22 00:02:46 +0100342 * Probe a server immediately without waiting for its due time to come
343 * round. This is used when all of the addresses have been tried.
344 */
345void afs_probe_fileserver(struct afs_net *net, struct afs_server *server)
346{
347 write_seqlock(&net->fs_lock);
348 if (!list_empty(&server->probe_link))
349 return afs_dispatch_fs_probe(net, server, true);
350 write_sequnlock(&net->fs_lock);
351}
352
353/*
David Howellsf6cbb362020-04-24 15:10:00 +0100354 * Probe dispatcher to regularly dispatch probes to keep NAT alive.
355 */
356void afs_fs_probe_dispatcher(struct work_struct *work)
357{
358 struct afs_net *net = container_of(work, struct afs_net, fs_prober);
359 struct afs_server *fast, *slow, *server;
360 unsigned long nowj, timer_at, poll_at;
361 bool first_pass = true, set_timer = false;
362
363 if (!net->live)
364 return;
365
366 _enter("");
367
368 if (list_empty(&net->fs_probe_fast) && list_empty(&net->fs_probe_slow)) {
369 _leave(" [none]");
370 return;
371 }
372
373again:
374 write_seqlock(&net->fs_lock);
375
376 fast = slow = server = NULL;
377 nowj = jiffies;
378 timer_at = nowj + MAX_JIFFY_OFFSET;
379
380 if (!list_empty(&net->fs_probe_fast)) {
381 fast = list_first_entry(&net->fs_probe_fast, struct afs_server, probe_link);
382 poll_at = fast->probed_at + afs_fs_probe_fast_poll_interval;
383 if (time_before(nowj, poll_at)) {
384 timer_at = poll_at;
385 set_timer = true;
386 fast = NULL;
387 }
388 }
389
390 if (!list_empty(&net->fs_probe_slow)) {
391 slow = list_first_entry(&net->fs_probe_slow, struct afs_server, probe_link);
392 poll_at = slow->probed_at + afs_fs_probe_slow_poll_interval;
393 if (time_before(nowj, poll_at)) {
394 if (time_before(poll_at, timer_at))
395 timer_at = poll_at;
396 set_timer = true;
397 slow = NULL;
398 }
399 }
400
401 server = fast ?: slow;
402 if (server)
403 _debug("probe %pU", &server->uuid);
404
405 if (server && (first_pass || !need_resched())) {
406 afs_dispatch_fs_probe(net, server, server == fast);
407 first_pass = false;
408 goto again;
409 }
410
411 write_sequnlock(&net->fs_lock);
412
413 if (server) {
414 if (!queue_work(afs_wq, &net->fs_prober))
415 afs_dec_servers_outstanding(net);
416 _leave(" [requeue]");
417 } else if (set_timer) {
418 if (timer_reduce(&net->fs_probe_timer, timer_at))
419 afs_dec_servers_outstanding(net);
420 _leave(" [timer]");
421 } else {
422 afs_dec_servers_outstanding(net);
423 _leave(" [quiesce]");
424 }
425}
David Howells8409f672020-04-22 00:02:46 +0100426
427/*
428 * Wait for a probe on a particular fileserver to complete for 2s.
429 */
430int afs_wait_for_one_fs_probe(struct afs_server *server, bool is_intr)
431{
432 struct wait_queue_entry wait;
433 unsigned long timo = 2 * HZ;
434
435 if (atomic_read(&server->probe_outstanding) == 0)
436 goto dont_wait;
437
438 init_wait_entry(&wait, 0);
439 for (;;) {
440 prepare_to_wait_event(&server->probe_wq, &wait,
441 is_intr ? TASK_INTERRUPTIBLE : TASK_UNINTERRUPTIBLE);
442 if (timo == 0 ||
443 server->probe.responded ||
444 atomic_read(&server->probe_outstanding) == 0 ||
445 (is_intr && signal_pending(current)))
446 break;
447 timo = schedule_timeout(timo);
448 }
449
450 finish_wait(&server->probe_wq, &wait);
451
452dont_wait:
453 if (server->probe.responded)
454 return 0;
455 if (is_intr && signal_pending(current))
456 return -ERESTARTSYS;
457 if (timo == 0)
458 return -ETIME;
459 return -EDESTADDRREQ;
460}
David Howells5481fc62020-06-19 23:39:36 +0100461
462/*
463 * Clean up the probing when the namespace is killed off.
464 */
465void afs_fs_probe_cleanup(struct afs_net *net)
466{
467 if (del_timer_sync(&net->fs_probe_timer))
468 afs_dec_servers_outstanding(net);
469}