David Howells | c410bf01 | 2020-05-11 14:54:34 +0100 | [diff] [blame] | 1 | // SPDX-License-Identifier: GPL-2.0 |
| 2 | /* RTT/RTO calculation. |
| 3 | * |
| 4 | * Adapted from TCP for AF_RXRPC by David Howells (dhowells@redhat.com) |
| 5 | * |
| 6 | * https://tools.ietf.org/html/rfc6298 |
| 7 | * https://tools.ietf.org/html/rfc1122#section-4.2.3.1 |
| 8 | * http://ccr.sigcomm.org/archive/1995/jan95/ccr-9501-partridge87.pdf |
| 9 | */ |
| 10 | |
| 11 | #include <linux/net.h> |
| 12 | #include "ar-internal.h" |
| 13 | |
| 14 | #define RXRPC_RTO_MAX ((unsigned)(120 * HZ)) |
| 15 | #define RXRPC_TIMEOUT_INIT ((unsigned)(1*HZ)) /* RFC6298 2.1 initial RTO value */ |
| 16 | #define rxrpc_jiffies32 ((u32)jiffies) /* As rxrpc_jiffies32 */ |
David Howells | c410bf01 | 2020-05-11 14:54:34 +0100 | [diff] [blame] | 17 | |
| 18 | static u32 rxrpc_rto_min_us(struct rxrpc_peer *peer) |
| 19 | { |
| 20 | return 200; |
| 21 | } |
| 22 | |
| 23 | static u32 __rxrpc_set_rto(const struct rxrpc_peer *peer) |
| 24 | { |
Jiasheng Jiang | acde891 | 2021-09-24 03:18:37 +0000 | [diff] [blame] | 25 | return usecs_to_jiffies((peer->srtt_us >> 3) + peer->rttvar_us); |
David Howells | c410bf01 | 2020-05-11 14:54:34 +0100 | [diff] [blame] | 26 | } |
| 27 | |
| 28 | static u32 rxrpc_bound_rto(u32 rto) |
| 29 | { |
| 30 | return min(rto, RXRPC_RTO_MAX); |
| 31 | } |
| 32 | |
| 33 | /* |
| 34 | * Called to compute a smoothed rtt estimate. The data fed to this |
| 35 | * routine either comes from timestamps, or from segments that were |
| 36 | * known _not_ to have been retransmitted [see Karn/Partridge |
| 37 | * Proceedings SIGCOMM 87]. The algorithm is from the SIGCOMM 88 |
| 38 | * piece by Van Jacobson. |
| 39 | * NOTE: the next three routines used to be one big routine. |
| 40 | * To save cycles in the RFC 1323 implementation it was better to break |
| 41 | * it up into three procedures. -- erics |
| 42 | */ |
| 43 | static void rxrpc_rtt_estimator(struct rxrpc_peer *peer, long sample_rtt_us) |
| 44 | { |
| 45 | long m = sample_rtt_us; /* RTT */ |
| 46 | u32 srtt = peer->srtt_us; |
| 47 | |
| 48 | /* The following amusing code comes from Jacobson's |
| 49 | * article in SIGCOMM '88. Note that rtt and mdev |
| 50 | * are scaled versions of rtt and mean deviation. |
| 51 | * This is designed to be as fast as possible |
| 52 | * m stands for "measurement". |
| 53 | * |
| 54 | * On a 1990 paper the rto value is changed to: |
| 55 | * RTO = rtt + 4 * mdev |
| 56 | * |
| 57 | * Funny. This algorithm seems to be very broken. |
| 58 | * These formulae increase RTO, when it should be decreased, increase |
| 59 | * too slowly, when it should be increased quickly, decrease too quickly |
| 60 | * etc. I guess in BSD RTO takes ONE value, so that it is absolutely |
| 61 | * does not matter how to _calculate_ it. Seems, it was trap |
| 62 | * that VJ failed to avoid. 8) |
| 63 | */ |
| 64 | if (srtt != 0) { |
| 65 | m -= (srtt >> 3); /* m is now error in rtt est */ |
| 66 | srtt += m; /* rtt = 7/8 rtt + 1/8 new */ |
| 67 | if (m < 0) { |
| 68 | m = -m; /* m is now abs(error) */ |
| 69 | m -= (peer->mdev_us >> 2); /* similar update on mdev */ |
| 70 | /* This is similar to one of Eifel findings. |
| 71 | * Eifel blocks mdev updates when rtt decreases. |
| 72 | * This solution is a bit different: we use finer gain |
| 73 | * for mdev in this case (alpha*beta). |
| 74 | * Like Eifel it also prevents growth of rto, |
| 75 | * but also it limits too fast rto decreases, |
| 76 | * happening in pure Eifel. |
| 77 | */ |
| 78 | if (m > 0) |
| 79 | m >>= 3; |
| 80 | } else { |
| 81 | m -= (peer->mdev_us >> 2); /* similar update on mdev */ |
| 82 | } |
| 83 | |
| 84 | peer->mdev_us += m; /* mdev = 3/4 mdev + 1/4 new */ |
| 85 | if (peer->mdev_us > peer->mdev_max_us) { |
| 86 | peer->mdev_max_us = peer->mdev_us; |
| 87 | if (peer->mdev_max_us > peer->rttvar_us) |
| 88 | peer->rttvar_us = peer->mdev_max_us; |
| 89 | } |
| 90 | } else { |
| 91 | /* no previous measure. */ |
| 92 | srtt = m << 3; /* take the measured time to be rtt */ |
| 93 | peer->mdev_us = m << 1; /* make sure rto = 3*rtt */ |
| 94 | peer->rttvar_us = max(peer->mdev_us, rxrpc_rto_min_us(peer)); |
| 95 | peer->mdev_max_us = peer->rttvar_us; |
| 96 | } |
| 97 | |
| 98 | peer->srtt_us = max(1U, srtt); |
| 99 | } |
| 100 | |
| 101 | /* |
| 102 | * Calculate rto without backoff. This is the second half of Van Jacobson's |
| 103 | * routine referred to above. |
| 104 | */ |
| 105 | static void rxrpc_set_rto(struct rxrpc_peer *peer) |
| 106 | { |
| 107 | u32 rto; |
| 108 | |
| 109 | /* 1. If rtt variance happened to be less 50msec, it is hallucination. |
| 110 | * It cannot be less due to utterly erratic ACK generation made |
| 111 | * at least by solaris and freebsd. "Erratic ACKs" has _nothing_ |
| 112 | * to do with delayed acks, because at cwnd>2 true delack timeout |
| 113 | * is invisible. Actually, Linux-2.4 also generates erratic |
| 114 | * ACKs in some circumstances. |
| 115 | */ |
| 116 | rto = __rxrpc_set_rto(peer); |
| 117 | |
| 118 | /* 2. Fixups made earlier cannot be right. |
| 119 | * If we do not estimate RTO correctly without them, |
| 120 | * all the algo is pure shit and should be replaced |
| 121 | * with correct one. It is exactly, which we pretend to do. |
| 122 | */ |
| 123 | |
| 124 | /* NOTE: clamping at RXRPC_RTO_MIN is not required, current algo |
| 125 | * guarantees that rto is higher. |
| 126 | */ |
| 127 | peer->rto_j = rxrpc_bound_rto(rto); |
| 128 | } |
| 129 | |
| 130 | static void rxrpc_ack_update_rtt(struct rxrpc_peer *peer, long rtt_us) |
| 131 | { |
| 132 | if (rtt_us < 0) |
| 133 | return; |
| 134 | |
| 135 | //rxrpc_update_rtt_min(peer, rtt_us); |
| 136 | rxrpc_rtt_estimator(peer, rtt_us); |
| 137 | rxrpc_set_rto(peer); |
| 138 | |
| 139 | /* RFC6298: only reset backoff on valid RTT measurement. */ |
| 140 | peer->backoff = 0; |
| 141 | } |
| 142 | |
| 143 | /* |
| 144 | * Add RTT information to cache. This is called in softirq mode and has |
| 145 | * exclusive access to the peer RTT data. |
| 146 | */ |
| 147 | void rxrpc_peer_add_rtt(struct rxrpc_call *call, enum rxrpc_rtt_rx_trace why, |
David Howells | 4700c4d | 2020-08-19 23:29:16 +0100 | [diff] [blame] | 148 | int rtt_slot, |
David Howells | c410bf01 | 2020-05-11 14:54:34 +0100 | [diff] [blame] | 149 | rxrpc_serial_t send_serial, rxrpc_serial_t resp_serial, |
| 150 | ktime_t send_time, ktime_t resp_time) |
| 151 | { |
| 152 | struct rxrpc_peer *peer = call->peer; |
| 153 | s64 rtt_us; |
| 154 | |
| 155 | rtt_us = ktime_to_us(ktime_sub(resp_time, send_time)); |
| 156 | if (rtt_us < 0) |
| 157 | return; |
| 158 | |
| 159 | spin_lock(&peer->rtt_input_lock); |
| 160 | rxrpc_ack_update_rtt(peer, rtt_us); |
| 161 | if (peer->rtt_count < 3) |
| 162 | peer->rtt_count++; |
| 163 | spin_unlock(&peer->rtt_input_lock); |
| 164 | |
David Howells | 4700c4d | 2020-08-19 23:29:16 +0100 | [diff] [blame] | 165 | trace_rxrpc_rtt_rx(call, why, rtt_slot, send_serial, resp_serial, |
David Howells | c410bf01 | 2020-05-11 14:54:34 +0100 | [diff] [blame] | 166 | peer->srtt_us >> 3, peer->rto_j); |
| 167 | } |
| 168 | |
| 169 | /* |
| 170 | * Get the retransmission timeout to set in jiffies, backing it off each time |
| 171 | * we retransmit. |
| 172 | */ |
| 173 | unsigned long rxrpc_get_rto_backoff(struct rxrpc_peer *peer, bool retrans) |
| 174 | { |
| 175 | u64 timo_j; |
| 176 | u8 backoff = READ_ONCE(peer->backoff); |
| 177 | |
| 178 | timo_j = peer->rto_j; |
| 179 | timo_j <<= backoff; |
| 180 | if (retrans && timo_j * 2 <= RXRPC_RTO_MAX) |
| 181 | WRITE_ONCE(peer->backoff, backoff + 1); |
| 182 | |
| 183 | if (timo_j < 1) |
| 184 | timo_j = 1; |
| 185 | |
| 186 | return timo_j; |
| 187 | } |
| 188 | |
| 189 | void rxrpc_peer_init_rtt(struct rxrpc_peer *peer) |
| 190 | { |
| 191 | peer->rto_j = RXRPC_TIMEOUT_INIT; |
| 192 | peer->mdev_us = jiffies_to_usecs(RXRPC_TIMEOUT_INIT); |
| 193 | peer->backoff = 0; |
| 194 | //minmax_reset(&peer->rtt_min, rxrpc_jiffies32, ~0U); |
| 195 | } |