Jerry Chu | 1046716 | 2012-08-31 12:29:11 +0000 | [diff] [blame] | 1 | #include <linux/err.h> |
Yuchung Cheng | 2100c8d | 2012-07-19 06:43:05 +0000 | [diff] [blame] | 2 | #include <linux/init.h> |
| 3 | #include <linux/kernel.h> |
Jerry Chu | 1046716 | 2012-08-31 12:29:11 +0000 | [diff] [blame] | 4 | #include <linux/list.h> |
| 5 | #include <linux/tcp.h> |
| 6 | #include <linux/rcupdate.h> |
| 7 | #include <linux/rculist.h> |
| 8 | #include <net/inetpeer.h> |
| 9 | #include <net/tcp.h> |
Yuchung Cheng | 2100c8d | 2012-07-19 06:43:05 +0000 | [diff] [blame] | 10 | |
Yuchung Cheng | 0d41cca | 2013-10-31 09:19:32 -0700 | [diff] [blame] | 11 | int sysctl_tcp_fastopen __read_mostly = TFO_CLIENT_ENABLE; |
Jerry Chu | 1046716 | 2012-08-31 12:29:11 +0000 | [diff] [blame] | 12 | |
| 13 | struct tcp_fastopen_context __rcu *tcp_fastopen_ctx; |
| 14 | |
| 15 | static DEFINE_SPINLOCK(tcp_fastopen_ctx_lock); |
| 16 | |
Hannes Frederic Sowa | 222e83d | 2013-10-19 21:48:58 +0200 | [diff] [blame] | 17 | void tcp_fastopen_init_key_once(bool publish) |
| 18 | { |
| 19 | static u8 key[TCP_FASTOPEN_KEY_LENGTH]; |
| 20 | |
| 21 | /* tcp_fastopen_reset_cipher publishes the new context |
| 22 | * atomically, so we allow this race happening here. |
| 23 | * |
| 24 | * All call sites of tcp_fastopen_cookie_gen also check |
| 25 | * for a valid cookie, so this is an acceptable risk. |
| 26 | */ |
| 27 | if (net_get_random_once(key, sizeof(key)) && publish) |
| 28 | tcp_fastopen_reset_cipher(key, sizeof(key)); |
| 29 | } |
| 30 | |
Jerry Chu | 1046716 | 2012-08-31 12:29:11 +0000 | [diff] [blame] | 31 | static void tcp_fastopen_ctx_free(struct rcu_head *head) |
| 32 | { |
| 33 | struct tcp_fastopen_context *ctx = |
| 34 | container_of(head, struct tcp_fastopen_context, rcu); |
| 35 | crypto_free_cipher(ctx->tfm); |
| 36 | kfree(ctx); |
| 37 | } |
| 38 | |
| 39 | int tcp_fastopen_reset_cipher(void *key, unsigned int len) |
| 40 | { |
| 41 | int err; |
| 42 | struct tcp_fastopen_context *ctx, *octx; |
| 43 | |
| 44 | ctx = kmalloc(sizeof(*ctx), GFP_KERNEL); |
| 45 | if (!ctx) |
| 46 | return -ENOMEM; |
| 47 | ctx->tfm = crypto_alloc_cipher("aes", 0, 0); |
| 48 | |
| 49 | if (IS_ERR(ctx->tfm)) { |
| 50 | err = PTR_ERR(ctx->tfm); |
| 51 | error: kfree(ctx); |
| 52 | pr_err("TCP: TFO aes cipher alloc error: %d\n", err); |
| 53 | return err; |
| 54 | } |
| 55 | err = crypto_cipher_setkey(ctx->tfm, key, len); |
| 56 | if (err) { |
| 57 | pr_err("TCP: TFO cipher key error: %d\n", err); |
| 58 | crypto_free_cipher(ctx->tfm); |
| 59 | goto error; |
| 60 | } |
| 61 | memcpy(ctx->key, key, len); |
| 62 | |
| 63 | spin_lock(&tcp_fastopen_ctx_lock); |
| 64 | |
| 65 | octx = rcu_dereference_protected(tcp_fastopen_ctx, |
| 66 | lockdep_is_held(&tcp_fastopen_ctx_lock)); |
| 67 | rcu_assign_pointer(tcp_fastopen_ctx, ctx); |
| 68 | spin_unlock(&tcp_fastopen_ctx_lock); |
| 69 | |
| 70 | if (octx) |
| 71 | call_rcu(&octx->rcu, tcp_fastopen_ctx_free); |
| 72 | return err; |
| 73 | } |
| 74 | |
Yuchung Cheng | 149479d | 2013-08-08 14:06:22 -0700 | [diff] [blame] | 75 | /* Computes the fastopen cookie for the IP path. |
| 76 | * The path is a 128 bits long (pad with zeros for IPv4). |
Jerry Chu | 1046716 | 2012-08-31 12:29:11 +0000 | [diff] [blame] | 77 | * |
| 78 | * The caller must check foc->len to determine if a valid cookie |
| 79 | * has been generated successfully. |
| 80 | */ |
Yuchung Cheng | 149479d | 2013-08-08 14:06:22 -0700 | [diff] [blame] | 81 | void tcp_fastopen_cookie_gen(__be32 src, __be32 dst, |
| 82 | struct tcp_fastopen_cookie *foc) |
Jerry Chu | 1046716 | 2012-08-31 12:29:11 +0000 | [diff] [blame] | 83 | { |
Yuchung Cheng | 149479d | 2013-08-08 14:06:22 -0700 | [diff] [blame] | 84 | __be32 path[4] = { src, dst, 0, 0 }; |
Jerry Chu | 1046716 | 2012-08-31 12:29:11 +0000 | [diff] [blame] | 85 | struct tcp_fastopen_context *ctx; |
| 86 | |
Hannes Frederic Sowa | 222e83d | 2013-10-19 21:48:58 +0200 | [diff] [blame] | 87 | tcp_fastopen_init_key_once(true); |
| 88 | |
Jerry Chu | 1046716 | 2012-08-31 12:29:11 +0000 | [diff] [blame] | 89 | rcu_read_lock(); |
| 90 | ctx = rcu_dereference(tcp_fastopen_ctx); |
| 91 | if (ctx) { |
Yuchung Cheng | 149479d | 2013-08-08 14:06:22 -0700 | [diff] [blame] | 92 | crypto_cipher_encrypt_one(ctx->tfm, foc->val, (__u8 *)path); |
Jerry Chu | 1046716 | 2012-08-31 12:29:11 +0000 | [diff] [blame] | 93 | foc->len = TCP_FASTOPEN_COOKIE_SIZE; |
| 94 | } |
| 95 | rcu_read_unlock(); |
| 96 | } |
Yuchung Cheng | 5b7ed08 | 2014-05-11 20:22:09 -0700 | [diff] [blame^] | 97 | |
| 98 | int tcp_fastopen_create_child(struct sock *sk, |
| 99 | struct sk_buff *skb, |
| 100 | struct sk_buff *skb_synack, |
| 101 | struct request_sock *req) |
| 102 | { |
| 103 | struct tcp_sock *tp = tcp_sk(sk); |
| 104 | struct request_sock_queue *queue = &inet_csk(sk)->icsk_accept_queue; |
| 105 | const struct inet_request_sock *ireq = inet_rsk(req); |
| 106 | struct sock *child; |
| 107 | int err; |
| 108 | |
| 109 | req->num_retrans = 0; |
| 110 | req->num_timeout = 0; |
| 111 | req->sk = NULL; |
| 112 | |
| 113 | child = inet_csk(sk)->icsk_af_ops->syn_recv_sock(sk, skb, req, NULL); |
| 114 | if (child == NULL) { |
| 115 | NET_INC_STATS_BH(sock_net(sk), |
| 116 | LINUX_MIB_TCPFASTOPENPASSIVEFAIL); |
| 117 | kfree_skb(skb_synack); |
| 118 | return -1; |
| 119 | } |
| 120 | err = ip_build_and_send_pkt(skb_synack, sk, ireq->ir_loc_addr, |
| 121 | ireq->ir_rmt_addr, ireq->opt); |
| 122 | err = net_xmit_eval(err); |
| 123 | if (!err) |
| 124 | tcp_rsk(req)->snt_synack = tcp_time_stamp; |
| 125 | /* XXX (TFO) - is it ok to ignore error and continue? */ |
| 126 | |
| 127 | spin_lock(&queue->fastopenq->lock); |
| 128 | queue->fastopenq->qlen++; |
| 129 | spin_unlock(&queue->fastopenq->lock); |
| 130 | |
| 131 | /* Initialize the child socket. Have to fix some values to take |
| 132 | * into account the child is a Fast Open socket and is created |
| 133 | * only out of the bits carried in the SYN packet. |
| 134 | */ |
| 135 | tp = tcp_sk(child); |
| 136 | |
| 137 | tp->fastopen_rsk = req; |
| 138 | /* Do a hold on the listner sk so that if the listener is being |
| 139 | * closed, the child that has been accepted can live on and still |
| 140 | * access listen_lock. |
| 141 | */ |
| 142 | sock_hold(sk); |
| 143 | tcp_rsk(req)->listener = sk; |
| 144 | |
| 145 | /* RFC1323: The window in SYN & SYN/ACK segments is never |
| 146 | * scaled. So correct it appropriately. |
| 147 | */ |
| 148 | tp->snd_wnd = ntohs(tcp_hdr(skb)->window); |
| 149 | |
| 150 | /* Activate the retrans timer so that SYNACK can be retransmitted. |
| 151 | * The request socket is not added to the SYN table of the parent |
| 152 | * because it's been added to the accept queue directly. |
| 153 | */ |
| 154 | inet_csk_reset_xmit_timer(child, ICSK_TIME_RETRANS, |
| 155 | TCP_TIMEOUT_INIT, TCP_RTO_MAX); |
| 156 | |
| 157 | /* Add the child socket directly into the accept queue */ |
| 158 | inet_csk_reqsk_queue_add(sk, req, child); |
| 159 | |
| 160 | /* Now finish processing the fastopen child socket. */ |
| 161 | inet_csk(child)->icsk_af_ops->rebuild_header(child); |
| 162 | tcp_init_congestion_control(child); |
| 163 | tcp_mtup_init(child); |
| 164 | tcp_init_metrics(child); |
| 165 | tcp_init_buffer_space(child); |
| 166 | |
| 167 | /* Queue the data carried in the SYN packet. We need to first |
| 168 | * bump skb's refcnt because the caller will attempt to free it. |
| 169 | * |
| 170 | * XXX (TFO) - we honor a zero-payload TFO request for now. |
| 171 | * (Any reason not to?) |
| 172 | */ |
| 173 | if (TCP_SKB_CB(skb)->end_seq == TCP_SKB_CB(skb)->seq + 1) { |
| 174 | /* Don't queue the skb if there is no payload in SYN. |
| 175 | * XXX (TFO) - How about SYN+FIN? |
| 176 | */ |
| 177 | tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq; |
| 178 | } else { |
| 179 | skb = skb_get(skb); |
| 180 | skb_dst_drop(skb); |
| 181 | __skb_pull(skb, tcp_hdr(skb)->doff * 4); |
| 182 | skb_set_owner_r(skb, child); |
| 183 | __skb_queue_tail(&child->sk_receive_queue, skb); |
| 184 | tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq; |
| 185 | tp->syn_data_acked = 1; |
| 186 | } |
| 187 | sk->sk_data_ready(sk); |
| 188 | bh_unlock_sock(child); |
| 189 | sock_put(child); |
| 190 | WARN_ON(req->sk == NULL); |
| 191 | return 0; |
| 192 | } |
| 193 | EXPORT_SYMBOL(tcp_fastopen_create_child); |
| 194 | |
| 195 | static bool tcp_fastopen_queue_check(struct sock *sk) |
| 196 | { |
| 197 | struct fastopen_queue *fastopenq; |
| 198 | |
| 199 | /* Make sure the listener has enabled fastopen, and we don't |
| 200 | * exceed the max # of pending TFO requests allowed before trying |
| 201 | * to validating the cookie in order to avoid burning CPU cycles |
| 202 | * unnecessarily. |
| 203 | * |
| 204 | * XXX (TFO) - The implication of checking the max_qlen before |
| 205 | * processing a cookie request is that clients can't differentiate |
| 206 | * between qlen overflow causing Fast Open to be disabled |
| 207 | * temporarily vs a server not supporting Fast Open at all. |
| 208 | */ |
| 209 | fastopenq = inet_csk(sk)->icsk_accept_queue.fastopenq; |
| 210 | if (fastopenq == NULL || fastopenq->max_qlen == 0) |
| 211 | return false; |
| 212 | |
| 213 | if (fastopenq->qlen >= fastopenq->max_qlen) { |
| 214 | struct request_sock *req1; |
| 215 | spin_lock(&fastopenq->lock); |
| 216 | req1 = fastopenq->rskq_rst_head; |
| 217 | if ((req1 == NULL) || time_after(req1->expires, jiffies)) { |
| 218 | spin_unlock(&fastopenq->lock); |
| 219 | NET_INC_STATS_BH(sock_net(sk), |
| 220 | LINUX_MIB_TCPFASTOPENLISTENOVERFLOW); |
| 221 | return false; |
| 222 | } |
| 223 | fastopenq->rskq_rst_head = req1->dl_next; |
| 224 | fastopenq->qlen--; |
| 225 | spin_unlock(&fastopenq->lock); |
| 226 | reqsk_free(req1); |
| 227 | } |
| 228 | return true; |
| 229 | } |
| 230 | |
| 231 | bool tcp_fastopen_check(struct sock *sk, struct sk_buff *skb, |
| 232 | struct request_sock *req, |
| 233 | struct tcp_fastopen_cookie *foc, |
| 234 | struct tcp_fastopen_cookie *valid_foc) |
| 235 | { |
| 236 | bool skip_cookie = false; |
| 237 | |
| 238 | if (likely(!fastopen_cookie_present(foc))) { |
| 239 | /* See include/net/tcp.h for the meaning of these knobs */ |
| 240 | if ((sysctl_tcp_fastopen & TFO_SERVER_ALWAYS) || |
| 241 | ((sysctl_tcp_fastopen & TFO_SERVER_COOKIE_NOT_REQD) && |
| 242 | (TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq + 1))) |
| 243 | skip_cookie = true; /* no cookie to validate */ |
| 244 | else |
| 245 | return false; |
| 246 | } |
| 247 | /* A FO option is present; bump the counter. */ |
| 248 | NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPFASTOPENPASSIVE); |
| 249 | |
| 250 | if ((sysctl_tcp_fastopen & TFO_SERVER_ENABLE) == 0 || |
| 251 | !tcp_fastopen_queue_check(sk)) |
| 252 | return false; |
| 253 | |
| 254 | if (skip_cookie) { |
| 255 | tcp_rsk(req)->rcv_nxt = TCP_SKB_CB(skb)->end_seq; |
| 256 | return true; |
| 257 | } |
| 258 | |
| 259 | if (foc->len == TCP_FASTOPEN_COOKIE_SIZE) { |
| 260 | if ((sysctl_tcp_fastopen & TFO_SERVER_COOKIE_NOT_CHKED) == 0) { |
| 261 | tcp_fastopen_cookie_gen(ip_hdr(skb)->saddr, |
| 262 | ip_hdr(skb)->daddr, valid_foc); |
| 263 | if ((valid_foc->len != TCP_FASTOPEN_COOKIE_SIZE) || |
| 264 | memcmp(&foc->val[0], &valid_foc->val[0], |
| 265 | TCP_FASTOPEN_COOKIE_SIZE) != 0) |
| 266 | return false; |
| 267 | valid_foc->len = -1; |
| 268 | } |
| 269 | /* Acknowledge the data received from the peer. */ |
| 270 | tcp_rsk(req)->rcv_nxt = TCP_SKB_CB(skb)->end_seq; |
| 271 | return true; |
| 272 | } else if (foc->len == 0) { /* Client requesting a cookie */ |
| 273 | tcp_fastopen_cookie_gen(ip_hdr(skb)->saddr, |
| 274 | ip_hdr(skb)->daddr, valid_foc); |
| 275 | NET_INC_STATS_BH(sock_net(sk), |
| 276 | LINUX_MIB_TCPFASTOPENCOOKIEREQD); |
| 277 | } else { |
| 278 | /* Client sent a cookie with wrong size. Treat it |
| 279 | * the same as invalid and return a valid one. |
| 280 | */ |
| 281 | tcp_fastopen_cookie_gen(ip_hdr(skb)->saddr, |
| 282 | ip_hdr(skb)->daddr, valid_foc); |
| 283 | } |
| 284 | return false; |
| 285 | } |
| 286 | EXPORT_SYMBOL(tcp_fastopen_check); |