blob: e53dafccf21fde81916b360ec4d0bb590919dca6 [file] [log] [blame]
Joe Stringer7f8a4362015-08-26 11:31:48 -07001/*
2 * Copyright (c) 2015 Nicira, Inc.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of version 2 of the GNU General Public
6 * License as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it will be useful, but
9 * WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
12 */
13
14#include <linux/module.h>
15#include <linux/openvswitch.h>
16#include <net/ip.h>
17#include <net/netfilter/nf_conntrack_core.h>
18#include <net/netfilter/nf_conntrack_zones.h>
19#include <net/netfilter/ipv6/nf_defrag_ipv6.h>
20
21#include "datapath.h"
22#include "conntrack.h"
23#include "flow.h"
24#include "flow_netlink.h"
25
26struct ovs_ct_len_tbl {
27 size_t maxlen;
28 size_t minlen;
29};
30
Joe Stringer182e3042015-08-26 11:31:49 -070031/* Metadata mark for masked write to conntrack mark */
32struct md_mark {
33 u32 value;
34 u32 mask;
35};
36
Joe Stringer7f8a4362015-08-26 11:31:48 -070037/* Conntrack action context for execution. */
38struct ovs_conntrack_info {
39 struct nf_conntrack_zone zone;
40 struct nf_conn *ct;
41 u32 flags;
42 u16 family;
Joe Stringer182e3042015-08-26 11:31:49 -070043 struct md_mark mark;
Joe Stringer7f8a4362015-08-26 11:31:48 -070044};
45
46static u16 key_to_nfproto(const struct sw_flow_key *key)
47{
48 switch (ntohs(key->eth.type)) {
49 case ETH_P_IP:
50 return NFPROTO_IPV4;
51 case ETH_P_IPV6:
52 return NFPROTO_IPV6;
53 default:
54 return NFPROTO_UNSPEC;
55 }
56}
57
58/* Map SKB connection state into the values used by flow definition. */
59static u8 ovs_ct_get_state(enum ip_conntrack_info ctinfo)
60{
61 u8 ct_state = OVS_CS_F_TRACKED;
62
63 switch (ctinfo) {
64 case IP_CT_ESTABLISHED_REPLY:
65 case IP_CT_RELATED_REPLY:
66 case IP_CT_NEW_REPLY:
67 ct_state |= OVS_CS_F_REPLY_DIR;
68 break;
69 default:
70 break;
71 }
72
73 switch (ctinfo) {
74 case IP_CT_ESTABLISHED:
75 case IP_CT_ESTABLISHED_REPLY:
76 ct_state |= OVS_CS_F_ESTABLISHED;
77 break;
78 case IP_CT_RELATED:
79 case IP_CT_RELATED_REPLY:
80 ct_state |= OVS_CS_F_RELATED;
81 break;
82 case IP_CT_NEW:
83 case IP_CT_NEW_REPLY:
84 ct_state |= OVS_CS_F_NEW;
85 break;
86 default:
87 break;
88 }
89
90 return ct_state;
91}
92
93static void __ovs_ct_update_key(struct sw_flow_key *key, u8 state,
Joe Stringer182e3042015-08-26 11:31:49 -070094 const struct nf_conntrack_zone *zone,
95 const struct nf_conn *ct)
Joe Stringer7f8a4362015-08-26 11:31:48 -070096{
97 key->ct.state = state;
98 key->ct.zone = zone->id;
Joe Stringer182e3042015-08-26 11:31:49 -070099 key->ct.mark = ct ? ct->mark : 0;
Joe Stringer7f8a4362015-08-26 11:31:48 -0700100}
101
102/* Update 'key' based on skb->nfct. If 'post_ct' is true, then OVS has
103 * previously sent the packet to conntrack via the ct action.
104 */
105static void ovs_ct_update_key(const struct sk_buff *skb,
106 struct sw_flow_key *key, bool post_ct)
107{
108 const struct nf_conntrack_zone *zone = &nf_ct_zone_dflt;
109 enum ip_conntrack_info ctinfo;
110 struct nf_conn *ct;
111 u8 state = 0;
112
113 ct = nf_ct_get(skb, &ctinfo);
114 if (ct) {
115 state = ovs_ct_get_state(ctinfo);
116 if (ct->master)
117 state |= OVS_CS_F_RELATED;
118 zone = nf_ct_zone(ct);
119 } else if (post_ct) {
120 state = OVS_CS_F_TRACKED | OVS_CS_F_INVALID;
121 }
Joe Stringer182e3042015-08-26 11:31:49 -0700122 __ovs_ct_update_key(key, state, zone, ct);
Joe Stringer7f8a4362015-08-26 11:31:48 -0700123}
124
125void ovs_ct_fill_key(const struct sk_buff *skb, struct sw_flow_key *key)
126{
127 ovs_ct_update_key(skb, key, false);
128}
129
130int ovs_ct_put_key(const struct sw_flow_key *key, struct sk_buff *skb)
131{
132 if (nla_put_u8(skb, OVS_KEY_ATTR_CT_STATE, key->ct.state))
133 return -EMSGSIZE;
134
135 if (IS_ENABLED(CONFIG_NF_CONNTRACK_ZONES) &&
136 nla_put_u16(skb, OVS_KEY_ATTR_CT_ZONE, key->ct.zone))
137 return -EMSGSIZE;
138
Joe Stringer182e3042015-08-26 11:31:49 -0700139 if (IS_ENABLED(CONFIG_NF_CONNTRACK_MARK) &&
140 nla_put_u32(skb, OVS_KEY_ATTR_CT_MARK, key->ct.mark))
141 return -EMSGSIZE;
142
143 return 0;
144}
145
146static int ovs_ct_set_mark(struct sk_buff *skb, struct sw_flow_key *key,
147 u32 ct_mark, u32 mask)
148{
149 enum ip_conntrack_info ctinfo;
150 struct nf_conn *ct;
151 u32 new_mark;
152
153 if (!IS_ENABLED(CONFIG_NF_CONNTRACK_MARK))
154 return -ENOTSUPP;
155
156 /* The connection could be invalid, in which case set_mark is no-op. */
157 ct = nf_ct_get(skb, &ctinfo);
158 if (!ct)
159 return 0;
160
161 new_mark = ct_mark | (ct->mark & ~(mask));
162 if (ct->mark != new_mark) {
163 ct->mark = new_mark;
164 nf_conntrack_event_cache(IPCT_MARK, ct);
165 key->ct.mark = new_mark;
166 }
167
Joe Stringer7f8a4362015-08-26 11:31:48 -0700168 return 0;
169}
170
171static int handle_fragments(struct net *net, struct sw_flow_key *key,
172 u16 zone, struct sk_buff *skb)
173{
174 struct ovs_skb_cb ovs_cb = *OVS_CB(skb);
175
176 if (key->eth.type == htons(ETH_P_IP)) {
177 enum ip_defrag_users user = IP_DEFRAG_CONNTRACK_IN + zone;
178 int err;
179
180 memset(IPCB(skb), 0, sizeof(struct inet_skb_parm));
181 err = ip_defrag(skb, user);
182 if (err)
183 return err;
184
185 ovs_cb.mru = IPCB(skb)->frag_max_size;
186 } else if (key->eth.type == htons(ETH_P_IPV6)) {
187#if IS_ENABLED(CONFIG_NF_DEFRAG_IPV6)
188 enum ip6_defrag_users user = IP6_DEFRAG_CONNTRACK_IN + zone;
189 struct sk_buff *reasm;
190
191 memset(IP6CB(skb), 0, sizeof(struct inet6_skb_parm));
192 reasm = nf_ct_frag6_gather(skb, user);
193 if (!reasm)
194 return -EINPROGRESS;
195
196 if (skb == reasm)
197 return -EINVAL;
198
199 key->ip.proto = ipv6_hdr(reasm)->nexthdr;
200 skb_morph(skb, reasm);
201 consume_skb(reasm);
202 ovs_cb.mru = IP6CB(skb)->frag_max_size;
203#else
204 return -EPFNOSUPPORT;
205#endif
206 } else {
207 return -EPFNOSUPPORT;
208 }
209
210 key->ip.frag = OVS_FRAG_TYPE_NONE;
211 skb_clear_hash(skb);
212 skb->ignore_df = 1;
213 *OVS_CB(skb) = ovs_cb;
214
215 return 0;
216}
217
218static struct nf_conntrack_expect *
219ovs_ct_expect_find(struct net *net, const struct nf_conntrack_zone *zone,
220 u16 proto, const struct sk_buff *skb)
221{
222 struct nf_conntrack_tuple tuple;
223
224 if (!nf_ct_get_tuplepr(skb, skb_network_offset(skb), proto, &tuple))
225 return NULL;
226 return __nf_ct_expect_find(net, zone, &tuple);
227}
228
229/* Determine whether skb->nfct is equal to the result of conntrack lookup. */
230static bool skb_nfct_cached(const struct net *net, const struct sk_buff *skb,
231 const struct ovs_conntrack_info *info)
232{
233 enum ip_conntrack_info ctinfo;
234 struct nf_conn *ct;
235
236 ct = nf_ct_get(skb, &ctinfo);
237 if (!ct)
238 return false;
239 if (!net_eq(net, read_pnet(&ct->ct_net)))
240 return false;
241 if (!nf_ct_zone_equal_any(info->ct, nf_ct_zone(ct)))
242 return false;
243
244 return true;
245}
246
247static int __ovs_ct_lookup(struct net *net, const struct sw_flow_key *key,
248 const struct ovs_conntrack_info *info,
249 struct sk_buff *skb)
250{
251 /* If we are recirculating packets to match on conntrack fields and
252 * committing with a separate conntrack action, then we don't need to
253 * actually run the packet through conntrack twice unless it's for a
254 * different zone.
255 */
256 if (!skb_nfct_cached(net, skb, info)) {
257 struct nf_conn *tmpl = info->ct;
258
259 /* Associate skb with specified zone. */
260 if (tmpl) {
261 if (skb->nfct)
262 nf_conntrack_put(skb->nfct);
263 nf_conntrack_get(&tmpl->ct_general);
264 skb->nfct = &tmpl->ct_general;
265 skb->nfctinfo = IP_CT_NEW;
266 }
267
268 if (nf_conntrack_in(net, info->family, NF_INET_PRE_ROUTING,
269 skb) != NF_ACCEPT)
270 return -ENOENT;
271 }
272
273 return 0;
274}
275
276/* Lookup connection and read fields into key. */
277static int ovs_ct_lookup(struct net *net, struct sw_flow_key *key,
278 const struct ovs_conntrack_info *info,
279 struct sk_buff *skb)
280{
281 struct nf_conntrack_expect *exp;
282
283 exp = ovs_ct_expect_find(net, &info->zone, info->family, skb);
284 if (exp) {
285 u8 state;
286
287 state = OVS_CS_F_TRACKED | OVS_CS_F_NEW | OVS_CS_F_RELATED;
Joe Stringer182e3042015-08-26 11:31:49 -0700288 __ovs_ct_update_key(key, state, &info->zone, exp->master);
Joe Stringer7f8a4362015-08-26 11:31:48 -0700289 } else {
290 int err;
291
292 err = __ovs_ct_lookup(net, key, info, skb);
293 if (err)
294 return err;
295
296 ovs_ct_update_key(skb, key, true);
297 }
298
299 return 0;
300}
301
302/* Lookup connection and confirm if unconfirmed. */
303static int ovs_ct_commit(struct net *net, struct sw_flow_key *key,
304 const struct ovs_conntrack_info *info,
305 struct sk_buff *skb)
306{
307 u8 state;
308 int err;
309
310 state = key->ct.state;
311 if (key->ct.zone == info->zone.id &&
312 ((state & OVS_CS_F_TRACKED) && !(state & OVS_CS_F_NEW))) {
313 /* Previous lookup has shown that this connection is already
314 * tracked and committed. Skip committing.
315 */
316 return 0;
317 }
318
319 err = __ovs_ct_lookup(net, key, info, skb);
320 if (err)
321 return err;
322 if (nf_conntrack_confirm(skb) != NF_ACCEPT)
323 return -EINVAL;
324
325 ovs_ct_update_key(skb, key, true);
326
327 return 0;
328}
329
330int ovs_ct_execute(struct net *net, struct sk_buff *skb,
331 struct sw_flow_key *key,
332 const struct ovs_conntrack_info *info)
333{
334 int nh_ofs;
335 int err;
336
337 /* The conntrack module expects to be working at L3. */
338 nh_ofs = skb_network_offset(skb);
339 skb_pull(skb, nh_ofs);
340
341 if (key->ip.frag != OVS_FRAG_TYPE_NONE) {
342 err = handle_fragments(net, key, info->zone.id, skb);
343 if (err)
344 return err;
345 }
346
347 if (info->flags & OVS_CT_F_COMMIT)
348 err = ovs_ct_commit(net, key, info, skb);
349 else
350 err = ovs_ct_lookup(net, key, info, skb);
Joe Stringer182e3042015-08-26 11:31:49 -0700351 if (err)
352 goto err;
Joe Stringer7f8a4362015-08-26 11:31:48 -0700353
Joe Stringer182e3042015-08-26 11:31:49 -0700354 if (info->mark.mask)
355 err = ovs_ct_set_mark(skb, key, info->mark.value,
356 info->mark.mask);
357err:
Joe Stringer7f8a4362015-08-26 11:31:48 -0700358 skb_push(skb, nh_ofs);
359 return err;
360}
361
362static const struct ovs_ct_len_tbl ovs_ct_attr_lens[OVS_CT_ATTR_MAX + 1] = {
363 [OVS_CT_ATTR_FLAGS] = { .minlen = sizeof(u32),
364 .maxlen = sizeof(u32) },
365 [OVS_CT_ATTR_ZONE] = { .minlen = sizeof(u16),
366 .maxlen = sizeof(u16) },
Joe Stringer182e3042015-08-26 11:31:49 -0700367 [OVS_CT_ATTR_MARK] = { .minlen = sizeof(struct md_mark),
368 .maxlen = sizeof(struct md_mark) },
Joe Stringer7f8a4362015-08-26 11:31:48 -0700369};
370
371static int parse_ct(const struct nlattr *attr, struct ovs_conntrack_info *info,
372 bool log)
373{
374 struct nlattr *a;
375 int rem;
376
377 nla_for_each_nested(a, attr, rem) {
378 int type = nla_type(a);
379 int maxlen = ovs_ct_attr_lens[type].maxlen;
380 int minlen = ovs_ct_attr_lens[type].minlen;
381
382 if (type > OVS_CT_ATTR_MAX) {
383 OVS_NLERR(log,
384 "Unknown conntrack attr (type=%d, max=%d)",
385 type, OVS_CT_ATTR_MAX);
386 return -EINVAL;
387 }
388 if (nla_len(a) < minlen || nla_len(a) > maxlen) {
389 OVS_NLERR(log,
390 "Conntrack attr type has unexpected length (type=%d, length=%d, expected=%d)",
391 type, nla_len(a), maxlen);
392 return -EINVAL;
393 }
394
395 switch (type) {
396 case OVS_CT_ATTR_FLAGS:
397 info->flags = nla_get_u32(a);
398 break;
399#ifdef CONFIG_NF_CONNTRACK_ZONES
400 case OVS_CT_ATTR_ZONE:
401 info->zone.id = nla_get_u16(a);
402 break;
403#endif
Joe Stringer182e3042015-08-26 11:31:49 -0700404#ifdef CONFIG_NF_CONNTRACK_MARK
405 case OVS_CT_ATTR_MARK: {
406 struct md_mark *mark = nla_data(a);
407
408 info->mark = *mark;
409 break;
410 }
411#endif
Joe Stringer7f8a4362015-08-26 11:31:48 -0700412 default:
413 OVS_NLERR(log, "Unknown conntrack attr (%d)",
414 type);
415 return -EINVAL;
416 }
417 }
418
419 if (rem > 0) {
420 OVS_NLERR(log, "Conntrack attr has %d unknown bytes", rem);
421 return -EINVAL;
422 }
423
424 return 0;
425}
426
427bool ovs_ct_verify(enum ovs_key_attr attr)
428{
429 if (attr == OVS_KEY_ATTR_CT_STATE)
430 return true;
431 if (IS_ENABLED(CONFIG_NF_CONNTRACK_ZONES) &&
432 attr == OVS_KEY_ATTR_CT_ZONE)
433 return true;
Joe Stringer182e3042015-08-26 11:31:49 -0700434 if (IS_ENABLED(CONFIG_NF_CONNTRACK_MARK) &&
435 attr == OVS_KEY_ATTR_CT_MARK)
436 return true;
Joe Stringer7f8a4362015-08-26 11:31:48 -0700437
438 return false;
439}
440
441int ovs_ct_copy_action(struct net *net, const struct nlattr *attr,
442 const struct sw_flow_key *key,
443 struct sw_flow_actions **sfa, bool log)
444{
445 struct ovs_conntrack_info ct_info;
446 u16 family;
447 int err;
448
449 family = key_to_nfproto(key);
450 if (family == NFPROTO_UNSPEC) {
451 OVS_NLERR(log, "ct family unspecified");
452 return -EINVAL;
453 }
454
455 memset(&ct_info, 0, sizeof(ct_info));
456 ct_info.family = family;
457
458 nf_ct_zone_init(&ct_info.zone, NF_CT_DEFAULT_ZONE_ID,
459 NF_CT_DEFAULT_ZONE_DIR, 0);
460
461 err = parse_ct(attr, &ct_info, log);
462 if (err)
463 return err;
464
465 /* Set up template for tracking connections in specific zones. */
466 ct_info.ct = nf_ct_tmpl_alloc(net, &ct_info.zone, GFP_KERNEL);
467 if (!ct_info.ct) {
468 OVS_NLERR(log, "Failed to allocate conntrack template");
469 return -ENOMEM;
470 }
471
472 err = ovs_nla_add_action(sfa, OVS_ACTION_ATTR_CT, &ct_info,
473 sizeof(ct_info), log);
474 if (err)
475 goto err_free_ct;
476
477 __set_bit(IPS_CONFIRMED_BIT, &ct_info.ct->status);
478 nf_conntrack_get(&ct_info.ct->ct_general);
479 return 0;
480err_free_ct:
481 nf_conntrack_free(ct_info.ct);
482 return err;
483}
484
485int ovs_ct_action_to_attr(const struct ovs_conntrack_info *ct_info,
486 struct sk_buff *skb)
487{
488 struct nlattr *start;
489
490 start = nla_nest_start(skb, OVS_ACTION_ATTR_CT);
491 if (!start)
492 return -EMSGSIZE;
493
494 if (nla_put_u32(skb, OVS_CT_ATTR_FLAGS, ct_info->flags))
495 return -EMSGSIZE;
496 if (IS_ENABLED(CONFIG_NF_CONNTRACK_ZONES) &&
497 nla_put_u16(skb, OVS_CT_ATTR_ZONE, ct_info->zone.id))
498 return -EMSGSIZE;
Joe Stringer182e3042015-08-26 11:31:49 -0700499 if (IS_ENABLED(CONFIG_NF_CONNTRACK_MARK) &&
500 nla_put(skb, OVS_CT_ATTR_MARK, sizeof(ct_info->mark),
501 &ct_info->mark))
502 return -EMSGSIZE;
Joe Stringer7f8a4362015-08-26 11:31:48 -0700503
504 nla_nest_end(skb, start);
505
506 return 0;
507}
508
509void ovs_ct_free_action(const struct nlattr *a)
510{
511 struct ovs_conntrack_info *ct_info = nla_data(a);
512
513 if (ct_info->ct)
514 nf_ct_put(ct_info->ct);
515}