blob: 1189fd50f1cf17a235328e208dacd6d49a7b7a0e [file] [log] [blame]
Joe Stringer7f8a4362015-08-26 11:31:48 -07001/*
2 * Copyright (c) 2015 Nicira, Inc.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of version 2 of the GNU General Public
6 * License as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it will be useful, but
9 * WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
12 */
13
14#include <linux/module.h>
15#include <linux/openvswitch.h>
16#include <net/ip.h>
17#include <net/netfilter/nf_conntrack_core.h>
18#include <net/netfilter/nf_conntrack_zones.h>
19#include <net/netfilter/ipv6/nf_defrag_ipv6.h>
20
21#include "datapath.h"
22#include "conntrack.h"
23#include "flow.h"
24#include "flow_netlink.h"
25
26struct ovs_ct_len_tbl {
27 size_t maxlen;
28 size_t minlen;
29};
30
31/* Conntrack action context for execution. */
32struct ovs_conntrack_info {
33 struct nf_conntrack_zone zone;
34 struct nf_conn *ct;
35 u32 flags;
36 u16 family;
37};
38
39static u16 key_to_nfproto(const struct sw_flow_key *key)
40{
41 switch (ntohs(key->eth.type)) {
42 case ETH_P_IP:
43 return NFPROTO_IPV4;
44 case ETH_P_IPV6:
45 return NFPROTO_IPV6;
46 default:
47 return NFPROTO_UNSPEC;
48 }
49}
50
51/* Map SKB connection state into the values used by flow definition. */
52static u8 ovs_ct_get_state(enum ip_conntrack_info ctinfo)
53{
54 u8 ct_state = OVS_CS_F_TRACKED;
55
56 switch (ctinfo) {
57 case IP_CT_ESTABLISHED_REPLY:
58 case IP_CT_RELATED_REPLY:
59 case IP_CT_NEW_REPLY:
60 ct_state |= OVS_CS_F_REPLY_DIR;
61 break;
62 default:
63 break;
64 }
65
66 switch (ctinfo) {
67 case IP_CT_ESTABLISHED:
68 case IP_CT_ESTABLISHED_REPLY:
69 ct_state |= OVS_CS_F_ESTABLISHED;
70 break;
71 case IP_CT_RELATED:
72 case IP_CT_RELATED_REPLY:
73 ct_state |= OVS_CS_F_RELATED;
74 break;
75 case IP_CT_NEW:
76 case IP_CT_NEW_REPLY:
77 ct_state |= OVS_CS_F_NEW;
78 break;
79 default:
80 break;
81 }
82
83 return ct_state;
84}
85
86static void __ovs_ct_update_key(struct sw_flow_key *key, u8 state,
87 const struct nf_conntrack_zone *zone)
88{
89 key->ct.state = state;
90 key->ct.zone = zone->id;
91}
92
93/* Update 'key' based on skb->nfct. If 'post_ct' is true, then OVS has
94 * previously sent the packet to conntrack via the ct action.
95 */
96static void ovs_ct_update_key(const struct sk_buff *skb,
97 struct sw_flow_key *key, bool post_ct)
98{
99 const struct nf_conntrack_zone *zone = &nf_ct_zone_dflt;
100 enum ip_conntrack_info ctinfo;
101 struct nf_conn *ct;
102 u8 state = 0;
103
104 ct = nf_ct_get(skb, &ctinfo);
105 if (ct) {
106 state = ovs_ct_get_state(ctinfo);
107 if (ct->master)
108 state |= OVS_CS_F_RELATED;
109 zone = nf_ct_zone(ct);
110 } else if (post_ct) {
111 state = OVS_CS_F_TRACKED | OVS_CS_F_INVALID;
112 }
113 __ovs_ct_update_key(key, state, zone);
114}
115
116void ovs_ct_fill_key(const struct sk_buff *skb, struct sw_flow_key *key)
117{
118 ovs_ct_update_key(skb, key, false);
119}
120
121int ovs_ct_put_key(const struct sw_flow_key *key, struct sk_buff *skb)
122{
123 if (nla_put_u8(skb, OVS_KEY_ATTR_CT_STATE, key->ct.state))
124 return -EMSGSIZE;
125
126 if (IS_ENABLED(CONFIG_NF_CONNTRACK_ZONES) &&
127 nla_put_u16(skb, OVS_KEY_ATTR_CT_ZONE, key->ct.zone))
128 return -EMSGSIZE;
129
130 return 0;
131}
132
133static int handle_fragments(struct net *net, struct sw_flow_key *key,
134 u16 zone, struct sk_buff *skb)
135{
136 struct ovs_skb_cb ovs_cb = *OVS_CB(skb);
137
138 if (key->eth.type == htons(ETH_P_IP)) {
139 enum ip_defrag_users user = IP_DEFRAG_CONNTRACK_IN + zone;
140 int err;
141
142 memset(IPCB(skb), 0, sizeof(struct inet_skb_parm));
143 err = ip_defrag(skb, user);
144 if (err)
145 return err;
146
147 ovs_cb.mru = IPCB(skb)->frag_max_size;
148 } else if (key->eth.type == htons(ETH_P_IPV6)) {
149#if IS_ENABLED(CONFIG_NF_DEFRAG_IPV6)
150 enum ip6_defrag_users user = IP6_DEFRAG_CONNTRACK_IN + zone;
151 struct sk_buff *reasm;
152
153 memset(IP6CB(skb), 0, sizeof(struct inet6_skb_parm));
154 reasm = nf_ct_frag6_gather(skb, user);
155 if (!reasm)
156 return -EINPROGRESS;
157
158 if (skb == reasm)
159 return -EINVAL;
160
161 key->ip.proto = ipv6_hdr(reasm)->nexthdr;
162 skb_morph(skb, reasm);
163 consume_skb(reasm);
164 ovs_cb.mru = IP6CB(skb)->frag_max_size;
165#else
166 return -EPFNOSUPPORT;
167#endif
168 } else {
169 return -EPFNOSUPPORT;
170 }
171
172 key->ip.frag = OVS_FRAG_TYPE_NONE;
173 skb_clear_hash(skb);
174 skb->ignore_df = 1;
175 *OVS_CB(skb) = ovs_cb;
176
177 return 0;
178}
179
180static struct nf_conntrack_expect *
181ovs_ct_expect_find(struct net *net, const struct nf_conntrack_zone *zone,
182 u16 proto, const struct sk_buff *skb)
183{
184 struct nf_conntrack_tuple tuple;
185
186 if (!nf_ct_get_tuplepr(skb, skb_network_offset(skb), proto, &tuple))
187 return NULL;
188 return __nf_ct_expect_find(net, zone, &tuple);
189}
190
191/* Determine whether skb->nfct is equal to the result of conntrack lookup. */
192static bool skb_nfct_cached(const struct net *net, const struct sk_buff *skb,
193 const struct ovs_conntrack_info *info)
194{
195 enum ip_conntrack_info ctinfo;
196 struct nf_conn *ct;
197
198 ct = nf_ct_get(skb, &ctinfo);
199 if (!ct)
200 return false;
201 if (!net_eq(net, read_pnet(&ct->ct_net)))
202 return false;
203 if (!nf_ct_zone_equal_any(info->ct, nf_ct_zone(ct)))
204 return false;
205
206 return true;
207}
208
209static int __ovs_ct_lookup(struct net *net, const struct sw_flow_key *key,
210 const struct ovs_conntrack_info *info,
211 struct sk_buff *skb)
212{
213 /* If we are recirculating packets to match on conntrack fields and
214 * committing with a separate conntrack action, then we don't need to
215 * actually run the packet through conntrack twice unless it's for a
216 * different zone.
217 */
218 if (!skb_nfct_cached(net, skb, info)) {
219 struct nf_conn *tmpl = info->ct;
220
221 /* Associate skb with specified zone. */
222 if (tmpl) {
223 if (skb->nfct)
224 nf_conntrack_put(skb->nfct);
225 nf_conntrack_get(&tmpl->ct_general);
226 skb->nfct = &tmpl->ct_general;
227 skb->nfctinfo = IP_CT_NEW;
228 }
229
230 if (nf_conntrack_in(net, info->family, NF_INET_PRE_ROUTING,
231 skb) != NF_ACCEPT)
232 return -ENOENT;
233 }
234
235 return 0;
236}
237
238/* Lookup connection and read fields into key. */
239static int ovs_ct_lookup(struct net *net, struct sw_flow_key *key,
240 const struct ovs_conntrack_info *info,
241 struct sk_buff *skb)
242{
243 struct nf_conntrack_expect *exp;
244
245 exp = ovs_ct_expect_find(net, &info->zone, info->family, skb);
246 if (exp) {
247 u8 state;
248
249 state = OVS_CS_F_TRACKED | OVS_CS_F_NEW | OVS_CS_F_RELATED;
250 __ovs_ct_update_key(key, state, &info->zone);
251 } else {
252 int err;
253
254 err = __ovs_ct_lookup(net, key, info, skb);
255 if (err)
256 return err;
257
258 ovs_ct_update_key(skb, key, true);
259 }
260
261 return 0;
262}
263
264/* Lookup connection and confirm if unconfirmed. */
265static int ovs_ct_commit(struct net *net, struct sw_flow_key *key,
266 const struct ovs_conntrack_info *info,
267 struct sk_buff *skb)
268{
269 u8 state;
270 int err;
271
272 state = key->ct.state;
273 if (key->ct.zone == info->zone.id &&
274 ((state & OVS_CS_F_TRACKED) && !(state & OVS_CS_F_NEW))) {
275 /* Previous lookup has shown that this connection is already
276 * tracked and committed. Skip committing.
277 */
278 return 0;
279 }
280
281 err = __ovs_ct_lookup(net, key, info, skb);
282 if (err)
283 return err;
284 if (nf_conntrack_confirm(skb) != NF_ACCEPT)
285 return -EINVAL;
286
287 ovs_ct_update_key(skb, key, true);
288
289 return 0;
290}
291
292int ovs_ct_execute(struct net *net, struct sk_buff *skb,
293 struct sw_flow_key *key,
294 const struct ovs_conntrack_info *info)
295{
296 int nh_ofs;
297 int err;
298
299 /* The conntrack module expects to be working at L3. */
300 nh_ofs = skb_network_offset(skb);
301 skb_pull(skb, nh_ofs);
302
303 if (key->ip.frag != OVS_FRAG_TYPE_NONE) {
304 err = handle_fragments(net, key, info->zone.id, skb);
305 if (err)
306 return err;
307 }
308
309 if (info->flags & OVS_CT_F_COMMIT)
310 err = ovs_ct_commit(net, key, info, skb);
311 else
312 err = ovs_ct_lookup(net, key, info, skb);
313
314 skb_push(skb, nh_ofs);
315 return err;
316}
317
318static const struct ovs_ct_len_tbl ovs_ct_attr_lens[OVS_CT_ATTR_MAX + 1] = {
319 [OVS_CT_ATTR_FLAGS] = { .minlen = sizeof(u32),
320 .maxlen = sizeof(u32) },
321 [OVS_CT_ATTR_ZONE] = { .minlen = sizeof(u16),
322 .maxlen = sizeof(u16) },
323};
324
325static int parse_ct(const struct nlattr *attr, struct ovs_conntrack_info *info,
326 bool log)
327{
328 struct nlattr *a;
329 int rem;
330
331 nla_for_each_nested(a, attr, rem) {
332 int type = nla_type(a);
333 int maxlen = ovs_ct_attr_lens[type].maxlen;
334 int minlen = ovs_ct_attr_lens[type].minlen;
335
336 if (type > OVS_CT_ATTR_MAX) {
337 OVS_NLERR(log,
338 "Unknown conntrack attr (type=%d, max=%d)",
339 type, OVS_CT_ATTR_MAX);
340 return -EINVAL;
341 }
342 if (nla_len(a) < minlen || nla_len(a) > maxlen) {
343 OVS_NLERR(log,
344 "Conntrack attr type has unexpected length (type=%d, length=%d, expected=%d)",
345 type, nla_len(a), maxlen);
346 return -EINVAL;
347 }
348
349 switch (type) {
350 case OVS_CT_ATTR_FLAGS:
351 info->flags = nla_get_u32(a);
352 break;
353#ifdef CONFIG_NF_CONNTRACK_ZONES
354 case OVS_CT_ATTR_ZONE:
355 info->zone.id = nla_get_u16(a);
356 break;
357#endif
358 default:
359 OVS_NLERR(log, "Unknown conntrack attr (%d)",
360 type);
361 return -EINVAL;
362 }
363 }
364
365 if (rem > 0) {
366 OVS_NLERR(log, "Conntrack attr has %d unknown bytes", rem);
367 return -EINVAL;
368 }
369
370 return 0;
371}
372
373bool ovs_ct_verify(enum ovs_key_attr attr)
374{
375 if (attr == OVS_KEY_ATTR_CT_STATE)
376 return true;
377 if (IS_ENABLED(CONFIG_NF_CONNTRACK_ZONES) &&
378 attr == OVS_KEY_ATTR_CT_ZONE)
379 return true;
380
381 return false;
382}
383
384int ovs_ct_copy_action(struct net *net, const struct nlattr *attr,
385 const struct sw_flow_key *key,
386 struct sw_flow_actions **sfa, bool log)
387{
388 struct ovs_conntrack_info ct_info;
389 u16 family;
390 int err;
391
392 family = key_to_nfproto(key);
393 if (family == NFPROTO_UNSPEC) {
394 OVS_NLERR(log, "ct family unspecified");
395 return -EINVAL;
396 }
397
398 memset(&ct_info, 0, sizeof(ct_info));
399 ct_info.family = family;
400
401 nf_ct_zone_init(&ct_info.zone, NF_CT_DEFAULT_ZONE_ID,
402 NF_CT_DEFAULT_ZONE_DIR, 0);
403
404 err = parse_ct(attr, &ct_info, log);
405 if (err)
406 return err;
407
408 /* Set up template for tracking connections in specific zones. */
409 ct_info.ct = nf_ct_tmpl_alloc(net, &ct_info.zone, GFP_KERNEL);
410 if (!ct_info.ct) {
411 OVS_NLERR(log, "Failed to allocate conntrack template");
412 return -ENOMEM;
413 }
414
415 err = ovs_nla_add_action(sfa, OVS_ACTION_ATTR_CT, &ct_info,
416 sizeof(ct_info), log);
417 if (err)
418 goto err_free_ct;
419
420 __set_bit(IPS_CONFIRMED_BIT, &ct_info.ct->status);
421 nf_conntrack_get(&ct_info.ct->ct_general);
422 return 0;
423err_free_ct:
424 nf_conntrack_free(ct_info.ct);
425 return err;
426}
427
428int ovs_ct_action_to_attr(const struct ovs_conntrack_info *ct_info,
429 struct sk_buff *skb)
430{
431 struct nlattr *start;
432
433 start = nla_nest_start(skb, OVS_ACTION_ATTR_CT);
434 if (!start)
435 return -EMSGSIZE;
436
437 if (nla_put_u32(skb, OVS_CT_ATTR_FLAGS, ct_info->flags))
438 return -EMSGSIZE;
439 if (IS_ENABLED(CONFIG_NF_CONNTRACK_ZONES) &&
440 nla_put_u16(skb, OVS_CT_ATTR_ZONE, ct_info->zone.id))
441 return -EMSGSIZE;
442
443 nla_nest_end(skb, start);
444
445 return 0;
446}
447
448void ovs_ct_free_action(const struct nlattr *a)
449{
450 struct ovs_conntrack_info *ct_info = nla_data(a);
451
452 if (ct_info->ct)
453 nf_ct_put(ct_info->ct);
454}