blob: f90a87389fcc5c957c2ae22e993d2ebe7472934a [file] [log] [blame]
Justin Iurman3edede02021-07-20 21:42:59 +02001// SPDX-License-Identifier: GPL-2.0+
2/*
3 * IPv6 IOAM Lightweight Tunnel implementation
4 *
5 * Author:
6 * Justin Iurman <justin.iurman@uliege.be>
7 */
8
9#include <linux/kernel.h>
10#include <linux/skbuff.h>
11#include <linux/net.h>
Justin Iurman3edede02021-07-20 21:42:59 +020012#include <linux/in6.h>
13#include <linux/ioam6.h>
14#include <linux/ioam6_iptunnel.h>
15#include <net/dst.h>
16#include <net/sock.h>
17#include <net/lwtunnel.h>
18#include <net/ioam6.h>
Justin Iurman7b34e442021-10-03 20:45:37 +020019#include <net/netlink.h>
Justin Iurman8cb3bf82021-10-03 20:45:38 +020020#include <net/ipv6.h>
21#include <net/dst_cache.h>
22#include <net/ip6_route.h>
23#include <net/addrconf.h>
Justin Iurman3edede02021-07-20 21:42:59 +020024
25#define IOAM6_MASK_SHORT_FIELDS 0xff100000
26#define IOAM6_MASK_WIDE_FIELDS 0xe00000
27
28struct ioam6_lwt_encap {
Justin Iurman7b34e442021-10-03 20:45:37 +020029 struct ipv6_hopopt_hdr eh;
30 u8 pad[2]; /* 2-octet padding for 4n-alignment */
31 struct ioam6_hdr ioamh;
32 struct ioam6_trace_hdr traceh;
Justin Iurman3edede02021-07-20 21:42:59 +020033} __packed;
34
35struct ioam6_lwt {
Justin Iurman8cb3bf82021-10-03 20:45:38 +020036 struct dst_cache cache;
37 u8 mode;
38 struct in6_addr tundst;
Justin Iurman3edede02021-07-20 21:42:59 +020039 struct ioam6_lwt_encap tuninfo;
40};
41
42static struct ioam6_lwt *ioam6_lwt_state(struct lwtunnel_state *lwt)
43{
44 return (struct ioam6_lwt *)lwt->data;
45}
46
47static struct ioam6_lwt_encap *ioam6_lwt_info(struct lwtunnel_state *lwt)
48{
49 return &ioam6_lwt_state(lwt)->tuninfo;
50}
51
Justin Iurman7b34e442021-10-03 20:45:37 +020052static struct ioam6_trace_hdr *ioam6_lwt_trace(struct lwtunnel_state *lwt)
Justin Iurman3edede02021-07-20 21:42:59 +020053{
54 return &(ioam6_lwt_state(lwt)->tuninfo.traceh);
55}
56
57static const struct nla_policy ioam6_iptunnel_policy[IOAM6_IPTUNNEL_MAX + 1] = {
Justin Iurman8cb3bf82021-10-03 20:45:38 +020058 [IOAM6_IPTUNNEL_MODE] = NLA_POLICY_RANGE(NLA_U8,
59 IOAM6_IPTUNNEL_MODE_MIN,
60 IOAM6_IPTUNNEL_MODE_MAX),
61 [IOAM6_IPTUNNEL_DST] = NLA_POLICY_EXACT_LEN(sizeof(struct in6_addr)),
Justin Iurman3edede02021-07-20 21:42:59 +020062 [IOAM6_IPTUNNEL_TRACE] = NLA_POLICY_EXACT_LEN(sizeof(struct ioam6_trace_hdr)),
63};
64
Justin Iurman3edede02021-07-20 21:42:59 +020065static bool ioam6_validate_trace_hdr(struct ioam6_trace_hdr *trace)
66{
67 u32 fields;
68
69 if (!trace->type_be32 || !trace->remlen ||
Justin Iurman2bbc9772021-10-11 20:04:11 +020070 trace->remlen > IOAM6_TRACE_DATA_SIZE_MAX / 4 ||
71 trace->type.bit12 | trace->type.bit13 | trace->type.bit14 |
72 trace->type.bit15 | trace->type.bit16 | trace->type.bit17 |
73 trace->type.bit18 | trace->type.bit19 | trace->type.bit20 |
74 trace->type.bit21)
Justin Iurman3edede02021-07-20 21:42:59 +020075 return false;
76
77 trace->nodelen = 0;
78 fields = be32_to_cpu(trace->type_be32);
79
80 trace->nodelen += hweight32(fields & IOAM6_MASK_SHORT_FIELDS)
81 * (sizeof(__be32) / 4);
82 trace->nodelen += hweight32(fields & IOAM6_MASK_WIDE_FIELDS)
83 * (sizeof(__be64) / 4);
84
85 return true;
86}
87
88static int ioam6_build_state(struct net *net, struct nlattr *nla,
89 unsigned int family, const void *cfg,
90 struct lwtunnel_state **ts,
91 struct netlink_ext_ack *extack)
92{
93 struct nlattr *tb[IOAM6_IPTUNNEL_MAX + 1];
94 struct ioam6_lwt_encap *tuninfo;
95 struct ioam6_trace_hdr *trace;
Justin Iurman8cb3bf82021-10-03 20:45:38 +020096 struct lwtunnel_state *lwt;
97 struct ioam6_lwt *ilwt;
98 int len_aligned, err;
99 u8 mode;
Justin Iurman3edede02021-07-20 21:42:59 +0200100
101 if (family != AF_INET6)
102 return -EINVAL;
103
104 err = nla_parse_nested(tb, IOAM6_IPTUNNEL_MAX, nla,
105 ioam6_iptunnel_policy, extack);
106 if (err < 0)
107 return err;
108
Justin Iurman8cb3bf82021-10-03 20:45:38 +0200109 if (!tb[IOAM6_IPTUNNEL_MODE])
110 mode = IOAM6_IPTUNNEL_MODE_INLINE;
111 else
112 mode = nla_get_u8(tb[IOAM6_IPTUNNEL_MODE]);
113
114 if (!tb[IOAM6_IPTUNNEL_DST] && mode != IOAM6_IPTUNNEL_MODE_INLINE) {
115 NL_SET_ERR_MSG(extack, "this mode needs a tunnel destination");
116 return -EINVAL;
117 }
118
Justin Iurman3edede02021-07-20 21:42:59 +0200119 if (!tb[IOAM6_IPTUNNEL_TRACE]) {
120 NL_SET_ERR_MSG(extack, "missing trace");
121 return -EINVAL;
122 }
123
124 trace = nla_data(tb[IOAM6_IPTUNNEL_TRACE]);
125 if (!ioam6_validate_trace_hdr(trace)) {
126 NL_SET_ERR_MSG_ATTR(extack, tb[IOAM6_IPTUNNEL_TRACE],
127 "invalid trace validation");
128 return -EINVAL;
129 }
130
Justin Iurman8cb3bf82021-10-03 20:45:38 +0200131 len_aligned = ALIGN(trace->remlen * 4, 8);
132 lwt = lwtunnel_state_alloc(sizeof(*ilwt) + len_aligned);
133 if (!lwt)
Justin Iurman3edede02021-07-20 21:42:59 +0200134 return -ENOMEM;
135
Justin Iurman8cb3bf82021-10-03 20:45:38 +0200136 ilwt = ioam6_lwt_state(lwt);
137 err = dst_cache_init(&ilwt->cache, GFP_ATOMIC);
138 if (err) {
139 kfree(lwt);
140 return err;
141 }
142
143 ilwt->mode = mode;
144 if (tb[IOAM6_IPTUNNEL_DST])
145 ilwt->tundst = nla_get_in6_addr(tb[IOAM6_IPTUNNEL_DST]);
146
147 tuninfo = ioam6_lwt_info(lwt);
148 tuninfo->eh.hdrlen = ((sizeof(*tuninfo) + len_aligned) >> 3) - 1;
Justin Iurman3edede02021-07-20 21:42:59 +0200149 tuninfo->pad[0] = IPV6_TLV_PADN;
150 tuninfo->ioamh.type = IOAM6_TYPE_PREALLOC;
151 tuninfo->ioamh.opt_type = IPV6_TLV_IOAM;
152 tuninfo->ioamh.opt_len = sizeof(tuninfo->ioamh) - 2 + sizeof(*trace)
153 + trace->remlen * 4;
154
155 memcpy(&tuninfo->traceh, trace, sizeof(*trace));
156
Justin Iurman8cb3bf82021-10-03 20:45:38 +0200157 if (len_aligned - trace->remlen * 4) {
Justin Iurman3edede02021-07-20 21:42:59 +0200158 tuninfo->traceh.data[trace->remlen * 4] = IPV6_TLV_PADN;
Justin Iurman8cb3bf82021-10-03 20:45:38 +0200159 tuninfo->traceh.data[trace->remlen * 4 + 1] = 2;
Justin Iurman3edede02021-07-20 21:42:59 +0200160 }
161
Justin Iurman8cb3bf82021-10-03 20:45:38 +0200162 lwt->type = LWTUNNEL_ENCAP_IOAM6;
163 lwt->flags |= LWTUNNEL_STATE_OUTPUT_REDIRECT;
Justin Iurman3edede02021-07-20 21:42:59 +0200164
Justin Iurman8cb3bf82021-10-03 20:45:38 +0200165 *ts = lwt;
Justin Iurman3edede02021-07-20 21:42:59 +0200166
167 return 0;
168}
169
Justin Iurman8cb3bf82021-10-03 20:45:38 +0200170static int ioam6_do_fill(struct net *net, struct sk_buff *skb)
Justin Iurman3edede02021-07-20 21:42:59 +0200171{
172 struct ioam6_trace_hdr *trace;
Justin Iurman3edede02021-07-20 21:42:59 +0200173 struct ioam6_namespace *ns;
Justin Iurman8cb3bf82021-10-03 20:45:38 +0200174
175 trace = (struct ioam6_trace_hdr *)(skb_transport_header(skb)
176 + sizeof(struct ipv6_hopopt_hdr) + 2
177 + sizeof(struct ioam6_hdr));
178
179 ns = ioam6_namespace(net, trace->namespace_id);
180 if (ns)
181 ioam6_fill_trace_data(skb, ns, trace, false);
182
183 return 0;
184}
185
186static int ioam6_do_inline(struct net *net, struct sk_buff *skb,
187 struct ioam6_lwt_encap *tuninfo)
188{
189 struct ipv6hdr *oldhdr, *hdr;
Justin Iurman3edede02021-07-20 21:42:59 +0200190 int hdrlen, err;
191
192 hdrlen = (tuninfo->eh.hdrlen + 1) << 3;
193
194 err = skb_cow_head(skb, hdrlen + skb->mac_len);
195 if (unlikely(err))
196 return err;
197
198 oldhdr = ipv6_hdr(skb);
199 skb_pull(skb, sizeof(*oldhdr));
200 skb_postpull_rcsum(skb, skb_network_header(skb), sizeof(*oldhdr));
201
202 skb_push(skb, sizeof(*oldhdr) + hdrlen);
203 skb_reset_network_header(skb);
204 skb_mac_header_rebuild(skb);
205
206 hdr = ipv6_hdr(skb);
207 memmove(hdr, oldhdr, sizeof(*oldhdr));
208 tuninfo->eh.nexthdr = hdr->nexthdr;
209
210 skb_set_transport_header(skb, sizeof(*hdr));
211 skb_postpush_rcsum(skb, hdr, sizeof(*hdr) + hdrlen);
212
213 memcpy(skb_transport_header(skb), (u8 *)tuninfo, hdrlen);
214
215 hdr->nexthdr = NEXTHDR_HOP;
216 hdr->payload_len = cpu_to_be16(skb->len - sizeof(*hdr));
217
Justin Iurman8cb3bf82021-10-03 20:45:38 +0200218 return ioam6_do_fill(net, skb);
219}
Justin Iurman3edede02021-07-20 21:42:59 +0200220
Justin Iurman8cb3bf82021-10-03 20:45:38 +0200221static int ioam6_do_encap(struct net *net, struct sk_buff *skb,
222 struct ioam6_lwt_encap *tuninfo,
223 struct in6_addr *tundst)
224{
225 struct dst_entry *dst = skb_dst(skb);
226 struct ipv6hdr *hdr, *inner_hdr;
227 int hdrlen, len, err;
Justin Iurman3edede02021-07-20 21:42:59 +0200228
Justin Iurman8cb3bf82021-10-03 20:45:38 +0200229 hdrlen = (tuninfo->eh.hdrlen + 1) << 3;
230 len = sizeof(*hdr) + hdrlen;
231
232 err = skb_cow_head(skb, len + skb->mac_len);
233 if (unlikely(err))
234 return err;
235
236 inner_hdr = ipv6_hdr(skb);
237
238 skb_push(skb, len);
239 skb_reset_network_header(skb);
240 skb_mac_header_rebuild(skb);
241 skb_set_transport_header(skb, sizeof(*hdr));
242
243 tuninfo->eh.nexthdr = NEXTHDR_IPV6;
244 memcpy(skb_transport_header(skb), (u8 *)tuninfo, hdrlen);
245
246 hdr = ipv6_hdr(skb);
247 memcpy(hdr, inner_hdr, sizeof(*hdr));
248
249 hdr->nexthdr = NEXTHDR_HOP;
250 hdr->payload_len = cpu_to_be16(skb->len - sizeof(*hdr));
251 hdr->daddr = *tundst;
252 ipv6_dev_get_saddr(net, dst->dev, &hdr->daddr,
253 IPV6_PREFER_SRC_PUBLIC, &hdr->saddr);
254
255 skb_postpush_rcsum(skb, hdr, len);
256
257 return ioam6_do_fill(net, skb);
Justin Iurman3edede02021-07-20 21:42:59 +0200258}
259
260static int ioam6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
261{
Justin Iurman8cb3bf82021-10-03 20:45:38 +0200262 struct dst_entry *dst = skb_dst(skb);
263 struct in6_addr orig_daddr;
264 struct ioam6_lwt *ilwt;
Justin Iurman3edede02021-07-20 21:42:59 +0200265 int err = -EINVAL;
266
267 if (skb->protocol != htons(ETH_P_IPV6))
268 goto drop;
269
Justin Iurman8cb3bf82021-10-03 20:45:38 +0200270 ilwt = ioam6_lwt_state(dst->lwtstate);
271 orig_daddr = ipv6_hdr(skb)->daddr;
Justin Iurman3edede02021-07-20 21:42:59 +0200272
Justin Iurman8cb3bf82021-10-03 20:45:38 +0200273 switch (ilwt->mode) {
274 case IOAM6_IPTUNNEL_MODE_INLINE:
275do_inline:
276 /* Direct insertion - if there is no Hop-by-Hop yet */
277 if (ipv6_hdr(skb)->nexthdr == NEXTHDR_HOP)
278 goto out;
279
280 err = ioam6_do_inline(net, skb, &ilwt->tuninfo);
281 if (unlikely(err))
282 goto drop;
283
284 break;
285 case IOAM6_IPTUNNEL_MODE_ENCAP:
286do_encap:
287 /* Encapsulation (ip6ip6) */
288 err = ioam6_do_encap(net, skb, &ilwt->tuninfo, &ilwt->tundst);
289 if (unlikely(err))
290 goto drop;
291
292 break;
293 case IOAM6_IPTUNNEL_MODE_AUTO:
294 /* Automatic (RFC8200 compliant):
295 * - local packets -> INLINE mode
296 * - in-transit packets -> ENCAP mode
297 */
298 if (!skb->dev)
299 goto do_inline;
300
301 goto do_encap;
302 default:
303 goto drop;
304 }
305
306 err = skb_cow_head(skb, LL_RESERVED_SPACE(dst->dev));
Justin Iurman3edede02021-07-20 21:42:59 +0200307 if (unlikely(err))
308 goto drop;
309
Justin Iurman8cb3bf82021-10-03 20:45:38 +0200310 if (!ipv6_addr_equal(&orig_daddr, &ipv6_hdr(skb)->daddr)) {
311 preempt_disable();
312 dst = dst_cache_get(&ilwt->cache);
313 preempt_enable();
Justin Iurman3edede02021-07-20 21:42:59 +0200314
Justin Iurman8cb3bf82021-10-03 20:45:38 +0200315 if (unlikely(!dst)) {
316 struct ipv6hdr *hdr = ipv6_hdr(skb);
317 struct flowi6 fl6;
318
319 memset(&fl6, 0, sizeof(fl6));
320 fl6.daddr = hdr->daddr;
321 fl6.saddr = hdr->saddr;
322 fl6.flowlabel = ip6_flowinfo(hdr);
323 fl6.flowi6_mark = skb->mark;
324 fl6.flowi6_proto = hdr->nexthdr;
325
326 dst = ip6_route_output(net, NULL, &fl6);
327 if (dst->error) {
328 err = dst->error;
329 dst_release(dst);
330 goto drop;
331 }
332
333 preempt_disable();
334 dst_cache_set_ip6(&ilwt->cache, dst, &fl6.saddr);
335 preempt_enable();
336 }
337
338 skb_dst_drop(skb);
339 skb_dst_set(skb, dst);
340
341 return dst_output(net, sk, skb);
342 }
Justin Iurman3edede02021-07-20 21:42:59 +0200343out:
Justin Iurman8cb3bf82021-10-03 20:45:38 +0200344 return dst->lwtstate->orig_output(net, sk, skb);
Justin Iurman3edede02021-07-20 21:42:59 +0200345drop:
346 kfree_skb(skb);
347 return err;
348}
349
Justin Iurman8cb3bf82021-10-03 20:45:38 +0200350static void ioam6_destroy_state(struct lwtunnel_state *lwt)
351{
352 dst_cache_destroy(&ioam6_lwt_state(lwt)->cache);
353}
354
Justin Iurman3edede02021-07-20 21:42:59 +0200355static int ioam6_fill_encap_info(struct sk_buff *skb,
356 struct lwtunnel_state *lwtstate)
357{
Justin Iurman8cb3bf82021-10-03 20:45:38 +0200358 struct ioam6_lwt *ilwt = ioam6_lwt_state(lwtstate);
Justin Iurman7b34e442021-10-03 20:45:37 +0200359 int err;
Justin Iurman3edede02021-07-20 21:42:59 +0200360
Justin Iurman8cb3bf82021-10-03 20:45:38 +0200361 err = nla_put_u8(skb, IOAM6_IPTUNNEL_MODE, ilwt->mode);
Justin Iurman7b34e442021-10-03 20:45:37 +0200362 if (err)
Justin Iurman8cb3bf82021-10-03 20:45:38 +0200363 goto ret;
Justin Iurman3edede02021-07-20 21:42:59 +0200364
Justin Iurman8cb3bf82021-10-03 20:45:38 +0200365 if (ilwt->mode != IOAM6_IPTUNNEL_MODE_INLINE) {
366 err = nla_put_in6_addr(skb, IOAM6_IPTUNNEL_DST, &ilwt->tundst);
367 if (err)
368 goto ret;
369 }
370
371 err = nla_put(skb, IOAM6_IPTUNNEL_TRACE, sizeof(ilwt->tuninfo.traceh),
372 &ilwt->tuninfo.traceh);
373ret:
374 return err;
Justin Iurman3edede02021-07-20 21:42:59 +0200375}
376
377static int ioam6_encap_nlsize(struct lwtunnel_state *lwtstate)
378{
Justin Iurman8cb3bf82021-10-03 20:45:38 +0200379 struct ioam6_lwt *ilwt = ioam6_lwt_state(lwtstate);
380 int nlsize;
Justin Iurman3edede02021-07-20 21:42:59 +0200381
Justin Iurman8cb3bf82021-10-03 20:45:38 +0200382 nlsize = nla_total_size(sizeof(ilwt->mode)) +
383 nla_total_size(sizeof(ilwt->tuninfo.traceh));
384
385 if (ilwt->mode != IOAM6_IPTUNNEL_MODE_INLINE)
386 nlsize += nla_total_size(sizeof(ilwt->tundst));
387
388 return nlsize;
Justin Iurman3edede02021-07-20 21:42:59 +0200389}
390
391static int ioam6_encap_cmp(struct lwtunnel_state *a, struct lwtunnel_state *b)
392{
Justin Iurman8cb3bf82021-10-03 20:45:38 +0200393 struct ioam6_trace_hdr *trace_a = ioam6_lwt_trace(a);
394 struct ioam6_trace_hdr *trace_b = ioam6_lwt_trace(b);
395 struct ioam6_lwt *ilwt_a = ioam6_lwt_state(a);
396 struct ioam6_lwt *ilwt_b = ioam6_lwt_state(b);
Justin Iurman3edede02021-07-20 21:42:59 +0200397
Justin Iurman8cb3bf82021-10-03 20:45:38 +0200398 return (ilwt_a->mode != ilwt_b->mode ||
399 (ilwt_a->mode != IOAM6_IPTUNNEL_MODE_INLINE &&
400 !ipv6_addr_equal(&ilwt_a->tundst, &ilwt_b->tundst)) ||
401 trace_a->namespace_id != trace_b->namespace_id);
Justin Iurman3edede02021-07-20 21:42:59 +0200402}
403
404static const struct lwtunnel_encap_ops ioam6_iptun_ops = {
Justin Iurman7b34e442021-10-03 20:45:37 +0200405 .build_state = ioam6_build_state,
Justin Iurman8cb3bf82021-10-03 20:45:38 +0200406 .destroy_state = ioam6_destroy_state,
Justin Iurman3edede02021-07-20 21:42:59 +0200407 .output = ioam6_output,
Justin Iurman7b34e442021-10-03 20:45:37 +0200408 .fill_encap = ioam6_fill_encap_info,
Justin Iurman3edede02021-07-20 21:42:59 +0200409 .get_encap_size = ioam6_encap_nlsize,
Justin Iurman7b34e442021-10-03 20:45:37 +0200410 .cmp_encap = ioam6_encap_cmp,
411 .owner = THIS_MODULE,
Justin Iurman3edede02021-07-20 21:42:59 +0200412};
413
414int __init ioam6_iptunnel_init(void)
415{
416 return lwtunnel_encap_add_ops(&ioam6_iptun_ops, LWTUNNEL_ENCAP_IOAM6);
417}
418
419void ioam6_iptunnel_exit(void)
420{
421 lwtunnel_encap_del_ops(&ioam6_iptun_ops, LWTUNNEL_ENCAP_IOAM6);
422}