Jakub Sitnicki | b27f7bb | 2020-05-31 10:28:37 +0200 | [diff] [blame] | 1 | // SPDX-License-Identifier: GPL-2.0 |
| 2 | |
| 3 | #include <linux/bpf.h> |
| 4 | #include <linux/filter.h> |
| 5 | #include <net/net_namespace.h> |
| 6 | |
| 7 | /* |
| 8 | * Functions to manage BPF programs attached to netns |
| 9 | */ |
| 10 | |
Jakub Sitnicki | 7f045a4 | 2020-05-31 10:28:38 +0200 | [diff] [blame] | 11 | struct bpf_netns_link { |
| 12 | struct bpf_link link; |
| 13 | enum bpf_attach_type type; |
| 14 | enum netns_bpf_attach_type netns_type; |
| 15 | |
| 16 | /* We don't hold a ref to net in order to auto-detach the link |
| 17 | * when netns is going away. Instead we rely on pernet |
| 18 | * pre_exit callback to clear this pointer. Must be accessed |
| 19 | * with netns_bpf_mutex held. |
| 20 | */ |
| 21 | struct net *net; |
| 22 | }; |
| 23 | |
Jakub Sitnicki | b27f7bb | 2020-05-31 10:28:37 +0200 | [diff] [blame] | 24 | /* Protects updates to netns_bpf */ |
| 25 | DEFINE_MUTEX(netns_bpf_mutex); |
| 26 | |
Jakub Sitnicki | 7f045a4 | 2020-05-31 10:28:38 +0200 | [diff] [blame] | 27 | /* Must be called with netns_bpf_mutex held. */ |
| 28 | static void __net_exit bpf_netns_link_auto_detach(struct bpf_link *link) |
| 29 | { |
| 30 | struct bpf_netns_link *net_link = |
| 31 | container_of(link, struct bpf_netns_link, link); |
| 32 | |
| 33 | net_link->net = NULL; |
| 34 | } |
| 35 | |
| 36 | static void bpf_netns_link_release(struct bpf_link *link) |
| 37 | { |
| 38 | struct bpf_netns_link *net_link = |
| 39 | container_of(link, struct bpf_netns_link, link); |
| 40 | enum netns_bpf_attach_type type = net_link->netns_type; |
| 41 | struct net *net; |
| 42 | |
| 43 | /* Link auto-detached by dying netns. */ |
| 44 | if (!net_link->net) |
| 45 | return; |
| 46 | |
| 47 | mutex_lock(&netns_bpf_mutex); |
| 48 | |
| 49 | /* Recheck after potential sleep. We can race with cleanup_net |
| 50 | * here, but if we see a non-NULL struct net pointer pre_exit |
| 51 | * has not happened yet and will block on netns_bpf_mutex. |
| 52 | */ |
| 53 | net = net_link->net; |
| 54 | if (!net) |
| 55 | goto out_unlock; |
| 56 | |
| 57 | net->bpf.links[type] = NULL; |
| 58 | RCU_INIT_POINTER(net->bpf.progs[type], NULL); |
| 59 | |
| 60 | out_unlock: |
| 61 | mutex_unlock(&netns_bpf_mutex); |
| 62 | } |
| 63 | |
| 64 | static void bpf_netns_link_dealloc(struct bpf_link *link) |
| 65 | { |
| 66 | struct bpf_netns_link *net_link = |
| 67 | container_of(link, struct bpf_netns_link, link); |
| 68 | |
| 69 | kfree(net_link); |
| 70 | } |
| 71 | |
| 72 | static int bpf_netns_link_update_prog(struct bpf_link *link, |
| 73 | struct bpf_prog *new_prog, |
| 74 | struct bpf_prog *old_prog) |
| 75 | { |
| 76 | struct bpf_netns_link *net_link = |
| 77 | container_of(link, struct bpf_netns_link, link); |
| 78 | enum netns_bpf_attach_type type = net_link->netns_type; |
| 79 | struct net *net; |
| 80 | int ret = 0; |
| 81 | |
| 82 | if (old_prog && old_prog != link->prog) |
| 83 | return -EPERM; |
| 84 | if (new_prog->type != link->prog->type) |
| 85 | return -EINVAL; |
| 86 | |
| 87 | mutex_lock(&netns_bpf_mutex); |
| 88 | |
| 89 | net = net_link->net; |
| 90 | if (!net || !check_net(net)) { |
| 91 | /* Link auto-detached or netns dying */ |
| 92 | ret = -ENOLINK; |
| 93 | goto out_unlock; |
| 94 | } |
| 95 | |
| 96 | old_prog = xchg(&link->prog, new_prog); |
| 97 | rcu_assign_pointer(net->bpf.progs[type], new_prog); |
| 98 | bpf_prog_put(old_prog); |
| 99 | |
| 100 | out_unlock: |
| 101 | mutex_unlock(&netns_bpf_mutex); |
| 102 | return ret; |
| 103 | } |
| 104 | |
| 105 | static int bpf_netns_link_fill_info(const struct bpf_link *link, |
| 106 | struct bpf_link_info *info) |
| 107 | { |
| 108 | const struct bpf_netns_link *net_link = |
| 109 | container_of(link, struct bpf_netns_link, link); |
| 110 | unsigned int inum = 0; |
| 111 | struct net *net; |
| 112 | |
| 113 | mutex_lock(&netns_bpf_mutex); |
| 114 | net = net_link->net; |
| 115 | if (net && check_net(net)) |
| 116 | inum = net->ns.inum; |
| 117 | mutex_unlock(&netns_bpf_mutex); |
| 118 | |
| 119 | info->netns.netns_ino = inum; |
| 120 | info->netns.attach_type = net_link->type; |
| 121 | return 0; |
| 122 | } |
| 123 | |
| 124 | static void bpf_netns_link_show_fdinfo(const struct bpf_link *link, |
| 125 | struct seq_file *seq) |
| 126 | { |
| 127 | struct bpf_link_info info = {}; |
| 128 | |
| 129 | bpf_netns_link_fill_info(link, &info); |
| 130 | seq_printf(seq, |
| 131 | "netns_ino:\t%u\n" |
| 132 | "attach_type:\t%u\n", |
| 133 | info.netns.netns_ino, |
| 134 | info.netns.attach_type); |
| 135 | } |
| 136 | |
| 137 | static const struct bpf_link_ops bpf_netns_link_ops = { |
| 138 | .release = bpf_netns_link_release, |
| 139 | .dealloc = bpf_netns_link_dealloc, |
| 140 | .update_prog = bpf_netns_link_update_prog, |
| 141 | .fill_link_info = bpf_netns_link_fill_info, |
| 142 | .show_fdinfo = bpf_netns_link_show_fdinfo, |
| 143 | }; |
| 144 | |
Jakub Sitnicki | b27f7bb | 2020-05-31 10:28:37 +0200 | [diff] [blame] | 145 | int netns_bpf_prog_query(const union bpf_attr *attr, |
| 146 | union bpf_attr __user *uattr) |
| 147 | { |
| 148 | __u32 __user *prog_ids = u64_to_user_ptr(attr->query.prog_ids); |
| 149 | u32 prog_id, prog_cnt = 0, flags = 0; |
| 150 | enum netns_bpf_attach_type type; |
| 151 | struct bpf_prog *attached; |
| 152 | struct net *net; |
| 153 | |
| 154 | if (attr->query.query_flags) |
| 155 | return -EINVAL; |
| 156 | |
| 157 | type = to_netns_bpf_attach_type(attr->query.attach_type); |
| 158 | if (type < 0) |
| 159 | return -EINVAL; |
| 160 | |
| 161 | net = get_net_ns_by_fd(attr->query.target_fd); |
| 162 | if (IS_ERR(net)) |
| 163 | return PTR_ERR(net); |
| 164 | |
| 165 | rcu_read_lock(); |
| 166 | attached = rcu_dereference(net->bpf.progs[type]); |
| 167 | if (attached) { |
| 168 | prog_cnt = 1; |
| 169 | prog_id = attached->aux->id; |
| 170 | } |
| 171 | rcu_read_unlock(); |
| 172 | |
| 173 | put_net(net); |
| 174 | |
| 175 | if (copy_to_user(&uattr->query.attach_flags, &flags, sizeof(flags))) |
| 176 | return -EFAULT; |
| 177 | if (copy_to_user(&uattr->query.prog_cnt, &prog_cnt, sizeof(prog_cnt))) |
| 178 | return -EFAULT; |
| 179 | |
| 180 | if (!attr->query.prog_cnt || !prog_ids || !prog_cnt) |
| 181 | return 0; |
| 182 | |
| 183 | if (copy_to_user(prog_ids, &prog_id, sizeof(u32))) |
| 184 | return -EFAULT; |
| 185 | |
| 186 | return 0; |
| 187 | } |
| 188 | |
| 189 | int netns_bpf_prog_attach(const union bpf_attr *attr, struct bpf_prog *prog) |
| 190 | { |
| 191 | enum netns_bpf_attach_type type; |
Jakub Sitnicki | 3b70169 | 2020-06-25 16:13:54 +0200 | [diff] [blame^] | 192 | struct bpf_prog *attached; |
Jakub Sitnicki | b27f7bb | 2020-05-31 10:28:37 +0200 | [diff] [blame] | 193 | struct net *net; |
| 194 | int ret; |
| 195 | |
| 196 | type = to_netns_bpf_attach_type(attr->attach_type); |
| 197 | if (type < 0) |
| 198 | return -EINVAL; |
| 199 | |
| 200 | net = current->nsproxy->net_ns; |
| 201 | mutex_lock(&netns_bpf_mutex); |
Jakub Sitnicki | 7f045a4 | 2020-05-31 10:28:38 +0200 | [diff] [blame] | 202 | |
| 203 | /* Attaching prog directly is not compatible with links */ |
| 204 | if (net->bpf.links[type]) { |
| 205 | ret = -EEXIST; |
| 206 | goto out_unlock; |
| 207 | } |
| 208 | |
Jakub Sitnicki | b27f7bb | 2020-05-31 10:28:37 +0200 | [diff] [blame] | 209 | switch (type) { |
| 210 | case NETNS_BPF_FLOW_DISSECTOR: |
Jakub Sitnicki | 3b70169 | 2020-06-25 16:13:54 +0200 | [diff] [blame^] | 211 | ret = flow_dissector_bpf_prog_attach_check(net, prog); |
Jakub Sitnicki | b27f7bb | 2020-05-31 10:28:37 +0200 | [diff] [blame] | 212 | break; |
| 213 | default: |
| 214 | ret = -EINVAL; |
| 215 | break; |
| 216 | } |
Jakub Sitnicki | 3b70169 | 2020-06-25 16:13:54 +0200 | [diff] [blame^] | 217 | if (ret) |
| 218 | goto out_unlock; |
| 219 | |
| 220 | attached = rcu_dereference_protected(net->bpf.progs[type], |
| 221 | lockdep_is_held(&netns_bpf_mutex)); |
| 222 | if (attached == prog) { |
| 223 | /* The same program cannot be attached twice */ |
| 224 | ret = -EINVAL; |
| 225 | goto out_unlock; |
| 226 | } |
| 227 | rcu_assign_pointer(net->bpf.progs[type], prog); |
| 228 | if (attached) |
| 229 | bpf_prog_put(attached); |
| 230 | |
Jakub Sitnicki | 7f045a4 | 2020-05-31 10:28:38 +0200 | [diff] [blame] | 231 | out_unlock: |
Jakub Sitnicki | b27f7bb | 2020-05-31 10:28:37 +0200 | [diff] [blame] | 232 | mutex_unlock(&netns_bpf_mutex); |
| 233 | |
| 234 | return ret; |
| 235 | } |
| 236 | |
| 237 | /* Must be called with netns_bpf_mutex held. */ |
| 238 | static int __netns_bpf_prog_detach(struct net *net, |
| 239 | enum netns_bpf_attach_type type) |
| 240 | { |
| 241 | struct bpf_prog *attached; |
| 242 | |
Jakub Sitnicki | 7f045a4 | 2020-05-31 10:28:38 +0200 | [diff] [blame] | 243 | /* Progs attached via links cannot be detached */ |
| 244 | if (net->bpf.links[type]) |
| 245 | return -EINVAL; |
| 246 | |
Jakub Sitnicki | b27f7bb | 2020-05-31 10:28:37 +0200 | [diff] [blame] | 247 | attached = rcu_dereference_protected(net->bpf.progs[type], |
| 248 | lockdep_is_held(&netns_bpf_mutex)); |
| 249 | if (!attached) |
| 250 | return -ENOENT; |
| 251 | RCU_INIT_POINTER(net->bpf.progs[type], NULL); |
| 252 | bpf_prog_put(attached); |
| 253 | return 0; |
| 254 | } |
| 255 | |
| 256 | int netns_bpf_prog_detach(const union bpf_attr *attr) |
| 257 | { |
| 258 | enum netns_bpf_attach_type type; |
| 259 | int ret; |
| 260 | |
| 261 | type = to_netns_bpf_attach_type(attr->attach_type); |
| 262 | if (type < 0) |
| 263 | return -EINVAL; |
| 264 | |
| 265 | mutex_lock(&netns_bpf_mutex); |
| 266 | ret = __netns_bpf_prog_detach(current->nsproxy->net_ns, type); |
| 267 | mutex_unlock(&netns_bpf_mutex); |
| 268 | |
| 269 | return ret; |
| 270 | } |
| 271 | |
Jakub Sitnicki | 7f045a4 | 2020-05-31 10:28:38 +0200 | [diff] [blame] | 272 | static int netns_bpf_link_attach(struct net *net, struct bpf_link *link, |
| 273 | enum netns_bpf_attach_type type) |
| 274 | { |
| 275 | struct bpf_prog *prog; |
| 276 | int err; |
| 277 | |
| 278 | mutex_lock(&netns_bpf_mutex); |
| 279 | |
| 280 | /* Allow attaching only one prog or link for now */ |
| 281 | if (net->bpf.links[type]) { |
| 282 | err = -E2BIG; |
| 283 | goto out_unlock; |
| 284 | } |
| 285 | /* Links are not compatible with attaching prog directly */ |
| 286 | prog = rcu_dereference_protected(net->bpf.progs[type], |
| 287 | lockdep_is_held(&netns_bpf_mutex)); |
| 288 | if (prog) { |
| 289 | err = -EEXIST; |
| 290 | goto out_unlock; |
| 291 | } |
| 292 | |
| 293 | switch (type) { |
| 294 | case NETNS_BPF_FLOW_DISSECTOR: |
Jakub Sitnicki | 3b70169 | 2020-06-25 16:13:54 +0200 | [diff] [blame^] | 295 | err = flow_dissector_bpf_prog_attach_check(net, link->prog); |
Jakub Sitnicki | 7f045a4 | 2020-05-31 10:28:38 +0200 | [diff] [blame] | 296 | break; |
| 297 | default: |
| 298 | err = -EINVAL; |
| 299 | break; |
| 300 | } |
| 301 | if (err) |
| 302 | goto out_unlock; |
| 303 | |
Jakub Sitnicki | 3b70169 | 2020-06-25 16:13:54 +0200 | [diff] [blame^] | 304 | rcu_assign_pointer(net->bpf.progs[type], link->prog); |
Jakub Sitnicki | 7f045a4 | 2020-05-31 10:28:38 +0200 | [diff] [blame] | 305 | net->bpf.links[type] = link; |
| 306 | |
| 307 | out_unlock: |
| 308 | mutex_unlock(&netns_bpf_mutex); |
| 309 | return err; |
| 310 | } |
| 311 | |
| 312 | int netns_bpf_link_create(const union bpf_attr *attr, struct bpf_prog *prog) |
| 313 | { |
| 314 | enum netns_bpf_attach_type netns_type; |
| 315 | struct bpf_link_primer link_primer; |
| 316 | struct bpf_netns_link *net_link; |
| 317 | enum bpf_attach_type type; |
| 318 | struct net *net; |
| 319 | int err; |
| 320 | |
| 321 | if (attr->link_create.flags) |
| 322 | return -EINVAL; |
| 323 | |
| 324 | type = attr->link_create.attach_type; |
| 325 | netns_type = to_netns_bpf_attach_type(type); |
| 326 | if (netns_type < 0) |
| 327 | return -EINVAL; |
| 328 | |
| 329 | net = get_net_ns_by_fd(attr->link_create.target_fd); |
| 330 | if (IS_ERR(net)) |
| 331 | return PTR_ERR(net); |
| 332 | |
| 333 | net_link = kzalloc(sizeof(*net_link), GFP_USER); |
| 334 | if (!net_link) { |
| 335 | err = -ENOMEM; |
| 336 | goto out_put_net; |
| 337 | } |
| 338 | bpf_link_init(&net_link->link, BPF_LINK_TYPE_NETNS, |
| 339 | &bpf_netns_link_ops, prog); |
| 340 | net_link->net = net; |
| 341 | net_link->type = type; |
| 342 | net_link->netns_type = netns_type; |
| 343 | |
| 344 | err = bpf_link_prime(&net_link->link, &link_primer); |
| 345 | if (err) { |
| 346 | kfree(net_link); |
| 347 | goto out_put_net; |
| 348 | } |
| 349 | |
| 350 | err = netns_bpf_link_attach(net, &net_link->link, netns_type); |
| 351 | if (err) { |
| 352 | bpf_link_cleanup(&link_primer); |
| 353 | goto out_put_net; |
| 354 | } |
| 355 | |
| 356 | put_net(net); |
| 357 | return bpf_link_settle(&link_primer); |
| 358 | |
| 359 | out_put_net: |
| 360 | put_net(net); |
| 361 | return err; |
| 362 | } |
| 363 | |
Jakub Sitnicki | b27f7bb | 2020-05-31 10:28:37 +0200 | [diff] [blame] | 364 | static void __net_exit netns_bpf_pernet_pre_exit(struct net *net) |
| 365 | { |
| 366 | enum netns_bpf_attach_type type; |
Jakub Sitnicki | 7f045a4 | 2020-05-31 10:28:38 +0200 | [diff] [blame] | 367 | struct bpf_link *link; |
Jakub Sitnicki | b27f7bb | 2020-05-31 10:28:37 +0200 | [diff] [blame] | 368 | |
| 369 | mutex_lock(&netns_bpf_mutex); |
Jakub Sitnicki | 7f045a4 | 2020-05-31 10:28:38 +0200 | [diff] [blame] | 370 | for (type = 0; type < MAX_NETNS_BPF_ATTACH_TYPE; type++) { |
| 371 | link = net->bpf.links[type]; |
| 372 | if (link) |
| 373 | bpf_netns_link_auto_detach(link); |
| 374 | else |
| 375 | __netns_bpf_prog_detach(net, type); |
| 376 | } |
Jakub Sitnicki | b27f7bb | 2020-05-31 10:28:37 +0200 | [diff] [blame] | 377 | mutex_unlock(&netns_bpf_mutex); |
| 378 | } |
| 379 | |
| 380 | static struct pernet_operations netns_bpf_pernet_ops __net_initdata = { |
| 381 | .pre_exit = netns_bpf_pernet_pre_exit, |
| 382 | }; |
| 383 | |
| 384 | static int __init netns_bpf_init(void) |
| 385 | { |
| 386 | return register_pernet_subsys(&netns_bpf_pernet_ops); |
| 387 | } |
| 388 | |
| 389 | subsys_initcall(netns_bpf_init); |