Daniel Borkmann | b219775 | 2015-10-29 14:58:09 +0100 | [diff] [blame] | 1 | /* |
| 2 | * Minimal file system backend for holding eBPF maps and programs, |
| 3 | * used by bpf(2) object pinning. |
| 4 | * |
| 5 | * Authors: |
| 6 | * |
| 7 | * Daniel Borkmann <daniel@iogearbox.net> |
| 8 | * |
| 9 | * This program is free software; you can redistribute it and/or |
| 10 | * modify it under the terms of the GNU General Public License |
| 11 | * version 2 as published by the Free Software Foundation. |
| 12 | */ |
| 13 | |
Paul Gortmaker | a536a6e | 2016-07-11 12:51:01 -0400 | [diff] [blame] | 14 | #include <linux/init.h> |
Daniel Borkmann | b219775 | 2015-10-29 14:58:09 +0100 | [diff] [blame] | 15 | #include <linux/magic.h> |
| 16 | #include <linux/major.h> |
| 17 | #include <linux/mount.h> |
| 18 | #include <linux/namei.h> |
| 19 | #include <linux/fs.h> |
| 20 | #include <linux/kdev_t.h> |
| 21 | #include <linux/filter.h> |
| 22 | #include <linux/bpf.h> |
| 23 | |
| 24 | enum bpf_type { |
| 25 | BPF_TYPE_UNSPEC = 0, |
| 26 | BPF_TYPE_PROG, |
| 27 | BPF_TYPE_MAP, |
| 28 | }; |
| 29 | |
| 30 | static void *bpf_any_get(void *raw, enum bpf_type type) |
| 31 | { |
| 32 | switch (type) { |
| 33 | case BPF_TYPE_PROG: |
Alexei Starovoitov | 92117d8 | 2016-04-27 18:56:20 -0700 | [diff] [blame] | 34 | raw = bpf_prog_inc(raw); |
Daniel Borkmann | b219775 | 2015-10-29 14:58:09 +0100 | [diff] [blame] | 35 | break; |
| 36 | case BPF_TYPE_MAP: |
Alexei Starovoitov | 92117d8 | 2016-04-27 18:56:20 -0700 | [diff] [blame] | 37 | raw = bpf_map_inc(raw, true); |
Daniel Borkmann | b219775 | 2015-10-29 14:58:09 +0100 | [diff] [blame] | 38 | break; |
| 39 | default: |
| 40 | WARN_ON_ONCE(1); |
| 41 | break; |
| 42 | } |
| 43 | |
| 44 | return raw; |
| 45 | } |
| 46 | |
| 47 | static void bpf_any_put(void *raw, enum bpf_type type) |
| 48 | { |
| 49 | switch (type) { |
| 50 | case BPF_TYPE_PROG: |
| 51 | bpf_prog_put(raw); |
| 52 | break; |
| 53 | case BPF_TYPE_MAP: |
Daniel Borkmann | c9da161 | 2015-11-24 21:28:15 +0100 | [diff] [blame] | 54 | bpf_map_put_with_uref(raw); |
Daniel Borkmann | b219775 | 2015-10-29 14:58:09 +0100 | [diff] [blame] | 55 | break; |
| 56 | default: |
| 57 | WARN_ON_ONCE(1); |
| 58 | break; |
| 59 | } |
| 60 | } |
| 61 | |
| 62 | static void *bpf_fd_probe_obj(u32 ufd, enum bpf_type *type) |
| 63 | { |
| 64 | void *raw; |
| 65 | |
| 66 | *type = BPF_TYPE_MAP; |
Daniel Borkmann | c9da161 | 2015-11-24 21:28:15 +0100 | [diff] [blame] | 67 | raw = bpf_map_get_with_uref(ufd); |
Daniel Borkmann | b219775 | 2015-10-29 14:58:09 +0100 | [diff] [blame] | 68 | if (IS_ERR(raw)) { |
| 69 | *type = BPF_TYPE_PROG; |
| 70 | raw = bpf_prog_get(ufd); |
| 71 | } |
| 72 | |
| 73 | return raw; |
| 74 | } |
| 75 | |
| 76 | static const struct inode_operations bpf_dir_iops; |
| 77 | |
| 78 | static const struct inode_operations bpf_prog_iops = { }; |
| 79 | static const struct inode_operations bpf_map_iops = { }; |
| 80 | |
| 81 | static struct inode *bpf_get_inode(struct super_block *sb, |
| 82 | const struct inode *dir, |
| 83 | umode_t mode) |
| 84 | { |
| 85 | struct inode *inode; |
| 86 | |
| 87 | switch (mode & S_IFMT) { |
| 88 | case S_IFDIR: |
| 89 | case S_IFREG: |
| 90 | break; |
| 91 | default: |
| 92 | return ERR_PTR(-EINVAL); |
| 93 | } |
| 94 | |
| 95 | inode = new_inode(sb); |
| 96 | if (!inode) |
| 97 | return ERR_PTR(-ENOSPC); |
| 98 | |
| 99 | inode->i_ino = get_next_ino(); |
Deepa Dinamani | 078cd82 | 2016-09-14 07:48:04 -0700 | [diff] [blame^] | 100 | inode->i_atime = current_time(inode); |
Daniel Borkmann | b219775 | 2015-10-29 14:58:09 +0100 | [diff] [blame] | 101 | inode->i_mtime = inode->i_atime; |
| 102 | inode->i_ctime = inode->i_atime; |
| 103 | |
| 104 | inode_init_owner(inode, dir, mode); |
| 105 | |
| 106 | return inode; |
| 107 | } |
| 108 | |
| 109 | static int bpf_inode_type(const struct inode *inode, enum bpf_type *type) |
| 110 | { |
| 111 | *type = BPF_TYPE_UNSPEC; |
| 112 | if (inode->i_op == &bpf_prog_iops) |
| 113 | *type = BPF_TYPE_PROG; |
| 114 | else if (inode->i_op == &bpf_map_iops) |
| 115 | *type = BPF_TYPE_MAP; |
| 116 | else |
| 117 | return -EACCES; |
| 118 | |
| 119 | return 0; |
| 120 | } |
| 121 | |
Daniel Borkmann | b219775 | 2015-10-29 14:58:09 +0100 | [diff] [blame] | 122 | static int bpf_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) |
| 123 | { |
| 124 | struct inode *inode; |
| 125 | |
Daniel Borkmann | b219775 | 2015-10-29 14:58:09 +0100 | [diff] [blame] | 126 | inode = bpf_get_inode(dir->i_sb, dir, mode | S_IFDIR); |
| 127 | if (IS_ERR(inode)) |
| 128 | return PTR_ERR(inode); |
| 129 | |
| 130 | inode->i_op = &bpf_dir_iops; |
| 131 | inode->i_fop = &simple_dir_operations; |
| 132 | |
| 133 | inc_nlink(inode); |
| 134 | inc_nlink(dir); |
| 135 | |
| 136 | d_instantiate(dentry, inode); |
| 137 | dget(dentry); |
| 138 | |
| 139 | return 0; |
| 140 | } |
| 141 | |
| 142 | static int bpf_mkobj_ops(struct inode *dir, struct dentry *dentry, |
| 143 | umode_t mode, const struct inode_operations *iops) |
| 144 | { |
| 145 | struct inode *inode; |
| 146 | |
Daniel Borkmann | b219775 | 2015-10-29 14:58:09 +0100 | [diff] [blame] | 147 | inode = bpf_get_inode(dir->i_sb, dir, mode | S_IFREG); |
| 148 | if (IS_ERR(inode)) |
| 149 | return PTR_ERR(inode); |
| 150 | |
| 151 | inode->i_op = iops; |
| 152 | inode->i_private = dentry->d_fsdata; |
| 153 | |
| 154 | d_instantiate(dentry, inode); |
| 155 | dget(dentry); |
| 156 | |
| 157 | return 0; |
| 158 | } |
| 159 | |
| 160 | static int bpf_mkobj(struct inode *dir, struct dentry *dentry, umode_t mode, |
| 161 | dev_t devt) |
| 162 | { |
| 163 | enum bpf_type type = MINOR(devt); |
| 164 | |
| 165 | if (MAJOR(devt) != UNNAMED_MAJOR || !S_ISREG(mode) || |
| 166 | dentry->d_fsdata == NULL) |
| 167 | return -EPERM; |
| 168 | |
| 169 | switch (type) { |
| 170 | case BPF_TYPE_PROG: |
| 171 | return bpf_mkobj_ops(dir, dentry, mode, &bpf_prog_iops); |
| 172 | case BPF_TYPE_MAP: |
| 173 | return bpf_mkobj_ops(dir, dentry, mode, &bpf_map_iops); |
| 174 | default: |
| 175 | return -EPERM; |
| 176 | } |
| 177 | } |
| 178 | |
Al Viro | 0c93b7d | 2016-03-25 12:06:51 -0400 | [diff] [blame] | 179 | static struct dentry * |
| 180 | bpf_lookup(struct inode *dir, struct dentry *dentry, unsigned flags) |
Daniel Borkmann | bb35a6e | 2015-12-10 22:33:49 +0100 | [diff] [blame] | 181 | { |
Al Viro | 0c93b7d | 2016-03-25 12:06:51 -0400 | [diff] [blame] | 182 | if (strchr(dentry->d_name.name, '.')) |
| 183 | return ERR_PTR(-EPERM); |
| 184 | return simple_lookup(dir, dentry, flags); |
Daniel Borkmann | bb35a6e | 2015-12-10 22:33:49 +0100 | [diff] [blame] | 185 | } |
| 186 | |
Daniel Borkmann | b219775 | 2015-10-29 14:58:09 +0100 | [diff] [blame] | 187 | static const struct inode_operations bpf_dir_iops = { |
Al Viro | 0c93b7d | 2016-03-25 12:06:51 -0400 | [diff] [blame] | 188 | .lookup = bpf_lookup, |
Daniel Borkmann | b219775 | 2015-10-29 14:58:09 +0100 | [diff] [blame] | 189 | .mknod = bpf_mkobj, |
| 190 | .mkdir = bpf_mkdir, |
| 191 | .rmdir = simple_rmdir, |
Al Viro | 0c93b7d | 2016-03-25 12:06:51 -0400 | [diff] [blame] | 192 | .rename = simple_rename, |
| 193 | .link = simple_link, |
Daniel Borkmann | b219775 | 2015-10-29 14:58:09 +0100 | [diff] [blame] | 194 | .unlink = simple_unlink, |
| 195 | }; |
| 196 | |
| 197 | static int bpf_obj_do_pin(const struct filename *pathname, void *raw, |
| 198 | enum bpf_type type) |
| 199 | { |
| 200 | struct dentry *dentry; |
| 201 | struct inode *dir; |
| 202 | struct path path; |
| 203 | umode_t mode; |
| 204 | dev_t devt; |
| 205 | int ret; |
| 206 | |
| 207 | dentry = kern_path_create(AT_FDCWD, pathname->name, &path, 0); |
| 208 | if (IS_ERR(dentry)) |
| 209 | return PTR_ERR(dentry); |
| 210 | |
| 211 | mode = S_IFREG | ((S_IRUSR | S_IWUSR) & ~current_umask()); |
| 212 | devt = MKDEV(UNNAMED_MAJOR, type); |
| 213 | |
| 214 | ret = security_path_mknod(&path, dentry, mode, devt); |
| 215 | if (ret) |
| 216 | goto out; |
| 217 | |
| 218 | dir = d_inode(path.dentry); |
| 219 | if (dir->i_op != &bpf_dir_iops) { |
| 220 | ret = -EPERM; |
| 221 | goto out; |
| 222 | } |
| 223 | |
| 224 | dentry->d_fsdata = raw; |
| 225 | ret = vfs_mknod(dir, dentry, mode, devt); |
| 226 | dentry->d_fsdata = NULL; |
| 227 | out: |
| 228 | done_path_create(&path, dentry); |
| 229 | return ret; |
| 230 | } |
| 231 | |
| 232 | int bpf_obj_pin_user(u32 ufd, const char __user *pathname) |
| 233 | { |
| 234 | struct filename *pname; |
| 235 | enum bpf_type type; |
| 236 | void *raw; |
| 237 | int ret; |
| 238 | |
| 239 | pname = getname(pathname); |
| 240 | if (IS_ERR(pname)) |
| 241 | return PTR_ERR(pname); |
| 242 | |
| 243 | raw = bpf_fd_probe_obj(ufd, &type); |
| 244 | if (IS_ERR(raw)) { |
| 245 | ret = PTR_ERR(raw); |
| 246 | goto out; |
| 247 | } |
| 248 | |
| 249 | ret = bpf_obj_do_pin(pname, raw, type); |
| 250 | if (ret != 0) |
| 251 | bpf_any_put(raw, type); |
| 252 | out: |
| 253 | putname(pname); |
| 254 | return ret; |
| 255 | } |
| 256 | |
| 257 | static void *bpf_obj_do_get(const struct filename *pathname, |
| 258 | enum bpf_type *type) |
| 259 | { |
| 260 | struct inode *inode; |
| 261 | struct path path; |
| 262 | void *raw; |
| 263 | int ret; |
| 264 | |
| 265 | ret = kern_path(pathname->name, LOOKUP_FOLLOW, &path); |
| 266 | if (ret) |
| 267 | return ERR_PTR(ret); |
| 268 | |
| 269 | inode = d_backing_inode(path.dentry); |
| 270 | ret = inode_permission(inode, MAY_WRITE); |
| 271 | if (ret) |
| 272 | goto out; |
| 273 | |
| 274 | ret = bpf_inode_type(inode, type); |
| 275 | if (ret) |
| 276 | goto out; |
| 277 | |
| 278 | raw = bpf_any_get(inode->i_private, *type); |
Alexei Starovoitov | 92117d8 | 2016-04-27 18:56:20 -0700 | [diff] [blame] | 279 | if (!IS_ERR(raw)) |
| 280 | touch_atime(&path); |
Daniel Borkmann | b219775 | 2015-10-29 14:58:09 +0100 | [diff] [blame] | 281 | |
| 282 | path_put(&path); |
| 283 | return raw; |
| 284 | out: |
| 285 | path_put(&path); |
| 286 | return ERR_PTR(ret); |
| 287 | } |
| 288 | |
| 289 | int bpf_obj_get_user(const char __user *pathname) |
| 290 | { |
| 291 | enum bpf_type type = BPF_TYPE_UNSPEC; |
| 292 | struct filename *pname; |
| 293 | int ret = -ENOENT; |
| 294 | void *raw; |
| 295 | |
| 296 | pname = getname(pathname); |
| 297 | if (IS_ERR(pname)) |
| 298 | return PTR_ERR(pname); |
| 299 | |
| 300 | raw = bpf_obj_do_get(pname, &type); |
| 301 | if (IS_ERR(raw)) { |
| 302 | ret = PTR_ERR(raw); |
| 303 | goto out; |
| 304 | } |
| 305 | |
| 306 | if (type == BPF_TYPE_PROG) |
| 307 | ret = bpf_prog_new_fd(raw); |
| 308 | else if (type == BPF_TYPE_MAP) |
| 309 | ret = bpf_map_new_fd(raw); |
| 310 | else |
| 311 | goto out; |
| 312 | |
| 313 | if (ret < 0) |
| 314 | bpf_any_put(raw, type); |
| 315 | out: |
| 316 | putname(pname); |
| 317 | return ret; |
| 318 | } |
| 319 | |
| 320 | static void bpf_evict_inode(struct inode *inode) |
| 321 | { |
| 322 | enum bpf_type type; |
| 323 | |
| 324 | truncate_inode_pages_final(&inode->i_data); |
| 325 | clear_inode(inode); |
| 326 | |
| 327 | if (!bpf_inode_type(inode, &type)) |
| 328 | bpf_any_put(inode->i_private, type); |
| 329 | } |
| 330 | |
| 331 | static const struct super_operations bpf_super_ops = { |
| 332 | .statfs = simple_statfs, |
| 333 | .drop_inode = generic_delete_inode, |
| 334 | .evict_inode = bpf_evict_inode, |
| 335 | }; |
| 336 | |
| 337 | static int bpf_fill_super(struct super_block *sb, void *data, int silent) |
| 338 | { |
| 339 | static struct tree_descr bpf_rfiles[] = { { "" } }; |
| 340 | struct inode *inode; |
| 341 | int ret; |
| 342 | |
| 343 | ret = simple_fill_super(sb, BPF_FS_MAGIC, bpf_rfiles); |
| 344 | if (ret) |
| 345 | return ret; |
| 346 | |
| 347 | sb->s_op = &bpf_super_ops; |
| 348 | |
| 349 | inode = sb->s_root->d_inode; |
| 350 | inode->i_op = &bpf_dir_iops; |
| 351 | inode->i_mode &= ~S_IALLUGO; |
| 352 | inode->i_mode |= S_ISVTX | S_IRWXUGO; |
| 353 | |
| 354 | return 0; |
| 355 | } |
| 356 | |
| 357 | static struct dentry *bpf_mount(struct file_system_type *type, int flags, |
| 358 | const char *dev_name, void *data) |
| 359 | { |
Eric W. Biederman | e27f4a9 | 2016-05-20 17:22:48 -0500 | [diff] [blame] | 360 | return mount_nodev(type, flags, data, bpf_fill_super); |
Daniel Borkmann | b219775 | 2015-10-29 14:58:09 +0100 | [diff] [blame] | 361 | } |
| 362 | |
| 363 | static struct file_system_type bpf_fs_type = { |
| 364 | .owner = THIS_MODULE, |
| 365 | .name = "bpf", |
| 366 | .mount = bpf_mount, |
| 367 | .kill_sb = kill_litter_super, |
Daniel Borkmann | b219775 | 2015-10-29 14:58:09 +0100 | [diff] [blame] | 368 | }; |
| 369 | |
Daniel Borkmann | b219775 | 2015-10-29 14:58:09 +0100 | [diff] [blame] | 370 | static int __init bpf_init(void) |
| 371 | { |
| 372 | int ret; |
| 373 | |
| 374 | ret = sysfs_create_mount_point(fs_kobj, "bpf"); |
| 375 | if (ret) |
| 376 | return ret; |
| 377 | |
| 378 | ret = register_filesystem(&bpf_fs_type); |
| 379 | if (ret) |
| 380 | sysfs_remove_mount_point(fs_kobj, "bpf"); |
| 381 | |
| 382 | return ret; |
| 383 | } |
| 384 | fs_initcall(bpf_init); |