blob: 262a321f58a63bc264a9cc1548a0cc6883167082 [file] [log] [blame]
Thomas Gleixner5b497af2019-05-29 07:18:09 -07001// SPDX-License-Identifier: GPL-2.0-only
Alexei Starovoitov28fbcfa2014-11-13 17:36:46 -08002/* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com
Alexei Starovoitov81ed18a2017-03-15 18:26:42 -07003 * Copyright (c) 2016,2017 Facebook
Alexei Starovoitov28fbcfa2014-11-13 17:36:46 -08004 */
5#include <linux/bpf.h>
Martin KaFai Laua26ca7c2018-04-18 15:56:03 -07006#include <linux/btf.h>
Alexei Starovoitov28fbcfa2014-11-13 17:36:46 -08007#include <linux/err.h>
Alexei Starovoitov28fbcfa2014-11-13 17:36:46 -08008#include <linux/slab.h>
9#include <linux/mm.h>
Alexei Starovoitov04fd61ab2015-05-19 16:59:03 -070010#include <linux/filter.h>
Daniel Borkmann0cdf56402015-10-02 18:42:00 +020011#include <linux/perf_event.h>
Martin KaFai Laua26ca7c2018-04-18 15:56:03 -070012#include <uapi/linux/btf.h>
Alexei Starovoitov28fbcfa2014-11-13 17:36:46 -080013
Martin KaFai Lau56f668d2017-03-22 10:00:33 -070014#include "map_in_map.h"
15
Chenbo Feng6e71b042017-10-18 13:00:22 -070016#define ARRAY_CREATE_FLAG_MASK \
Daniel Borkmann591fe982019-04-09 23:20:05 +020017 (BPF_F_NUMA_NODE | BPF_F_ACCESS_MASK)
Chenbo Feng6e71b042017-10-18 13:00:22 -070018
Alexei Starovoitova10423b2016-02-01 22:39:54 -080019static void bpf_array_free_percpu(struct bpf_array *array)
20{
21 int i;
22
Eric Dumazet32fff232018-02-22 08:33:24 -080023 for (i = 0; i < array->map.max_entries; i++) {
Alexei Starovoitova10423b2016-02-01 22:39:54 -080024 free_percpu(array->pptrs[i]);
Eric Dumazet32fff232018-02-22 08:33:24 -080025 cond_resched();
26 }
Alexei Starovoitova10423b2016-02-01 22:39:54 -080027}
28
29static int bpf_array_alloc_percpu(struct bpf_array *array)
30{
31 void __percpu *ptr;
32 int i;
33
34 for (i = 0; i < array->map.max_entries; i++) {
35 ptr = __alloc_percpu_gfp(array->elem_size, 8,
36 GFP_USER | __GFP_NOWARN);
37 if (!ptr) {
38 bpf_array_free_percpu(array);
39 return -ENOMEM;
40 }
41 array->pptrs[i] = ptr;
Eric Dumazet32fff232018-02-22 08:33:24 -080042 cond_resched();
Alexei Starovoitova10423b2016-02-01 22:39:54 -080043 }
44
45 return 0;
46}
47
Alexei Starovoitov28fbcfa2014-11-13 17:36:46 -080048/* Called from syscall */
Martin KaFai Lau5dc4c4b2018-08-08 01:01:24 -070049int array_map_alloc_check(union bpf_attr *attr)
Jakub Kicinskiad460612018-01-17 19:13:25 -080050{
51 bool percpu = attr->map_type == BPF_MAP_TYPE_PERCPU_ARRAY;
52 int numa_node = bpf_map_attr_numa_node(attr);
53
54 /* check sanity of attributes */
55 if (attr->max_entries == 0 || attr->key_size != 4 ||
56 attr->value_size == 0 ||
57 attr->map_flags & ~ARRAY_CREATE_FLAG_MASK ||
Daniel Borkmann591fe982019-04-09 23:20:05 +020058 !bpf_map_flags_access_ok(attr->map_flags) ||
Jakub Kicinskiad460612018-01-17 19:13:25 -080059 (percpu && numa_node != NUMA_NO_NODE))
60 return -EINVAL;
61
62 if (attr->value_size > KMALLOC_MAX_SIZE)
63 /* if value_size is bigger, the user space won't be able to
64 * access the elements.
65 */
66 return -E2BIG;
67
68 return 0;
69}
70
Alexei Starovoitov28fbcfa2014-11-13 17:36:46 -080071static struct bpf_map *array_map_alloc(union bpf_attr *attr)
72{
Alexei Starovoitova10423b2016-02-01 22:39:54 -080073 bool percpu = attr->map_type == BPF_MAP_TYPE_PERCPU_ARRAY;
Daniel Borkmann9c2d63b2018-02-16 01:10:29 +010074 int ret, numa_node = bpf_map_attr_numa_node(attr);
Alexei Starovoitovb2157392018-01-07 17:33:02 -080075 u32 elem_size, index_mask, max_entries;
76 bool unpriv = !capable(CAP_SYS_ADMIN);
Daniel Borkmann9c2d63b2018-02-16 01:10:29 +010077 u64 cost, array_size, mask64;
Alexei Starovoitov28fbcfa2014-11-13 17:36:46 -080078 struct bpf_array *array;
Alexei Starovoitov28fbcfa2014-11-13 17:36:46 -080079
Alexei Starovoitov28fbcfa2014-11-13 17:36:46 -080080 elem_size = round_up(attr->value_size, 8);
81
Alexei Starovoitovb2157392018-01-07 17:33:02 -080082 max_entries = attr->max_entries;
Alexei Starovoitovb2157392018-01-07 17:33:02 -080083
Daniel Borkmannbbeb6e42018-01-10 23:25:05 +010084 /* On 32 bit archs roundup_pow_of_two() with max_entries that has
85 * upper most bit set in u32 space is undefined behavior due to
86 * resulting 1U << 32, so do it manually here in u64 space.
87 */
88 mask64 = fls_long(max_entries - 1);
89 mask64 = 1ULL << mask64;
90 mask64 -= 1;
91
92 index_mask = mask64;
93 if (unpriv) {
Alexei Starovoitovb2157392018-01-07 17:33:02 -080094 /* round up array size to nearest power of 2,
95 * since cpu will speculate within index_mask limits
96 */
97 max_entries = index_mask + 1;
Daniel Borkmannbbeb6e42018-01-10 23:25:05 +010098 /* Check for overflows. */
99 if (max_entries < attr->max_entries)
100 return ERR_PTR(-E2BIG);
101 }
Alexei Starovoitovb2157392018-01-07 17:33:02 -0800102
Alexei Starovoitova10423b2016-02-01 22:39:54 -0800103 array_size = sizeof(*array);
104 if (percpu)
Alexei Starovoitovb2157392018-01-07 17:33:02 -0800105 array_size += (u64) max_entries * sizeof(void *);
Alexei Starovoitova10423b2016-02-01 22:39:54 -0800106 else
Alexei Starovoitovb2157392018-01-07 17:33:02 -0800107 array_size += (u64) max_entries * elem_size;
Alexei Starovoitova10423b2016-02-01 22:39:54 -0800108
109 /* make sure there is no u32 overflow later in round_up() */
Daniel Borkmann9c2d63b2018-02-16 01:10:29 +0100110 cost = array_size;
111 if (cost >= U32_MAX - PAGE_SIZE)
Alexei Starovoitovdaaf4272014-11-18 17:32:16 -0800112 return ERR_PTR(-ENOMEM);
Daniel Borkmann9c2d63b2018-02-16 01:10:29 +0100113 if (percpu) {
114 cost += (u64)attr->max_entries * elem_size * num_possible_cpus();
115 if (cost >= U32_MAX - PAGE_SIZE)
116 return ERR_PTR(-ENOMEM);
117 }
118 cost = round_up(cost, PAGE_SIZE) >> PAGE_SHIFT;
119
120 ret = bpf_map_precharge_memlock(cost);
121 if (ret < 0)
122 return ERR_PTR(ret);
Alexei Starovoitovdaaf4272014-11-18 17:32:16 -0800123
Alexei Starovoitov28fbcfa2014-11-13 17:36:46 -0800124 /* allocate all map elements and zero-initialize them */
Martin KaFai Lau96eabe72017-08-18 11:28:00 -0700125 array = bpf_map_area_alloc(array_size, numa_node);
Daniel Borkmannd407bd22017-01-18 15:14:17 +0100126 if (!array)
127 return ERR_PTR(-ENOMEM);
Alexei Starovoitovb2157392018-01-07 17:33:02 -0800128 array->index_mask = index_mask;
129 array->map.unpriv_array = unpriv;
Alexei Starovoitov28fbcfa2014-11-13 17:36:46 -0800130
131 /* copy mandatory map attributes */
Jakub Kicinski32852642018-01-17 19:13:26 -0800132 bpf_map_init_from_attr(&array->map, attr);
Daniel Borkmann9c2d63b2018-02-16 01:10:29 +0100133 array->map.pages = cost;
Alexei Starovoitov28fbcfa2014-11-13 17:36:46 -0800134 array->elem_size = elem_size;
135
Daniel Borkmann9c2d63b2018-02-16 01:10:29 +0100136 if (percpu && bpf_array_alloc_percpu(array)) {
Daniel Borkmannd407bd22017-01-18 15:14:17 +0100137 bpf_map_area_free(array);
Alexei Starovoitova10423b2016-02-01 22:39:54 -0800138 return ERR_PTR(-ENOMEM);
139 }
Alexei Starovoitova10423b2016-02-01 22:39:54 -0800140
Alexei Starovoitov28fbcfa2014-11-13 17:36:46 -0800141 return &array->map;
Alexei Starovoitov28fbcfa2014-11-13 17:36:46 -0800142}
143
144/* Called from syscall or from eBPF program */
145static void *array_map_lookup_elem(struct bpf_map *map, void *key)
146{
147 struct bpf_array *array = container_of(map, struct bpf_array, map);
148 u32 index = *(u32 *)key;
149
Alexei Starovoitova10423b2016-02-01 22:39:54 -0800150 if (unlikely(index >= array->map.max_entries))
Alexei Starovoitov28fbcfa2014-11-13 17:36:46 -0800151 return NULL;
152
Alexei Starovoitovb2157392018-01-07 17:33:02 -0800153 return array->value + array->elem_size * (index & array->index_mask);
Alexei Starovoitov28fbcfa2014-11-13 17:36:46 -0800154}
155
Daniel Borkmannd8eca5b2019-04-09 23:20:03 +0200156static int array_map_direct_value_addr(const struct bpf_map *map, u64 *imm,
157 u32 off)
158{
159 struct bpf_array *array = container_of(map, struct bpf_array, map);
160
161 if (map->max_entries != 1)
162 return -ENOTSUPP;
163 if (off >= map->value_size)
164 return -EINVAL;
165
166 *imm = (unsigned long)array->value;
167 return 0;
168}
169
170static int array_map_direct_value_meta(const struct bpf_map *map, u64 imm,
171 u32 *off)
172{
173 struct bpf_array *array = container_of(map, struct bpf_array, map);
174 u64 base = (unsigned long)array->value;
175 u64 range = array->elem_size;
176
177 if (map->max_entries != 1)
178 return -ENOTSUPP;
179 if (imm < base || imm >= base + range)
180 return -ENOENT;
181
182 *off = imm - base;
183 return 0;
184}
185
Alexei Starovoitov81ed18a2017-03-15 18:26:42 -0700186/* emit BPF instructions equivalent to C code of array_map_lookup_elem() */
187static u32 array_map_gen_lookup(struct bpf_map *map, struct bpf_insn *insn_buf)
188{
Alexei Starovoitovb2157392018-01-07 17:33:02 -0800189 struct bpf_array *array = container_of(map, struct bpf_array, map);
Alexei Starovoitov81ed18a2017-03-15 18:26:42 -0700190 struct bpf_insn *insn = insn_buf;
Martin KaFai Laufad73a12017-03-22 10:00:32 -0700191 u32 elem_size = round_up(map->value_size, 8);
Alexei Starovoitov81ed18a2017-03-15 18:26:42 -0700192 const int ret = BPF_REG_0;
193 const int map_ptr = BPF_REG_1;
194 const int index = BPF_REG_2;
195
196 *insn++ = BPF_ALU64_IMM(BPF_ADD, map_ptr, offsetof(struct bpf_array, value));
197 *insn++ = BPF_LDX_MEM(BPF_W, ret, index, 0);
Alexei Starovoitovb2157392018-01-07 17:33:02 -0800198 if (map->unpriv_array) {
199 *insn++ = BPF_JMP_IMM(BPF_JGE, ret, map->max_entries, 4);
200 *insn++ = BPF_ALU32_IMM(BPF_AND, ret, array->index_mask);
201 } else {
202 *insn++ = BPF_JMP_IMM(BPF_JGE, ret, map->max_entries, 3);
203 }
Martin KaFai Laufad73a12017-03-22 10:00:32 -0700204
205 if (is_power_of_2(elem_size)) {
Alexei Starovoitov81ed18a2017-03-15 18:26:42 -0700206 *insn++ = BPF_ALU64_IMM(BPF_LSH, ret, ilog2(elem_size));
207 } else {
208 *insn++ = BPF_ALU64_IMM(BPF_MUL, ret, elem_size);
209 }
210 *insn++ = BPF_ALU64_REG(BPF_ADD, ret, map_ptr);
211 *insn++ = BPF_JMP_IMM(BPF_JA, 0, 0, 1);
212 *insn++ = BPF_MOV64_IMM(ret, 0);
213 return insn - insn_buf;
214}
215
Alexei Starovoitova10423b2016-02-01 22:39:54 -0800216/* Called from eBPF program */
217static void *percpu_array_map_lookup_elem(struct bpf_map *map, void *key)
218{
219 struct bpf_array *array = container_of(map, struct bpf_array, map);
220 u32 index = *(u32 *)key;
221
222 if (unlikely(index >= array->map.max_entries))
223 return NULL;
224
Alexei Starovoitovb2157392018-01-07 17:33:02 -0800225 return this_cpu_ptr(array->pptrs[index & array->index_mask]);
Alexei Starovoitova10423b2016-02-01 22:39:54 -0800226}
227
Alexei Starovoitov15a07b32016-02-01 22:39:55 -0800228int bpf_percpu_array_copy(struct bpf_map *map, void *key, void *value)
229{
230 struct bpf_array *array = container_of(map, struct bpf_array, map);
231 u32 index = *(u32 *)key;
232 void __percpu *pptr;
233 int cpu, off = 0;
234 u32 size;
235
236 if (unlikely(index >= array->map.max_entries))
237 return -ENOENT;
238
239 /* per_cpu areas are zero-filled and bpf programs can only
240 * access 'value_size' of them, so copying rounded areas
241 * will not leak any kernel data
242 */
243 size = round_up(map->value_size, 8);
244 rcu_read_lock();
Alexei Starovoitovb2157392018-01-07 17:33:02 -0800245 pptr = array->pptrs[index & array->index_mask];
Alexei Starovoitov15a07b32016-02-01 22:39:55 -0800246 for_each_possible_cpu(cpu) {
247 bpf_long_memcpy(value + off, per_cpu_ptr(pptr, cpu), size);
248 off += size;
249 }
250 rcu_read_unlock();
251 return 0;
252}
253
Alexei Starovoitov28fbcfa2014-11-13 17:36:46 -0800254/* Called from syscall */
255static int array_map_get_next_key(struct bpf_map *map, void *key, void *next_key)
256{
257 struct bpf_array *array = container_of(map, struct bpf_array, map);
Teng Qin8fe45922017-04-24 19:00:37 -0700258 u32 index = key ? *(u32 *)key : U32_MAX;
Alexei Starovoitov28fbcfa2014-11-13 17:36:46 -0800259 u32 *next = (u32 *)next_key;
260
261 if (index >= array->map.max_entries) {
262 *next = 0;
263 return 0;
264 }
265
266 if (index == array->map.max_entries - 1)
267 return -ENOENT;
268
269 *next = index + 1;
270 return 0;
271}
272
273/* Called from syscall or from eBPF program */
274static int array_map_update_elem(struct bpf_map *map, void *key, void *value,
275 u64 map_flags)
276{
277 struct bpf_array *array = container_of(map, struct bpf_array, map);
278 u32 index = *(u32 *)key;
Alexei Starovoitov96049f32019-01-31 15:40:09 -0800279 char *val;
Alexei Starovoitov28fbcfa2014-11-13 17:36:46 -0800280
Alexei Starovoitov96049f32019-01-31 15:40:09 -0800281 if (unlikely((map_flags & ~BPF_F_LOCK) > BPF_EXIST))
Alexei Starovoitov28fbcfa2014-11-13 17:36:46 -0800282 /* unknown flags */
283 return -EINVAL;
284
Alexei Starovoitova10423b2016-02-01 22:39:54 -0800285 if (unlikely(index >= array->map.max_entries))
Alexei Starovoitov28fbcfa2014-11-13 17:36:46 -0800286 /* all elements were pre-allocated, cannot insert a new one */
287 return -E2BIG;
288
Alexei Starovoitov96049f32019-01-31 15:40:09 -0800289 if (unlikely(map_flags & BPF_NOEXIST))
Alexei Starovoitovdaaf4272014-11-18 17:32:16 -0800290 /* all elements already exist */
Alexei Starovoitov28fbcfa2014-11-13 17:36:46 -0800291 return -EEXIST;
292
Alexei Starovoitov96049f32019-01-31 15:40:09 -0800293 if (unlikely((map_flags & BPF_F_LOCK) &&
294 !map_value_has_spin_lock(map)))
295 return -EINVAL;
296
297 if (array->map.map_type == BPF_MAP_TYPE_PERCPU_ARRAY) {
Alexei Starovoitovb2157392018-01-07 17:33:02 -0800298 memcpy(this_cpu_ptr(array->pptrs[index & array->index_mask]),
Alexei Starovoitova10423b2016-02-01 22:39:54 -0800299 value, map->value_size);
Alexei Starovoitov96049f32019-01-31 15:40:09 -0800300 } else {
301 val = array->value +
302 array->elem_size * (index & array->index_mask);
303 if (map_flags & BPF_F_LOCK)
304 copy_map_value_locked(map, val, value, false);
305 else
306 copy_map_value(map, val, value);
307 }
Alexei Starovoitov28fbcfa2014-11-13 17:36:46 -0800308 return 0;
309}
310
Alexei Starovoitov15a07b32016-02-01 22:39:55 -0800311int bpf_percpu_array_update(struct bpf_map *map, void *key, void *value,
312 u64 map_flags)
313{
314 struct bpf_array *array = container_of(map, struct bpf_array, map);
315 u32 index = *(u32 *)key;
316 void __percpu *pptr;
317 int cpu, off = 0;
318 u32 size;
319
320 if (unlikely(map_flags > BPF_EXIST))
321 /* unknown flags */
322 return -EINVAL;
323
324 if (unlikely(index >= array->map.max_entries))
325 /* all elements were pre-allocated, cannot insert a new one */
326 return -E2BIG;
327
328 if (unlikely(map_flags == BPF_NOEXIST))
329 /* all elements already exist */
330 return -EEXIST;
331
332 /* the user space will provide round_up(value_size, 8) bytes that
333 * will be copied into per-cpu area. bpf programs can only access
334 * value_size of it. During lookup the same extra bytes will be
335 * returned or zeros which were zero-filled by percpu_alloc,
336 * so no kernel data leaks possible
337 */
338 size = round_up(map->value_size, 8);
339 rcu_read_lock();
Alexei Starovoitovb2157392018-01-07 17:33:02 -0800340 pptr = array->pptrs[index & array->index_mask];
Alexei Starovoitov15a07b32016-02-01 22:39:55 -0800341 for_each_possible_cpu(cpu) {
342 bpf_long_memcpy(per_cpu_ptr(pptr, cpu), value + off, size);
343 off += size;
344 }
345 rcu_read_unlock();
346 return 0;
347}
348
Alexei Starovoitov28fbcfa2014-11-13 17:36:46 -0800349/* Called from syscall or from eBPF program */
350static int array_map_delete_elem(struct bpf_map *map, void *key)
351{
352 return -EINVAL;
353}
354
355/* Called when map->refcnt goes to zero, either from workqueue or from syscall */
356static void array_map_free(struct bpf_map *map)
357{
358 struct bpf_array *array = container_of(map, struct bpf_array, map);
359
360 /* at this point bpf_prog->aux->refcnt == 0 and this map->refcnt == 0,
361 * so the programs (can be more than one that used this map) were
362 * disconnected from events. Wait for outstanding programs to complete
363 * and free the array
364 */
365 synchronize_rcu();
366
Alexei Starovoitova10423b2016-02-01 22:39:54 -0800367 if (array->map.map_type == BPF_MAP_TYPE_PERCPU_ARRAY)
368 bpf_array_free_percpu(array);
369
Daniel Borkmannd407bd22017-01-18 15:14:17 +0100370 bpf_map_area_free(array);
Alexei Starovoitov28fbcfa2014-11-13 17:36:46 -0800371}
372
Martin KaFai Laua26ca7c2018-04-18 15:56:03 -0700373static void array_map_seq_show_elem(struct bpf_map *map, void *key,
374 struct seq_file *m)
375{
376 void *value;
377
378 rcu_read_lock();
379
380 value = array_map_lookup_elem(map, key);
381 if (!value) {
382 rcu_read_unlock();
383 return;
384 }
385
Daniel Borkmann2824ecb2019-04-09 23:20:10 +0200386 if (map->btf_key_type_id)
387 seq_printf(m, "%u: ", *(u32 *)key);
Martin KaFai Lau9b2cf322018-05-22 14:57:21 -0700388 btf_type_seq_show(map->btf, map->btf_value_type_id, value, m);
Martin KaFai Laua26ca7c2018-04-18 15:56:03 -0700389 seq_puts(m, "\n");
390
391 rcu_read_unlock();
392}
393
Yonghong Songc7b27c32018-08-29 14:43:13 -0700394static void percpu_array_map_seq_show_elem(struct bpf_map *map, void *key,
395 struct seq_file *m)
396{
397 struct bpf_array *array = container_of(map, struct bpf_array, map);
398 u32 index = *(u32 *)key;
399 void __percpu *pptr;
400 int cpu;
401
402 rcu_read_lock();
403
404 seq_printf(m, "%u: {\n", *(u32 *)key);
405 pptr = array->pptrs[index & array->index_mask];
406 for_each_possible_cpu(cpu) {
407 seq_printf(m, "\tcpu%d: ", cpu);
408 btf_type_seq_show(map->btf, map->btf_value_type_id,
409 per_cpu_ptr(pptr, cpu), m);
410 seq_puts(m, "\n");
411 }
412 seq_puts(m, "}\n");
413
414 rcu_read_unlock();
415}
416
Daniel Borkmanne8d2bec2018-08-12 01:59:17 +0200417static int array_map_check_btf(const struct bpf_map *map,
Roman Gushchin1b2b2342018-12-10 15:43:00 -0800418 const struct btf *btf,
Daniel Borkmanne8d2bec2018-08-12 01:59:17 +0200419 const struct btf_type *key_type,
420 const struct btf_type *value_type)
Martin KaFai Laua26ca7c2018-04-18 15:56:03 -0700421{
Martin KaFai Laua26ca7c2018-04-18 15:56:03 -0700422 u32 int_data;
423
Daniel Borkmann2824ecb2019-04-09 23:20:10 +0200424 /* One exception for keyless BTF: .bss/.data/.rodata map */
425 if (btf_type_is_void(key_type)) {
426 if (map->map_type != BPF_MAP_TYPE_ARRAY ||
427 map->max_entries != 1)
428 return -EINVAL;
429
430 if (BTF_INFO_KIND(value_type->info) != BTF_KIND_DATASEC)
431 return -EINVAL;
432
433 return 0;
434 }
435
Daniel Borkmanne8d2bec2018-08-12 01:59:17 +0200436 if (BTF_INFO_KIND(key_type->info) != BTF_KIND_INT)
Martin KaFai Laua26ca7c2018-04-18 15:56:03 -0700437 return -EINVAL;
438
439 int_data = *(u32 *)(key_type + 1);
Daniel Borkmanne8d2bec2018-08-12 01:59:17 +0200440 /* bpf array can only take a u32 key. This check makes sure
441 * that the btf matches the attr used during map_create.
Martin KaFai Laua26ca7c2018-04-18 15:56:03 -0700442 */
Daniel Borkmanne8d2bec2018-08-12 01:59:17 +0200443 if (BTF_INT_BITS(int_data) != 32 || BTF_INT_OFFSET(int_data))
Martin KaFai Laua26ca7c2018-04-18 15:56:03 -0700444 return -EINVAL;
445
446 return 0;
447}
448
Johannes Berg40077e02017-04-11 15:34:58 +0200449const struct bpf_map_ops array_map_ops = {
Jakub Kicinskiad460612018-01-17 19:13:25 -0800450 .map_alloc_check = array_map_alloc_check,
Alexei Starovoitov28fbcfa2014-11-13 17:36:46 -0800451 .map_alloc = array_map_alloc,
452 .map_free = array_map_free,
453 .map_get_next_key = array_map_get_next_key,
454 .map_lookup_elem = array_map_lookup_elem,
455 .map_update_elem = array_map_update_elem,
456 .map_delete_elem = array_map_delete_elem,
Alexei Starovoitov81ed18a2017-03-15 18:26:42 -0700457 .map_gen_lookup = array_map_gen_lookup,
Daniel Borkmannd8eca5b2019-04-09 23:20:03 +0200458 .map_direct_value_addr = array_map_direct_value_addr,
459 .map_direct_value_meta = array_map_direct_value_meta,
Martin KaFai Laua26ca7c2018-04-18 15:56:03 -0700460 .map_seq_show_elem = array_map_seq_show_elem,
461 .map_check_btf = array_map_check_btf,
Alexei Starovoitov28fbcfa2014-11-13 17:36:46 -0800462};
463
Johannes Berg40077e02017-04-11 15:34:58 +0200464const struct bpf_map_ops percpu_array_map_ops = {
Jakub Kicinskiad460612018-01-17 19:13:25 -0800465 .map_alloc_check = array_map_alloc_check,
Alexei Starovoitova10423b2016-02-01 22:39:54 -0800466 .map_alloc = array_map_alloc,
467 .map_free = array_map_free,
468 .map_get_next_key = array_map_get_next_key,
469 .map_lookup_elem = percpu_array_map_lookup_elem,
470 .map_update_elem = array_map_update_elem,
471 .map_delete_elem = array_map_delete_elem,
Yonghong Songc7b27c32018-08-29 14:43:13 -0700472 .map_seq_show_elem = percpu_array_map_seq_show_elem,
Daniel Borkmanne8d2bec2018-08-12 01:59:17 +0200473 .map_check_btf = array_map_check_btf,
Alexei Starovoitova10423b2016-02-01 22:39:54 -0800474};
475
Jakub Kicinskiad460612018-01-17 19:13:25 -0800476static int fd_array_map_alloc_check(union bpf_attr *attr)
Alexei Starovoitov04fd61ab2015-05-19 16:59:03 -0700477{
Wang Nan2a36f0b2015-08-06 07:02:33 +0000478 /* only file descriptors can be stored in this type of map */
Alexei Starovoitov04fd61ab2015-05-19 16:59:03 -0700479 if (attr->value_size != sizeof(u32))
Jakub Kicinskiad460612018-01-17 19:13:25 -0800480 return -EINVAL;
Daniel Borkmann591fe982019-04-09 23:20:05 +0200481 /* Program read-only/write-only not supported for special maps yet. */
482 if (attr->map_flags & (BPF_F_RDONLY_PROG | BPF_F_WRONLY_PROG))
483 return -EINVAL;
Jakub Kicinskiad460612018-01-17 19:13:25 -0800484 return array_map_alloc_check(attr);
Alexei Starovoitov04fd61ab2015-05-19 16:59:03 -0700485}
486
Wang Nan2a36f0b2015-08-06 07:02:33 +0000487static void fd_array_map_free(struct bpf_map *map)
Alexei Starovoitov04fd61ab2015-05-19 16:59:03 -0700488{
489 struct bpf_array *array = container_of(map, struct bpf_array, map);
490 int i;
491
492 synchronize_rcu();
493
494 /* make sure it's empty */
495 for (i = 0; i < array->map.max_entries; i++)
Wang Nan2a36f0b2015-08-06 07:02:33 +0000496 BUG_ON(array->ptrs[i] != NULL);
Daniel Borkmannd407bd22017-01-18 15:14:17 +0100497
498 bpf_map_area_free(array);
Alexei Starovoitov04fd61ab2015-05-19 16:59:03 -0700499}
500
Wang Nan2a36f0b2015-08-06 07:02:33 +0000501static void *fd_array_map_lookup_elem(struct bpf_map *map, void *key)
Alexei Starovoitov04fd61ab2015-05-19 16:59:03 -0700502{
Prashant Bhole3b4a63f2018-10-09 10:04:50 +0900503 return ERR_PTR(-EOPNOTSUPP);
Alexei Starovoitov04fd61ab2015-05-19 16:59:03 -0700504}
505
506/* only called from syscall */
Martin KaFai Lau14dc6f02017-06-27 23:08:34 -0700507int bpf_fd_array_map_lookup_elem(struct bpf_map *map, void *key, u32 *value)
508{
509 void **elem, *ptr;
510 int ret = 0;
511
512 if (!map->ops->map_fd_sys_lookup_elem)
513 return -ENOTSUPP;
514
515 rcu_read_lock();
516 elem = array_map_lookup_elem(map, key);
517 if (elem && (ptr = READ_ONCE(*elem)))
518 *value = map->ops->map_fd_sys_lookup_elem(ptr);
519 else
520 ret = -ENOENT;
521 rcu_read_unlock();
522
523 return ret;
524}
525
526/* only called from syscall */
Daniel Borkmannd056a782016-06-15 22:47:13 +0200527int bpf_fd_array_map_update_elem(struct bpf_map *map, struct file *map_file,
528 void *key, void *value, u64 map_flags)
Alexei Starovoitov04fd61ab2015-05-19 16:59:03 -0700529{
530 struct bpf_array *array = container_of(map, struct bpf_array, map);
Wang Nan2a36f0b2015-08-06 07:02:33 +0000531 void *new_ptr, *old_ptr;
Alexei Starovoitov04fd61ab2015-05-19 16:59:03 -0700532 u32 index = *(u32 *)key, ufd;
533
534 if (map_flags != BPF_ANY)
535 return -EINVAL;
536
537 if (index >= array->map.max_entries)
538 return -E2BIG;
539
540 ufd = *(u32 *)value;
Daniel Borkmannd056a782016-06-15 22:47:13 +0200541 new_ptr = map->ops->map_fd_get_ptr(map, map_file, ufd);
Wang Nan2a36f0b2015-08-06 07:02:33 +0000542 if (IS_ERR(new_ptr))
543 return PTR_ERR(new_ptr);
Alexei Starovoitov04fd61ab2015-05-19 16:59:03 -0700544
Wang Nan2a36f0b2015-08-06 07:02:33 +0000545 old_ptr = xchg(array->ptrs + index, new_ptr);
546 if (old_ptr)
547 map->ops->map_fd_put_ptr(old_ptr);
Alexei Starovoitov04fd61ab2015-05-19 16:59:03 -0700548
549 return 0;
550}
551
Wang Nan2a36f0b2015-08-06 07:02:33 +0000552static int fd_array_map_delete_elem(struct bpf_map *map, void *key)
Alexei Starovoitov04fd61ab2015-05-19 16:59:03 -0700553{
554 struct bpf_array *array = container_of(map, struct bpf_array, map);
Wang Nan2a36f0b2015-08-06 07:02:33 +0000555 void *old_ptr;
Alexei Starovoitov04fd61ab2015-05-19 16:59:03 -0700556 u32 index = *(u32 *)key;
557
558 if (index >= array->map.max_entries)
559 return -E2BIG;
560
Wang Nan2a36f0b2015-08-06 07:02:33 +0000561 old_ptr = xchg(array->ptrs + index, NULL);
562 if (old_ptr) {
563 map->ops->map_fd_put_ptr(old_ptr);
Alexei Starovoitov04fd61ab2015-05-19 16:59:03 -0700564 return 0;
565 } else {
566 return -ENOENT;
567 }
568}
569
Daniel Borkmannd056a782016-06-15 22:47:13 +0200570static void *prog_fd_array_get_ptr(struct bpf_map *map,
571 struct file *map_file, int fd)
Wang Nan2a36f0b2015-08-06 07:02:33 +0000572{
573 struct bpf_array *array = container_of(map, struct bpf_array, map);
574 struct bpf_prog *prog = bpf_prog_get(fd);
Daniel Borkmannd056a782016-06-15 22:47:13 +0200575
Wang Nan2a36f0b2015-08-06 07:02:33 +0000576 if (IS_ERR(prog))
577 return prog;
578
579 if (!bpf_prog_array_compatible(array, prog)) {
580 bpf_prog_put(prog);
581 return ERR_PTR(-EINVAL);
582 }
Daniel Borkmannd056a782016-06-15 22:47:13 +0200583
Wang Nan2a36f0b2015-08-06 07:02:33 +0000584 return prog;
585}
586
587static void prog_fd_array_put_ptr(void *ptr)
588{
Daniel Borkmann1aacde32016-06-30 17:24:43 +0200589 bpf_prog_put(ptr);
Wang Nan2a36f0b2015-08-06 07:02:33 +0000590}
591
Martin KaFai Lau14dc6f02017-06-27 23:08:34 -0700592static u32 prog_fd_array_sys_lookup_elem(void *ptr)
593{
594 return ((struct bpf_prog *)ptr)->aux->id;
595}
596
Alexei Starovoitov04fd61ab2015-05-19 16:59:03 -0700597/* decrement refcnt of all bpf_progs that are stored in this map */
John Fastabendba6b8de2018-04-23 15:39:23 -0700598static void bpf_fd_array_map_clear(struct bpf_map *map)
Alexei Starovoitov04fd61ab2015-05-19 16:59:03 -0700599{
600 struct bpf_array *array = container_of(map, struct bpf_array, map);
601 int i;
602
603 for (i = 0; i < array->map.max_entries; i++)
Wang Nan2a36f0b2015-08-06 07:02:33 +0000604 fd_array_map_delete_elem(map, &i);
Alexei Starovoitov04fd61ab2015-05-19 16:59:03 -0700605}
606
Yonghong Songa7c19db2018-09-06 17:26:04 -0700607static void prog_array_map_seq_show_elem(struct bpf_map *map, void *key,
608 struct seq_file *m)
609{
610 void **elem, *ptr;
611 u32 prog_id;
612
613 rcu_read_lock();
614
615 elem = array_map_lookup_elem(map, key);
616 if (elem) {
617 ptr = READ_ONCE(*elem);
618 if (ptr) {
619 seq_printf(m, "%u: ", *(u32 *)key);
620 prog_id = prog_fd_array_sys_lookup_elem(ptr);
621 btf_type_seq_show(map->btf, map->btf_value_type_id,
622 &prog_id, m);
623 seq_puts(m, "\n");
624 }
625 }
626
627 rcu_read_unlock();
628}
629
Johannes Berg40077e02017-04-11 15:34:58 +0200630const struct bpf_map_ops prog_array_map_ops = {
Jakub Kicinskiad460612018-01-17 19:13:25 -0800631 .map_alloc_check = fd_array_map_alloc_check,
632 .map_alloc = array_map_alloc,
Wang Nan2a36f0b2015-08-06 07:02:33 +0000633 .map_free = fd_array_map_free,
Alexei Starovoitov04fd61ab2015-05-19 16:59:03 -0700634 .map_get_next_key = array_map_get_next_key,
Wang Nan2a36f0b2015-08-06 07:02:33 +0000635 .map_lookup_elem = fd_array_map_lookup_elem,
Wang Nan2a36f0b2015-08-06 07:02:33 +0000636 .map_delete_elem = fd_array_map_delete_elem,
637 .map_fd_get_ptr = prog_fd_array_get_ptr,
638 .map_fd_put_ptr = prog_fd_array_put_ptr,
Martin KaFai Lau14dc6f02017-06-27 23:08:34 -0700639 .map_fd_sys_lookup_elem = prog_fd_array_sys_lookup_elem,
John Fastabendba6b8de2018-04-23 15:39:23 -0700640 .map_release_uref = bpf_fd_array_map_clear,
Yonghong Songa7c19db2018-09-06 17:26:04 -0700641 .map_seq_show_elem = prog_array_map_seq_show_elem,
Alexei Starovoitov04fd61ab2015-05-19 16:59:03 -0700642};
643
Daniel Borkmann3b1efb12016-06-15 22:47:14 +0200644static struct bpf_event_entry *bpf_event_entry_gen(struct file *perf_file,
645 struct file *map_file)
Kaixu Xiaea317b22015-08-06 07:02:34 +0000646{
Daniel Borkmann3b1efb12016-06-15 22:47:14 +0200647 struct bpf_event_entry *ee;
648
Daniel Borkmann858d68f2016-07-16 01:15:55 +0200649 ee = kzalloc(sizeof(*ee), GFP_ATOMIC);
Daniel Borkmann3b1efb12016-06-15 22:47:14 +0200650 if (ee) {
651 ee->event = perf_file->private_data;
652 ee->perf_file = perf_file;
653 ee->map_file = map_file;
654 }
655
656 return ee;
657}
658
659static void __bpf_event_entry_free(struct rcu_head *rcu)
660{
661 struct bpf_event_entry *ee;
662
663 ee = container_of(rcu, struct bpf_event_entry, rcu);
664 fput(ee->perf_file);
665 kfree(ee);
666}
667
668static void bpf_event_entry_free_rcu(struct bpf_event_entry *ee)
669{
670 call_rcu(&ee->rcu, __bpf_event_entry_free);
Kaixu Xiaea317b22015-08-06 07:02:34 +0000671}
672
Daniel Borkmannd056a782016-06-15 22:47:13 +0200673static void *perf_event_fd_array_get_ptr(struct bpf_map *map,
674 struct file *map_file, int fd)
Kaixu Xiaea317b22015-08-06 07:02:34 +0000675{
Daniel Borkmann3b1efb12016-06-15 22:47:14 +0200676 struct bpf_event_entry *ee;
677 struct perf_event *event;
678 struct file *perf_file;
Alexei Starovoitovf91840a2017-06-02 21:03:52 -0700679 u64 value;
Kaixu Xiaea317b22015-08-06 07:02:34 +0000680
Daniel Borkmann3b1efb12016-06-15 22:47:14 +0200681 perf_file = perf_event_get(fd);
682 if (IS_ERR(perf_file))
683 return perf_file;
Alexei Starovoitove03e7ee2016-01-25 20:59:49 -0800684
Alexei Starovoitovf91840a2017-06-02 21:03:52 -0700685 ee = ERR_PTR(-EOPNOTSUPP);
Daniel Borkmann3b1efb12016-06-15 22:47:14 +0200686 event = perf_file->private_data;
Yonghong Song97562632017-10-05 09:19:19 -0700687 if (perf_event_read_local(event, &value, NULL, NULL) == -EOPNOTSUPP)
Daniel Borkmann3b1efb12016-06-15 22:47:14 +0200688 goto err_out;
Kaixu Xiaea317b22015-08-06 07:02:34 +0000689
Alexei Starovoitovf91840a2017-06-02 21:03:52 -0700690 ee = bpf_event_entry_gen(perf_file, map_file);
691 if (ee)
692 return ee;
693 ee = ERR_PTR(-ENOMEM);
Daniel Borkmann3b1efb12016-06-15 22:47:14 +0200694err_out:
695 fput(perf_file);
696 return ee;
Kaixu Xiaea317b22015-08-06 07:02:34 +0000697}
698
699static void perf_event_fd_array_put_ptr(void *ptr)
700{
Daniel Borkmann3b1efb12016-06-15 22:47:14 +0200701 bpf_event_entry_free_rcu(ptr);
702}
703
704static void perf_event_fd_array_release(struct bpf_map *map,
705 struct file *map_file)
706{
707 struct bpf_array *array = container_of(map, struct bpf_array, map);
708 struct bpf_event_entry *ee;
709 int i;
710
711 rcu_read_lock();
712 for (i = 0; i < array->map.max_entries; i++) {
713 ee = READ_ONCE(array->ptrs[i]);
714 if (ee && ee->map_file == map_file)
715 fd_array_map_delete_elem(map, &i);
716 }
717 rcu_read_unlock();
Kaixu Xiaea317b22015-08-06 07:02:34 +0000718}
719
Johannes Berg40077e02017-04-11 15:34:58 +0200720const struct bpf_map_ops perf_event_array_map_ops = {
Jakub Kicinskiad460612018-01-17 19:13:25 -0800721 .map_alloc_check = fd_array_map_alloc_check,
722 .map_alloc = array_map_alloc,
Daniel Borkmann3b1efb12016-06-15 22:47:14 +0200723 .map_free = fd_array_map_free,
Kaixu Xiaea317b22015-08-06 07:02:34 +0000724 .map_get_next_key = array_map_get_next_key,
725 .map_lookup_elem = fd_array_map_lookup_elem,
Kaixu Xiaea317b22015-08-06 07:02:34 +0000726 .map_delete_elem = fd_array_map_delete_elem,
727 .map_fd_get_ptr = perf_event_fd_array_get_ptr,
728 .map_fd_put_ptr = perf_event_fd_array_put_ptr,
Daniel Borkmann3b1efb12016-06-15 22:47:14 +0200729 .map_release = perf_event_fd_array_release,
Daniel Borkmanne8d2bec2018-08-12 01:59:17 +0200730 .map_check_btf = map_check_no_btf,
Kaixu Xiaea317b22015-08-06 07:02:34 +0000731};
732
Sargun Dhillon60d20f92016-08-12 08:56:52 -0700733#ifdef CONFIG_CGROUPS
Martin KaFai Lau4ed8ec52016-06-30 10:28:43 -0700734static void *cgroup_fd_array_get_ptr(struct bpf_map *map,
735 struct file *map_file /* not used */,
736 int fd)
737{
738 return cgroup_get_from_fd(fd);
739}
740
741static void cgroup_fd_array_put_ptr(void *ptr)
742{
743 /* cgroup_put free cgrp after a rcu grace period */
744 cgroup_put(ptr);
745}
746
747static void cgroup_fd_array_free(struct bpf_map *map)
748{
749 bpf_fd_array_map_clear(map);
750 fd_array_map_free(map);
751}
752
Johannes Berg40077e02017-04-11 15:34:58 +0200753const struct bpf_map_ops cgroup_array_map_ops = {
Jakub Kicinskiad460612018-01-17 19:13:25 -0800754 .map_alloc_check = fd_array_map_alloc_check,
755 .map_alloc = array_map_alloc,
Martin KaFai Lau4ed8ec52016-06-30 10:28:43 -0700756 .map_free = cgroup_fd_array_free,
757 .map_get_next_key = array_map_get_next_key,
758 .map_lookup_elem = fd_array_map_lookup_elem,
759 .map_delete_elem = fd_array_map_delete_elem,
760 .map_fd_get_ptr = cgroup_fd_array_get_ptr,
761 .map_fd_put_ptr = cgroup_fd_array_put_ptr,
Daniel Borkmanne8d2bec2018-08-12 01:59:17 +0200762 .map_check_btf = map_check_no_btf,
Martin KaFai Lau4ed8ec52016-06-30 10:28:43 -0700763};
Martin KaFai Lau4ed8ec52016-06-30 10:28:43 -0700764#endif
Martin KaFai Lau56f668d2017-03-22 10:00:33 -0700765
766static struct bpf_map *array_of_map_alloc(union bpf_attr *attr)
767{
768 struct bpf_map *map, *inner_map_meta;
769
770 inner_map_meta = bpf_map_meta_alloc(attr->inner_map_fd);
771 if (IS_ERR(inner_map_meta))
772 return inner_map_meta;
773
Jakub Kicinskiad460612018-01-17 19:13:25 -0800774 map = array_map_alloc(attr);
Martin KaFai Lau56f668d2017-03-22 10:00:33 -0700775 if (IS_ERR(map)) {
776 bpf_map_meta_free(inner_map_meta);
777 return map;
778 }
779
780 map->inner_map_meta = inner_map_meta;
781
782 return map;
783}
784
785static void array_of_map_free(struct bpf_map *map)
786{
787 /* map->inner_map_meta is only accessed by syscall which
788 * is protected by fdget/fdput.
789 */
790 bpf_map_meta_free(map->inner_map_meta);
791 bpf_fd_array_map_clear(map);
792 fd_array_map_free(map);
793}
794
795static void *array_of_map_lookup_elem(struct bpf_map *map, void *key)
796{
797 struct bpf_map **inner_map = array_map_lookup_elem(map, key);
798
799 if (!inner_map)
800 return NULL;
801
802 return READ_ONCE(*inner_map);
803}
804
Daniel Borkmann7b0c2a02017-08-19 03:12:46 +0200805static u32 array_of_map_gen_lookup(struct bpf_map *map,
806 struct bpf_insn *insn_buf)
807{
Alexei Starovoitovb2157392018-01-07 17:33:02 -0800808 struct bpf_array *array = container_of(map, struct bpf_array, map);
Daniel Borkmann7b0c2a02017-08-19 03:12:46 +0200809 u32 elem_size = round_up(map->value_size, 8);
810 struct bpf_insn *insn = insn_buf;
811 const int ret = BPF_REG_0;
812 const int map_ptr = BPF_REG_1;
813 const int index = BPF_REG_2;
814
815 *insn++ = BPF_ALU64_IMM(BPF_ADD, map_ptr, offsetof(struct bpf_array, value));
816 *insn++ = BPF_LDX_MEM(BPF_W, ret, index, 0);
Alexei Starovoitovb2157392018-01-07 17:33:02 -0800817 if (map->unpriv_array) {
818 *insn++ = BPF_JMP_IMM(BPF_JGE, ret, map->max_entries, 6);
819 *insn++ = BPF_ALU32_IMM(BPF_AND, ret, array->index_mask);
820 } else {
821 *insn++ = BPF_JMP_IMM(BPF_JGE, ret, map->max_entries, 5);
822 }
Daniel Borkmann7b0c2a02017-08-19 03:12:46 +0200823 if (is_power_of_2(elem_size))
824 *insn++ = BPF_ALU64_IMM(BPF_LSH, ret, ilog2(elem_size));
825 else
826 *insn++ = BPF_ALU64_IMM(BPF_MUL, ret, elem_size);
827 *insn++ = BPF_ALU64_REG(BPF_ADD, ret, map_ptr);
828 *insn++ = BPF_LDX_MEM(BPF_DW, ret, ret, 0);
829 *insn++ = BPF_JMP_IMM(BPF_JEQ, ret, 0, 1);
830 *insn++ = BPF_JMP_IMM(BPF_JA, 0, 0, 1);
831 *insn++ = BPF_MOV64_IMM(ret, 0);
832
833 return insn - insn_buf;
834}
835
Johannes Berg40077e02017-04-11 15:34:58 +0200836const struct bpf_map_ops array_of_maps_map_ops = {
Jakub Kicinskiad460612018-01-17 19:13:25 -0800837 .map_alloc_check = fd_array_map_alloc_check,
Martin KaFai Lau56f668d2017-03-22 10:00:33 -0700838 .map_alloc = array_of_map_alloc,
839 .map_free = array_of_map_free,
840 .map_get_next_key = array_map_get_next_key,
841 .map_lookup_elem = array_of_map_lookup_elem,
842 .map_delete_elem = fd_array_map_delete_elem,
843 .map_fd_get_ptr = bpf_map_fd_get_ptr,
844 .map_fd_put_ptr = bpf_map_fd_put_ptr,
Martin KaFai Lau14dc6f02017-06-27 23:08:34 -0700845 .map_fd_sys_lookup_elem = bpf_map_fd_sys_lookup_elem,
Daniel Borkmann7b0c2a02017-08-19 03:12:46 +0200846 .map_gen_lookup = array_of_map_gen_lookup,
Daniel Borkmanne8d2bec2018-08-12 01:59:17 +0200847 .map_check_btf = map_check_no_btf,
Martin KaFai Lau56f668d2017-03-22 10:00:33 -0700848};