blob: 2831ba1e71c1116482e1342264f5bbef993629fa [file] [log] [blame]
Alexei Starovoitovf5bffec2014-07-22 23:01:58 -07001/*
2 * Linux Socket Filter - Kernel level socket filtering
3 *
4 * Based on the design of the Berkeley Packet Filter. The new
5 * internal format has been designed by PLUMgrid:
6 *
7 * Copyright (c) 2011 - 2014 PLUMgrid, http://plumgrid.com
8 *
9 * Authors:
10 *
11 * Jay Schulist <jschlst@samba.org>
12 * Alexei Starovoitov <ast@plumgrid.com>
13 * Daniel Borkmann <dborkman@redhat.com>
14 *
15 * This program is free software; you can redistribute it and/or
16 * modify it under the terms of the GNU General Public License
17 * as published by the Free Software Foundation; either version
18 * 2 of the License, or (at your option) any later version.
19 *
20 * Andi Kleen - Fix a few bad bugs and races.
Alexei Starovoitov4df95ff2014-07-30 20:34:14 -070021 * Kris Katterjohn - Added many additional checks in bpf_check_classic()
Alexei Starovoitovf5bffec2014-07-22 23:01:58 -070022 */
Daniel Borkmann738cbe72014-09-08 08:04:47 +020023
Alexei Starovoitovf5bffec2014-07-22 23:01:58 -070024#include <linux/filter.h>
25#include <linux/skbuff.h>
Daniel Borkmann60a3b222014-09-02 22:53:44 +020026#include <linux/vmalloc.h>
Daniel Borkmann738cbe72014-09-08 08:04:47 +020027#include <linux/random.h>
28#include <linux/moduleloader.h>
Alexei Starovoitov09756af2014-09-26 00:17:00 -070029#include <linux/bpf.h>
Josh Poimboeuf39853cc2016-02-28 22:22:37 -060030#include <linux/frame.h>
Alexei Starovoitovf5bffec2014-07-22 23:01:58 -070031
Daniel Borkmann3324b582015-05-29 23:23:07 +020032#include <asm/unaligned.h>
33
Alexei Starovoitovf5bffec2014-07-22 23:01:58 -070034/* Registers */
35#define BPF_R0 regs[BPF_REG_0]
36#define BPF_R1 regs[BPF_REG_1]
37#define BPF_R2 regs[BPF_REG_2]
38#define BPF_R3 regs[BPF_REG_3]
39#define BPF_R4 regs[BPF_REG_4]
40#define BPF_R5 regs[BPF_REG_5]
41#define BPF_R6 regs[BPF_REG_6]
42#define BPF_R7 regs[BPF_REG_7]
43#define BPF_R8 regs[BPF_REG_8]
44#define BPF_R9 regs[BPF_REG_9]
45#define BPF_R10 regs[BPF_REG_10]
46
47/* Named registers */
48#define DST regs[insn->dst_reg]
49#define SRC regs[insn->src_reg]
50#define FP regs[BPF_REG_FP]
51#define ARG1 regs[BPF_REG_ARG1]
52#define CTX regs[BPF_REG_CTX]
53#define IMM insn->imm
54
55/* No hurry in this branch
56 *
57 * Exported for the bpf jit load helper.
58 */
59void *bpf_internal_load_pointer_neg_helper(const struct sk_buff *skb, int k, unsigned int size)
60{
61 u8 *ptr = NULL;
62
63 if (k >= SKF_NET_OFF)
64 ptr = skb_network_header(skb) + k - SKF_NET_OFF;
65 else if (k >= SKF_LL_OFF)
66 ptr = skb_mac_header(skb) + k - SKF_LL_OFF;
Daniel Borkmann3324b582015-05-29 23:23:07 +020067
Alexei Starovoitovf5bffec2014-07-22 23:01:58 -070068 if (ptr >= skb->head && ptr + size <= skb_tail_pointer(skb))
69 return ptr;
70
71 return NULL;
72}
73
Daniel Borkmann60a3b222014-09-02 22:53:44 +020074struct bpf_prog *bpf_prog_alloc(unsigned int size, gfp_t gfp_extra_flags)
75{
76 gfp_t gfp_flags = GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO |
77 gfp_extra_flags;
Alexei Starovoitov09756af2014-09-26 00:17:00 -070078 struct bpf_prog_aux *aux;
Daniel Borkmann60a3b222014-09-02 22:53:44 +020079 struct bpf_prog *fp;
80
81 size = round_up(size, PAGE_SIZE);
82 fp = __vmalloc(size, gfp_flags, PAGE_KERNEL);
83 if (fp == NULL)
84 return NULL;
85
Daniel Borkmanna91263d2015-09-30 01:41:50 +020086 kmemcheck_annotate_bitfield(fp, meta);
87
Alexei Starovoitov09756af2014-09-26 00:17:00 -070088 aux = kzalloc(sizeof(*aux), GFP_KERNEL | gfp_extra_flags);
89 if (aux == NULL) {
Daniel Borkmann60a3b222014-09-02 22:53:44 +020090 vfree(fp);
91 return NULL;
92 }
93
94 fp->pages = size / PAGE_SIZE;
Alexei Starovoitov09756af2014-09-26 00:17:00 -070095 fp->aux = aux;
Daniel Borkmanne9d8afa2015-10-29 14:58:08 +010096 fp->aux->prog = fp;
Daniel Borkmann60a3b222014-09-02 22:53:44 +020097
98 return fp;
99}
100EXPORT_SYMBOL_GPL(bpf_prog_alloc);
101
102struct bpf_prog *bpf_prog_realloc(struct bpf_prog *fp_old, unsigned int size,
103 gfp_t gfp_extra_flags)
104{
105 gfp_t gfp_flags = GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO |
106 gfp_extra_flags;
107 struct bpf_prog *fp;
Daniel Borkmann5ccb0712016-12-18 01:52:58 +0100108 u32 pages, delta;
109 int ret;
Daniel Borkmann60a3b222014-09-02 22:53:44 +0200110
111 BUG_ON(fp_old == NULL);
112
113 size = round_up(size, PAGE_SIZE);
Daniel Borkmann5ccb0712016-12-18 01:52:58 +0100114 pages = size / PAGE_SIZE;
115 if (pages <= fp_old->pages)
Daniel Borkmann60a3b222014-09-02 22:53:44 +0200116 return fp_old;
117
Daniel Borkmann5ccb0712016-12-18 01:52:58 +0100118 delta = pages - fp_old->pages;
119 ret = __bpf_prog_charge(fp_old->aux->user, delta);
120 if (ret)
121 return NULL;
122
Daniel Borkmann60a3b222014-09-02 22:53:44 +0200123 fp = __vmalloc(size, gfp_flags, PAGE_KERNEL);
Daniel Borkmann5ccb0712016-12-18 01:52:58 +0100124 if (fp == NULL) {
125 __bpf_prog_uncharge(fp_old->aux->user, delta);
126 } else {
Daniel Borkmanna91263d2015-09-30 01:41:50 +0200127 kmemcheck_annotate_bitfield(fp, meta);
128
Daniel Borkmann60a3b222014-09-02 22:53:44 +0200129 memcpy(fp, fp_old, fp_old->pages * PAGE_SIZE);
Daniel Borkmann5ccb0712016-12-18 01:52:58 +0100130 fp->pages = pages;
Daniel Borkmanne9d8afa2015-10-29 14:58:08 +0100131 fp->aux->prog = fp;
Daniel Borkmann60a3b222014-09-02 22:53:44 +0200132
Alexei Starovoitov09756af2014-09-26 00:17:00 -0700133 /* We keep fp->aux from fp_old around in the new
Daniel Borkmann60a3b222014-09-02 22:53:44 +0200134 * reallocated structure.
135 */
Alexei Starovoitov09756af2014-09-26 00:17:00 -0700136 fp_old->aux = NULL;
Daniel Borkmann60a3b222014-09-02 22:53:44 +0200137 __bpf_prog_free(fp_old);
138 }
139
140 return fp;
141}
Daniel Borkmann60a3b222014-09-02 22:53:44 +0200142
143void __bpf_prog_free(struct bpf_prog *fp)
144{
Alexei Starovoitov09756af2014-09-26 00:17:00 -0700145 kfree(fp->aux);
Daniel Borkmann60a3b222014-09-02 22:53:44 +0200146 vfree(fp);
147}
Daniel Borkmann60a3b222014-09-02 22:53:44 +0200148
Daniel Borkmannf1f77142017-01-13 23:38:15 +0100149int bpf_prog_calc_tag(struct bpf_prog *fp)
Daniel Borkmann7bd509e2016-12-04 23:19:41 +0100150{
151 const u32 bits_offset = SHA_MESSAGE_BYTES - sizeof(__be64);
Daniel Borkmannf1f77142017-01-13 23:38:15 +0100152 u32 raw_size = bpf_prog_tag_scratch_size(fp);
153 u32 digest[SHA_DIGEST_WORDS];
Daniel Borkmannaafe6ae2016-12-18 01:52:57 +0100154 u32 ws[SHA_WORKSPACE_WORDS];
Daniel Borkmann7bd509e2016-12-04 23:19:41 +0100155 u32 i, bsize, psize, blocks;
Daniel Borkmannaafe6ae2016-12-18 01:52:57 +0100156 struct bpf_insn *dst;
Daniel Borkmann7bd509e2016-12-04 23:19:41 +0100157 bool was_ld_map;
Daniel Borkmannaafe6ae2016-12-18 01:52:57 +0100158 u8 *raw, *todo;
Daniel Borkmann7bd509e2016-12-04 23:19:41 +0100159 __be32 *result;
160 __be64 *bits;
161
Daniel Borkmannaafe6ae2016-12-18 01:52:57 +0100162 raw = vmalloc(raw_size);
163 if (!raw)
164 return -ENOMEM;
165
Daniel Borkmannf1f77142017-01-13 23:38:15 +0100166 sha_init(digest);
Daniel Borkmann7bd509e2016-12-04 23:19:41 +0100167 memset(ws, 0, sizeof(ws));
168
169 /* We need to take out the map fd for the digest calculation
170 * since they are unstable from user space side.
171 */
Daniel Borkmannaafe6ae2016-12-18 01:52:57 +0100172 dst = (void *)raw;
Daniel Borkmann7bd509e2016-12-04 23:19:41 +0100173 for (i = 0, was_ld_map = false; i < fp->len; i++) {
174 dst[i] = fp->insnsi[i];
175 if (!was_ld_map &&
176 dst[i].code == (BPF_LD | BPF_IMM | BPF_DW) &&
177 dst[i].src_reg == BPF_PSEUDO_MAP_FD) {
178 was_ld_map = true;
179 dst[i].imm = 0;
180 } else if (was_ld_map &&
181 dst[i].code == 0 &&
182 dst[i].dst_reg == 0 &&
183 dst[i].src_reg == 0 &&
184 dst[i].off == 0) {
185 was_ld_map = false;
186 dst[i].imm = 0;
187 } else {
188 was_ld_map = false;
189 }
190 }
191
Daniel Borkmannaafe6ae2016-12-18 01:52:57 +0100192 psize = bpf_prog_insn_size(fp);
193 memset(&raw[psize], 0, raw_size - psize);
Daniel Borkmann7bd509e2016-12-04 23:19:41 +0100194 raw[psize++] = 0x80;
195
196 bsize = round_up(psize, SHA_MESSAGE_BYTES);
197 blocks = bsize / SHA_MESSAGE_BYTES;
Daniel Borkmannaafe6ae2016-12-18 01:52:57 +0100198 todo = raw;
Daniel Borkmann7bd509e2016-12-04 23:19:41 +0100199 if (bsize - psize >= sizeof(__be64)) {
200 bits = (__be64 *)(todo + bsize - sizeof(__be64));
201 } else {
202 bits = (__be64 *)(todo + bsize + bits_offset);
203 blocks++;
204 }
205 *bits = cpu_to_be64((psize - 1) << 3);
206
207 while (blocks--) {
Daniel Borkmannf1f77142017-01-13 23:38:15 +0100208 sha_transform(digest, todo, ws);
Daniel Borkmann7bd509e2016-12-04 23:19:41 +0100209 todo += SHA_MESSAGE_BYTES;
210 }
211
Daniel Borkmannf1f77142017-01-13 23:38:15 +0100212 result = (__force __be32 *)digest;
Daniel Borkmann7bd509e2016-12-04 23:19:41 +0100213 for (i = 0; i < SHA_DIGEST_WORDS; i++)
Daniel Borkmannf1f77142017-01-13 23:38:15 +0100214 result[i] = cpu_to_be32(digest[i]);
215 memcpy(fp->tag, result, sizeof(fp->tag));
Daniel Borkmannaafe6ae2016-12-18 01:52:57 +0100216
217 vfree(raw);
218 return 0;
Daniel Borkmann7bd509e2016-12-04 23:19:41 +0100219}
220
Daniel Borkmannc237ee52016-05-13 19:08:30 +0200221static bool bpf_is_jmp_and_has_target(const struct bpf_insn *insn)
222{
223 return BPF_CLASS(insn->code) == BPF_JMP &&
224 /* Call and Exit are both special jumps with no
225 * target inside the BPF instruction image.
226 */
227 BPF_OP(insn->code) != BPF_CALL &&
228 BPF_OP(insn->code) != BPF_EXIT;
229}
230
231static void bpf_adj_branches(struct bpf_prog *prog, u32 pos, u32 delta)
232{
233 struct bpf_insn *insn = prog->insnsi;
234 u32 i, insn_cnt = prog->len;
235
236 for (i = 0; i < insn_cnt; i++, insn++) {
237 if (!bpf_is_jmp_and_has_target(insn))
238 continue;
239
240 /* Adjust offset of jmps if we cross boundaries. */
241 if (i < pos && i + insn->off + 1 > pos)
242 insn->off += delta;
243 else if (i > pos + delta && i + insn->off + 1 <= pos + delta)
244 insn->off -= delta;
245 }
246}
247
248struct bpf_prog *bpf_patch_insn_single(struct bpf_prog *prog, u32 off,
249 const struct bpf_insn *patch, u32 len)
250{
251 u32 insn_adj_cnt, insn_rest, insn_delta = len - 1;
252 struct bpf_prog *prog_adj;
253
254 /* Since our patchlet doesn't expand the image, we're done. */
255 if (insn_delta == 0) {
256 memcpy(prog->insnsi + off, patch, sizeof(*patch));
257 return prog;
258 }
259
260 insn_adj_cnt = prog->len + insn_delta;
261
262 /* Several new instructions need to be inserted. Make room
263 * for them. Likely, there's no need for a new allocation as
264 * last page could have large enough tailroom.
265 */
266 prog_adj = bpf_prog_realloc(prog, bpf_prog_size(insn_adj_cnt),
267 GFP_USER);
268 if (!prog_adj)
269 return NULL;
270
271 prog_adj->len = insn_adj_cnt;
272
273 /* Patching happens in 3 steps:
274 *
275 * 1) Move over tail of insnsi from next instruction onwards,
276 * so we can patch the single target insn with one or more
277 * new ones (patching is always from 1 to n insns, n > 0).
278 * 2) Inject new instructions at the target location.
279 * 3) Adjust branch offsets if necessary.
280 */
281 insn_rest = insn_adj_cnt - off - len;
282
283 memmove(prog_adj->insnsi + off + len, prog_adj->insnsi + off + 1,
284 sizeof(*patch) * insn_rest);
285 memcpy(prog_adj->insnsi + off, patch, sizeof(*patch) * len);
286
287 bpf_adj_branches(prog_adj, off, insn_delta);
288
289 return prog_adj;
290}
291
Daniel Borkmannb954d832014-09-10 15:01:02 +0200292#ifdef CONFIG_BPF_JIT
Daniel Borkmann738cbe72014-09-08 08:04:47 +0200293struct bpf_binary_header *
294bpf_jit_binary_alloc(unsigned int proglen, u8 **image_ptr,
295 unsigned int alignment,
296 bpf_jit_fill_hole_t bpf_fill_ill_insns)
297{
298 struct bpf_binary_header *hdr;
299 unsigned int size, hole, start;
300
301 /* Most of BPF filters are really small, but if some of them
302 * fill a page, allow at least 128 extra bytes to insert a
303 * random section of illegal instructions.
304 */
305 size = round_up(proglen + sizeof(*hdr) + 128, PAGE_SIZE);
306 hdr = module_alloc(size);
307 if (hdr == NULL)
308 return NULL;
309
310 /* Fill space with illegal/arch-dep instructions. */
311 bpf_fill_ill_insns(hdr, size);
312
313 hdr->pages = size / PAGE_SIZE;
314 hole = min_t(unsigned int, size - (proglen + sizeof(*hdr)),
315 PAGE_SIZE - sizeof(*hdr));
Daniel Borkmannb7552e1b2016-05-18 14:14:28 +0200316 start = (get_random_int() % hole) & ~(alignment - 1);
Daniel Borkmann738cbe72014-09-08 08:04:47 +0200317
318 /* Leave a random number of instructions before BPF code. */
319 *image_ptr = &hdr->image[start];
320
321 return hdr;
322}
323
324void bpf_jit_binary_free(struct bpf_binary_header *hdr)
325{
Rusty Russellbe1f2212015-01-20 09:07:05 +1030326 module_memfree(hdr);
Daniel Borkmann738cbe72014-09-08 08:04:47 +0200327}
Daniel Borkmann4f3446b2016-05-13 19:08:32 +0200328
329int bpf_jit_harden __read_mostly;
330
331static int bpf_jit_blind_insn(const struct bpf_insn *from,
332 const struct bpf_insn *aux,
333 struct bpf_insn *to_buff)
334{
335 struct bpf_insn *to = to_buff;
Daniel Borkmannb7552e1b2016-05-18 14:14:28 +0200336 u32 imm_rnd = get_random_int();
Daniel Borkmann4f3446b2016-05-13 19:08:32 +0200337 s16 off;
338
339 BUILD_BUG_ON(BPF_REG_AX + 1 != MAX_BPF_JIT_REG);
340 BUILD_BUG_ON(MAX_BPF_REG + 1 != MAX_BPF_JIT_REG);
341
342 if (from->imm == 0 &&
343 (from->code == (BPF_ALU | BPF_MOV | BPF_K) ||
344 from->code == (BPF_ALU64 | BPF_MOV | BPF_K))) {
345 *to++ = BPF_ALU64_REG(BPF_XOR, from->dst_reg, from->dst_reg);
346 goto out;
347 }
348
349 switch (from->code) {
350 case BPF_ALU | BPF_ADD | BPF_K:
351 case BPF_ALU | BPF_SUB | BPF_K:
352 case BPF_ALU | BPF_AND | BPF_K:
353 case BPF_ALU | BPF_OR | BPF_K:
354 case BPF_ALU | BPF_XOR | BPF_K:
355 case BPF_ALU | BPF_MUL | BPF_K:
356 case BPF_ALU | BPF_MOV | BPF_K:
357 case BPF_ALU | BPF_DIV | BPF_K:
358 case BPF_ALU | BPF_MOD | BPF_K:
359 *to++ = BPF_ALU32_IMM(BPF_MOV, BPF_REG_AX, imm_rnd ^ from->imm);
360 *to++ = BPF_ALU32_IMM(BPF_XOR, BPF_REG_AX, imm_rnd);
361 *to++ = BPF_ALU32_REG(from->code, from->dst_reg, BPF_REG_AX);
362 break;
363
364 case BPF_ALU64 | BPF_ADD | BPF_K:
365 case BPF_ALU64 | BPF_SUB | BPF_K:
366 case BPF_ALU64 | BPF_AND | BPF_K:
367 case BPF_ALU64 | BPF_OR | BPF_K:
368 case BPF_ALU64 | BPF_XOR | BPF_K:
369 case BPF_ALU64 | BPF_MUL | BPF_K:
370 case BPF_ALU64 | BPF_MOV | BPF_K:
371 case BPF_ALU64 | BPF_DIV | BPF_K:
372 case BPF_ALU64 | BPF_MOD | BPF_K:
373 *to++ = BPF_ALU64_IMM(BPF_MOV, BPF_REG_AX, imm_rnd ^ from->imm);
374 *to++ = BPF_ALU64_IMM(BPF_XOR, BPF_REG_AX, imm_rnd);
375 *to++ = BPF_ALU64_REG(from->code, from->dst_reg, BPF_REG_AX);
376 break;
377
378 case BPF_JMP | BPF_JEQ | BPF_K:
379 case BPF_JMP | BPF_JNE | BPF_K:
380 case BPF_JMP | BPF_JGT | BPF_K:
381 case BPF_JMP | BPF_JGE | BPF_K:
382 case BPF_JMP | BPF_JSGT | BPF_K:
383 case BPF_JMP | BPF_JSGE | BPF_K:
384 case BPF_JMP | BPF_JSET | BPF_K:
385 /* Accommodate for extra offset in case of a backjump. */
386 off = from->off;
387 if (off < 0)
388 off -= 2;
389 *to++ = BPF_ALU64_IMM(BPF_MOV, BPF_REG_AX, imm_rnd ^ from->imm);
390 *to++ = BPF_ALU64_IMM(BPF_XOR, BPF_REG_AX, imm_rnd);
391 *to++ = BPF_JMP_REG(from->code, from->dst_reg, BPF_REG_AX, off);
392 break;
393
394 case BPF_LD | BPF_ABS | BPF_W:
395 case BPF_LD | BPF_ABS | BPF_H:
396 case BPF_LD | BPF_ABS | BPF_B:
397 *to++ = BPF_ALU64_IMM(BPF_MOV, BPF_REG_AX, imm_rnd ^ from->imm);
398 *to++ = BPF_ALU64_IMM(BPF_XOR, BPF_REG_AX, imm_rnd);
399 *to++ = BPF_LD_IND(from->code, BPF_REG_AX, 0);
400 break;
401
402 case BPF_LD | BPF_IND | BPF_W:
403 case BPF_LD | BPF_IND | BPF_H:
404 case BPF_LD | BPF_IND | BPF_B:
405 *to++ = BPF_ALU64_IMM(BPF_MOV, BPF_REG_AX, imm_rnd ^ from->imm);
406 *to++ = BPF_ALU64_IMM(BPF_XOR, BPF_REG_AX, imm_rnd);
407 *to++ = BPF_ALU32_REG(BPF_ADD, BPF_REG_AX, from->src_reg);
408 *to++ = BPF_LD_IND(from->code, BPF_REG_AX, 0);
409 break;
410
411 case BPF_LD | BPF_IMM | BPF_DW:
412 *to++ = BPF_ALU64_IMM(BPF_MOV, BPF_REG_AX, imm_rnd ^ aux[1].imm);
413 *to++ = BPF_ALU64_IMM(BPF_XOR, BPF_REG_AX, imm_rnd);
414 *to++ = BPF_ALU64_IMM(BPF_LSH, BPF_REG_AX, 32);
415 *to++ = BPF_ALU64_REG(BPF_MOV, aux[0].dst_reg, BPF_REG_AX);
416 break;
417 case 0: /* Part 2 of BPF_LD | BPF_IMM | BPF_DW. */
418 *to++ = BPF_ALU32_IMM(BPF_MOV, BPF_REG_AX, imm_rnd ^ aux[0].imm);
419 *to++ = BPF_ALU32_IMM(BPF_XOR, BPF_REG_AX, imm_rnd);
420 *to++ = BPF_ALU64_REG(BPF_OR, aux[0].dst_reg, BPF_REG_AX);
421 break;
422
423 case BPF_ST | BPF_MEM | BPF_DW:
424 case BPF_ST | BPF_MEM | BPF_W:
425 case BPF_ST | BPF_MEM | BPF_H:
426 case BPF_ST | BPF_MEM | BPF_B:
427 *to++ = BPF_ALU64_IMM(BPF_MOV, BPF_REG_AX, imm_rnd ^ from->imm);
428 *to++ = BPF_ALU64_IMM(BPF_XOR, BPF_REG_AX, imm_rnd);
429 *to++ = BPF_STX_MEM(from->code, from->dst_reg, BPF_REG_AX, from->off);
430 break;
431 }
432out:
433 return to - to_buff;
434}
435
436static struct bpf_prog *bpf_prog_clone_create(struct bpf_prog *fp_other,
437 gfp_t gfp_extra_flags)
438{
439 gfp_t gfp_flags = GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO |
440 gfp_extra_flags;
441 struct bpf_prog *fp;
442
443 fp = __vmalloc(fp_other->pages * PAGE_SIZE, gfp_flags, PAGE_KERNEL);
444 if (fp != NULL) {
445 kmemcheck_annotate_bitfield(fp, meta);
446
447 /* aux->prog still points to the fp_other one, so
448 * when promoting the clone to the real program,
449 * this still needs to be adapted.
450 */
451 memcpy(fp, fp_other, fp_other->pages * PAGE_SIZE);
452 }
453
454 return fp;
455}
456
457static void bpf_prog_clone_free(struct bpf_prog *fp)
458{
459 /* aux was stolen by the other clone, so we cannot free
460 * it from this path! It will be freed eventually by the
461 * other program on release.
462 *
463 * At this point, we don't need a deferred release since
464 * clone is guaranteed to not be locked.
465 */
466 fp->aux = NULL;
467 __bpf_prog_free(fp);
468}
469
470void bpf_jit_prog_release_other(struct bpf_prog *fp, struct bpf_prog *fp_other)
471{
472 /* We have to repoint aux->prog to self, as we don't
473 * know whether fp here is the clone or the original.
474 */
475 fp->aux->prog = fp;
476 bpf_prog_clone_free(fp_other);
477}
478
479struct bpf_prog *bpf_jit_blind_constants(struct bpf_prog *prog)
480{
481 struct bpf_insn insn_buff[16], aux[2];
482 struct bpf_prog *clone, *tmp;
483 int insn_delta, insn_cnt;
484 struct bpf_insn *insn;
485 int i, rewritten;
486
487 if (!bpf_jit_blinding_enabled())
488 return prog;
489
490 clone = bpf_prog_clone_create(prog, GFP_USER);
491 if (!clone)
492 return ERR_PTR(-ENOMEM);
493
494 insn_cnt = clone->len;
495 insn = clone->insnsi;
496
497 for (i = 0; i < insn_cnt; i++, insn++) {
498 /* We temporarily need to hold the original ld64 insn
499 * so that we can still access the first part in the
500 * second blinding run.
501 */
502 if (insn[0].code == (BPF_LD | BPF_IMM | BPF_DW) &&
503 insn[1].code == 0)
504 memcpy(aux, insn, sizeof(aux));
505
506 rewritten = bpf_jit_blind_insn(insn, aux, insn_buff);
507 if (!rewritten)
508 continue;
509
510 tmp = bpf_patch_insn_single(clone, i, insn_buff, rewritten);
511 if (!tmp) {
512 /* Patching may have repointed aux->prog during
513 * realloc from the original one, so we need to
514 * fix it up here on error.
515 */
516 bpf_jit_prog_release_other(prog, clone);
517 return ERR_PTR(-ENOMEM);
518 }
519
520 clone = tmp;
521 insn_delta = rewritten - 1;
522
523 /* Walk new program and skip insns we just inserted. */
524 insn = clone->insnsi + i + insn_delta;
525 insn_cnt += insn_delta;
526 i += insn_delta;
527 }
528
529 return clone;
530}
Daniel Borkmannb954d832014-09-10 15:01:02 +0200531#endif /* CONFIG_BPF_JIT */
Daniel Borkmann738cbe72014-09-08 08:04:47 +0200532
Alexei Starovoitovf5bffec2014-07-22 23:01:58 -0700533/* Base function for offset calculation. Needs to go into .text section,
534 * therefore keeping it non-static as well; will also be used by JITs
535 * anyway later on, so do not let the compiler omit it.
536 */
537noinline u64 __bpf_call_base(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
538{
539 return 0;
540}
Alexei Starovoitov4d9c5c52015-07-20 20:34:19 -0700541EXPORT_SYMBOL_GPL(__bpf_call_base);
Alexei Starovoitovf5bffec2014-07-22 23:01:58 -0700542
543/**
Alexei Starovoitov7ae457c2014-07-30 20:34:16 -0700544 * __bpf_prog_run - run eBPF program on a given context
545 * @ctx: is the data we are operating on
546 * @insn: is the array of eBPF instructions
Alexei Starovoitovf5bffec2014-07-22 23:01:58 -0700547 *
Alexei Starovoitov7ae457c2014-07-30 20:34:16 -0700548 * Decode and execute eBPF instructions.
Alexei Starovoitovf5bffec2014-07-22 23:01:58 -0700549 */
Alexei Starovoitov7ae457c2014-07-30 20:34:16 -0700550static unsigned int __bpf_prog_run(void *ctx, const struct bpf_insn *insn)
Alexei Starovoitovf5bffec2014-07-22 23:01:58 -0700551{
552 u64 stack[MAX_BPF_STACK / sizeof(u64)];
553 u64 regs[MAX_BPF_REG], tmp;
554 static const void *jumptable[256] = {
555 [0 ... 255] = &&default_label,
556 /* Now overwrite non-defaults ... */
557 /* 32 bit ALU operations */
558 [BPF_ALU | BPF_ADD | BPF_X] = &&ALU_ADD_X,
559 [BPF_ALU | BPF_ADD | BPF_K] = &&ALU_ADD_K,
560 [BPF_ALU | BPF_SUB | BPF_X] = &&ALU_SUB_X,
561 [BPF_ALU | BPF_SUB | BPF_K] = &&ALU_SUB_K,
562 [BPF_ALU | BPF_AND | BPF_X] = &&ALU_AND_X,
563 [BPF_ALU | BPF_AND | BPF_K] = &&ALU_AND_K,
564 [BPF_ALU | BPF_OR | BPF_X] = &&ALU_OR_X,
565 [BPF_ALU | BPF_OR | BPF_K] = &&ALU_OR_K,
566 [BPF_ALU | BPF_LSH | BPF_X] = &&ALU_LSH_X,
567 [BPF_ALU | BPF_LSH | BPF_K] = &&ALU_LSH_K,
568 [BPF_ALU | BPF_RSH | BPF_X] = &&ALU_RSH_X,
569 [BPF_ALU | BPF_RSH | BPF_K] = &&ALU_RSH_K,
570 [BPF_ALU | BPF_XOR | BPF_X] = &&ALU_XOR_X,
571 [BPF_ALU | BPF_XOR | BPF_K] = &&ALU_XOR_K,
572 [BPF_ALU | BPF_MUL | BPF_X] = &&ALU_MUL_X,
573 [BPF_ALU | BPF_MUL | BPF_K] = &&ALU_MUL_K,
574 [BPF_ALU | BPF_MOV | BPF_X] = &&ALU_MOV_X,
575 [BPF_ALU | BPF_MOV | BPF_K] = &&ALU_MOV_K,
576 [BPF_ALU | BPF_DIV | BPF_X] = &&ALU_DIV_X,
577 [BPF_ALU | BPF_DIV | BPF_K] = &&ALU_DIV_K,
578 [BPF_ALU | BPF_MOD | BPF_X] = &&ALU_MOD_X,
579 [BPF_ALU | BPF_MOD | BPF_K] = &&ALU_MOD_K,
580 [BPF_ALU | BPF_NEG] = &&ALU_NEG,
581 [BPF_ALU | BPF_END | BPF_TO_BE] = &&ALU_END_TO_BE,
582 [BPF_ALU | BPF_END | BPF_TO_LE] = &&ALU_END_TO_LE,
583 /* 64 bit ALU operations */
584 [BPF_ALU64 | BPF_ADD | BPF_X] = &&ALU64_ADD_X,
585 [BPF_ALU64 | BPF_ADD | BPF_K] = &&ALU64_ADD_K,
586 [BPF_ALU64 | BPF_SUB | BPF_X] = &&ALU64_SUB_X,
587 [BPF_ALU64 | BPF_SUB | BPF_K] = &&ALU64_SUB_K,
588 [BPF_ALU64 | BPF_AND | BPF_X] = &&ALU64_AND_X,
589 [BPF_ALU64 | BPF_AND | BPF_K] = &&ALU64_AND_K,
590 [BPF_ALU64 | BPF_OR | BPF_X] = &&ALU64_OR_X,
591 [BPF_ALU64 | BPF_OR | BPF_K] = &&ALU64_OR_K,
592 [BPF_ALU64 | BPF_LSH | BPF_X] = &&ALU64_LSH_X,
593 [BPF_ALU64 | BPF_LSH | BPF_K] = &&ALU64_LSH_K,
594 [BPF_ALU64 | BPF_RSH | BPF_X] = &&ALU64_RSH_X,
595 [BPF_ALU64 | BPF_RSH | BPF_K] = &&ALU64_RSH_K,
596 [BPF_ALU64 | BPF_XOR | BPF_X] = &&ALU64_XOR_X,
597 [BPF_ALU64 | BPF_XOR | BPF_K] = &&ALU64_XOR_K,
598 [BPF_ALU64 | BPF_MUL | BPF_X] = &&ALU64_MUL_X,
599 [BPF_ALU64 | BPF_MUL | BPF_K] = &&ALU64_MUL_K,
600 [BPF_ALU64 | BPF_MOV | BPF_X] = &&ALU64_MOV_X,
601 [BPF_ALU64 | BPF_MOV | BPF_K] = &&ALU64_MOV_K,
602 [BPF_ALU64 | BPF_ARSH | BPF_X] = &&ALU64_ARSH_X,
603 [BPF_ALU64 | BPF_ARSH | BPF_K] = &&ALU64_ARSH_K,
604 [BPF_ALU64 | BPF_DIV | BPF_X] = &&ALU64_DIV_X,
605 [BPF_ALU64 | BPF_DIV | BPF_K] = &&ALU64_DIV_K,
606 [BPF_ALU64 | BPF_MOD | BPF_X] = &&ALU64_MOD_X,
607 [BPF_ALU64 | BPF_MOD | BPF_K] = &&ALU64_MOD_K,
608 [BPF_ALU64 | BPF_NEG] = &&ALU64_NEG,
609 /* Call instruction */
610 [BPF_JMP | BPF_CALL] = &&JMP_CALL,
Alexei Starovoitov04fd61ab2015-05-19 16:59:03 -0700611 [BPF_JMP | BPF_CALL | BPF_X] = &&JMP_TAIL_CALL,
Alexei Starovoitovf5bffec2014-07-22 23:01:58 -0700612 /* Jumps */
613 [BPF_JMP | BPF_JA] = &&JMP_JA,
614 [BPF_JMP | BPF_JEQ | BPF_X] = &&JMP_JEQ_X,
615 [BPF_JMP | BPF_JEQ | BPF_K] = &&JMP_JEQ_K,
616 [BPF_JMP | BPF_JNE | BPF_X] = &&JMP_JNE_X,
617 [BPF_JMP | BPF_JNE | BPF_K] = &&JMP_JNE_K,
618 [BPF_JMP | BPF_JGT | BPF_X] = &&JMP_JGT_X,
619 [BPF_JMP | BPF_JGT | BPF_K] = &&JMP_JGT_K,
620 [BPF_JMP | BPF_JGE | BPF_X] = &&JMP_JGE_X,
621 [BPF_JMP | BPF_JGE | BPF_K] = &&JMP_JGE_K,
622 [BPF_JMP | BPF_JSGT | BPF_X] = &&JMP_JSGT_X,
623 [BPF_JMP | BPF_JSGT | BPF_K] = &&JMP_JSGT_K,
624 [BPF_JMP | BPF_JSGE | BPF_X] = &&JMP_JSGE_X,
625 [BPF_JMP | BPF_JSGE | BPF_K] = &&JMP_JSGE_K,
626 [BPF_JMP | BPF_JSET | BPF_X] = &&JMP_JSET_X,
627 [BPF_JMP | BPF_JSET | BPF_K] = &&JMP_JSET_K,
628 /* Program return */
629 [BPF_JMP | BPF_EXIT] = &&JMP_EXIT,
630 /* Store instructions */
631 [BPF_STX | BPF_MEM | BPF_B] = &&STX_MEM_B,
632 [BPF_STX | BPF_MEM | BPF_H] = &&STX_MEM_H,
633 [BPF_STX | BPF_MEM | BPF_W] = &&STX_MEM_W,
634 [BPF_STX | BPF_MEM | BPF_DW] = &&STX_MEM_DW,
635 [BPF_STX | BPF_XADD | BPF_W] = &&STX_XADD_W,
636 [BPF_STX | BPF_XADD | BPF_DW] = &&STX_XADD_DW,
637 [BPF_ST | BPF_MEM | BPF_B] = &&ST_MEM_B,
638 [BPF_ST | BPF_MEM | BPF_H] = &&ST_MEM_H,
639 [BPF_ST | BPF_MEM | BPF_W] = &&ST_MEM_W,
640 [BPF_ST | BPF_MEM | BPF_DW] = &&ST_MEM_DW,
641 /* Load instructions */
642 [BPF_LDX | BPF_MEM | BPF_B] = &&LDX_MEM_B,
643 [BPF_LDX | BPF_MEM | BPF_H] = &&LDX_MEM_H,
644 [BPF_LDX | BPF_MEM | BPF_W] = &&LDX_MEM_W,
645 [BPF_LDX | BPF_MEM | BPF_DW] = &&LDX_MEM_DW,
646 [BPF_LD | BPF_ABS | BPF_W] = &&LD_ABS_W,
647 [BPF_LD | BPF_ABS | BPF_H] = &&LD_ABS_H,
648 [BPF_LD | BPF_ABS | BPF_B] = &&LD_ABS_B,
649 [BPF_LD | BPF_IND | BPF_W] = &&LD_IND_W,
650 [BPF_LD | BPF_IND | BPF_H] = &&LD_IND_H,
651 [BPF_LD | BPF_IND | BPF_B] = &&LD_IND_B,
Alexei Starovoitov02ab6952014-09-04 22:17:17 -0700652 [BPF_LD | BPF_IMM | BPF_DW] = &&LD_IMM_DW,
Alexei Starovoitovf5bffec2014-07-22 23:01:58 -0700653 };
Alexei Starovoitov04fd61ab2015-05-19 16:59:03 -0700654 u32 tail_call_cnt = 0;
Alexei Starovoitovf5bffec2014-07-22 23:01:58 -0700655 void *ptr;
656 int off;
657
658#define CONT ({ insn++; goto select_insn; })
659#define CONT_JMP ({ insn++; goto select_insn; })
660
661 FP = (u64) (unsigned long) &stack[ARRAY_SIZE(stack)];
662 ARG1 = (u64) (unsigned long) ctx;
663
Alexei Starovoitovf5bffec2014-07-22 23:01:58 -0700664select_insn:
665 goto *jumptable[insn->code];
666
667 /* ALU */
668#define ALU(OPCODE, OP) \
669 ALU64_##OPCODE##_X: \
670 DST = DST OP SRC; \
671 CONT; \
672 ALU_##OPCODE##_X: \
673 DST = (u32) DST OP (u32) SRC; \
674 CONT; \
675 ALU64_##OPCODE##_K: \
676 DST = DST OP IMM; \
677 CONT; \
678 ALU_##OPCODE##_K: \
679 DST = (u32) DST OP (u32) IMM; \
680 CONT;
681
682 ALU(ADD, +)
683 ALU(SUB, -)
684 ALU(AND, &)
685 ALU(OR, |)
686 ALU(LSH, <<)
687 ALU(RSH, >>)
688 ALU(XOR, ^)
689 ALU(MUL, *)
690#undef ALU
691 ALU_NEG:
692 DST = (u32) -DST;
693 CONT;
694 ALU64_NEG:
695 DST = -DST;
696 CONT;
697 ALU_MOV_X:
698 DST = (u32) SRC;
699 CONT;
700 ALU_MOV_K:
701 DST = (u32) IMM;
702 CONT;
703 ALU64_MOV_X:
704 DST = SRC;
705 CONT;
706 ALU64_MOV_K:
707 DST = IMM;
708 CONT;
Alexei Starovoitov02ab6952014-09-04 22:17:17 -0700709 LD_IMM_DW:
710 DST = (u64) (u32) insn[0].imm | ((u64) (u32) insn[1].imm) << 32;
711 insn++;
712 CONT;
Alexei Starovoitovf5bffec2014-07-22 23:01:58 -0700713 ALU64_ARSH_X:
714 (*(s64 *) &DST) >>= SRC;
715 CONT;
716 ALU64_ARSH_K:
717 (*(s64 *) &DST) >>= IMM;
718 CONT;
719 ALU64_MOD_X:
720 if (unlikely(SRC == 0))
721 return 0;
Alexei Starovoitov876a7ae2015-04-27 14:40:37 -0700722 div64_u64_rem(DST, SRC, &tmp);
723 DST = tmp;
Alexei Starovoitovf5bffec2014-07-22 23:01:58 -0700724 CONT;
725 ALU_MOD_X:
726 if (unlikely(SRC == 0))
727 return 0;
728 tmp = (u32) DST;
729 DST = do_div(tmp, (u32) SRC);
730 CONT;
731 ALU64_MOD_K:
Alexei Starovoitov876a7ae2015-04-27 14:40:37 -0700732 div64_u64_rem(DST, IMM, &tmp);
733 DST = tmp;
Alexei Starovoitovf5bffec2014-07-22 23:01:58 -0700734 CONT;
735 ALU_MOD_K:
736 tmp = (u32) DST;
737 DST = do_div(tmp, (u32) IMM);
738 CONT;
739 ALU64_DIV_X:
740 if (unlikely(SRC == 0))
741 return 0;
Alexei Starovoitov876a7ae2015-04-27 14:40:37 -0700742 DST = div64_u64(DST, SRC);
Alexei Starovoitovf5bffec2014-07-22 23:01:58 -0700743 CONT;
744 ALU_DIV_X:
745 if (unlikely(SRC == 0))
746 return 0;
747 tmp = (u32) DST;
748 do_div(tmp, (u32) SRC);
749 DST = (u32) tmp;
750 CONT;
751 ALU64_DIV_K:
Alexei Starovoitov876a7ae2015-04-27 14:40:37 -0700752 DST = div64_u64(DST, IMM);
Alexei Starovoitovf5bffec2014-07-22 23:01:58 -0700753 CONT;
754 ALU_DIV_K:
755 tmp = (u32) DST;
756 do_div(tmp, (u32) IMM);
757 DST = (u32) tmp;
758 CONT;
759 ALU_END_TO_BE:
760 switch (IMM) {
761 case 16:
762 DST = (__force u16) cpu_to_be16(DST);
763 break;
764 case 32:
765 DST = (__force u32) cpu_to_be32(DST);
766 break;
767 case 64:
768 DST = (__force u64) cpu_to_be64(DST);
769 break;
770 }
771 CONT;
772 ALU_END_TO_LE:
773 switch (IMM) {
774 case 16:
775 DST = (__force u16) cpu_to_le16(DST);
776 break;
777 case 32:
778 DST = (__force u32) cpu_to_le32(DST);
779 break;
780 case 64:
781 DST = (__force u64) cpu_to_le64(DST);
782 break;
783 }
784 CONT;
785
786 /* CALL */
787 JMP_CALL:
788 /* Function call scratches BPF_R1-BPF_R5 registers,
789 * preserves BPF_R6-BPF_R9, and stores return value
790 * into BPF_R0.
791 */
792 BPF_R0 = (__bpf_call_base + insn->imm)(BPF_R1, BPF_R2, BPF_R3,
793 BPF_R4, BPF_R5);
794 CONT;
795
Alexei Starovoitov04fd61ab2015-05-19 16:59:03 -0700796 JMP_TAIL_CALL: {
797 struct bpf_map *map = (struct bpf_map *) (unsigned long) BPF_R2;
798 struct bpf_array *array = container_of(map, struct bpf_array, map);
799 struct bpf_prog *prog;
800 u64 index = BPF_R3;
801
802 if (unlikely(index >= array->map.max_entries))
803 goto out;
Alexei Starovoitov04fd61ab2015-05-19 16:59:03 -0700804 if (unlikely(tail_call_cnt > MAX_TAIL_CALL_CNT))
805 goto out;
806
807 tail_call_cnt++;
808
Wang Nan2a36f0b2015-08-06 07:02:33 +0000809 prog = READ_ONCE(array->ptrs[index]);
Daniel Borkmann1ca1cc92016-06-28 12:18:23 +0200810 if (!prog)
Alexei Starovoitov04fd61ab2015-05-19 16:59:03 -0700811 goto out;
812
Daniel Borkmannc4675f92015-07-13 20:49:32 +0200813 /* ARG1 at this point is guaranteed to point to CTX from
814 * the verifier side due to the fact that the tail call is
815 * handeled like a helper, that is, bpf_tail_call_proto,
816 * where arg1_type is ARG_PTR_TO_CTX.
817 */
Alexei Starovoitov04fd61ab2015-05-19 16:59:03 -0700818 insn = prog->insnsi;
819 goto select_insn;
820out:
821 CONT;
822 }
Alexei Starovoitovf5bffec2014-07-22 23:01:58 -0700823 /* JMP */
824 JMP_JA:
825 insn += insn->off;
826 CONT;
827 JMP_JEQ_X:
828 if (DST == SRC) {
829 insn += insn->off;
830 CONT_JMP;
831 }
832 CONT;
833 JMP_JEQ_K:
834 if (DST == IMM) {
835 insn += insn->off;
836 CONT_JMP;
837 }
838 CONT;
839 JMP_JNE_X:
840 if (DST != SRC) {
841 insn += insn->off;
842 CONT_JMP;
843 }
844 CONT;
845 JMP_JNE_K:
846 if (DST != IMM) {
847 insn += insn->off;
848 CONT_JMP;
849 }
850 CONT;
851 JMP_JGT_X:
852 if (DST > SRC) {
853 insn += insn->off;
854 CONT_JMP;
855 }
856 CONT;
857 JMP_JGT_K:
858 if (DST > IMM) {
859 insn += insn->off;
860 CONT_JMP;
861 }
862 CONT;
863 JMP_JGE_X:
864 if (DST >= SRC) {
865 insn += insn->off;
866 CONT_JMP;
867 }
868 CONT;
869 JMP_JGE_K:
870 if (DST >= IMM) {
871 insn += insn->off;
872 CONT_JMP;
873 }
874 CONT;
875 JMP_JSGT_X:
876 if (((s64) DST) > ((s64) SRC)) {
877 insn += insn->off;
878 CONT_JMP;
879 }
880 CONT;
881 JMP_JSGT_K:
882 if (((s64) DST) > ((s64) IMM)) {
883 insn += insn->off;
884 CONT_JMP;
885 }
886 CONT;
887 JMP_JSGE_X:
888 if (((s64) DST) >= ((s64) SRC)) {
889 insn += insn->off;
890 CONT_JMP;
891 }
892 CONT;
893 JMP_JSGE_K:
894 if (((s64) DST) >= ((s64) IMM)) {
895 insn += insn->off;
896 CONT_JMP;
897 }
898 CONT;
899 JMP_JSET_X:
900 if (DST & SRC) {
901 insn += insn->off;
902 CONT_JMP;
903 }
904 CONT;
905 JMP_JSET_K:
906 if (DST & IMM) {
907 insn += insn->off;
908 CONT_JMP;
909 }
910 CONT;
911 JMP_EXIT:
912 return BPF_R0;
913
914 /* STX and ST and LDX*/
915#define LDST(SIZEOP, SIZE) \
916 STX_MEM_##SIZEOP: \
917 *(SIZE *)(unsigned long) (DST + insn->off) = SRC; \
918 CONT; \
919 ST_MEM_##SIZEOP: \
920 *(SIZE *)(unsigned long) (DST + insn->off) = IMM; \
921 CONT; \
922 LDX_MEM_##SIZEOP: \
923 DST = *(SIZE *)(unsigned long) (SRC + insn->off); \
924 CONT;
925
926 LDST(B, u8)
927 LDST(H, u16)
928 LDST(W, u32)
929 LDST(DW, u64)
930#undef LDST
931 STX_XADD_W: /* lock xadd *(u32 *)(dst_reg + off16) += src_reg */
932 atomic_add((u32) SRC, (atomic_t *)(unsigned long)
933 (DST + insn->off));
934 CONT;
935 STX_XADD_DW: /* lock xadd *(u64 *)(dst_reg + off16) += src_reg */
936 atomic64_add((u64) SRC, (atomic64_t *)(unsigned long)
937 (DST + insn->off));
938 CONT;
939 LD_ABS_W: /* BPF_R0 = ntohl(*(u32 *) (skb->data + imm32)) */
940 off = IMM;
941load_word:
942 /* BPF_LD + BPD_ABS and BPF_LD + BPF_IND insns are
943 * only appearing in the programs where ctx ==
944 * skb. All programs keep 'ctx' in regs[BPF_REG_CTX]
Alexei Starovoitov8fb575c2014-07-30 20:34:15 -0700945 * == BPF_R6, bpf_convert_filter() saves it in BPF_R6,
Alexei Starovoitovf5bffec2014-07-22 23:01:58 -0700946 * internal BPF verifier will check that BPF_R6 ==
947 * ctx.
948 *
949 * BPF_ABS and BPF_IND are wrappers of function calls,
950 * so they scratch BPF_R1-BPF_R5 registers, preserve
951 * BPF_R6-BPF_R9, and store return value into BPF_R0.
952 *
953 * Implicit input:
954 * ctx == skb == BPF_R6 == CTX
955 *
956 * Explicit input:
957 * SRC == any register
958 * IMM == 32-bit immediate
959 *
960 * Output:
961 * BPF_R0 - 8/16/32-bit skb data converted to cpu endianness
962 */
963
964 ptr = bpf_load_pointer((struct sk_buff *) (unsigned long) CTX, off, 4, &tmp);
965 if (likely(ptr != NULL)) {
966 BPF_R0 = get_unaligned_be32(ptr);
967 CONT;
968 }
969
970 return 0;
971 LD_ABS_H: /* BPF_R0 = ntohs(*(u16 *) (skb->data + imm32)) */
972 off = IMM;
973load_half:
974 ptr = bpf_load_pointer((struct sk_buff *) (unsigned long) CTX, off, 2, &tmp);
975 if (likely(ptr != NULL)) {
976 BPF_R0 = get_unaligned_be16(ptr);
977 CONT;
978 }
979
980 return 0;
981 LD_ABS_B: /* BPF_R0 = *(u8 *) (skb->data + imm32) */
982 off = IMM;
983load_byte:
984 ptr = bpf_load_pointer((struct sk_buff *) (unsigned long) CTX, off, 1, &tmp);
985 if (likely(ptr != NULL)) {
986 BPF_R0 = *(u8 *)ptr;
987 CONT;
988 }
989
990 return 0;
991 LD_IND_W: /* BPF_R0 = ntohl(*(u32 *) (skb->data + src_reg + imm32)) */
992 off = IMM + SRC;
993 goto load_word;
994 LD_IND_H: /* BPF_R0 = ntohs(*(u16 *) (skb->data + src_reg + imm32)) */
995 off = IMM + SRC;
996 goto load_half;
997 LD_IND_B: /* BPF_R0 = *(u8 *) (skb->data + src_reg + imm32) */
998 off = IMM + SRC;
999 goto load_byte;
1000
1001 default_label:
1002 /* If we ever reach this, we have a bug somewhere. */
1003 WARN_RATELIMIT(1, "unknown opcode %02x\n", insn->code);
1004 return 0;
1005}
Josh Poimboeuf39853cc2016-02-28 22:22:37 -06001006STACK_FRAME_NON_STANDARD(__bpf_prog_run); /* jump table */
Alexei Starovoitovf5bffec2014-07-22 23:01:58 -07001007
Daniel Borkmann3324b582015-05-29 23:23:07 +02001008bool bpf_prog_array_compatible(struct bpf_array *array,
1009 const struct bpf_prog *fp)
Alexei Starovoitovf5bffec2014-07-22 23:01:58 -07001010{
Daniel Borkmann3324b582015-05-29 23:23:07 +02001011 if (!array->owner_prog_type) {
1012 /* There's no owner yet where we could check for
1013 * compatibility.
1014 */
Alexei Starovoitov04fd61ab2015-05-19 16:59:03 -07001015 array->owner_prog_type = fp->type;
1016 array->owner_jited = fp->jited;
Daniel Borkmann3324b582015-05-29 23:23:07 +02001017
1018 return true;
Alexei Starovoitov04fd61ab2015-05-19 16:59:03 -07001019 }
Daniel Borkmann3324b582015-05-29 23:23:07 +02001020
1021 return array->owner_prog_type == fp->type &&
1022 array->owner_jited == fp->jited;
Alexei Starovoitov04fd61ab2015-05-19 16:59:03 -07001023}
1024
Daniel Borkmann3324b582015-05-29 23:23:07 +02001025static int bpf_check_tail_call(const struct bpf_prog *fp)
Alexei Starovoitov04fd61ab2015-05-19 16:59:03 -07001026{
1027 struct bpf_prog_aux *aux = fp->aux;
1028 int i;
1029
1030 for (i = 0; i < aux->used_map_cnt; i++) {
Daniel Borkmann3324b582015-05-29 23:23:07 +02001031 struct bpf_map *map = aux->used_maps[i];
Alexei Starovoitov04fd61ab2015-05-19 16:59:03 -07001032 struct bpf_array *array;
Alexei Starovoitov04fd61ab2015-05-19 16:59:03 -07001033
Alexei Starovoitov04fd61ab2015-05-19 16:59:03 -07001034 if (map->map_type != BPF_MAP_TYPE_PROG_ARRAY)
1035 continue;
Daniel Borkmann3324b582015-05-29 23:23:07 +02001036
Alexei Starovoitov04fd61ab2015-05-19 16:59:03 -07001037 array = container_of(map, struct bpf_array, map);
1038 if (!bpf_prog_array_compatible(array, fp))
1039 return -EINVAL;
1040 }
1041
1042 return 0;
1043}
1044
Alexei Starovoitovf5bffec2014-07-22 23:01:58 -07001045/**
Daniel Borkmann3324b582015-05-29 23:23:07 +02001046 * bpf_prog_select_runtime - select exec runtime for BPF program
Alexei Starovoitov7ae457c2014-07-30 20:34:16 -07001047 * @fp: bpf_prog populated with internal BPF program
Daniel Borkmannd1c55ab2016-05-13 19:08:31 +02001048 * @err: pointer to error variable
Alexei Starovoitovf5bffec2014-07-22 23:01:58 -07001049 *
Daniel Borkmann3324b582015-05-29 23:23:07 +02001050 * Try to JIT eBPF program, if JIT is not available, use interpreter.
1051 * The BPF program will be executed via BPF_PROG_RUN() macro.
Alexei Starovoitovf5bffec2014-07-22 23:01:58 -07001052 */
Daniel Borkmannd1c55ab2016-05-13 19:08:31 +02001053struct bpf_prog *bpf_prog_select_runtime(struct bpf_prog *fp, int *err)
Alexei Starovoitovf5bffec2014-07-22 23:01:58 -07001054{
Alexei Starovoitov7ae457c2014-07-30 20:34:16 -07001055 fp->bpf_func = (void *) __bpf_prog_run;
Alexei Starovoitovf5bffec2014-07-22 23:01:58 -07001056
Daniel Borkmannd1c55ab2016-05-13 19:08:31 +02001057 /* eBPF JITs can rewrite the program in case constant
1058 * blinding is active. However, in case of error during
1059 * blinding, bpf_int_jit_compile() must always return a
1060 * valid program, which in this case would simply not
1061 * be JITed, but falls back to the interpreter.
1062 */
1063 fp = bpf_int_jit_compile(fp);
Daniel Borkmann60a3b222014-09-02 22:53:44 +02001064 bpf_prog_lock_ro(fp);
Alexei Starovoitov04fd61ab2015-05-19 16:59:03 -07001065
Daniel Borkmann3324b582015-05-29 23:23:07 +02001066 /* The tail call compatibility check can only be done at
1067 * this late stage as we need to determine, if we deal
1068 * with JITed or non JITed program concatenations and not
1069 * all eBPF JITs might immediately support all features.
1070 */
Daniel Borkmannd1c55ab2016-05-13 19:08:31 +02001071 *err = bpf_check_tail_call(fp);
1072
1073 return fp;
Alexei Starovoitovf5bffec2014-07-22 23:01:58 -07001074}
Alexei Starovoitov7ae457c2014-07-30 20:34:16 -07001075EXPORT_SYMBOL_GPL(bpf_prog_select_runtime);
Alexei Starovoitovf5bffec2014-07-22 23:01:58 -07001076
Daniel Borkmann60a3b222014-09-02 22:53:44 +02001077static void bpf_prog_free_deferred(struct work_struct *work)
1078{
Alexei Starovoitov09756af2014-09-26 00:17:00 -07001079 struct bpf_prog_aux *aux;
Daniel Borkmann60a3b222014-09-02 22:53:44 +02001080
Alexei Starovoitov09756af2014-09-26 00:17:00 -07001081 aux = container_of(work, struct bpf_prog_aux, work);
1082 bpf_jit_free(aux->prog);
Daniel Borkmann60a3b222014-09-02 22:53:44 +02001083}
1084
1085/* Free internal BPF program */
Alexei Starovoitov7ae457c2014-07-30 20:34:16 -07001086void bpf_prog_free(struct bpf_prog *fp)
Alexei Starovoitovf5bffec2014-07-22 23:01:58 -07001087{
Alexei Starovoitov09756af2014-09-26 00:17:00 -07001088 struct bpf_prog_aux *aux = fp->aux;
Daniel Borkmann60a3b222014-09-02 22:53:44 +02001089
Alexei Starovoitov09756af2014-09-26 00:17:00 -07001090 INIT_WORK(&aux->work, bpf_prog_free_deferred);
Alexei Starovoitov09756af2014-09-26 00:17:00 -07001091 schedule_work(&aux->work);
Alexei Starovoitovf5bffec2014-07-22 23:01:58 -07001092}
Alexei Starovoitov7ae457c2014-07-30 20:34:16 -07001093EXPORT_SYMBOL_GPL(bpf_prog_free);
Alexei Starovoitovf89b7752014-10-23 18:41:08 -07001094
Daniel Borkmann3ad00402015-10-08 01:20:39 +02001095/* RNG for unpriviledged user space with separated state from prandom_u32(). */
1096static DEFINE_PER_CPU(struct rnd_state, bpf_user_rnd_state);
1097
1098void bpf_user_rnd_init_once(void)
1099{
1100 prandom_init_once(&bpf_user_rnd_state);
1101}
1102
Daniel Borkmannf3694e02016-09-09 02:45:31 +02001103BPF_CALL_0(bpf_user_rnd_u32)
Daniel Borkmann3ad00402015-10-08 01:20:39 +02001104{
1105 /* Should someone ever have the rather unwise idea to use some
1106 * of the registers passed into this function, then note that
1107 * this function is called from native eBPF and classic-to-eBPF
1108 * transformations. Register assignments from both sides are
1109 * different, f.e. classic always sets fn(ctx, A, X) here.
1110 */
1111 struct rnd_state *state;
1112 u32 res;
1113
1114 state = &get_cpu_var(bpf_user_rnd_state);
1115 res = prandom_u32_state(state);
Shaohua Lib761fe22016-09-27 08:42:41 -07001116 put_cpu_var(bpf_user_rnd_state);
Daniel Borkmann3ad00402015-10-08 01:20:39 +02001117
1118 return res;
1119}
1120
Daniel Borkmann3ba67da2015-03-05 23:27:51 +01001121/* Weak definitions of helper functions in case we don't have bpf syscall. */
1122const struct bpf_func_proto bpf_map_lookup_elem_proto __weak;
1123const struct bpf_func_proto bpf_map_update_elem_proto __weak;
1124const struct bpf_func_proto bpf_map_delete_elem_proto __weak;
1125
Daniel Borkmann03e69b52015-03-14 02:27:16 +01001126const struct bpf_func_proto bpf_get_prandom_u32_proto __weak;
Daniel Borkmannc04167c2015-03-14 02:27:17 +01001127const struct bpf_func_proto bpf_get_smp_processor_id_proto __weak;
Daniel Borkmann2d0e30c2016-10-21 12:46:33 +02001128const struct bpf_func_proto bpf_get_numa_node_id_proto __weak;
Daniel Borkmann17ca8cb2015-05-29 23:23:06 +02001129const struct bpf_func_proto bpf_ktime_get_ns_proto __weak;
Daniel Borkmannbd570ff2016-04-18 21:01:24 +02001130
Alexei Starovoitovffeedaf2015-06-12 19:39:12 -07001131const struct bpf_func_proto bpf_get_current_pid_tgid_proto __weak;
1132const struct bpf_func_proto bpf_get_current_uid_gid_proto __weak;
1133const struct bpf_func_proto bpf_get_current_comm_proto __weak;
Daniel Borkmannbd570ff2016-04-18 21:01:24 +02001134
Alexei Starovoitov0756ea32015-06-12 19:39:13 -07001135const struct bpf_func_proto * __weak bpf_get_trace_printk_proto(void)
1136{
1137 return NULL;
1138}
Daniel Borkmann03e69b52015-03-14 02:27:16 +01001139
Daniel Borkmann555c8a82016-07-14 18:08:05 +02001140u64 __weak
1141bpf_event_output(struct bpf_map *map, u64 flags, void *meta, u64 meta_size,
1142 void *ctx, u64 ctx_size, bpf_ctx_copy_t ctx_copy)
Daniel Borkmannbd570ff2016-04-18 21:01:24 +02001143{
Daniel Borkmann555c8a82016-07-14 18:08:05 +02001144 return -ENOTSUPP;
Daniel Borkmannbd570ff2016-04-18 21:01:24 +02001145}
1146
Daniel Borkmann3324b582015-05-29 23:23:07 +02001147/* Always built-in helper functions. */
1148const struct bpf_func_proto bpf_tail_call_proto = {
1149 .func = NULL,
1150 .gpl_only = false,
1151 .ret_type = RET_VOID,
1152 .arg1_type = ARG_PTR_TO_CTX,
1153 .arg2_type = ARG_CONST_MAP_PTR,
1154 .arg3_type = ARG_ANYTHING,
1155};
1156
Daniel Borkmann93831912017-02-16 22:24:49 +01001157/* Stub for JITs that only support cBPF. eBPF programs are interpreted.
1158 * It is encouraged to implement bpf_int_jit_compile() instead, so that
1159 * eBPF and implicitly also cBPF can get JITed!
1160 */
Daniel Borkmannd1c55ab2016-05-13 19:08:31 +02001161struct bpf_prog * __weak bpf_int_jit_compile(struct bpf_prog *prog)
Daniel Borkmann3324b582015-05-29 23:23:07 +02001162{
Daniel Borkmannd1c55ab2016-05-13 19:08:31 +02001163 return prog;
Daniel Borkmann3324b582015-05-29 23:23:07 +02001164}
1165
Daniel Borkmann93831912017-02-16 22:24:49 +01001166/* Stub for JITs that support eBPF. All cBPF code gets transformed into
1167 * eBPF by the kernel and is later compiled by bpf_int_jit_compile().
1168 */
1169void __weak bpf_jit_compile(struct bpf_prog *prog)
1170{
1171}
1172
Martin KaFai Lau17bedab2016-12-07 15:53:11 -08001173bool __weak bpf_helper_changes_pkt_data(void *func)
Alexei Starovoitov969bf052016-05-05 19:49:10 -07001174{
1175 return false;
1176}
1177
Alexei Starovoitovf89b7752014-10-23 18:41:08 -07001178/* To execute LD_ABS/LD_IND instructions __bpf_prog_run() may call
1179 * skb_copy_bits(), so provide a weak definition of it for NET-less config.
1180 */
1181int __weak skb_copy_bits(const struct sk_buff *skb, int offset, void *to,
1182 int len)
1183{
1184 return -EFAULT;
1185}
Daniel Borkmanna67edbf2017-01-25 02:28:18 +01001186
1187/* All definitions of tracepoints related to BPF. */
1188#define CREATE_TRACE_POINTS
1189#include <linux/bpf_trace.h>
1190
1191EXPORT_TRACEPOINT_SYMBOL_GPL(xdp_exception);
1192
1193EXPORT_TRACEPOINT_SYMBOL_GPL(bpf_prog_get_type);
1194EXPORT_TRACEPOINT_SYMBOL_GPL(bpf_prog_put_rcu);