bpf: add helper inlining infra and optimize map_array lookup

Optimize bpf_call -> bpf_map_lookup_elem() -> array_map_lookup_elem()
into a sequence of bpf instructions.
When JIT is on the sequence of bpf instructions is the sequence
of native cpu instructions with significantly faster performance
than indirect call and two function's prologue/epilogue.

Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 909fc03..da8c64c 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -35,6 +35,7 @@ struct bpf_map_ops {
 	void *(*map_fd_get_ptr)(struct bpf_map *map, struct file *map_file,
 				int fd);
 	void (*map_fd_put_ptr)(void *ptr);
+	u32 (*map_gen_lookup)(struct bpf_map *map, struct bpf_insn *insn_buf);
 };
 
 struct bpf_map {