blob: c2ec44cf5098af4d247cc22d124625eef32fb471 [file] [log] [blame]
Greg Kroah-Hartmanb2441312017-11-01 15:07:57 +01001/* SPDX-License-Identifier: GPL-2.0 */
Yehuda Sadeh3d14c5d2010-04-06 15:14:15 -07002#ifndef _FS_CEPH_LIBCEPH_H
3#define _FS_CEPH_LIBCEPH_H
4
David Howellsa1ce3922012-10-02 18:01:25 +01005#include <linux/ceph/ceph_debug.h>
Yehuda Sadeh3d14c5d2010-04-06 15:14:15 -07006
7#include <asm/unaligned.h>
8#include <linux/backing-dev.h>
9#include <linux/completion.h>
10#include <linux/exportfs.h>
Paul Gortmaker187f1882011-11-23 20:12:59 -050011#include <linux/bug.h>
Yehuda Sadeh3d14c5d2010-04-06 15:14:15 -070012#include <linux/fs.h>
13#include <linux/mempool.h>
14#include <linux/pagemap.h>
15#include <linux/wait.h>
16#include <linux/writeback.h>
17#include <linux/slab.h>
Elena Reshetova06dfa962017-03-17 14:10:27 +020018#include <linux/refcount.h>
Yehuda Sadeh3d14c5d2010-04-06 15:14:15 -070019
David Howellsa1ce3922012-10-02 18:01:25 +010020#include <linux/ceph/types.h>
21#include <linux/ceph/messenger.h>
22#include <linux/ceph/msgpool.h>
23#include <linux/ceph/mon_client.h>
24#include <linux/ceph/osd_client.h>
25#include <linux/ceph/ceph_fs.h>
Yan, Zheng51e92732016-02-05 15:36:22 +080026#include <linux/ceph/string_table.h>
Yehuda Sadeh3d14c5d2010-04-06 15:14:15 -070027
28/*
Yehuda Sadeh3d14c5d2010-04-06 15:14:15 -070029 * mount options
30 */
31#define CEPH_OPT_FSID (1<<0)
32#define CEPH_OPT_NOSHARE (1<<1) /* don't share client with other sbs */
33#define CEPH_OPT_MYIP (1<<2) /* specified my ip */
34#define CEPH_OPT_NOCRC (1<<3) /* no data crc on writes */
Ilya Dryomova51983e2015-10-28 23:52:06 +010035#define CEPH_OPT_NOMSGAUTH (1<<4) /* don't require msg signing feat */
Chaitanya Huilgolba988f82015-01-23 16:41:25 +053036#define CEPH_OPT_TCP_NODELAY (1<<5) /* TCP_NODELAY on TCP sockets */
Ilya Dryomova51983e2015-10-28 23:52:06 +010037#define CEPH_OPT_NOMSGSIGN (1<<6) /* don't sign msgs */
Yehuda Sadeh3d14c5d2010-04-06 15:14:15 -070038
Chaitanya Huilgolba988f82015-01-23 16:41:25 +053039#define CEPH_OPT_DEFAULT (CEPH_OPT_TCP_NODELAY)
Yehuda Sadeh3d14c5d2010-04-06 15:14:15 -070040
41#define ceph_set_opt(client, opt) \
42 (client)->options->flags |= CEPH_OPT_##opt;
43#define ceph_test_opt(client, opt) \
44 (!!((client)->options->flags & CEPH_OPT_##opt))
45
46struct ceph_options {
47 int flags;
48 struct ceph_fsid fsid;
49 struct ceph_entity_addr my_addr;
Ilya Dryomova319bf52015-05-15 12:02:17 +030050 unsigned long mount_timeout; /* jiffies */
51 unsigned long osd_idle_ttl; /* jiffies */
52 unsigned long osd_keepalive_timeout; /* jiffies */
Ilya Dryomov7cc5e382017-02-12 17:11:07 +010053 unsigned long osd_request_timeout; /* jiffies */
Yehuda Sadeh3d14c5d2010-04-06 15:14:15 -070054
55 /*
56 * any type that can't be simply compared or doesn't need need
57 * to be compared should go beyond this point,
58 * ceph_compare_options() should be updated accordingly
59 */
60
61 struct ceph_entity_addr *mon_addr; /* should be the first
62 pointer type of args */
63 int num_mon;
64 char *name;
Tommi Virtanen8323c3a2011-03-25 16:32:57 -070065 struct ceph_crypto_key *key;
Yehuda Sadeh3d14c5d2010-04-06 15:14:15 -070066};
67
68/*
69 * defaults
70 */
Ilya Dryomova319bf52015-05-15 12:02:17 +030071#define CEPH_MOUNT_TIMEOUT_DEFAULT msecs_to_jiffies(60 * 1000)
72#define CEPH_OSD_KEEPALIVE_DEFAULT msecs_to_jiffies(5 * 1000)
73#define CEPH_OSD_IDLE_TTL_DEFAULT msecs_to_jiffies(60 * 1000)
Ilya Dryomov7cc5e382017-02-12 17:11:07 +010074#define CEPH_OSD_REQUEST_TIMEOUT_DEFAULT 0 /* no timeout */
Ilya Dryomov58d81b12016-01-21 16:33:15 +010075
Ilya Dryomov168b9092016-01-21 16:33:19 +010076#define CEPH_MONC_HUNT_INTERVAL msecs_to_jiffies(3 * 1000)
Ilya Dryomov58d81b12016-01-21 16:33:15 +010077#define CEPH_MONC_PING_INTERVAL msecs_to_jiffies(10 * 1000)
78#define CEPH_MONC_PING_TIMEOUT msecs_to_jiffies(30 * 1000)
Ilya Dryomov168b9092016-01-21 16:33:19 +010079#define CEPH_MONC_HUNT_BACKOFF 2
80#define CEPH_MONC_HUNT_MAX_MULT 10
Yehuda Sadeh3d14c5d2010-04-06 15:14:15 -070081
82#define CEPH_MSG_MAX_FRONT_LEN (16*1024*1024)
Alex Elder7b11ba32013-03-08 18:51:03 -060083#define CEPH_MSG_MAX_MIDDLE_LEN (16*1024*1024)
Yehuda Sadeh3d14c5d2010-04-06 15:14:15 -070084#define CEPH_MSG_MAX_DATA_LEN (16*1024*1024)
85
86#define CEPH_AUTH_NAME_DEFAULT "guest"
87
Yehuda Sadeh3d14c5d2010-04-06 15:14:15 -070088/* mount state */
89enum {
90 CEPH_MOUNT_MOUNTING,
91 CEPH_MOUNT_MOUNTED,
92 CEPH_MOUNT_UNMOUNTING,
93 CEPH_MOUNT_UNMOUNTED,
94 CEPH_MOUNT_SHUTDOWN,
95};
96
Ilya Dryomova319bf52015-05-15 12:02:17 +030097static inline unsigned long ceph_timeout_jiffies(unsigned long timeout)
98{
99 return timeout ?: MAX_SCHEDULE_TIMEOUT;
100}
101
Yehuda Sadeh3d14c5d2010-04-06 15:14:15 -0700102struct ceph_mds_client;
103
104/*
105 * per client state
106 *
107 * possibly shared by multiple mount points, if they are
108 * mounting the same ceph filesystem/cluster.
109 */
110struct ceph_client {
111 struct ceph_fsid fsid;
112 bool have_fsid;
113
114 void *private;
115
116 struct ceph_options *options;
117
118 struct mutex mount_mutex; /* serialize mount attempts */
119 wait_queue_head_t auth_wq;
120 int auth_err;
121
122 int (*extra_mon_dispatch)(struct ceph_client *, struct ceph_msg *);
123
Ilya Dryomov12b46292013-12-24 21:19:23 +0200124 u64 supported_features;
125 u64 required_features;
Yehuda Sadeh3d14c5d2010-04-06 15:14:15 -0700126
Alex Elder15d98822012-05-26 23:26:43 -0500127 struct ceph_messenger msgr; /* messenger instance */
Yehuda Sadeh3d14c5d2010-04-06 15:14:15 -0700128 struct ceph_mon_client monc;
129 struct ceph_osd_client osdc;
130
131#ifdef CONFIG_DEBUG_FS
132 struct dentry *debugfs_dir;
133 struct dentry *debugfs_monmap;
134 struct dentry *debugfs_osdmap;
Ilya Dryomov5cf7bd32015-03-25 21:07:41 +0300135 struct dentry *debugfs_options;
Yehuda Sadeh3d14c5d2010-04-06 15:14:15 -0700136#endif
137};
138
Ilya Dryomov859bff52015-10-28 23:50:58 +0100139#define from_msgr(ms) container_of(ms, struct ceph_client, msgr)
Yehuda Sadeh3d14c5d2010-04-06 15:14:15 -0700140
141
142/*
143 * snapshots
144 */
145
146/*
147 * A "snap context" is the set of existing snapshots when we
148 * write data. It is used by the OSD to guide its COW behavior.
149 *
150 * The ceph_snap_context is refcounted, and attached to each dirty
151 * page, indicating which context the dirty data belonged when it was
152 * dirtied.
153 */
154struct ceph_snap_context {
Elena Reshetova06dfa962017-03-17 14:10:27 +0200155 refcount_t nref;
Yehuda Sadeh3d14c5d2010-04-06 15:14:15 -0700156 u64 seq;
Alex Elderaa711ee32012-07-13 20:35:11 -0500157 u32 num_snaps;
Yehuda Sadeh3d14c5d2010-04-06 15:14:15 -0700158 u64 snaps[];
159};
160
Alex Elder4f0dcb12013-04-30 00:44:32 -0500161extern struct ceph_snap_context *ceph_create_snap_context(u32 snap_count,
162 gfp_t gfp_flags);
163extern struct ceph_snap_context *ceph_get_snap_context(
164 struct ceph_snap_context *sc);
165extern void ceph_put_snap_context(struct ceph_snap_context *sc);
Yehuda Sadeh3d14c5d2010-04-06 15:14:15 -0700166
167/*
168 * calculate the number of pages a given length and offset map onto,
169 * if we align the data.
170 */
171static inline int calc_pages_for(u64 off, u64 len)
172{
Kirill A. Shutemov09cbfea2016-04-01 15:29:47 +0300173 return ((off+len+PAGE_SIZE-1) >> PAGE_SHIFT) -
174 (off >> PAGE_SHIFT);
Yehuda Sadeh3d14c5d2010-04-06 15:14:15 -0700175}
176
Ilya Dryomov76f827a2017-06-19 12:18:05 +0200177#define RB_BYVAL(a) (a)
178#define RB_BYPTR(a) (&(a))
179#define RB_CMP3WAY(a, b) ((a) < (b) ? -1 : (a) > (b))
180
181#define DEFINE_RB_INSDEL_FUNCS2(name, type, keyfld, cmpexp, keyexp, nodefld) \
Ilya Dryomovfcd00b62016-04-28 16:07:22 +0200182static void insert_##name(struct rb_root *root, type *t) \
183{ \
184 struct rb_node **n = &root->rb_node; \
185 struct rb_node *parent = NULL; \
186 \
187 BUG_ON(!RB_EMPTY_NODE(&t->nodefld)); \
188 \
189 while (*n) { \
190 type *cur = rb_entry(*n, type, nodefld); \
Ilya Dryomov76f827a2017-06-19 12:18:05 +0200191 int cmp; \
Ilya Dryomovfcd00b62016-04-28 16:07:22 +0200192 \
193 parent = *n; \
Ilya Dryomov76f827a2017-06-19 12:18:05 +0200194 cmp = cmpexp(keyexp(t->keyfld), keyexp(cur->keyfld)); \
195 if (cmp < 0) \
Ilya Dryomovfcd00b62016-04-28 16:07:22 +0200196 n = &(*n)->rb_left; \
Ilya Dryomov76f827a2017-06-19 12:18:05 +0200197 else if (cmp > 0) \
Ilya Dryomovfcd00b62016-04-28 16:07:22 +0200198 n = &(*n)->rb_right; \
199 else \
200 BUG(); \
201 } \
202 \
203 rb_link_node(&t->nodefld, parent, n); \
204 rb_insert_color(&t->nodefld, root); \
205} \
206static void erase_##name(struct rb_root *root, type *t) \
207{ \
208 BUG_ON(RB_EMPTY_NODE(&t->nodefld)); \
209 rb_erase(&t->nodefld, root); \
210 RB_CLEAR_NODE(&t->nodefld); \
211}
212
Ilya Dryomov76f827a2017-06-19 12:18:05 +0200213/*
214 * @lookup_param_type is a parameter and not constructed from (@type,
215 * @keyfld) with typeof() because adding const is too unwieldy.
216 */
217#define DEFINE_RB_LOOKUP_FUNC2(name, type, keyfld, cmpexp, keyexp, \
218 lookup_param_type, nodefld) \
219static type *lookup_##name(struct rb_root *root, lookup_param_type key) \
Ilya Dryomovfcd00b62016-04-28 16:07:22 +0200220{ \
221 struct rb_node *n = root->rb_node; \
222 \
223 while (n) { \
224 type *cur = rb_entry(n, type, nodefld); \
Ilya Dryomov76f827a2017-06-19 12:18:05 +0200225 int cmp; \
Ilya Dryomovfcd00b62016-04-28 16:07:22 +0200226 \
Ilya Dryomov76f827a2017-06-19 12:18:05 +0200227 cmp = cmpexp(key, keyexp(cur->keyfld)); \
228 if (cmp < 0) \
Ilya Dryomovfcd00b62016-04-28 16:07:22 +0200229 n = n->rb_left; \
Ilya Dryomov76f827a2017-06-19 12:18:05 +0200230 else if (cmp > 0) \
Ilya Dryomovfcd00b62016-04-28 16:07:22 +0200231 n = n->rb_right; \
232 else \
233 return cur; \
234 } \
235 \
236 return NULL; \
237}
238
Ilya Dryomov76f827a2017-06-19 12:18:05 +0200239#define DEFINE_RB_FUNCS2(name, type, keyfld, cmpexp, keyexp, \
240 lookup_param_type, nodefld) \
241DEFINE_RB_INSDEL_FUNCS2(name, type, keyfld, cmpexp, keyexp, nodefld) \
242DEFINE_RB_LOOKUP_FUNC2(name, type, keyfld, cmpexp, keyexp, \
243 lookup_param_type, nodefld)
244
245/*
246 * Shorthands for integer keys.
247 */
248#define DEFINE_RB_INSDEL_FUNCS(name, type, keyfld, nodefld) \
249DEFINE_RB_INSDEL_FUNCS2(name, type, keyfld, RB_CMP3WAY, RB_BYVAL, nodefld)
250
251#define DEFINE_RB_LOOKUP_FUNC(name, type, keyfld, nodefld) \
252extern type __lookup_##name##_key; \
253DEFINE_RB_LOOKUP_FUNC2(name, type, keyfld, RB_CMP3WAY, RB_BYVAL, \
254 typeof(__lookup_##name##_key.keyfld), nodefld)
255
Ilya Dryomovfcd00b62016-04-28 16:07:22 +0200256#define DEFINE_RB_FUNCS(name, type, keyfld, nodefld) \
257DEFINE_RB_INSDEL_FUNCS(name, type, keyfld, nodefld) \
258DEFINE_RB_LOOKUP_FUNC(name, type, keyfld, nodefld)
259
Ilya Dryomoveeb0bed2014-01-09 20:08:21 +0200260extern struct kmem_cache *ceph_inode_cachep;
261extern struct kmem_cache *ceph_cap_cachep;
Yan, Zhengf66fd9f2015-06-10 17:26:13 +0800262extern struct kmem_cache *ceph_cap_flush_cachep;
Ilya Dryomoveeb0bed2014-01-09 20:08:21 +0200263extern struct kmem_cache *ceph_dentry_cachep;
264extern struct kmem_cache *ceph_file_cachep;
265
Yehuda Sadeh3d14c5d2010-04-06 15:14:15 -0700266/* ceph_common.c */
Alex Elder72fe25e2013-01-30 11:13:33 -0600267extern bool libceph_compatible(void *data);
268
Yehuda Sadeh3d14c5d2010-04-06 15:14:15 -0700269extern const char *ceph_msg_type_name(int type);
270extern int ceph_check_fsid(struct ceph_client *client, struct ceph_fsid *fsid);
Ilya Dryomoveeb0bed2014-01-09 20:08:21 +0200271extern void *ceph_kvmalloc(size_t size, gfp_t flags);
Yehuda Sadeh3d14c5d2010-04-06 15:14:15 -0700272
Alex Elderee577412012-01-24 10:08:36 -0600273extern struct ceph_options *ceph_parse_options(char *options,
Yehuda Sadeh3d14c5d2010-04-06 15:14:15 -0700274 const char *dev_name, const char *dev_name_end,
275 int (*parse_extra_token)(char *c, void *private),
276 void *private);
Ilya Dryomovff40f9a2015-03-25 21:02:16 +0300277int ceph_print_client_options(struct seq_file *m, struct ceph_client *client);
Yehuda Sadeh3d14c5d2010-04-06 15:14:15 -0700278extern void ceph_destroy_options(struct ceph_options *opt);
279extern int ceph_compare_options(struct ceph_options *new_opt,
280 struct ceph_client *client);
Ilya Dryomov74da4a0f2017-03-03 18:16:07 +0100281struct ceph_client *ceph_create_client(struct ceph_options *opt, void *private);
Ilya Dryomov005a07bf2016-08-18 18:38:43 +0200282struct ceph_entity_addr *ceph_client_addr(struct ceph_client *client);
Ilya Dryomov033268a2016-08-12 14:59:58 +0200283u64 ceph_client_gid(struct ceph_client *client);
Yehuda Sadeh3d14c5d2010-04-06 15:14:15 -0700284extern void ceph_destroy_client(struct ceph_client *client);
285extern int __ceph_open_session(struct ceph_client *client,
286 unsigned long started);
287extern int ceph_open_session(struct ceph_client *client);
288
289/* pagevec.c */
290extern void ceph_release_page_vector(struct page **pages, int num_pages);
291
Alex Elderb3248142013-02-06 13:11:38 -0600292extern struct page **ceph_get_direct_page_vector(const void __user *data,
Henry C Changb6aa5902010-12-15 20:45:41 -0800293 int num_pages,
294 bool write_page);
295extern void ceph_put_page_vector(struct page **pages, int num_pages,
296 bool dirty);
Yehuda Sadeh3d14c5d2010-04-06 15:14:15 -0700297extern struct page **ceph_alloc_page_vector(int num_pages, gfp_t flags);
298extern int ceph_copy_user_to_page_vector(struct page **pages,
Alex Elderb3248142013-02-06 13:11:38 -0600299 const void __user *data,
Yehuda Sadeh3d14c5d2010-04-06 15:14:15 -0700300 loff_t off, size_t len);
Alex Elder903bb322013-02-06 13:11:38 -0600301extern void ceph_copy_to_page_vector(struct page **pages,
Alex Elderb3248142013-02-06 13:11:38 -0600302 const void *data,
Yehuda Sadeh3d14c5d2010-04-06 15:14:15 -0700303 loff_t off, size_t len);
Alex Elder903bb322013-02-06 13:11:38 -0600304extern void ceph_copy_from_page_vector(struct page **pages,
Alex Elderb3248142013-02-06 13:11:38 -0600305 void *data,
Yehuda Sadeh3d14c5d2010-04-06 15:14:15 -0700306 loff_t off, size_t len);
Yehuda Sadeh3d14c5d2010-04-06 15:14:15 -0700307extern void ceph_zero_page_vector_range(int off, int len, struct page **pages);
308
309
310#endif /* _FS_CEPH_SUPER_H */