blob: 8d446674c1da247c133bd1b332c7a44eee22a1a3 [file] [log] [blame]
Dan Williams7b6be842017-04-11 09:49:49 -07001/*
2 * Copyright(c) 2017 Intel Corporation. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of version 2 of the GNU General Public License as
6 * published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it will be useful, but
9 * WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
12 */
13#include <linux/pagemap.h>
14#include <linux/module.h>
15#include <linux/mount.h>
16#include <linux/magic.h>
17#include <linux/cdev.h>
18#include <linux/hash.h>
19#include <linux/slab.h>
20#include <linux/fs.h>
21
22static int nr_dax = CONFIG_NR_DEV_DAX;
23module_param(nr_dax, int, S_IRUGO);
24MODULE_PARM_DESC(nr_dax, "max number of dax device instances");
25
26static dev_t dax_devt;
27DEFINE_STATIC_SRCU(dax_srcu);
28static struct vfsmount *dax_mnt;
29static DEFINE_IDA(dax_minor_ida);
30static struct kmem_cache *dax_cache __read_mostly;
31static struct super_block *dax_superblock __read_mostly;
32
Dan Williams72058002017-04-19 15:14:31 -070033#define DAX_HASH_SIZE (PAGE_SIZE / sizeof(struct hlist_head))
34static struct hlist_head dax_host_list[DAX_HASH_SIZE];
35static DEFINE_SPINLOCK(dax_host_lock);
36
Dan Williams7b6be842017-04-11 09:49:49 -070037int dax_read_lock(void)
38{
39 return srcu_read_lock(&dax_srcu);
40}
41EXPORT_SYMBOL_GPL(dax_read_lock);
42
43void dax_read_unlock(int id)
44{
45 srcu_read_unlock(&dax_srcu, id);
46}
47EXPORT_SYMBOL_GPL(dax_read_unlock);
48
49/**
50 * struct dax_device - anchor object for dax services
51 * @inode: core vfs
52 * @cdev: optional character interface for "device dax"
Dan Williams72058002017-04-19 15:14:31 -070053 * @host: optional name for lookups where the device path is not available
Dan Williams7b6be842017-04-11 09:49:49 -070054 * @private: dax driver private data
55 * @alive: !alive + rcu grace period == no new operations / mappings
56 */
57struct dax_device {
Dan Williams72058002017-04-19 15:14:31 -070058 struct hlist_node list;
Dan Williams7b6be842017-04-11 09:49:49 -070059 struct inode inode;
60 struct cdev cdev;
Dan Williams72058002017-04-19 15:14:31 -070061 const char *host;
Dan Williams7b6be842017-04-11 09:49:49 -070062 void *private;
63 bool alive;
64};
65
66bool dax_alive(struct dax_device *dax_dev)
67{
68 lockdep_assert_held(&dax_srcu);
69 return dax_dev->alive;
70}
71EXPORT_SYMBOL_GPL(dax_alive);
72
Dan Williams72058002017-04-19 15:14:31 -070073static int dax_host_hash(const char *host)
74{
75 return hashlen_hash(hashlen_string("DAX", host)) % DAX_HASH_SIZE;
76}
77
Dan Williams7b6be842017-04-11 09:49:49 -070078/*
79 * Note, rcu is not protecting the liveness of dax_dev, rcu is ensuring
80 * that any fault handlers or operations that might have seen
81 * dax_alive(), have completed. Any operations that start after
82 * synchronize_srcu() has run will abort upon seeing !dax_alive().
83 */
84void kill_dax(struct dax_device *dax_dev)
85{
86 if (!dax_dev)
87 return;
88
89 dax_dev->alive = false;
Dan Williams72058002017-04-19 15:14:31 -070090
Dan Williams7b6be842017-04-11 09:49:49 -070091 synchronize_srcu(&dax_srcu);
Dan Williams72058002017-04-19 15:14:31 -070092
93 spin_lock(&dax_host_lock);
94 hlist_del_init(&dax_dev->list);
95 spin_unlock(&dax_host_lock);
96
Dan Williams7b6be842017-04-11 09:49:49 -070097 dax_dev->private = NULL;
98}
99EXPORT_SYMBOL_GPL(kill_dax);
100
101static struct inode *dax_alloc_inode(struct super_block *sb)
102{
103 struct dax_device *dax_dev;
104
105 dax_dev = kmem_cache_alloc(dax_cache, GFP_KERNEL);
106 return &dax_dev->inode;
107}
108
109static struct dax_device *to_dax_dev(struct inode *inode)
110{
111 return container_of(inode, struct dax_device, inode);
112}
113
114static void dax_i_callback(struct rcu_head *head)
115{
116 struct inode *inode = container_of(head, struct inode, i_rcu);
117 struct dax_device *dax_dev = to_dax_dev(inode);
118
Dan Williams72058002017-04-19 15:14:31 -0700119 kfree(dax_dev->host);
120 dax_dev->host = NULL;
Dan Williams7b6be842017-04-11 09:49:49 -0700121 ida_simple_remove(&dax_minor_ida, MINOR(inode->i_rdev));
122 kmem_cache_free(dax_cache, dax_dev);
123}
124
125static void dax_destroy_inode(struct inode *inode)
126{
127 struct dax_device *dax_dev = to_dax_dev(inode);
128
129 WARN_ONCE(dax_dev->alive,
130 "kill_dax() must be called before final iput()\n");
131 call_rcu(&inode->i_rcu, dax_i_callback);
132}
133
134static const struct super_operations dax_sops = {
135 .statfs = simple_statfs,
136 .alloc_inode = dax_alloc_inode,
137 .destroy_inode = dax_destroy_inode,
138 .drop_inode = generic_delete_inode,
139};
140
141static struct dentry *dax_mount(struct file_system_type *fs_type,
142 int flags, const char *dev_name, void *data)
143{
144 return mount_pseudo(fs_type, "dax:", &dax_sops, NULL, DAXFS_MAGIC);
145}
146
147static struct file_system_type dax_fs_type = {
148 .name = "dax",
149 .mount = dax_mount,
150 .kill_sb = kill_anon_super,
151};
152
153static int dax_test(struct inode *inode, void *data)
154{
155 dev_t devt = *(dev_t *) data;
156
157 return inode->i_rdev == devt;
158}
159
160static int dax_set(struct inode *inode, void *data)
161{
162 dev_t devt = *(dev_t *) data;
163
164 inode->i_rdev = devt;
165 return 0;
166}
167
168static struct dax_device *dax_dev_get(dev_t devt)
169{
170 struct dax_device *dax_dev;
171 struct inode *inode;
172
173 inode = iget5_locked(dax_superblock, hash_32(devt + DAXFS_MAGIC, 31),
174 dax_test, dax_set, &devt);
175
176 if (!inode)
177 return NULL;
178
179 dax_dev = to_dax_dev(inode);
180 if (inode->i_state & I_NEW) {
181 dax_dev->alive = true;
182 inode->i_cdev = &dax_dev->cdev;
183 inode->i_mode = S_IFCHR;
184 inode->i_flags = S_DAX;
185 mapping_set_gfp_mask(&inode->i_data, GFP_USER);
186 unlock_new_inode(inode);
187 }
188
189 return dax_dev;
190}
191
Dan Williams72058002017-04-19 15:14:31 -0700192static void dax_add_host(struct dax_device *dax_dev, const char *host)
193{
194 int hash;
195
196 /*
197 * Unconditionally init dax_dev since it's coming from a
198 * non-zeroed slab cache
199 */
200 INIT_HLIST_NODE(&dax_dev->list);
201 dax_dev->host = host;
202 if (!host)
203 return;
204
205 hash = dax_host_hash(host);
206 spin_lock(&dax_host_lock);
207 hlist_add_head(&dax_dev->list, &dax_host_list[hash]);
208 spin_unlock(&dax_host_lock);
209}
210
211struct dax_device *alloc_dax(void *private, const char *__host)
Dan Williams7b6be842017-04-11 09:49:49 -0700212{
213 struct dax_device *dax_dev;
Dan Williams72058002017-04-19 15:14:31 -0700214 const char *host;
Dan Williams7b6be842017-04-11 09:49:49 -0700215 dev_t devt;
216 int minor;
217
Dan Williams72058002017-04-19 15:14:31 -0700218 host = kstrdup(__host, GFP_KERNEL);
219 if (__host && !host)
220 return NULL;
221
Dan Williams7b6be842017-04-11 09:49:49 -0700222 minor = ida_simple_get(&dax_minor_ida, 0, nr_dax, GFP_KERNEL);
223 if (minor < 0)
Dan Williams72058002017-04-19 15:14:31 -0700224 goto err_minor;
Dan Williams7b6be842017-04-11 09:49:49 -0700225
226 devt = MKDEV(MAJOR(dax_devt), minor);
227 dax_dev = dax_dev_get(devt);
228 if (!dax_dev)
Dan Williams72058002017-04-19 15:14:31 -0700229 goto err_dev;
Dan Williams7b6be842017-04-11 09:49:49 -0700230
Dan Williams72058002017-04-19 15:14:31 -0700231 dax_add_host(dax_dev, host);
Dan Williams7b6be842017-04-11 09:49:49 -0700232 dax_dev->private = private;
233 return dax_dev;
234
Dan Williams72058002017-04-19 15:14:31 -0700235 err_dev:
Dan Williams7b6be842017-04-11 09:49:49 -0700236 ida_simple_remove(&dax_minor_ida, minor);
Dan Williams72058002017-04-19 15:14:31 -0700237 err_minor:
238 kfree(host);
Dan Williams7b6be842017-04-11 09:49:49 -0700239 return NULL;
240}
241EXPORT_SYMBOL_GPL(alloc_dax);
242
243void put_dax(struct dax_device *dax_dev)
244{
245 if (!dax_dev)
246 return;
247 iput(&dax_dev->inode);
248}
249EXPORT_SYMBOL_GPL(put_dax);
250
251/**
Dan Williams72058002017-04-19 15:14:31 -0700252 * dax_get_by_host() - temporary lookup mechanism for filesystem-dax
253 * @host: alternate name for the device registered by a dax driver
254 */
255struct dax_device *dax_get_by_host(const char *host)
256{
257 struct dax_device *dax_dev, *found = NULL;
258 int hash, id;
259
260 if (!host)
261 return NULL;
262
263 hash = dax_host_hash(host);
264
265 id = dax_read_lock();
266 spin_lock(&dax_host_lock);
267 hlist_for_each_entry(dax_dev, &dax_host_list[hash], list) {
268 if (!dax_alive(dax_dev)
269 || strcmp(host, dax_dev->host) != 0)
270 continue;
271
272 if (igrab(&dax_dev->inode))
273 found = dax_dev;
274 break;
275 }
276 spin_unlock(&dax_host_lock);
277 dax_read_unlock(id);
278
279 return found;
280}
281EXPORT_SYMBOL_GPL(dax_get_by_host);
282
283/**
Dan Williams7b6be842017-04-11 09:49:49 -0700284 * inode_dax: convert a public inode into its dax_dev
285 * @inode: An inode with i_cdev pointing to a dax_dev
286 *
287 * Note this is not equivalent to to_dax_dev() which is for private
288 * internal use where we know the inode filesystem type == dax_fs_type.
289 */
290struct dax_device *inode_dax(struct inode *inode)
291{
292 struct cdev *cdev = inode->i_cdev;
293
294 return container_of(cdev, struct dax_device, cdev);
295}
296EXPORT_SYMBOL_GPL(inode_dax);
297
298struct inode *dax_inode(struct dax_device *dax_dev)
299{
300 return &dax_dev->inode;
301}
302EXPORT_SYMBOL_GPL(dax_inode);
303
304void *dax_get_private(struct dax_device *dax_dev)
305{
306 return dax_dev->private;
307}
308EXPORT_SYMBOL_GPL(dax_get_private);
309
310static void init_once(void *_dax_dev)
311{
312 struct dax_device *dax_dev = _dax_dev;
313 struct inode *inode = &dax_dev->inode;
314
315 inode_init_once(inode);
316}
317
318static int __dax_fs_init(void)
319{
320 int rc;
321
322 dax_cache = kmem_cache_create("dax_cache", sizeof(struct dax_device), 0,
323 (SLAB_HWCACHE_ALIGN|SLAB_RECLAIM_ACCOUNT|
324 SLAB_MEM_SPREAD|SLAB_ACCOUNT),
325 init_once);
326 if (!dax_cache)
327 return -ENOMEM;
328
329 rc = register_filesystem(&dax_fs_type);
330 if (rc)
331 goto err_register_fs;
332
333 dax_mnt = kern_mount(&dax_fs_type);
334 if (IS_ERR(dax_mnt)) {
335 rc = PTR_ERR(dax_mnt);
336 goto err_mount;
337 }
338 dax_superblock = dax_mnt->mnt_sb;
339
340 return 0;
341
342 err_mount:
343 unregister_filesystem(&dax_fs_type);
344 err_register_fs:
345 kmem_cache_destroy(dax_cache);
346
347 return rc;
348}
349
350static void __dax_fs_exit(void)
351{
352 kern_unmount(dax_mnt);
353 unregister_filesystem(&dax_fs_type);
354 kmem_cache_destroy(dax_cache);
355}
356
357static int __init dax_fs_init(void)
358{
359 int rc;
360
361 rc = __dax_fs_init();
362 if (rc)
363 return rc;
364
365 nr_dax = max(nr_dax, 256);
366 rc = alloc_chrdev_region(&dax_devt, 0, nr_dax, "dax");
367 if (rc)
368 __dax_fs_exit();
369 return rc;
370}
371
372static void __exit dax_fs_exit(void)
373{
374 unregister_chrdev_region(dax_devt, nr_dax);
375 ida_destroy(&dax_minor_ida);
376 __dax_fs_exit();
377}
378
379MODULE_AUTHOR("Intel Corporation");
380MODULE_LICENSE("GPL v2");
381subsys_initcall(dax_fs_init);
382module_exit(dax_fs_exit);