blob: 4e5ae7e8b557407a5a988e55b81268aa590574a7 [file] [log] [blame]
Thomas Gleixner5b497af2019-05-29 07:18:09 -07001// SPDX-License-Identifier: GPL-2.0-only
Dan Williams7b6be842017-04-11 09:49:49 -07002/*
3 * Copyright(c) 2017 Intel Corporation. All rights reserved.
Dan Williams7b6be842017-04-11 09:49:49 -07004 */
5#include <linux/pagemap.h>
6#include <linux/module.h>
7#include <linux/mount.h>
8#include <linux/magic.h>
Dan Williamsef510422017-05-08 10:55:27 -07009#include <linux/genhd.h>
Dan Williams569d0362017-10-14 11:33:32 -070010#include <linux/pfn_t.h>
Dan Williams7b6be842017-04-11 09:49:49 -070011#include <linux/cdev.h>
12#include <linux/hash.h>
13#include <linux/slab.h>
Dan Williams7e026c82017-05-29 12:57:56 -070014#include <linux/uio.h>
Dan Williams6568b082017-01-24 18:44:18 -080015#include <linux/dax.h>
Dan Williams7b6be842017-04-11 09:49:49 -070016#include <linux/fs.h>
Dan Williams51cf7842017-07-12 17:58:21 -070017#include "dax-private.h"
Dan Williams7b6be842017-04-11 09:49:49 -070018
Dan Williams7b6be842017-04-11 09:49:49 -070019static dev_t dax_devt;
20DEFINE_STATIC_SRCU(dax_srcu);
21static struct vfsmount *dax_mnt;
22static DEFINE_IDA(dax_minor_ida);
23static struct kmem_cache *dax_cache __read_mostly;
24static struct super_block *dax_superblock __read_mostly;
25
Dan Williams72058002017-04-19 15:14:31 -070026#define DAX_HASH_SIZE (PAGE_SIZE / sizeof(struct hlist_head))
27static struct hlist_head dax_host_list[DAX_HASH_SIZE];
28static DEFINE_SPINLOCK(dax_host_lock);
29
Dan Williams7b6be842017-04-11 09:49:49 -070030int dax_read_lock(void)
31{
32 return srcu_read_lock(&dax_srcu);
33}
34EXPORT_SYMBOL_GPL(dax_read_lock);
35
36void dax_read_unlock(int id)
37{
38 srcu_read_unlock(&dax_srcu, id);
39}
40EXPORT_SYMBOL_GPL(dax_read_unlock);
41
Dan Williams9d109082017-05-13 16:18:21 -070042#ifdef CONFIG_BLOCK
Dan Williams78f35472017-08-30 09:16:38 -070043#include <linux/blkdev.h>
44
Dan Williamsef510422017-05-08 10:55:27 -070045int bdev_dax_pgoff(struct block_device *bdev, sector_t sector, size_t size,
46 pgoff_t *pgoff)
47{
48 phys_addr_t phys_off = (get_start_sect(bdev) + sector) * 512;
49
50 if (pgoff)
51 *pgoff = PHYS_PFN(phys_off);
52 if (phys_off % PAGE_SIZE || size % PAGE_SIZE)
53 return -EINVAL;
54 return 0;
55}
56EXPORT_SYMBOL(bdev_dax_pgoff);
57
Dan Williams26f2f4d2017-09-03 10:17:53 -070058#if IS_ENABLED(CONFIG_FS_DAX)
Dan Williams78f35472017-08-30 09:16:38 -070059struct dax_device *fs_dax_get_by_bdev(struct block_device *bdev)
60{
61 if (!blk_queue_dax(bdev->bd_queue))
62 return NULL;
63 return fs_dax_get_by_host(bdev->bd_disk->disk_name);
64}
65EXPORT_SYMBOL_GPL(fs_dax_get_by_bdev);
Dan Williams26f2f4d2017-09-03 10:17:53 -070066#endif
Dan Williams78f35472017-08-30 09:16:38 -070067
Dan Williams7bf7eac2019-05-16 13:26:29 -070068bool __generic_fsdax_supported(struct dax_device *dax_dev,
69 struct block_device *bdev, int blocksize, sector_t start,
70 sector_t sectors)
Dan Williamsef510422017-05-08 10:55:27 -070071{
Dan Williamse76384882018-05-16 11:46:08 -070072 bool dax_enabled = false;
Dan Williamsad428cd2019-02-20 21:12:50 -080073 pgoff_t pgoff, pgoff_end;
Darrick J. Wongba23cba2018-05-30 13:03:45 -070074 char buf[BDEVNAME_SIZE];
Dan Williamsad428cd2019-02-20 21:12:50 -080075 void *kaddr, *end_kaddr;
76 pfn_t pfn, end_pfn;
77 sector_t last_page;
78 long len, len2;
79 int err, id;
Dan Williamsef510422017-05-08 10:55:27 -070080
81 if (blocksize != PAGE_SIZE) {
Darrick J. Wongba23cba2018-05-30 13:03:45 -070082 pr_debug("%s: error: unsupported blocksize for dax\n",
83 bdevname(bdev, buf));
Dave Jiang80660f22018-05-30 13:03:46 -070084 return false;
Dan Williamsef510422017-05-08 10:55:27 -070085 }
86
Dan Williams7bf7eac2019-05-16 13:26:29 -070087 err = bdev_dax_pgoff(bdev, start, PAGE_SIZE, &pgoff);
Dan Williamsef510422017-05-08 10:55:27 -070088 if (err) {
Darrick J. Wongba23cba2018-05-30 13:03:45 -070089 pr_debug("%s: error: unaligned partition for dax\n",
90 bdevname(bdev, buf));
Dave Jiang80660f22018-05-30 13:03:46 -070091 return false;
Dan Williamsef510422017-05-08 10:55:27 -070092 }
93
Dan Williams7bf7eac2019-05-16 13:26:29 -070094 last_page = PFN_DOWN((start + sectors - 1) * 512) * PAGE_SIZE / 512;
Dan Williamsad428cd2019-02-20 21:12:50 -080095 err = bdev_dax_pgoff(bdev, last_page, PAGE_SIZE, &pgoff_end);
96 if (err) {
97 pr_debug("%s: error: unaligned partition for dax\n",
98 bdevname(bdev, buf));
99 return false;
100 }
101
Dan Williamsef510422017-05-08 10:55:27 -0700102 id = dax_read_lock();
Dan Williamsad428cd2019-02-20 21:12:50 -0800103 len = dax_direct_access(dax_dev, pgoff, 1, &kaddr, &pfn);
104 len2 = dax_direct_access(dax_dev, pgoff_end, 1, &end_kaddr, &end_pfn);
Dan Williamsef510422017-05-08 10:55:27 -0700105 dax_read_unlock(id);
106
Dan Williamsad428cd2019-02-20 21:12:50 -0800107 if (len < 1 || len2 < 1) {
Darrick J. Wongba23cba2018-05-30 13:03:45 -0700108 pr_debug("%s: error: dax access failed (%ld)\n",
Dan Williamsad428cd2019-02-20 21:12:50 -0800109 bdevname(bdev, buf), len < 1 ? len : len2);
Dave Jiang80660f22018-05-30 13:03:46 -0700110 return false;
Dan Williamsef510422017-05-08 10:55:27 -0700111 }
112
Dan Williams3fe07912017-10-14 17:13:45 -0700113 if (IS_ENABLED(CONFIG_FS_DAX_LIMITED) && pfn_t_special(pfn)) {
114 /*
115 * An arch that has enabled the pmem api should also
116 * have its drivers support pfn_t_devmap()
117 *
118 * This is a developer warning and should not trigger in
119 * production. dax_flush() will crash since it depends
120 * on being able to do (page_address(pfn_to_page())).
121 */
122 WARN_ON(IS_ENABLED(CONFIG_ARCH_HAS_PMEM_API));
Dan Williamse76384882018-05-16 11:46:08 -0700123 dax_enabled = true;
Dan Williamsad428cd2019-02-20 21:12:50 -0800124 } else if (pfn_t_devmap(pfn) && pfn_t_devmap(end_pfn)) {
125 struct dev_pagemap *pgmap, *end_pgmap;
Dan Williamse76384882018-05-16 11:46:08 -0700126
127 pgmap = get_dev_pagemap(pfn_t_to_pfn(pfn), NULL);
Dan Williamsad428cd2019-02-20 21:12:50 -0800128 end_pgmap = get_dev_pagemap(pfn_t_to_pfn(end_pfn), NULL);
129 if (pgmap && pgmap == end_pgmap && pgmap->type == MEMORY_DEVICE_FS_DAX
130 && pfn_t_to_page(pfn)->pgmap == pgmap
131 && pfn_t_to_page(end_pfn)->pgmap == pgmap
132 && pfn_t_to_pfn(pfn) == PHYS_PFN(__pa(kaddr))
133 && pfn_t_to_pfn(end_pfn) == PHYS_PFN(__pa(end_kaddr)))
Dan Williamse76384882018-05-16 11:46:08 -0700134 dax_enabled = true;
135 put_dev_pagemap(pgmap);
Dan Williamsad428cd2019-02-20 21:12:50 -0800136 put_dev_pagemap(end_pgmap);
137
Dan Williamse76384882018-05-16 11:46:08 -0700138 }
139
140 if (!dax_enabled) {
Darrick J. Wongba23cba2018-05-30 13:03:45 -0700141 pr_debug("%s: error: dax support not enabled\n",
142 bdevname(bdev, buf));
Dave Jiang80660f22018-05-30 13:03:46 -0700143 return false;
Dan Williams569d0362017-10-14 11:33:32 -0700144 }
Dave Jiang80660f22018-05-30 13:03:46 -0700145 return true;
Dan Williamsef510422017-05-08 10:55:27 -0700146}
Dan Williams7bf7eac2019-05-16 13:26:29 -0700147EXPORT_SYMBOL_GPL(__generic_fsdax_supported);
148
149/**
150 * __bdev_dax_supported() - Check if the device supports dax for filesystem
151 * @bdev: block device to check
152 * @blocksize: The block size of the device
153 *
154 * This is a library function for filesystems to check if the block device
155 * can be mounted with dax option.
156 *
157 * Return: true if supported, false if unsupported
158 */
159bool __bdev_dax_supported(struct block_device *bdev, int blocksize)
160{
161 struct dax_device *dax_dev;
162 struct request_queue *q;
163 char buf[BDEVNAME_SIZE];
164 bool ret;
165 int id;
166
167 q = bdev_get_queue(bdev);
168 if (!q || !blk_queue_dax(q)) {
169 pr_debug("%s: error: request queue doesn't support dax\n",
170 bdevname(bdev, buf));
171 return false;
172 }
173
174 dax_dev = dax_get_by_host(bdev->bd_disk->disk_name);
175 if (!dax_dev) {
176 pr_debug("%s: error: device does not support dax\n",
177 bdevname(bdev, buf));
178 return false;
179 }
180
181 id = dax_read_lock();
182 ret = dax_supported(dax_dev, bdev, blocksize, 0,
183 i_size_read(bdev->bd_inode) / 512);
184 dax_read_unlock(id);
185
186 put_dax(dax_dev);
187
188 return ret;
189}
Dan Williamsef510422017-05-08 10:55:27 -0700190EXPORT_SYMBOL_GPL(__bdev_dax_supported);
Dan Williams9d109082017-05-13 16:18:21 -0700191#endif
Dan Williamsef510422017-05-08 10:55:27 -0700192
Dan Williams9a60c3e2017-06-27 17:59:28 -0700193enum dax_device_flags {
194 /* !alive + rcu grace period == no new operations / mappings */
195 DAXDEV_ALIVE,
Dan Williams6e0c90d2017-06-26 21:28:41 -0700196 /* gate whether dax_flush() calls the low level flush routine */
197 DAXDEV_WRITE_CACHE,
Dan Williams9a60c3e2017-06-27 17:59:28 -0700198};
199
Dan Williams7b6be842017-04-11 09:49:49 -0700200/**
201 * struct dax_device - anchor object for dax services
202 * @inode: core vfs
203 * @cdev: optional character interface for "device dax"
Dan Williams72058002017-04-19 15:14:31 -0700204 * @host: optional name for lookups where the device path is not available
Dan Williams7b6be842017-04-11 09:49:49 -0700205 * @private: dax driver private data
Dan Williams9a60c3e2017-06-27 17:59:28 -0700206 * @flags: state and boolean properties
Dan Williams7b6be842017-04-11 09:49:49 -0700207 */
208struct dax_device {
Dan Williams72058002017-04-19 15:14:31 -0700209 struct hlist_node list;
Dan Williams7b6be842017-04-11 09:49:49 -0700210 struct inode inode;
211 struct cdev cdev;
Dan Williams72058002017-04-19 15:14:31 -0700212 const char *host;
Dan Williams7b6be842017-04-11 09:49:49 -0700213 void *private;
Dan Williams9a60c3e2017-06-27 17:59:28 -0700214 unsigned long flags;
Dan Williams6568b082017-01-24 18:44:18 -0800215 const struct dax_operations *ops;
Dan Williams7b6be842017-04-11 09:49:49 -0700216};
217
Dan Williams6e0c90d2017-06-26 21:28:41 -0700218static ssize_t write_cache_show(struct device *dev,
219 struct device_attribute *attr, char *buf)
220{
221 struct dax_device *dax_dev = dax_get_by_host(dev_name(dev));
222 ssize_t rc;
223
224 WARN_ON_ONCE(!dax_dev);
225 if (!dax_dev)
226 return -ENXIO;
227
Ross Zwisler808c3402018-06-06 10:45:14 -0600228 rc = sprintf(buf, "%d\n", !!dax_write_cache_enabled(dax_dev));
Dan Williams6e0c90d2017-06-26 21:28:41 -0700229 put_dax(dax_dev);
230 return rc;
231}
232
233static ssize_t write_cache_store(struct device *dev,
234 struct device_attribute *attr, const char *buf, size_t len)
235{
236 bool write_cache;
237 int rc = strtobool(buf, &write_cache);
238 struct dax_device *dax_dev = dax_get_by_host(dev_name(dev));
239
240 WARN_ON_ONCE(!dax_dev);
241 if (!dax_dev)
242 return -ENXIO;
243
244 if (rc)
245 len = rc;
Dan Williams6e0c90d2017-06-26 21:28:41 -0700246 else
Ross Zwisler808c3402018-06-06 10:45:14 -0600247 dax_write_cache(dax_dev, write_cache);
Dan Williams6e0c90d2017-06-26 21:28:41 -0700248
249 put_dax(dax_dev);
250 return len;
251}
252static DEVICE_ATTR_RW(write_cache);
253
254static umode_t dax_visible(struct kobject *kobj, struct attribute *a, int n)
255{
256 struct device *dev = container_of(kobj, typeof(*dev), kobj);
257 struct dax_device *dax_dev = dax_get_by_host(dev_name(dev));
258
259 WARN_ON_ONCE(!dax_dev);
260 if (!dax_dev)
261 return 0;
262
Mikulas Patockac3ca0152017-08-31 21:47:43 -0400263#ifndef CONFIG_ARCH_HAS_PMEM_API
264 if (a == &dev_attr_write_cache.attr)
Dan Williams6e0c90d2017-06-26 21:28:41 -0700265 return 0;
Mikulas Patockac3ca0152017-08-31 21:47:43 -0400266#endif
Dan Williams6e0c90d2017-06-26 21:28:41 -0700267 return a->mode;
268}
269
270static struct attribute *dax_attributes[] = {
271 &dev_attr_write_cache.attr,
272 NULL,
273};
274
275struct attribute_group dax_attribute_group = {
276 .name = "dax",
277 .attrs = dax_attributes,
278 .is_visible = dax_visible,
279};
280EXPORT_SYMBOL_GPL(dax_attribute_group);
281
Dan Williamsb0686262017-01-26 20:37:35 -0800282/**
283 * dax_direct_access() - translate a device pgoff to an absolute pfn
284 * @dax_dev: a dax_device instance representing the logical memory range
285 * @pgoff: offset in pages from the start of the device to translate
286 * @nr_pages: number of consecutive pages caller can handle relative to @pfn
287 * @kaddr: output parameter that returns a virtual address mapping of pfn
288 * @pfn: output parameter that returns an absolute pfn translation of @pgoff
289 *
290 * Return: negative errno if an error occurs, otherwise the number of
291 * pages accessible at the device relative @pgoff.
292 */
293long dax_direct_access(struct dax_device *dax_dev, pgoff_t pgoff, long nr_pages,
294 void **kaddr, pfn_t *pfn)
295{
296 long avail;
297
Dan Williamsb0686262017-01-26 20:37:35 -0800298 if (!dax_dev)
299 return -EOPNOTSUPP;
300
301 if (!dax_alive(dax_dev))
302 return -ENXIO;
303
304 if (nr_pages < 0)
305 return nr_pages;
306
307 avail = dax_dev->ops->direct_access(dax_dev, pgoff, nr_pages,
308 kaddr, pfn);
309 if (!avail)
310 return -ERANGE;
311 return min(avail, nr_pages);
312}
313EXPORT_SYMBOL_GPL(dax_direct_access);
314
Dan Williams7bf7eac2019-05-16 13:26:29 -0700315bool dax_supported(struct dax_device *dax_dev, struct block_device *bdev,
316 int blocksize, sector_t start, sector_t len)
317{
318 if (!dax_alive(dax_dev))
319 return false;
320
321 return dax_dev->ops->dax_supported(dax_dev, bdev, blocksize, start, len);
322}
323
Dan Williams7e026c82017-05-29 12:57:56 -0700324size_t dax_copy_from_iter(struct dax_device *dax_dev, pgoff_t pgoff, void *addr,
325 size_t bytes, struct iov_iter *i)
326{
327 if (!dax_alive(dax_dev))
328 return 0;
329
Dan Williams7e026c82017-05-29 12:57:56 -0700330 return dax_dev->ops->copy_from_iter(dax_dev, pgoff, addr, bytes, i);
331}
332EXPORT_SYMBOL_GPL(dax_copy_from_iter);
333
Dan Williamsb3a9a0c2018-05-02 06:46:33 -0700334size_t dax_copy_to_iter(struct dax_device *dax_dev, pgoff_t pgoff, void *addr,
335 size_t bytes, struct iov_iter *i)
336{
337 if (!dax_alive(dax_dev))
338 return 0;
339
340 return dax_dev->ops->copy_to_iter(dax_dev, pgoff, addr, bytes, i);
341}
342EXPORT_SYMBOL_GPL(dax_copy_to_iter);
343
Mikulas Patockac3ca0152017-08-31 21:47:43 -0400344#ifdef CONFIG_ARCH_HAS_PMEM_API
345void arch_wb_cache_pmem(void *addr, size_t size);
346void dax_flush(struct dax_device *dax_dev, void *addr, size_t size)
Dan Williamsabebfbe22017-05-29 13:02:52 -0700347{
Ross Zwisler808c3402018-06-06 10:45:14 -0600348 if (unlikely(!dax_write_cache_enabled(dax_dev)))
Dan Williams6e0c90d2017-06-26 21:28:41 -0700349 return;
350
Mikulas Patockac3ca0152017-08-31 21:47:43 -0400351 arch_wb_cache_pmem(addr, size);
Dan Williamsabebfbe22017-05-29 13:02:52 -0700352}
Mikulas Patockac3ca0152017-08-31 21:47:43 -0400353#else
354void dax_flush(struct dax_device *dax_dev, void *addr, size_t size)
355{
356}
357#endif
Dan Williamsabebfbe22017-05-29 13:02:52 -0700358EXPORT_SYMBOL_GPL(dax_flush);
359
Dan Williams6e0c90d2017-06-26 21:28:41 -0700360void dax_write_cache(struct dax_device *dax_dev, bool wc)
361{
362 if (wc)
363 set_bit(DAXDEV_WRITE_CACHE, &dax_dev->flags);
364 else
365 clear_bit(DAXDEV_WRITE_CACHE, &dax_dev->flags);
366}
367EXPORT_SYMBOL_GPL(dax_write_cache);
368
Vivek Goyal273752c2017-07-26 09:35:09 -0400369bool dax_write_cache_enabled(struct dax_device *dax_dev)
370{
371 return test_bit(DAXDEV_WRITE_CACHE, &dax_dev->flags);
372}
373EXPORT_SYMBOL_GPL(dax_write_cache_enabled);
374
Dan Williams7b6be842017-04-11 09:49:49 -0700375bool dax_alive(struct dax_device *dax_dev)
376{
377 lockdep_assert_held(&dax_srcu);
Dan Williams9a60c3e2017-06-27 17:59:28 -0700378 return test_bit(DAXDEV_ALIVE, &dax_dev->flags);
Dan Williams7b6be842017-04-11 09:49:49 -0700379}
380EXPORT_SYMBOL_GPL(dax_alive);
381
Dan Williams72058002017-04-19 15:14:31 -0700382static int dax_host_hash(const char *host)
383{
384 return hashlen_hash(hashlen_string("DAX", host)) % DAX_HASH_SIZE;
385}
386
Dan Williams7b6be842017-04-11 09:49:49 -0700387/*
388 * Note, rcu is not protecting the liveness of dax_dev, rcu is ensuring
389 * that any fault handlers or operations that might have seen
390 * dax_alive(), have completed. Any operations that start after
391 * synchronize_srcu() has run will abort upon seeing !dax_alive().
392 */
393void kill_dax(struct dax_device *dax_dev)
394{
395 if (!dax_dev)
396 return;
397
Dan Williams9a60c3e2017-06-27 17:59:28 -0700398 clear_bit(DAXDEV_ALIVE, &dax_dev->flags);
Dan Williams72058002017-04-19 15:14:31 -0700399
Dan Williams7b6be842017-04-11 09:49:49 -0700400 synchronize_srcu(&dax_srcu);
Dan Williams72058002017-04-19 15:14:31 -0700401
402 spin_lock(&dax_host_lock);
403 hlist_del_init(&dax_dev->list);
404 spin_unlock(&dax_host_lock);
Dan Williams7b6be842017-04-11 09:49:49 -0700405}
406EXPORT_SYMBOL_GPL(kill_dax);
407
Dan Williams9567da02017-07-12 17:58:21 -0700408void run_dax(struct dax_device *dax_dev)
409{
410 set_bit(DAXDEV_ALIVE, &dax_dev->flags);
411}
412EXPORT_SYMBOL_GPL(run_dax);
413
Dan Williams7b6be842017-04-11 09:49:49 -0700414static struct inode *dax_alloc_inode(struct super_block *sb)
415{
416 struct dax_device *dax_dev;
Dan Williamsb9d39d12017-06-09 08:50:49 -0700417 struct inode *inode;
Dan Williams7b6be842017-04-11 09:49:49 -0700418
419 dax_dev = kmem_cache_alloc(dax_cache, GFP_KERNEL);
Mikulas Patocka9f586ff2017-11-14 09:59:54 -0500420 if (!dax_dev)
421 return NULL;
422
Dan Williamsb9d39d12017-06-09 08:50:49 -0700423 inode = &dax_dev->inode;
424 inode->i_rdev = 0;
425 return inode;
Dan Williams7b6be842017-04-11 09:49:49 -0700426}
427
428static struct dax_device *to_dax_dev(struct inode *inode)
429{
430 return container_of(inode, struct dax_device, inode);
431}
432
Al Viro53e22822019-04-10 14:57:19 -0400433static void dax_free_inode(struct inode *inode)
Dan Williams7b6be842017-04-11 09:49:49 -0700434{
Dan Williams7b6be842017-04-11 09:49:49 -0700435 struct dax_device *dax_dev = to_dax_dev(inode);
Dan Williams72058002017-04-19 15:14:31 -0700436 kfree(dax_dev->host);
437 dax_dev->host = NULL;
Dan Williamsb9d39d12017-06-09 08:50:49 -0700438 if (inode->i_rdev)
439 ida_simple_remove(&dax_minor_ida, MINOR(inode->i_rdev));
Dan Williams7b6be842017-04-11 09:49:49 -0700440 kmem_cache_free(dax_cache, dax_dev);
441}
442
443static void dax_destroy_inode(struct inode *inode)
444{
445 struct dax_device *dax_dev = to_dax_dev(inode);
Dan Williams9a60c3e2017-06-27 17:59:28 -0700446 WARN_ONCE(test_bit(DAXDEV_ALIVE, &dax_dev->flags),
Dan Williams7b6be842017-04-11 09:49:49 -0700447 "kill_dax() must be called before final iput()\n");
Dan Williams7b6be842017-04-11 09:49:49 -0700448}
449
450static const struct super_operations dax_sops = {
451 .statfs = simple_statfs,
452 .alloc_inode = dax_alloc_inode,
453 .destroy_inode = dax_destroy_inode,
Al Viro53e22822019-04-10 14:57:19 -0400454 .free_inode = dax_free_inode,
Dan Williams7b6be842017-04-11 09:49:49 -0700455 .drop_inode = generic_delete_inode,
456};
457
458static struct dentry *dax_mount(struct file_system_type *fs_type,
459 int flags, const char *dev_name, void *data)
460{
461 return mount_pseudo(fs_type, "dax:", &dax_sops, NULL, DAXFS_MAGIC);
462}
463
464static struct file_system_type dax_fs_type = {
465 .name = "dax",
466 .mount = dax_mount,
467 .kill_sb = kill_anon_super,
468};
469
470static int dax_test(struct inode *inode, void *data)
471{
472 dev_t devt = *(dev_t *) data;
473
474 return inode->i_rdev == devt;
475}
476
477static int dax_set(struct inode *inode, void *data)
478{
479 dev_t devt = *(dev_t *) data;
480
481 inode->i_rdev = devt;
482 return 0;
483}
484
485static struct dax_device *dax_dev_get(dev_t devt)
486{
487 struct dax_device *dax_dev;
488 struct inode *inode;
489
490 inode = iget5_locked(dax_superblock, hash_32(devt + DAXFS_MAGIC, 31),
491 dax_test, dax_set, &devt);
492
493 if (!inode)
494 return NULL;
495
496 dax_dev = to_dax_dev(inode);
497 if (inode->i_state & I_NEW) {
Dan Williams9a60c3e2017-06-27 17:59:28 -0700498 set_bit(DAXDEV_ALIVE, &dax_dev->flags);
Dan Williams7b6be842017-04-11 09:49:49 -0700499 inode->i_cdev = &dax_dev->cdev;
500 inode->i_mode = S_IFCHR;
501 inode->i_flags = S_DAX;
502 mapping_set_gfp_mask(&inode->i_data, GFP_USER);
503 unlock_new_inode(inode);
504 }
505
506 return dax_dev;
507}
508
Dan Williams72058002017-04-19 15:14:31 -0700509static void dax_add_host(struct dax_device *dax_dev, const char *host)
510{
511 int hash;
512
513 /*
514 * Unconditionally init dax_dev since it's coming from a
515 * non-zeroed slab cache
516 */
517 INIT_HLIST_NODE(&dax_dev->list);
518 dax_dev->host = host;
519 if (!host)
520 return;
521
522 hash = dax_host_hash(host);
523 spin_lock(&dax_host_lock);
524 hlist_add_head(&dax_dev->list, &dax_host_list[hash]);
525 spin_unlock(&dax_host_lock);
526}
527
Dan Williams6568b082017-01-24 18:44:18 -0800528struct dax_device *alloc_dax(void *private, const char *__host,
529 const struct dax_operations *ops)
Dan Williams7b6be842017-04-11 09:49:49 -0700530{
531 struct dax_device *dax_dev;
Dan Williams72058002017-04-19 15:14:31 -0700532 const char *host;
Dan Williams7b6be842017-04-11 09:49:49 -0700533 dev_t devt;
534 int minor;
535
Dan Williams72058002017-04-19 15:14:31 -0700536 host = kstrdup(__host, GFP_KERNEL);
537 if (__host && !host)
538 return NULL;
539
Dan Williamscf1e2282017-05-08 12:33:53 -0700540 minor = ida_simple_get(&dax_minor_ida, 0, MINORMASK+1, GFP_KERNEL);
Dan Williams7b6be842017-04-11 09:49:49 -0700541 if (minor < 0)
Dan Williams72058002017-04-19 15:14:31 -0700542 goto err_minor;
Dan Williams7b6be842017-04-11 09:49:49 -0700543
544 devt = MKDEV(MAJOR(dax_devt), minor);
545 dax_dev = dax_dev_get(devt);
546 if (!dax_dev)
Dan Williams72058002017-04-19 15:14:31 -0700547 goto err_dev;
Dan Williams7b6be842017-04-11 09:49:49 -0700548
Dan Williams72058002017-04-19 15:14:31 -0700549 dax_add_host(dax_dev, host);
Dan Williams6568b082017-01-24 18:44:18 -0800550 dax_dev->ops = ops;
Dan Williams7b6be842017-04-11 09:49:49 -0700551 dax_dev->private = private;
552 return dax_dev;
553
Dan Williams72058002017-04-19 15:14:31 -0700554 err_dev:
Dan Williams7b6be842017-04-11 09:49:49 -0700555 ida_simple_remove(&dax_minor_ida, minor);
Dan Williams72058002017-04-19 15:14:31 -0700556 err_minor:
557 kfree(host);
Dan Williams7b6be842017-04-11 09:49:49 -0700558 return NULL;
559}
560EXPORT_SYMBOL_GPL(alloc_dax);
561
562void put_dax(struct dax_device *dax_dev)
563{
564 if (!dax_dev)
565 return;
566 iput(&dax_dev->inode);
567}
568EXPORT_SYMBOL_GPL(put_dax);
569
570/**
Dan Williams72058002017-04-19 15:14:31 -0700571 * dax_get_by_host() - temporary lookup mechanism for filesystem-dax
572 * @host: alternate name for the device registered by a dax driver
573 */
574struct dax_device *dax_get_by_host(const char *host)
575{
576 struct dax_device *dax_dev, *found = NULL;
577 int hash, id;
578
579 if (!host)
580 return NULL;
581
582 hash = dax_host_hash(host);
583
584 id = dax_read_lock();
585 spin_lock(&dax_host_lock);
586 hlist_for_each_entry(dax_dev, &dax_host_list[hash], list) {
587 if (!dax_alive(dax_dev)
588 || strcmp(host, dax_dev->host) != 0)
589 continue;
590
591 if (igrab(&dax_dev->inode))
592 found = dax_dev;
593 break;
594 }
595 spin_unlock(&dax_host_lock);
596 dax_read_unlock(id);
597
598 return found;
599}
600EXPORT_SYMBOL_GPL(dax_get_by_host);
601
602/**
Dan Williams7b6be842017-04-11 09:49:49 -0700603 * inode_dax: convert a public inode into its dax_dev
604 * @inode: An inode with i_cdev pointing to a dax_dev
605 *
606 * Note this is not equivalent to to_dax_dev() which is for private
607 * internal use where we know the inode filesystem type == dax_fs_type.
608 */
609struct dax_device *inode_dax(struct inode *inode)
610{
611 struct cdev *cdev = inode->i_cdev;
612
613 return container_of(cdev, struct dax_device, cdev);
614}
615EXPORT_SYMBOL_GPL(inode_dax);
616
617struct inode *dax_inode(struct dax_device *dax_dev)
618{
619 return &dax_dev->inode;
620}
621EXPORT_SYMBOL_GPL(dax_inode);
622
623void *dax_get_private(struct dax_device *dax_dev)
624{
Dan Williams9567da02017-07-12 17:58:21 -0700625 if (!test_bit(DAXDEV_ALIVE, &dax_dev->flags))
626 return NULL;
Dan Williams7b6be842017-04-11 09:49:49 -0700627 return dax_dev->private;
628}
629EXPORT_SYMBOL_GPL(dax_get_private);
630
631static void init_once(void *_dax_dev)
632{
633 struct dax_device *dax_dev = _dax_dev;
634 struct inode *inode = &dax_dev->inode;
635
Dan Williamsb9d39d12017-06-09 08:50:49 -0700636 memset(dax_dev, 0, sizeof(*dax_dev));
Dan Williams7b6be842017-04-11 09:49:49 -0700637 inode_init_once(inode);
638}
639
Dan Williams9567da02017-07-12 17:58:21 -0700640static int dax_fs_init(void)
Dan Williams7b6be842017-04-11 09:49:49 -0700641{
642 int rc;
643
644 dax_cache = kmem_cache_create("dax_cache", sizeof(struct dax_device), 0,
645 (SLAB_HWCACHE_ALIGN|SLAB_RECLAIM_ACCOUNT|
646 SLAB_MEM_SPREAD|SLAB_ACCOUNT),
647 init_once);
648 if (!dax_cache)
649 return -ENOMEM;
650
651 rc = register_filesystem(&dax_fs_type);
652 if (rc)
653 goto err_register_fs;
654
655 dax_mnt = kern_mount(&dax_fs_type);
656 if (IS_ERR(dax_mnt)) {
657 rc = PTR_ERR(dax_mnt);
658 goto err_mount;
659 }
660 dax_superblock = dax_mnt->mnt_sb;
661
662 return 0;
663
664 err_mount:
665 unregister_filesystem(&dax_fs_type);
666 err_register_fs:
667 kmem_cache_destroy(dax_cache);
668
669 return rc;
670}
671
Dan Williams9567da02017-07-12 17:58:21 -0700672static void dax_fs_exit(void)
Dan Williams7b6be842017-04-11 09:49:49 -0700673{
674 kern_unmount(dax_mnt);
675 unregister_filesystem(&dax_fs_type);
676 kmem_cache_destroy(dax_cache);
677}
678
Dan Williams9567da02017-07-12 17:58:21 -0700679static int __init dax_core_init(void)
Dan Williams7b6be842017-04-11 09:49:49 -0700680{
681 int rc;
682
Dan Williams9567da02017-07-12 17:58:21 -0700683 rc = dax_fs_init();
Dan Williams7b6be842017-04-11 09:49:49 -0700684 if (rc)
685 return rc;
686
Dan Williamscf1e2282017-05-08 12:33:53 -0700687 rc = alloc_chrdev_region(&dax_devt, 0, MINORMASK+1, "dax");
Dan Williams7b6be842017-04-11 09:49:49 -0700688 if (rc)
Dan Williams9567da02017-07-12 17:58:21 -0700689 goto err_chrdev;
690
691 rc = dax_bus_init();
692 if (rc)
693 goto err_bus;
694 return 0;
695
696err_bus:
697 unregister_chrdev_region(dax_devt, MINORMASK+1);
698err_chrdev:
699 dax_fs_exit();
700 return 0;
Dan Williams7b6be842017-04-11 09:49:49 -0700701}
702
Dan Williams9567da02017-07-12 17:58:21 -0700703static void __exit dax_core_exit(void)
Dan Williams7b6be842017-04-11 09:49:49 -0700704{
Dan Williamscf1e2282017-05-08 12:33:53 -0700705 unregister_chrdev_region(dax_devt, MINORMASK+1);
Dan Williams7b6be842017-04-11 09:49:49 -0700706 ida_destroy(&dax_minor_ida);
Dan Williams9567da02017-07-12 17:58:21 -0700707 dax_fs_exit();
Dan Williams7b6be842017-04-11 09:49:49 -0700708}
709
710MODULE_AUTHOR("Intel Corporation");
711MODULE_LICENSE("GPL v2");
Dan Williams9567da02017-07-12 17:58:21 -0700712subsys_initcall(dax_core_init);
713module_exit(dax_core_exit);