blob: 9aebffb4050597e1daae74f28375d86335f73e0c [file] [log] [blame]
Benny Halevyc93407d2011-05-22 19:49:06 +03001/*
2 * pNFS Objects layout implementation over open-osd initiator library
3 *
4 * Copyright (C) 2009 Panasas Inc. [year of first publication]
5 * All rights reserved.
6 *
7 * Benny Halevy <bhalevy@panasas.com>
Boaz Harroshaa281ac2014-10-19 19:38:58 +03008 * Boaz Harrosh <ooo@electrozaur.com>
Benny Halevyc93407d2011-05-22 19:49:06 +03009 *
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License version 2
12 * See the file COPYING included with this distribution for more details.
13 *
14 * Redistribution and use in source and binary forms, with or without
15 * modification, are permitted provided that the following conditions
16 * are met:
17 *
18 * 1. Redistributions of source code must retain the above copyright
19 * notice, this list of conditions and the following disclaimer.
20 * 2. Redistributions in binary form must reproduce the above copyright
21 * notice, this list of conditions and the following disclaimer in the
22 * documentation and/or other materials provided with the distribution.
23 * 3. Neither the name of the Panasas company nor the names of its
24 * contributors may be used to endorse or promote products derived
25 * from this software without specific prior written permission.
26 *
27 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
28 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
29 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
30 * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
31 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
32 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
33 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
34 * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
35 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
36 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
37 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
38 */
39
40#include <linux/module.h>
Boaz Harroshaf4f5b52011-10-31 15:04:19 -070041#include <scsi/osd_ore.h>
Boaz Harrosh09f5bf42011-05-22 19:50:20 +030042
43#include "objlayout.h"
Peng Tao62965562012-09-25 14:55:57 +080044#include "../internal.h"
Boaz Harrosh09f5bf42011-05-22 19:50:20 +030045
46#define NFSDBG_FACILITY NFSDBG_PNFS_LD
47
Boaz Harroshb6c05f12011-05-26 21:45:34 +030048struct objio_dev_ent {
49 struct nfs4_deviceid_node id_node;
Boaz Harroshaf4f5b52011-10-31 15:04:19 -070050 struct ore_dev od;
Boaz Harroshb6c05f12011-05-26 21:45:34 +030051};
52
53static void
54objio_free_deviceid_node(struct nfs4_deviceid_node *d)
55{
56 struct objio_dev_ent *de = container_of(d, struct objio_dev_ent, id_node);
57
Boaz Harroshaf4f5b52011-10-31 15:04:19 -070058 dprintk("%s: free od=%p\n", __func__, de->od.od);
59 osduld_put_device(de->od.od);
Trond Myklebust84a80f62015-03-09 15:23:35 -040060 kfree_rcu(d, rcu);
Boaz Harroshb6c05f12011-05-26 21:45:34 +030061}
62
Boaz Harrosh09f5bf42011-05-22 19:50:20 +030063struct objio_segment {
64 struct pnfs_layout_segment lseg;
65
Boaz Harroshaf4f5b52011-10-31 15:04:19 -070066 struct ore_layout layout;
67 struct ore_components oc;
Boaz Harrosh09f5bf42011-05-22 19:50:20 +030068};
69
70static inline struct objio_segment *
71OBJIO_LSEG(struct pnfs_layout_segment *lseg)
72{
73 return container_of(lseg, struct objio_segment, lseg);
74}
75
Boaz Harrosh04f83452011-05-22 19:52:19 +030076struct objio_state {
77 /* Generic layer */
Boaz Harroshe2e04352011-10-31 15:03:35 -070078 struct objlayout_io_res oir;
Boaz Harrosh04f83452011-05-22 19:52:19 +030079
Boaz Harrosh96218552011-10-31 14:47:32 -070080 bool sync;
Boaz Harrosheecfc632011-10-31 15:15:38 -070081 /*FIXME: Support for extra_bytes at ore_get_rw_state() */
82 struct ore_io_state *ios;
Boaz Harrosh04f83452011-05-22 19:52:19 +030083};
84
Boaz Harroshb6c05f12011-05-26 21:45:34 +030085/* Send and wait for a get_device_info of devices in the layout,
86 then look them up with the osd_initiator library */
Christoph Hellwig661373b2014-09-02 21:27:57 -070087struct nfs4_deviceid_node *
88objio_alloc_deviceid_node(struct nfs_server *server, struct pnfs_device *pdev,
89 gfp_t gfp_flags)
Boaz Harroshb6c05f12011-05-26 21:45:34 +030090{
91 struct pnfs_osd_deviceaddr *deviceaddr;
Christoph Hellwig661373b2014-09-02 21:27:57 -070092 struct objio_dev_ent *ode = NULL;
Boaz Harroshb6c05f12011-05-26 21:45:34 +030093 struct osd_dev *od;
94 struct osd_dev_info odi;
Sachin Bhamare18d98f62012-03-19 20:47:58 -070095 bool retry_flag = true;
Christoph Hellwigfd41b472014-09-10 17:36:29 -070096 __be32 *p;
Boaz Harroshb6c05f12011-05-26 21:45:34 +030097 int err;
98
Christoph Hellwig661373b2014-09-02 21:27:57 -070099 deviceaddr = kzalloc(sizeof(*deviceaddr), gfp_flags);
100 if (!deviceaddr)
101 return NULL;
Boaz Harroshb6c05f12011-05-26 21:45:34 +0300102
Christoph Hellwig661373b2014-09-02 21:27:57 -0700103 p = page_address(pdev->pages[0]);
104 pnfs_osd_xdr_decode_deviceaddr(deviceaddr, p);
Boaz Harroshb6c05f12011-05-26 21:45:34 +0300105
106 odi.systemid_len = deviceaddr->oda_systemid.len;
107 if (odi.systemid_len > sizeof(odi.systemid)) {
Boaz Harroshaf4f5b52011-10-31 15:04:19 -0700108 dprintk("%s: odi.systemid_len > sizeof(systemid=%zd)\n",
109 __func__, sizeof(odi.systemid));
Boaz Harroshb6c05f12011-05-26 21:45:34 +0300110 err = -EINVAL;
111 goto out;
112 } else if (odi.systemid_len)
113 memcpy(odi.systemid, deviceaddr->oda_systemid.data,
114 odi.systemid_len);
115 odi.osdname_len = deviceaddr->oda_osdname.len;
116 odi.osdname = (u8 *)deviceaddr->oda_osdname.data;
117
118 if (!odi.osdname_len && !odi.systemid_len) {
119 dprintk("%s: !odi.osdname_len && !odi.systemid_len\n",
120 __func__);
121 err = -ENODEV;
122 goto out;
123 }
124
Sachin Bhamare18d98f62012-03-19 20:47:58 -0700125retry_lookup:
Boaz Harroshb6c05f12011-05-26 21:45:34 +0300126 od = osduld_info_lookup(&odi);
Viresh Kumara1c83682015-08-12 15:59:44 +0530127 if (IS_ERR(od)) {
Boaz Harroshb6c05f12011-05-26 21:45:34 +0300128 err = PTR_ERR(od);
129 dprintk("%s: osduld_info_lookup => %d\n", __func__, err);
Sachin Bhamare18d98f62012-03-19 20:47:58 -0700130 if (err == -ENODEV && retry_flag) {
131 err = objlayout_autologin(deviceaddr);
132 if (likely(!err)) {
133 retry_flag = false;
134 goto retry_lookup;
135 }
136 }
Boaz Harroshb6c05f12011-05-26 21:45:34 +0300137 goto out;
138 }
139
Boaz Harroshaf4f5b52011-10-31 15:04:19 -0700140 dprintk("Adding new dev_id(%llx:%llx)\n",
Christoph Hellwig661373b2014-09-02 21:27:57 -0700141 _DEVID_LO(&pdev->dev_id), _DEVID_HI(&pdev->dev_id));
142
143 ode = kzalloc(sizeof(*ode), gfp_flags);
144 if (!ode) {
145 dprintk("%s: -ENOMEM od=%p\n", __func__, od);
146 goto out;
147 }
148
149 nfs4_init_deviceid_node(&ode->id_node, server, &pdev->dev_id);
150 kfree(deviceaddr);
151
152 ode->od.od = od;
153 return &ode->id_node;
154
Boaz Harroshb6c05f12011-05-26 21:45:34 +0300155out:
Christoph Hellwig661373b2014-09-02 21:27:57 -0700156 kfree(deviceaddr);
157 return NULL;
Boaz Harroshb6c05f12011-05-26 21:45:34 +0300158}
159
Boaz Harroshaf4f5b52011-10-31 15:04:19 -0700160static void copy_single_comp(struct ore_components *oc, unsigned c,
161 struct pnfs_osd_object_cred *src_comp)
Boaz Harrosh09f5bf42011-05-22 19:50:20 +0300162{
Boaz Harroshaf4f5b52011-10-31 15:04:19 -0700163 struct ore_comp *ocomp = &oc->comps[c];
Boaz Harrosh09f5bf42011-05-22 19:50:20 +0300164
Boaz Harroshaf4f5b52011-10-31 15:04:19 -0700165 WARN_ON(src_comp->oc_cap_key.cred_len > 0); /* libosd is NO_SEC only */
166 WARN_ON(src_comp->oc_cap.cred_len > sizeof(ocomp->cred));
Boaz Harrosh09f5bf42011-05-22 19:50:20 +0300167
Boaz Harroshaf4f5b52011-10-31 15:04:19 -0700168 ocomp->obj.partition = src_comp->oc_object_id.oid_partition_id;
169 ocomp->obj.id = src_comp->oc_object_id.oid_object_id;
Boaz Harrosh09f5bf42011-05-22 19:50:20 +0300170
Boaz Harroshaf4f5b52011-10-31 15:04:19 -0700171 memcpy(ocomp->cred, src_comp->oc_cap.cred, sizeof(ocomp->cred));
172}
173
Trond Myklebust1385b812012-05-04 13:54:24 -0400174static int __alloc_objio_seg(unsigned numdevs, gfp_t gfp_flags,
Boaz Harroshaf4f5b52011-10-31 15:04:19 -0700175 struct objio_segment **pseg)
176{
Boaz Harrosh5318a29c2012-03-13 20:44:26 -0700177/* This is the in memory structure of the objio_segment
178 *
179 * struct __alloc_objio_segment {
180 * struct objio_segment olseg;
181 * struct ore_dev *ods[numdevs];
182 * struct ore_comp comps[numdevs];
183 * } *aolseg;
184 * NOTE: The code as above compiles and runs perfectly. It is elegant,
185 * type safe and compact. At some Past time Linus has decided he does not
186 * like variable length arrays, For the sake of this principal we uglify
187 * the code as below.
188 */
189 struct objio_segment *lseg;
190 size_t lseg_size = sizeof(*lseg) +
191 numdevs * sizeof(lseg->oc.ods[0]) +
192 numdevs * sizeof(*lseg->oc.comps);
Boaz Harroshaf4f5b52011-10-31 15:04:19 -0700193
Boaz Harrosh5318a29c2012-03-13 20:44:26 -0700194 lseg = kzalloc(lseg_size, gfp_flags);
195 if (unlikely(!lseg)) {
Masanari Iidaa895d572013-04-09 02:06:50 +0900196 dprintk("%s: Failed allocation numdevs=%d size=%zd\n", __func__,
Boaz Harrosh5318a29c2012-03-13 20:44:26 -0700197 numdevs, lseg_size);
Boaz Harroshaf4f5b52011-10-31 15:04:19 -0700198 return -ENOMEM;
199 }
200
Boaz Harrosh5318a29c2012-03-13 20:44:26 -0700201 lseg->oc.numdevs = numdevs;
202 lseg->oc.single_comp = EC_MULTPLE_COMPS;
203 lseg->oc.ods = (void *)(lseg + 1);
204 lseg->oc.comps = (void *)(lseg->oc.ods + numdevs);
Boaz Harroshaf4f5b52011-10-31 15:04:19 -0700205
Boaz Harrosh5318a29c2012-03-13 20:44:26 -0700206 *pseg = lseg;
Boaz Harroshaf4f5b52011-10-31 15:04:19 -0700207 return 0;
Boaz Harrosh09f5bf42011-05-22 19:50:20 +0300208}
209
210int objio_alloc_lseg(struct pnfs_layout_segment **outp,
211 struct pnfs_layout_hdr *pnfslay,
212 struct pnfs_layout_range *range,
213 struct xdr_stream *xdr,
214 gfp_t gfp_flags)
215{
Christoph Hellwig661373b2014-09-02 21:27:57 -0700216 struct nfs_server *server = NFS_SERVER(pnfslay->plh_inode);
Boaz Harrosh09f5bf42011-05-22 19:50:20 +0300217 struct objio_segment *objio_seg;
218 struct pnfs_osd_xdr_decode_layout_iter iter;
219 struct pnfs_osd_layout layout;
Boaz Harroshaf4f5b52011-10-31 15:04:19 -0700220 struct pnfs_osd_object_cred src_comp;
221 unsigned cur_comp;
Boaz Harrosh09f5bf42011-05-22 19:50:20 +0300222 int err;
223
224 err = pnfs_osd_xdr_decode_layout_map(&layout, &iter, xdr);
225 if (unlikely(err))
226 return err;
227
Boaz Harroshaf4f5b52011-10-31 15:04:19 -0700228 err = __alloc_objio_seg(layout.olo_num_comps, gfp_flags, &objio_seg);
Boaz Harrosh09f5bf42011-05-22 19:50:20 +0300229 if (unlikely(err))
230 return err;
231
Boaz Harroshaf4f5b52011-10-31 15:04:19 -0700232 objio_seg->layout.stripe_unit = layout.olo_map.odm_stripe_unit;
233 objio_seg->layout.group_width = layout.olo_map.odm_group_width;
234 objio_seg->layout.group_depth = layout.olo_map.odm_group_depth;
235 objio_seg->layout.mirrors_p1 = layout.olo_map.odm_mirror_cnt + 1;
236 objio_seg->layout.raid_algorithm = layout.olo_map.odm_raid_algorithm;
Boaz Harrosh09f5bf42011-05-22 19:50:20 +0300237
Boaz Harroshaf4f5b52011-10-31 15:04:19 -0700238 err = ore_verify_layout(layout.olo_map.odm_num_comps,
239 &objio_seg->layout);
Boaz Harrosh09f5bf42011-05-22 19:50:20 +0300240 if (unlikely(err))
241 goto err;
242
Boaz Harroshaf4f5b52011-10-31 15:04:19 -0700243 objio_seg->oc.first_dev = layout.olo_comps_index;
244 cur_comp = 0;
245 while (pnfs_osd_xdr_decode_layout_comp(&src_comp, &iter, xdr, &err)) {
Christoph Hellwig661373b2014-09-02 21:27:57 -0700246 struct nfs4_deviceid_node *d;
247 struct objio_dev_ent *ode;
248
Boaz Harroshaf4f5b52011-10-31 15:04:19 -0700249 copy_single_comp(&objio_seg->oc, cur_comp, &src_comp);
Christoph Hellwig661373b2014-09-02 21:27:57 -0700250
251 d = nfs4_find_get_deviceid(server,
252 &src_comp.oc_object_id.oid_device_id,
253 pnfslay->plh_lc_cred, gfp_flags);
254 if (!d) {
255 err = -ENXIO;
Boaz Harroshaf4f5b52011-10-31 15:04:19 -0700256 goto err;
Christoph Hellwig661373b2014-09-02 21:27:57 -0700257 }
258
259 ode = container_of(d, struct objio_dev_ent, id_node);
260 objio_seg->oc.ods[cur_comp++] = &ode->od;
Boaz Harrosh09f5bf42011-05-22 19:50:20 +0300261 }
Boaz Harroshaf4f5b52011-10-31 15:04:19 -0700262 /* pnfs_osd_xdr_decode_layout_comp returns false on error */
263 if (unlikely(err))
264 goto err;
Boaz Harrosh93420772011-05-25 21:25:29 +0300265
Boaz Harrosh09f5bf42011-05-22 19:50:20 +0300266 *outp = &objio_seg->lseg;
267 return 0;
268
269err:
270 kfree(objio_seg);
271 dprintk("%s: Error: return %d\n", __func__, err);
272 *outp = NULL;
273 return err;
274}
275
276void objio_free_lseg(struct pnfs_layout_segment *lseg)
277{
Boaz Harroshb6c05f12011-05-26 21:45:34 +0300278 int i;
Boaz Harrosh09f5bf42011-05-22 19:50:20 +0300279 struct objio_segment *objio_seg = OBJIO_LSEG(lseg);
280
Boaz Harroshaf4f5b52011-10-31 15:04:19 -0700281 for (i = 0; i < objio_seg->oc.numdevs; i++) {
282 struct ore_dev *od = objio_seg->oc.ods[i];
283 struct objio_dev_ent *ode;
284
285 if (!od)
Boaz Harroshb6c05f12011-05-26 21:45:34 +0300286 break;
Boaz Harroshaf4f5b52011-10-31 15:04:19 -0700287 ode = container_of(od, typeof(*ode), od);
288 nfs4_put_deviceid_node(&ode->id_node);
Boaz Harroshb6c05f12011-05-26 21:45:34 +0300289 }
Boaz Harrosh09f5bf42011-05-22 19:50:20 +0300290 kfree(objio_seg);
291}
292
Boaz Harrosh96218552011-10-31 14:47:32 -0700293static int
Boaz Harrosheecfc632011-10-31 15:15:38 -0700294objio_alloc_io_state(struct pnfs_layout_hdr *pnfs_layout_type, bool is_reading,
Boaz Harrosh96218552011-10-31 14:47:32 -0700295 struct pnfs_layout_segment *lseg, struct page **pages, unsigned pgbase,
296 loff_t offset, size_t count, void *rpcdata, gfp_t gfp_flags,
297 struct objio_state **outp)
Boaz Harrosh04f83452011-05-22 19:52:19 +0300298{
299 struct objio_segment *objio_seg = OBJIO_LSEG(lseg);
Boaz Harrosheecfc632011-10-31 15:15:38 -0700300 struct ore_io_state *ios;
301 int ret;
Boaz Harrosh96218552011-10-31 14:47:32 -0700302 struct __alloc_objio_state {
303 struct objio_state objios;
Boaz Harroshaf4f5b52011-10-31 15:04:19 -0700304 struct pnfs_osd_ioerr ioerrs[objio_seg->oc.numdevs];
Boaz Harrosh96218552011-10-31 14:47:32 -0700305 } *aos;
Boaz Harrosh04f83452011-05-22 19:52:19 +0300306
Boaz Harrosh96218552011-10-31 14:47:32 -0700307 aos = kzalloc(sizeof(*aos), gfp_flags);
308 if (unlikely(!aos))
Boaz Harrosh04f83452011-05-22 19:52:19 +0300309 return -ENOMEM;
310
Boaz Harroshaf4f5b52011-10-31 15:04:19 -0700311 objlayout_init_ioerrs(&aos->objios.oir, objio_seg->oc.numdevs,
Boaz Harrosh96218552011-10-31 14:47:32 -0700312 aos->ioerrs, rpcdata, pnfs_layout_type);
313
Boaz Harrosheecfc632011-10-31 15:15:38 -0700314 ret = ore_get_rw_state(&objio_seg->layout, &objio_seg->oc, is_reading,
315 offset, count, &ios);
316 if (unlikely(ret)) {
317 kfree(aos);
318 return ret;
319 }
320
Boaz Harrosh96218552011-10-31 14:47:32 -0700321 ios->pages = pages;
322 ios->pgbase = pgbase;
Boaz Harrosheecfc632011-10-31 15:15:38 -0700323 ios->private = aos;
Boaz Harrosh96218552011-10-31 14:47:32 -0700324 BUG_ON(ios->nr_pages > (pgbase + count + PAGE_SIZE - 1) >> PAGE_SHIFT);
325
Boaz Harrosheecfc632011-10-31 15:15:38 -0700326 aos->objios.sync = 0;
327 aos->objios.ios = ios;
328 *outp = &aos->objios;
Boaz Harrosh04f83452011-05-22 19:52:19 +0300329 return 0;
330}
331
Boaz Harroshe2e04352011-10-31 15:03:35 -0700332void objio_free_result(struct objlayout_io_res *oir)
Boaz Harrosh04f83452011-05-22 19:52:19 +0300333{
Boaz Harrosheecfc632011-10-31 15:15:38 -0700334 struct objio_state *objios = container_of(oir, struct objio_state, oir);
Boaz Harrosh04f83452011-05-22 19:52:19 +0300335
Boaz Harrosheecfc632011-10-31 15:15:38 -0700336 ore_put_io_state(objios->ios);
337 kfree(objios);
Boaz Harrosh04f83452011-05-22 19:52:19 +0300338}
339
Trond Myklebust2e928e42012-10-16 12:34:56 -0400340static enum pnfs_osd_errno osd_pri_2_pnfs_err(enum osd_err_priority oep)
Boaz Harroshadb58532011-05-26 21:49:46 +0300341{
342 switch (oep) {
343 case OSD_ERR_PRI_NO_ERROR:
344 return (enum pnfs_osd_errno)0;
345
346 case OSD_ERR_PRI_CLEAR_PAGES:
347 BUG_ON(1);
348 return 0;
349
350 case OSD_ERR_PRI_RESOURCE:
351 return PNFS_OSD_ERR_RESOURCE;
352 case OSD_ERR_PRI_BAD_CRED:
353 return PNFS_OSD_ERR_BAD_CRED;
354 case OSD_ERR_PRI_NO_ACCESS:
355 return PNFS_OSD_ERR_NO_ACCESS;
356 case OSD_ERR_PRI_UNREACHABLE:
357 return PNFS_OSD_ERR_UNREACHABLE;
358 case OSD_ERR_PRI_NOT_FOUND:
359 return PNFS_OSD_ERR_NOT_FOUND;
360 case OSD_ERR_PRI_NO_SPACE:
361 return PNFS_OSD_ERR_NO_SPACE;
362 default:
363 WARN_ON(1);
364 /* fallthrough */
365 case OSD_ERR_PRI_EIO:
366 return PNFS_OSD_ERR_EIO;
367 }
368}
369
Boaz Harrosheecfc632011-10-31 15:15:38 -0700370static void __on_dev_error(struct ore_io_state *ios,
Boaz Harroshaf4f5b52011-10-31 15:04:19 -0700371 struct ore_dev *od, unsigned dev_index, enum osd_err_priority oep,
372 u64 dev_offset, u64 dev_len)
373{
374 struct objio_state *objios = ios->private;
375 struct pnfs_osd_objid pooid;
376 struct objio_dev_ent *ode = container_of(od, typeof(*ode), od);
377 /* FIXME: what to do with more-then-one-group layouts. We need to
378 * translate from ore_io_state index to oc->comps index
379 */
380 unsigned comp = dev_index;
381
382 pooid.oid_device_id = ode->id_node.deviceid;
383 pooid.oid_partition_id = ios->oc->comps[comp].obj.partition;
384 pooid.oid_object_id = ios->oc->comps[comp].obj.id;
385
386 objlayout_io_set_result(&objios->oir, comp,
387 &pooid, osd_pri_2_pnfs_err(oep),
Boaz Harrosheecfc632011-10-31 15:15:38 -0700388 dev_offset, dev_len, !ios->reading);
Boaz Harroshaf4f5b52011-10-31 15:04:19 -0700389}
390
Boaz Harrosh04f83452011-05-22 19:52:19 +0300391/*
392 * read
393 */
Boaz Harrosheecfc632011-10-31 15:15:38 -0700394static void _read_done(struct ore_io_state *ios, void *private)
Boaz Harrosh04f83452011-05-22 19:52:19 +0300395{
Boaz Harrosheecfc632011-10-31 15:15:38 -0700396 struct objio_state *objios = private;
Boaz Harrosh04f83452011-05-22 19:52:19 +0300397 ssize_t status;
Boaz Harrosheecfc632011-10-31 15:15:38 -0700398 int ret = ore_check_io(ios, &__on_dev_error);
Boaz Harrosh04f83452011-05-22 19:52:19 +0300399
Boaz Harrosheecfc632011-10-31 15:15:38 -0700400 /* FIXME: _io_free(ios) can we dealocate the libosd resources; */
Boaz Harrosh04f83452011-05-22 19:52:19 +0300401
402 if (likely(!ret))
403 status = ios->length;
404 else
405 status = ret;
406
Boaz Harrosheecfc632011-10-31 15:15:38 -0700407 objlayout_read_done(&objios->oir, status, objios->sync);
Boaz Harrosh04f83452011-05-22 19:52:19 +0300408}
409
Weston Andros Adamsond45f60c2014-06-09 11:48:35 -0400410int objio_read_pagelist(struct nfs_pgio_header *hdr)
Boaz Harrosh04f83452011-05-22 19:52:19 +0300411{
Boaz Harrosheecfc632011-10-31 15:15:38 -0700412 struct objio_state *objios;
Boaz Harrosh04f83452011-05-22 19:52:19 +0300413 int ret;
414
Fred Isamancd841602012-04-20 14:47:44 -0400415 ret = objio_alloc_io_state(NFS_I(hdr->inode)->layout, true,
Weston Andros Adamsond45f60c2014-06-09 11:48:35 -0400416 hdr->lseg, hdr->args.pages, hdr->args.pgbase,
417 hdr->args.offset, hdr->args.count, hdr,
Boaz Harrosheecfc632011-10-31 15:15:38 -0700418 GFP_KERNEL, &objios);
Boaz Harrosh96218552011-10-31 14:47:32 -0700419 if (unlikely(ret))
420 return ret;
421
Boaz Harrosheecfc632011-10-31 15:15:38 -0700422 objios->ios->done = _read_done;
423 dprintk("%s: offset=0x%llx length=0x%x\n", __func__,
Weston Andros Adamsond45f60c2014-06-09 11:48:35 -0400424 hdr->args.offset, hdr->args.count);
Boaz Harrosh9909d452012-06-08 05:29:40 +0300425 ret = ore_read(objios->ios);
426 if (unlikely(ret))
427 objio_free_result(&objios->oir);
428 return ret;
Boaz Harrosh04f83452011-05-22 19:52:19 +0300429}
430
431/*
432 * write
433 */
Boaz Harrosheecfc632011-10-31 15:15:38 -0700434static void _write_done(struct ore_io_state *ios, void *private)
Boaz Harrosh04f83452011-05-22 19:52:19 +0300435{
Boaz Harrosheecfc632011-10-31 15:15:38 -0700436 struct objio_state *objios = private;
Boaz Harrosh04f83452011-05-22 19:52:19 +0300437 ssize_t status;
Boaz Harrosheecfc632011-10-31 15:15:38 -0700438 int ret = ore_check_io(ios, &__on_dev_error);
Boaz Harrosh04f83452011-05-22 19:52:19 +0300439
Boaz Harrosheecfc632011-10-31 15:15:38 -0700440 /* FIXME: _io_free(ios) can we dealocate the libosd resources; */
Boaz Harrosh04f83452011-05-22 19:52:19 +0300441
442 if (likely(!ret)) {
443 /* FIXME: should be based on the OSD's persistence model
444 * See OSD2r05 Section 4.13 Data persistence model */
Boaz Harrosheecfc632011-10-31 15:15:38 -0700445 objios->oir.committed = NFS_FILE_SYNC;
Boaz Harrosh04f83452011-05-22 19:52:19 +0300446 status = ios->length;
447 } else {
448 status = ret;
449 }
450
Boaz Harrosheecfc632011-10-31 15:15:38 -0700451 objlayout_write_done(&objios->oir, status, objios->sync);
Boaz Harrosh04f83452011-05-22 19:52:19 +0300452}
453
Boaz Harrosh278c0232011-10-31 15:16:54 -0700454static struct page *__r4w_get_page(void *priv, u64 offset, bool *uptodate)
455{
456 struct objio_state *objios = priv;
Weston Andros Adamsond45f60c2014-06-09 11:48:35 -0400457 struct nfs_pgio_header *hdr = objios->oir.rpcdata;
458 struct address_space *mapping = hdr->inode->i_mapping;
Boaz Harrosh278c0232011-10-31 15:16:54 -0700459 pgoff_t index = offset / PAGE_SIZE;
Boaz Harroshc999ff62012-06-08 02:02:30 +0300460 struct page *page;
Weston Andros Adamsond45f60c2014-06-09 11:48:35 -0400461 loff_t i_size = i_size_read(hdr->inode);
Boaz Harrosh278c0232011-10-31 15:16:54 -0700462
Boaz Harroshc999ff62012-06-08 02:02:30 +0300463 if (offset >= i_size) {
464 *uptodate = true;
465 dprintk("%s: g_zero_page index=0x%lx\n", __func__, index);
466 return ZERO_PAGE(0);
467 }
468
469 page = find_get_page(mapping, index);
Boaz Harrosh278c0232011-10-31 15:16:54 -0700470 if (!page) {
Fred Isamancd841602012-04-20 14:47:44 -0400471 page = find_or_create_page(mapping, index, GFP_NOFS);
Boaz Harrosh278c0232011-10-31 15:16:54 -0700472 if (unlikely(!page)) {
473 dprintk("%s: grab_cache_page Failed index=0x%lx\n",
474 __func__, index);
475 return NULL;
476 }
477 unlock_page(page);
478 }
Hugh Dickins3066a962015-12-11 13:40:38 -0800479 *uptodate = PageUptodate(page);
Boaz Harrosh278c0232011-10-31 15:16:54 -0700480 dprintk("%s: index=0x%lx uptodate=%d\n", __func__, index, *uptodate);
481 return page;
482}
483
484static void __r4w_put_page(void *priv, struct page *page)
485{
Boaz Harroshc999ff62012-06-08 02:02:30 +0300486 dprintk("%s: index=0x%lx\n", __func__,
487 (page == ZERO_PAGE(0)) ? -1UL : page->index);
488 if (ZERO_PAGE(0) != page)
489 page_cache_release(page);
Boaz Harrosh278c0232011-10-31 15:16:54 -0700490 return;
491}
492
493static const struct _ore_r4w_op _r4w_op = {
494 .get_page = &__r4w_get_page,
495 .put_page = &__r4w_put_page,
496};
497
Weston Andros Adamsond45f60c2014-06-09 11:48:35 -0400498int objio_write_pagelist(struct nfs_pgio_header *hdr, int how)
Boaz Harrosh04f83452011-05-22 19:52:19 +0300499{
Boaz Harrosheecfc632011-10-31 15:15:38 -0700500 struct objio_state *objios;
Boaz Harrosh04f83452011-05-22 19:52:19 +0300501 int ret;
502
Fred Isamancd841602012-04-20 14:47:44 -0400503 ret = objio_alloc_io_state(NFS_I(hdr->inode)->layout, false,
Weston Andros Adamsond45f60c2014-06-09 11:48:35 -0400504 hdr->lseg, hdr->args.pages, hdr->args.pgbase,
505 hdr->args.offset, hdr->args.count, hdr, GFP_NOFS,
Boaz Harrosheecfc632011-10-31 15:15:38 -0700506 &objios);
Boaz Harrosh96218552011-10-31 14:47:32 -0700507 if (unlikely(ret))
508 return ret;
509
Boaz Harrosheecfc632011-10-31 15:15:38 -0700510 objios->sync = 0 != (how & FLUSH_SYNC);
Boaz Harrosh278c0232011-10-31 15:16:54 -0700511 objios->ios->r4w = &_r4w_op;
Boaz Harrosh96218552011-10-31 14:47:32 -0700512
Boaz Harrosheecfc632011-10-31 15:15:38 -0700513 if (!objios->sync)
514 objios->ios->done = _write_done;
515
516 dprintk("%s: offset=0x%llx length=0x%x\n", __func__,
Weston Andros Adamsond45f60c2014-06-09 11:48:35 -0400517 hdr->args.offset, hdr->args.count);
Boaz Harrosheecfc632011-10-31 15:15:38 -0700518 ret = ore_write(objios->ios);
Boaz Harrosh9909d452012-06-08 05:29:40 +0300519 if (unlikely(ret)) {
520 objio_free_result(&objios->oir);
Boaz Harrosh04f83452011-05-22 19:52:19 +0300521 return ret;
Boaz Harrosh9909d452012-06-08 05:29:40 +0300522 }
Boaz Harrosh04f83452011-05-22 19:52:19 +0300523
Boaz Harrosheecfc632011-10-31 15:15:38 -0700524 if (objios->sync)
525 _write_done(objios->ios, objios);
526
527 return 0;
Boaz Harrosh04f83452011-05-22 19:52:19 +0300528}
529
Weston Andros Adamsonb4fdac12014-05-15 11:56:43 -0400530/*
531 * Return 0 if @req cannot be coalesced into @pgio, otherwise return the number
532 * of bytes (maximum @req->wb_bytes) that can be coalesced.
533 */
534static size_t objio_pg_test(struct nfs_pageio_descriptor *pgio,
Boaz Harrosh93420772011-05-25 21:25:29 +0300535 struct nfs_page *prev, struct nfs_page *req)
536{
Peng Tao48d635f2014-11-10 08:35:35 +0800537 struct nfs_pgio_mirror *mirror = nfs_pgio_current_mirror(pgio);
Weston Andros Adamson0f9c429e2014-05-15 11:56:51 -0400538 unsigned int size;
539
540 size = pnfs_generic_pg_test(pgio, prev, req);
541
Weston Andros Adamsona7d42dd2014-09-19 10:55:07 -0400542 if (!size || mirror->pg_count + req->wb_bytes >
Weston Andros Adamsonb4fdac12014-05-15 11:56:43 -0400543 (unsigned long)pgio->pg_layout_private)
544 return 0;
Boaz Harrosh93420772011-05-25 21:25:29 +0300545
Weston Andros Adamson0f9c429e2014-05-15 11:56:51 -0400546 return min(size, req->wb_bytes);
Boaz Harrosh7de6e282012-08-02 15:38:23 +0300547}
548
Trond Myklebust2e928e42012-10-16 12:34:56 -0400549static void objio_init_read(struct nfs_pageio_descriptor *pgio, struct nfs_page *req)
Boaz Harrosh7de6e282012-08-02 15:38:23 +0300550{
551 pnfs_generic_pg_init_read(pgio, req);
552 if (unlikely(pgio->pg_lseg == NULL))
553 return; /* Not pNFS */
554
555 pgio->pg_layout_private = (void *)
556 OBJIO_LSEG(pgio->pg_lseg)->layout.max_io_length;
557}
558
559static bool aligned_on_raid_stripe(u64 offset, struct ore_layout *layout,
560 unsigned long *stripe_end)
561{
562 u32 stripe_off;
563 unsigned stripe_size;
564
565 if (layout->raid_algorithm == PNFS_OSD_RAID_0)
566 return true;
567
568 stripe_size = layout->stripe_unit *
569 (layout->group_width - layout->parity);
570
571 div_u64_rem(offset, stripe_size, &stripe_off);
572 if (!stripe_off)
573 return true;
574
575 *stripe_end = stripe_size - stripe_off;
576 return false;
577}
578
Trond Myklebust2e928e42012-10-16 12:34:56 -0400579static void objio_init_write(struct nfs_pageio_descriptor *pgio, struct nfs_page *req)
Boaz Harrosh7de6e282012-08-02 15:38:23 +0300580{
581 unsigned long stripe_end = 0;
Peng Tao62965562012-09-25 14:55:57 +0800582 u64 wb_size;
Boaz Harrosh7de6e282012-08-02 15:38:23 +0300583
Peng Tao62965562012-09-25 14:55:57 +0800584 if (pgio->pg_dreq == NULL)
585 wb_size = i_size_read(pgio->pg_inode) - req_offset(req);
586 else
587 wb_size = nfs_dreq_bytes_left(pgio->pg_dreq);
588
589 pnfs_generic_pg_init_write(pgio, req, wb_size);
Boaz Harrosh7de6e282012-08-02 15:38:23 +0300590 if (unlikely(pgio->pg_lseg == NULL))
591 return; /* Not pNFS */
592
593 if (req->wb_offset ||
594 !aligned_on_raid_stripe(req->wb_index * PAGE_SIZE,
595 &OBJIO_LSEG(pgio->pg_lseg)->layout,
596 &stripe_end)) {
597 pgio->pg_layout_private = (void *)stripe_end;
598 } else {
599 pgio->pg_layout_private = (void *)
600 OBJIO_LSEG(pgio->pg_lseg)->layout.max_io_length;
601 }
Boaz Harrosh93420772011-05-25 21:25:29 +0300602}
603
Trond Myklebust1751c362011-06-10 13:30:23 -0400604static const struct nfs_pageio_ops objio_pg_read_ops = {
Boaz Harrosh7de6e282012-08-02 15:38:23 +0300605 .pg_init = objio_init_read,
Trond Myklebust1751c362011-06-10 13:30:23 -0400606 .pg_test = objio_pg_test,
Trond Myklebust493292d2011-07-13 15:58:28 -0400607 .pg_doio = pnfs_generic_pg_readpages,
Weston Andros Adamson180bb5e2014-09-10 15:48:01 -0400608 .pg_cleanup = pnfs_generic_pg_cleanup,
Trond Myklebust1751c362011-06-10 13:30:23 -0400609};
610
611static const struct nfs_pageio_ops objio_pg_write_ops = {
Boaz Harrosh7de6e282012-08-02 15:38:23 +0300612 .pg_init = objio_init_write,
Trond Myklebust1751c362011-06-10 13:30:23 -0400613 .pg_test = objio_pg_test,
Trond Myklebustdce81292011-07-13 15:59:19 -0400614 .pg_doio = pnfs_generic_pg_writepages,
Weston Andros Adamson180bb5e2014-09-10 15:48:01 -0400615 .pg_cleanup = pnfs_generic_pg_cleanup,
Trond Myklebust1751c362011-06-10 13:30:23 -0400616};
617
Benny Halevyc93407d2011-05-22 19:49:06 +0300618static struct pnfs_layoutdriver_type objlayout_type = {
619 .id = LAYOUT_OSD2_OBJECTS,
620 .name = "LAYOUT_OSD2_OBJECTS",
Boaz Harroshfe0fe832012-01-06 09:31:20 +0200621 .flags = PNFS_LAYOUTRET_ON_SETATTR |
622 PNFS_LAYOUTRET_ON_ERROR,
Boaz Harrosh09f5bf42011-05-22 19:50:20 +0300623
Christoph Hellwig661373b2014-09-02 21:27:57 -0700624 .max_deviceinfo_size = PAGE_SIZE,
fanchaoting5a12cca2013-02-04 21:15:02 +0800625 .owner = THIS_MODULE,
Benny Halevye51b8412011-05-22 19:51:48 +0300626 .alloc_layout_hdr = objlayout_alloc_layout_hdr,
627 .free_layout_hdr = objlayout_free_layout_hdr,
628
Boaz Harrosh09f5bf42011-05-22 19:50:20 +0300629 .alloc_lseg = objlayout_alloc_lseg,
630 .free_lseg = objlayout_free_lseg,
Boaz Harroshb6c05f12011-05-26 21:45:34 +0300631
Boaz Harrosh04f83452011-05-22 19:52:19 +0300632 .read_pagelist = objlayout_read_pagelist,
633 .write_pagelist = objlayout_write_pagelist,
Trond Myklebust1751c362011-06-10 13:30:23 -0400634 .pg_read_ops = &objio_pg_read_ops,
635 .pg_write_ops = &objio_pg_write_ops,
Boaz Harrosh04f83452011-05-22 19:52:19 +0300636
Trond Myklebust5bb89b42015-03-25 14:14:42 -0400637 .sync = pnfs_generic_sync,
638
Boaz Harroshb6c05f12011-05-26 21:45:34 +0300639 .free_deviceid_node = objio_free_deviceid_node,
Boaz Harroshadb58532011-05-26 21:49:46 +0300640
Boaz Harrosha0fe8bf2011-05-22 19:54:13 +0300641 .encode_layoutcommit = objlayout_encode_layoutcommit,
Boaz Harroshadb58532011-05-26 21:49:46 +0300642 .encode_layoutreturn = objlayout_encode_layoutreturn,
Benny Halevyc93407d2011-05-22 19:49:06 +0300643};
644
645MODULE_DESCRIPTION("pNFS Layout Driver for OSD2 objects");
646MODULE_AUTHOR("Benny Halevy <bhalevy@panasas.com>");
647MODULE_LICENSE("GPL");
648
649static int __init
650objlayout_init(void)
651{
652 int ret = pnfs_register_layoutdriver(&objlayout_type);
653
654 if (ret)
655 printk(KERN_INFO
Weston Andros Adamsona0308892012-01-26 13:32:23 -0500656 "NFS: %s: Registering OSD pNFS Layout Driver failed: error=%d\n",
Benny Halevyc93407d2011-05-22 19:49:06 +0300657 __func__, ret);
658 else
Weston Andros Adamsona0308892012-01-26 13:32:23 -0500659 printk(KERN_INFO "NFS: %s: Registered OSD pNFS Layout Driver\n",
Benny Halevyc93407d2011-05-22 19:49:06 +0300660 __func__);
661 return ret;
662}
663
664static void __exit
665objlayout_exit(void)
666{
667 pnfs_unregister_layoutdriver(&objlayout_type);
Weston Andros Adamsona0308892012-01-26 13:32:23 -0500668 printk(KERN_INFO "NFS: %s: Unregistered OSD pNFS Layout Driver\n",
Benny Halevyc93407d2011-05-22 19:49:06 +0300669 __func__);
670}
671
J. Bruce Fieldsf85ef692011-07-15 19:18:42 -0400672MODULE_ALIAS("nfs-layouttype4-2");
673
Benny Halevyc93407d2011-05-22 19:49:06 +0300674module_init(objlayout_init);
675module_exit(objlayout_exit);