blob: 87a56039f0ef11192fe3c73e6aaef19b660f417e [file] [log] [blame]
Bernard Metzler22513342019-06-20 18:21:28 +02001// SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause
2
3/* Authors: Bernard Metzler <bmt@zurich.ibm.com> */
4/* Copyright (c) 2008-2019, IBM Corporation */
5
6#include <linux/gfp.h>
7#include <rdma/ib_verbs.h>
8#include <linux/dma-mapping.h>
9#include <linux/slab.h>
10#include <linux/sched/mm.h>
11#include <linux/resource.h>
12
13#include "siw.h"
14#include "siw_mem.h"
15
16/*
17 * Stag lookup is based on its index part only (24 bits).
18 * The code avoids special Stag of zero and tries to randomize
19 * STag values between 1 and SIW_STAG_MAX_INDEX.
20 */
21int siw_mem_add(struct siw_device *sdev, struct siw_mem *m)
22{
23 struct xa_limit limit = XA_LIMIT(1, 0x00ffffff);
24 u32 id, next;
25
26 get_random_bytes(&next, 4);
27 next &= 0x00ffffff;
28
29 if (xa_alloc_cyclic(&sdev->mem_xa, &id, m, limit, &next,
30 GFP_KERNEL) < 0)
31 return -ENOMEM;
32
33 /* Set the STag index part */
34 m->stag = id << 8;
35
36 siw_dbg_mem(m, "new MEM object\n");
37
38 return 0;
39}
40
41/*
42 * siw_mem_id2obj()
43 *
44 * resolves memory from stag given by id. might be called from:
45 * o process context before sending out of sgl, or
46 * o in softirq when resolving target memory
47 */
48struct siw_mem *siw_mem_id2obj(struct siw_device *sdev, int stag_index)
49{
50 struct siw_mem *mem;
51
52 rcu_read_lock();
53 mem = xa_load(&sdev->mem_xa, stag_index);
54 if (likely(mem && kref_get_unless_zero(&mem->ref))) {
55 rcu_read_unlock();
56 return mem;
57 }
58 rcu_read_unlock();
59
60 return NULL;
61}
62
63static void siw_free_plist(struct siw_page_chunk *chunk, int num_pages,
64 bool dirty)
65{
66 struct page **p = chunk->plist;
67
68 while (num_pages--) {
69 if (!PageDirty(*p) && dirty)
70 put_user_pages_dirty_lock(p, 1);
71 else
72 put_user_page(*p);
73 p++;
74 }
75}
76
77void siw_umem_release(struct siw_umem *umem, bool dirty)
78{
79 struct mm_struct *mm_s = umem->owning_mm;
80 int i, num_pages = umem->num_pages;
81
82 for (i = 0; num_pages; i++) {
83 int to_free = min_t(int, PAGES_PER_CHUNK, num_pages);
84
85 siw_free_plist(&umem->page_chunk[i], to_free,
86 umem->writable && dirty);
87 kfree(umem->page_chunk[i].plist);
88 num_pages -= to_free;
89 }
90 atomic64_sub(umem->num_pages, &mm_s->pinned_vm);
91
92 mmdrop(mm_s);
93 kfree(umem->page_chunk);
94 kfree(umem);
95}
96
97int siw_mr_add_mem(struct siw_mr *mr, struct ib_pd *pd, void *mem_obj,
98 u64 start, u64 len, int rights)
99{
100 struct siw_device *sdev = to_siw_dev(pd->device);
101 struct siw_mem *mem = kzalloc(sizeof(*mem), GFP_KERNEL);
102 struct xa_limit limit = XA_LIMIT(1, 0x00ffffff);
103 u32 id, next;
104
105 if (!mem)
106 return -ENOMEM;
107
108 mem->mem_obj = mem_obj;
109 mem->stag_valid = 0;
110 mem->sdev = sdev;
111 mem->va = start;
112 mem->len = len;
113 mem->pd = pd;
114 mem->perms = rights & IWARP_ACCESS_MASK;
115 kref_init(&mem->ref);
116
117 mr->mem = mem;
118
119 get_random_bytes(&next, 4);
120 next &= 0x00ffffff;
121
122 if (xa_alloc_cyclic(&sdev->mem_xa, &id, mem, limit, &next,
123 GFP_KERNEL) < 0) {
124 kfree(mem);
125 return -ENOMEM;
126 }
127 /* Set the STag index part */
128 mem->stag = id << 8;
129 mr->base_mr.lkey = mr->base_mr.rkey = mem->stag;
130
131 return 0;
132}
133
134void siw_mr_drop_mem(struct siw_mr *mr)
135{
136 struct siw_mem *mem = mr->mem, *found;
137
138 mem->stag_valid = 0;
139
140 /* make STag invalid visible asap */
141 smp_mb();
142
143 found = xa_erase(&mem->sdev->mem_xa, mem->stag >> 8);
144 WARN_ON(found != mem);
145 siw_mem_put(mem);
146}
147
148void siw_free_mem(struct kref *ref)
149{
150 struct siw_mem *mem = container_of(ref, struct siw_mem, ref);
151
152 siw_dbg_mem(mem, "free mem, pbl: %s\n", mem->is_pbl ? "y" : "n");
153
154 if (!mem->is_mw && mem->mem_obj) {
155 if (mem->is_pbl == 0)
156 siw_umem_release(mem->umem, true);
157 else
158 kfree(mem->pbl);
159 }
160 kfree(mem);
161}
162
163/*
164 * siw_check_mem()
165 *
166 * Check protection domain, STAG state, access permissions and
167 * address range for memory object.
168 *
169 * @pd: Protection Domain memory should belong to
170 * @mem: memory to be checked
171 * @addr: starting addr of mem
172 * @perms: requested access permissions
173 * @len: len of memory interval to be checked
174 *
175 */
176int siw_check_mem(struct ib_pd *pd, struct siw_mem *mem, u64 addr,
177 enum ib_access_flags perms, int len)
178{
179 if (!mem->stag_valid) {
180 siw_dbg_pd(pd, "STag 0x%08x invalid\n", mem->stag);
181 return -E_STAG_INVALID;
182 }
183 if (mem->pd != pd) {
184 siw_dbg_pd(pd, "STag 0x%08x: PD mismatch\n", mem->stag);
185 return -E_PD_MISMATCH;
186 }
187 /*
188 * check access permissions
189 */
190 if ((mem->perms & perms) < perms) {
191 siw_dbg_pd(pd, "permissions 0x%08x < 0x%08x\n",
192 mem->perms, perms);
193 return -E_ACCESS_PERM;
194 }
195 /*
196 * Check if access falls into valid memory interval.
197 */
198 if (addr < mem->va || addr + len > mem->va + mem->len) {
199 siw_dbg_pd(pd, "MEM interval len %d\n", len);
Bernard Metzlerc5362772019-08-22 19:37:38 +0200200 siw_dbg_pd(pd, "[0x%pK, 0x%pK] out of bounds\n",
201 (void *)(uintptr_t)addr,
202 (void *)(uintptr_t)(addr + len));
203 siw_dbg_pd(pd, "[0x%pK, 0x%pK] STag=0x%08x\n",
204 (void *)(uintptr_t)mem->va,
205 (void *)(uintptr_t)(mem->va + mem->len),
Bernard Metzler22513342019-06-20 18:21:28 +0200206 mem->stag);
207
208 return -E_BASE_BOUNDS;
209 }
210 return E_ACCESS_OK;
211}
212
213/*
214 * siw_check_sge()
215 *
216 * Check SGE for access rights in given interval
217 *
218 * @pd: Protection Domain memory should belong to
219 * @sge: SGE to be checked
220 * @mem: location of memory reference within array
221 * @perms: requested access permissions
222 * @off: starting offset in SGE
223 * @len: len of memory interval to be checked
224 *
225 * NOTE: Function references SGE's memory object (mem->obj)
226 * if not yet done. New reference is kept if check went ok and
227 * released if check failed. If mem->obj is already valid, no new
228 * lookup is being done and mem is not released it check fails.
229 */
230int siw_check_sge(struct ib_pd *pd, struct siw_sge *sge, struct siw_mem *mem[],
231 enum ib_access_flags perms, u32 off, int len)
232{
233 struct siw_device *sdev = to_siw_dev(pd->device);
234 struct siw_mem *new = NULL;
235 int rv = E_ACCESS_OK;
236
237 if (len + off > sge->length) {
238 rv = -E_BASE_BOUNDS;
239 goto fail;
240 }
241 if (*mem == NULL) {
242 new = siw_mem_id2obj(sdev, sge->lkey >> 8);
243 if (unlikely(!new)) {
244 siw_dbg_pd(pd, "STag unknown: 0x%08x\n", sge->lkey);
245 rv = -E_STAG_INVALID;
246 goto fail;
247 }
248 *mem = new;
249 }
250 /* Check if user re-registered with different STag key */
251 if (unlikely((*mem)->stag != sge->lkey)) {
252 siw_dbg_mem((*mem), "STag mismatch: 0x%08x\n", sge->lkey);
253 rv = -E_STAG_INVALID;
254 goto fail;
255 }
256 rv = siw_check_mem(pd, *mem, sge->laddr + off, perms, len);
257 if (unlikely(rv))
258 goto fail;
259
260 return 0;
261
262fail:
263 if (new) {
264 *mem = NULL;
265 siw_mem_put(new);
266 }
267 return rv;
268}
269
270void siw_wqe_put_mem(struct siw_wqe *wqe, enum siw_opcode op)
271{
272 switch (op) {
273 case SIW_OP_SEND:
274 case SIW_OP_WRITE:
275 case SIW_OP_SEND_WITH_IMM:
276 case SIW_OP_SEND_REMOTE_INV:
277 case SIW_OP_READ:
278 case SIW_OP_READ_LOCAL_INV:
279 if (!(wqe->sqe.flags & SIW_WQE_INLINE))
280 siw_unref_mem_sgl(wqe->mem, wqe->sqe.num_sge);
281 break;
282
283 case SIW_OP_RECEIVE:
284 siw_unref_mem_sgl(wqe->mem, wqe->rqe.num_sge);
285 break;
286
287 case SIW_OP_READ_RESPONSE:
288 siw_unref_mem_sgl(wqe->mem, 1);
289 break;
290
291 default:
292 /*
293 * SIW_OP_INVAL_STAG and SIW_OP_REG_MR
294 * do not hold memory references
295 */
296 break;
297 }
298}
299
300int siw_invalidate_stag(struct ib_pd *pd, u32 stag)
301{
302 struct siw_device *sdev = to_siw_dev(pd->device);
303 struct siw_mem *mem = siw_mem_id2obj(sdev, stag >> 8);
304 int rv = 0;
305
306 if (unlikely(!mem)) {
307 siw_dbg_pd(pd, "STag 0x%08x unknown\n", stag);
308 return -EINVAL;
309 }
310 if (unlikely(mem->pd != pd)) {
311 siw_dbg_pd(pd, "PD mismatch for STag 0x%08x\n", stag);
312 rv = -EACCES;
313 goto out;
314 }
315 /*
316 * Per RDMA verbs definition, an STag may already be in invalid
317 * state if invalidation is requested. So no state check here.
318 */
319 mem->stag_valid = 0;
320
321 siw_dbg_pd(pd, "STag 0x%08x now invalid\n", stag);
322out:
323 siw_mem_put(mem);
324 return rv;
325}
326
327/*
328 * Gets physical address backed by PBL element. Address is referenced
329 * by linear byte offset into list of variably sized PB elements.
330 * Optionally, provides remaining len within current element, and
331 * current PBL index for later resume at same element.
332 */
Bernard Metzlerc5362772019-08-22 19:37:38 +0200333dma_addr_t siw_pbl_get_buffer(struct siw_pbl *pbl, u64 off, int *len, int *idx)
Bernard Metzler22513342019-06-20 18:21:28 +0200334{
335 int i = idx ? *idx : 0;
336
337 while (i < pbl->num_buf) {
338 struct siw_pble *pble = &pbl->pbe[i];
339
340 if (pble->pbl_off + pble->size > off) {
341 u64 pble_off = off - pble->pbl_off;
342
343 if (len)
344 *len = pble->size - pble_off;
345 if (idx)
346 *idx = i;
347
348 return pble->addr + pble_off;
349 }
350 i++;
351 }
352 if (len)
353 *len = 0;
354 return 0;
355}
356
357struct siw_pbl *siw_pbl_alloc(u32 num_buf)
358{
359 struct siw_pbl *pbl;
360 int buf_size = sizeof(*pbl);
361
362 if (num_buf == 0)
363 return ERR_PTR(-EINVAL);
364
365 buf_size += ((num_buf - 1) * sizeof(struct siw_pble));
366
367 pbl = kzalloc(buf_size, GFP_KERNEL);
368 if (!pbl)
369 return ERR_PTR(-ENOMEM);
370
371 pbl->max_buf = num_buf;
372
373 return pbl;
374}
375
376struct siw_umem *siw_umem_get(u64 start, u64 len, bool writable)
377{
378 struct siw_umem *umem;
379 struct mm_struct *mm_s;
380 u64 first_page_va;
381 unsigned long mlock_limit;
382 unsigned int foll_flags = FOLL_WRITE;
383 int num_pages, num_chunks, i, rv = 0;
384
385 if (!can_do_mlock())
386 return ERR_PTR(-EPERM);
387
388 if (!len)
389 return ERR_PTR(-EINVAL);
390
391 first_page_va = start & PAGE_MASK;
392 num_pages = PAGE_ALIGN(start + len - first_page_va) >> PAGE_SHIFT;
393 num_chunks = (num_pages >> CHUNK_SHIFT) + 1;
394
395 umem = kzalloc(sizeof(*umem), GFP_KERNEL);
396 if (!umem)
397 return ERR_PTR(-ENOMEM);
398
399 mm_s = current->mm;
400 umem->owning_mm = mm_s;
401 umem->writable = writable;
402
403 mmgrab(mm_s);
404
405 if (!writable)
406 foll_flags |= FOLL_FORCE;
407
408 down_read(&mm_s->mmap_sem);
409
410 mlock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
411
412 if (num_pages + atomic64_read(&mm_s->pinned_vm) > mlock_limit) {
413 rv = -ENOMEM;
414 goto out_sem_up;
415 }
416 umem->fp_addr = first_page_va;
417
418 umem->page_chunk =
419 kcalloc(num_chunks, sizeof(struct siw_page_chunk), GFP_KERNEL);
420 if (!umem->page_chunk) {
421 rv = -ENOMEM;
422 goto out_sem_up;
423 }
424 for (i = 0; num_pages; i++) {
425 int got, nents = min_t(int, num_pages, PAGES_PER_CHUNK);
426
427 umem->page_chunk[i].plist =
428 kcalloc(nents, sizeof(struct page *), GFP_KERNEL);
429 if (!umem->page_chunk[i].plist) {
430 rv = -ENOMEM;
431 goto out_sem_up;
432 }
433 got = 0;
434 while (nents) {
435 struct page **plist = &umem->page_chunk[i].plist[got];
436
437 rv = get_user_pages(first_page_va, nents,
438 foll_flags | FOLL_LONGTERM,
439 plist, NULL);
440 if (rv < 0)
441 goto out_sem_up;
442
443 umem->num_pages += rv;
444 atomic64_add(rv, &mm_s->pinned_vm);
445 first_page_va += rv * PAGE_SIZE;
446 nents -= rv;
447 got += rv;
448 }
449 num_pages -= got;
450 }
451out_sem_up:
452 up_read(&mm_s->mmap_sem);
453
454 if (rv > 0)
455 return umem;
456
457 siw_umem_release(umem, false);
458
459 return ERR_PTR(rv);
460}