blob: 9baa7f04e8d061698eb9540d4f13f5cab63a1a15 [file] [log] [blame]
Dennis Dalessandro01946212016-01-06 09:50:24 -08001#ifndef DEF_RDMA_VT_H
2#define DEF_RDMA_VT_H
3
4/*
5 * Copyright(c) 2015 Intel Corporation.
6 *
7 * This file is provided under a dual BSD/GPLv2 license. When using or
8 * redistributing this file, you may do so under either license.
9 *
10 * GPL LICENSE SUMMARY
11 *
12 * This program is free software; you can redistribute it and/or modify
13 * it under the terms of version 2 of the GNU General Public License as
14 * published by the Free Software Foundation.
15 *
16 * This program is distributed in the hope that it will be useful, but
17 * WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 * General Public License for more details.
20 *
21 * BSD LICENSE
22 *
23 * Redistribution and use in source and binary forms, with or without
24 * modification, are permitted provided that the following conditions
25 * are met:
26 *
27 * - Redistributions of source code must retain the above copyright
28 * notice, this list of conditions and the following disclaimer.
29 * - Redistributions in binary form must reproduce the above copyright
30 * notice, this list of conditions and the following disclaimer in
31 * the documentation and/or other materials provided with the
32 * distribution.
33 * - Neither the name of Intel Corporation nor the names of its
34 * contributors may be used to endorse or promote products derived
35 * from this software without specific prior written permission.
36 *
37 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
38 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
39 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
40 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
41 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
42 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
43 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
44 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
45 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
46 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
47 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
48 *
49 */
50
51/*
52 * Structure that low level drivers will populate in order to register with the
53 * rdmavt layer.
54 */
55
56#include "ib_verbs.h"
Dennis Dalessandro8afd32e2016-01-06 09:51:48 -080057
58/*
Dennis Dalessandrob92a7562016-01-06 10:01:42 -080059 * For Memory Regions. This stuff should probably be moved into rdmavt/mr.h once
60 * drivers no longer need access to the MR directly.
61 */
62
63/*
64 * A segment is a linear region of low physical memory.
65 * Used by the verbs layer.
66 */
67struct rvt_seg {
68 void *vaddr;
69 size_t length;
70};
71
72/* The number of rvt_segs that fit in a page. */
73#define RVT_SEGSZ (PAGE_SIZE / sizeof(struct rvt_seg))
74
75struct rvt_segarray {
76 struct rvt_seg segs[RVT_SEGSZ];
77};
78
79struct rvt_mregion {
80 struct ib_pd *pd; /* shares refcnt of ibmr.pd */
81 u64 user_base; /* User's address for this region */
82 u64 iova; /* IB start address of this region */
83 size_t length;
84 u32 lkey;
85 u32 offset; /* offset (bytes) to start of region */
86 int access_flags;
87 u32 max_segs; /* number of rvt_segs in all the arrays */
88 u32 mapsz; /* size of the map array */
89 u8 page_shift; /* 0 - non unform/non powerof2 sizes */
90 u8 lkey_published; /* in global table */
91 struct completion comp; /* complete when refcount goes to zero */
92 atomic_t refcount;
93 struct rvt_segarray *map[0]; /* the segments */
94};
95
96#define RVT_MAX_LKEY_TABLE_BITS 23
97
98struct rvt_lkey_table {
99 spinlock_t lock; /* protect changes in this struct */
100 u32 next; /* next unused index (speeds search) */
101 u32 gen; /* generation count */
102 u32 max; /* size of the table */
103 struct rvt_mregion __rcu **table;
104};
105
106/* End Memmory Region */
107
108/*
Dennis Dalessandroca889e82016-01-06 10:02:41 -0800109 * Things needed for the Queue Pair definition. Like the MR stuff above the
110 * following should probably get moved to qp.h once drivers stop trying to make
111 * and manipulate thier own QPs. For the few instnaces where a driver may need
112 * to look into a queue pair there should be a pointer to a driver priavte data
113 * structure that they can look at.
114 */
115
116/*
117 * These keep track of the copy progress within a memory region.
118 * Used by the verbs layer.
119 */
120struct rvt_sge {
121 struct rvt_mregion *mr;
122 void *vaddr; /* kernel virtual address of segment */
123 u32 sge_length; /* length of the SGE */
124 u32 length; /* remaining length of the segment */
125 u16 m; /* current index: mr->map[m] */
126 u16 n; /* current index: mr->map[m]->segs[n] */
127};
128
129/*
130 * Send work request queue entry.
131 * The size of the sg_list is determined when the QP is created and stored
132 * in qp->s_max_sge.
133 */
134struct rvt_swqe {
135 union {
136 struct ib_send_wr wr; /* don't use wr.sg_list */
137 struct ib_ud_wr ud_wr;
138 struct ib_reg_wr reg_wr;
139 struct ib_rdma_wr rdma_wr;
140 struct ib_atomic_wr atomic_wr;
141 };
142 u32 psn; /* first packet sequence number */
143 u32 lpsn; /* last packet sequence number */
144 u32 ssn; /* send sequence number */
145 u32 length; /* total length of data in sg_list */
146 struct rvt_sge sg_list[0];
147};
148
149/*
150 * Receive work request queue entry.
151 * The size of the sg_list is determined when the QP (or SRQ) is created
152 * and stored in qp->r_rq.max_sge (or srq->rq.max_sge).
153 */
154struct rvt_rwqe {
155 u64 wr_id;
156 u8 num_sge;
157 struct ib_sge sg_list[0];
158};
159
160/*
161 * This structure is used to contain the head pointer, tail pointer,
162 * and receive work queue entries as a single memory allocation so
163 * it can be mmap'ed into user space.
164 * Note that the wq array elements are variable size so you can't
165 * just index into the array to get the N'th element;
166 * use get_rwqe_ptr() instead.
167 */
168struct rvt_rwq {
169 u32 head; /* new work requests posted to the head */
170 u32 tail; /* receives pull requests from here. */
171 struct rvt_rwqe wq[0];
172};
173
174struct rvt_rq {
175 struct rvt_rwq *wq;
176 u32 size; /* size of RWQE array */
177 u8 max_sge;
178 /* protect changes in this struct */
179 spinlock_t lock ____cacheline_aligned_in_smp;
180};
181
182/*
183 * This structure is used by rvt_mmap() to validate an offset
184 * when an mmap() request is made. The vm_area_struct then uses
185 * this as its vm_private_data.
186 */
187struct rvt_mmap_info {
188 struct list_head pending_mmaps;
189 struct ib_ucontext *context;
190 void *obj;
191 __u64 offset;
192 struct kref ref;
193 unsigned size;
194};
195
196#define RVT_MAX_RDMA_ATOMIC 16
197
198/*
199 * This structure holds the information that the send tasklet needs
200 * to send a RDMA read response or atomic operation.
201 */
202struct rvt_ack_entry {
203 u8 opcode;
204 u8 sent;
205 u32 psn;
206 u32 lpsn;
207 union {
208 struct rvt_sge rdma_sge;
209 u64 atomic_data;
210 };
211};
212
213struct rvt_sge_state {
214 struct rvt_sge *sg_list; /* next SGE to be used if any */
215 struct rvt_sge sge; /* progress state for the current SGE */
216 u32 total_len;
217 u8 num_sge;
218};
219
220/*
221 * Variables prefixed with s_ are for the requester (sender).
222 * Variables prefixed with r_ are for the responder (receiver).
223 * Variables prefixed with ack_ are for responder replies.
224 *
225 * Common variables are protected by both r_rq.lock and s_lock in that order
226 * which only happens in modify_qp() or changing the QP 'state'.
227 */
228struct rvt_qp {
229 struct ib_qp ibqp;
230 void *priv; /* Driver private data */
231 /* read mostly fields above and below */
232 struct ib_ah_attr remote_ah_attr;
233 struct ib_ah_attr alt_ah_attr;
234 struct rvt_qp __rcu *next; /* link list for QPN hash table */
235 struct rvt_swqe *s_wq; /* send work queue */
236 struct rvt_mmap_info *ip;
237
238 unsigned long timeout_jiffies; /* computed from timeout */
239
240 enum ib_mtu path_mtu;
241 int srate_mbps; /* s_srate (below) converted to Mbit/s */
242 u32 remote_qpn;
243 u32 pmtu; /* decoded from path_mtu */
244 u32 qkey; /* QKEY for this QP (for UD or RD) */
245 u32 s_size; /* send work queue size */
246 u32 s_rnr_timeout; /* number of milliseconds for RNR timeout */
247 u32 s_ahgpsn; /* set to the psn in the copy of the header */
248
249 u8 state; /* QP state */
250 u8 allowed_ops; /* high order bits of allowed opcodes */
251 u8 qp_access_flags;
252 u8 alt_timeout; /* Alternate path timeout for this QP */
253 u8 timeout; /* Timeout for this QP */
254 u8 s_srate;
255 u8 s_mig_state;
256 u8 port_num;
257 u8 s_pkey_index; /* PKEY index to use */
258 u8 s_alt_pkey_index; /* Alternate path PKEY index to use */
259 u8 r_max_rd_atomic; /* max number of RDMA read/atomic to receive */
260 u8 s_max_rd_atomic; /* max number of RDMA read/atomic to send */
261 u8 s_retry_cnt; /* number of times to retry */
262 u8 s_rnr_retry_cnt;
263 u8 r_min_rnr_timer; /* retry timeout value for RNR NAKs */
264 u8 s_max_sge; /* size of s_wq->sg_list */
265 u8 s_draining;
266
267 /* start of read/write fields */
268 atomic_t refcount ____cacheline_aligned_in_smp;
269 wait_queue_head_t wait;
270
271 struct rvt_ack_entry s_ack_queue[RVT_MAX_RDMA_ATOMIC + 1]
272 ____cacheline_aligned_in_smp;
273 struct rvt_sge_state s_rdma_read_sge;
274
275 spinlock_t r_lock ____cacheline_aligned_in_smp; /* used for APM */
276 unsigned long r_aflags;
277 u64 r_wr_id; /* ID for current receive WQE */
278 u32 r_ack_psn; /* PSN for next ACK or atomic ACK */
279 u32 r_len; /* total length of r_sge */
280 u32 r_rcv_len; /* receive data len processed */
281 u32 r_psn; /* expected rcv packet sequence number */
282 u32 r_msn; /* message sequence number */
283
284 u8 r_state; /* opcode of last packet received */
285 u8 r_flags;
286 u8 r_head_ack_queue; /* index into s_ack_queue[] */
287
288 struct list_head rspwait; /* link for waiting to respond */
289
290 struct rvt_sge_state r_sge; /* current receive data */
291 struct rvt_rq r_rq; /* receive work queue */
292
293 spinlock_t s_lock ____cacheline_aligned_in_smp;
294 struct rvt_sge_state *s_cur_sge;
295 u32 s_flags;
296 struct rvt_swqe *s_wqe;
297 struct rvt_sge_state s_sge; /* current send request data */
298 struct rvt_mregion *s_rdma_mr;
299 struct sdma_engine *s_sde; /* current sde */
300 u32 s_cur_size; /* size of send packet in bytes */
301 u32 s_len; /* total length of s_sge */
302 u32 s_rdma_read_len; /* total length of s_rdma_read_sge */
303 u32 s_next_psn; /* PSN for next request */
304 u32 s_last_psn; /* last response PSN processed */
305 u32 s_sending_psn; /* lowest PSN that is being sent */
306 u32 s_sending_hpsn; /* highest PSN that is being sent */
307 u32 s_psn; /* current packet sequence number */
308 u32 s_ack_rdma_psn; /* PSN for sending RDMA read responses */
309 u32 s_ack_psn; /* PSN for acking sends and RDMA writes */
310 u32 s_head; /* new entries added here */
311 u32 s_tail; /* next entry to process */
312 u32 s_cur; /* current work queue entry */
313 u32 s_acked; /* last un-ACK'ed entry */
314 u32 s_last; /* last completed entry */
315 u32 s_ssn; /* SSN of tail entry */
316 u32 s_lsn; /* limit sequence number (credit) */
317 u16 s_hdrwords; /* size of s_hdr in 32 bit words */
318 u16 s_rdma_ack_cnt;
319 s8 s_ahgidx;
320 u8 s_state; /* opcode of last packet sent */
321 u8 s_ack_state; /* opcode of packet to ACK */
322 u8 s_nak_state; /* non-zero if NAK is pending */
323 u8 r_nak_state; /* non-zero if NAK is pending */
324 u8 s_retry; /* requester retry counter */
325 u8 s_rnr_retry; /* requester RNR retry counter */
326 u8 s_num_rd_atomic; /* number of RDMA read/atomic pending */
327 u8 s_tail_ack_queue; /* index into s_ack_queue[] */
328
329 struct rvt_sge_state s_ack_rdma_sge;
330 struct timer_list s_timer;
331
332 /*
333 * This sge list MUST be last. Do not add anything below here.
334 */
335 struct rvt_sge r_sg_list[0] /* verified SGEs */
336 ____cacheline_aligned_in_smp;
337};
338
339/* End QP section */
340
341/*
Dennis Dalessandro8afd32e2016-01-06 09:51:48 -0800342 * Things that are driver specific, module parameters in hfi1 and qib
343 */
344struct rvt_driver_params {
Dennis Dalessandrob1070a72016-01-06 09:52:19 -0800345 /*
346 * driver required fields:
347 * node_guid
348 * phys_port_cnt
349 * dma_device
350 * owner
351 * driver optional fields (rvt will provide generic value if blank):
352 * name
353 * node_desc
354 * rvt fields, driver value ignored:
355 * uverbs_abi_ver
356 * node_type
357 * num_comp_vectors
358 * uverbs_cmd_mask
359 */
360 struct ib_device_attr props;
361
362 /*
363 * Drivers will need to support a number of notifications to rvt in
364 * accordance with certain events. This structure should contain a mask
365 * of the supported events. Such events that the rvt may need to know
366 * about include:
367 * port errors
368 * port active
369 * lid change
370 * sm change
371 * client reregister
372 * pkey change
373 *
374 * There may also be other events that the rvt layers needs to know
375 * about this is not an exhaustive list. Some events though rvt does not
376 * need to rely on the driver for such as completion queue error.
377 */
378 int rvt_signal_supported;
379
380 /*
381 * Anything driver specific that is not covered by props
382 * For instance special module parameters. Goes here.
383 */
Dennis Dalessandro8afd32e2016-01-06 09:51:48 -0800384};
385
386/* Protection domain */
387struct rvt_pd {
388 struct ib_pd ibpd;
389 int user; /* non-zero if created from user space */
390};
391
Dennis Dalessandro01946212016-01-06 09:50:24 -0800392struct rvt_dev_info {
Dennis Dalessandrob1070a72016-01-06 09:52:19 -0800393 /*
394 * Prior to calling for registration the driver will be responsible for
395 * allocating space for this structure.
396 *
397 * The driver will also be responsible for filling in certain members of
398 * dparms.props
399 */
Dennis Dalessandro01946212016-01-06 09:50:24 -0800400 struct ib_device ibdev;
Dennis Dalessandro8afd32e2016-01-06 09:51:48 -0800401
Dennis Dalessandrob1070a72016-01-06 09:52:19 -0800402 /* Driver specific properties */
Dennis Dalessandro8afd32e2016-01-06 09:51:48 -0800403 struct rvt_driver_params dparms;
Dennis Dalessandrob1070a72016-01-06 09:52:19 -0800404
Dennis Dalessandrob92a7562016-01-06 10:01:42 -0800405 struct rvt_mregion __rcu *dma_mr;
406 struct rvt_lkey_table lkey_table;
407
Dennis Dalessandro30588642016-01-06 09:54:16 -0800408 /* PKey Table goes here */
409
Dennis Dalessandrob1070a72016-01-06 09:52:19 -0800410 /*
411 * The work to create port files in /sys/class Infiniband is different
412 * depending on the driver. This should not be extracted away and
413 * instead drivers are responsible for setting the correct callback for
414 * this.
415 */
Dennis Dalessandro01946212016-01-06 09:50:24 -0800416 int (*port_callback)(struct ib_device *, u8, struct kobject *);
417
Dennis Dalessandro8afd32e2016-01-06 09:51:48 -0800418 /* Internal use */
419 int n_pds_allocated;
420 spinlock_t n_pds_lock; /* Protect pd allocated count */
Dennis Dalessandro01946212016-01-06 09:50:24 -0800421};
422
Dennis Dalessandro8afd32e2016-01-06 09:51:48 -0800423static inline struct rvt_pd *ibpd_to_rvtpd(struct ib_pd *ibpd)
424{
425 return container_of(ibpd, struct rvt_pd, ibpd);
426}
427
428static inline struct rvt_dev_info *ib_to_rvt(struct ib_device *ibdev)
429{
430 return container_of(ibdev, struct rvt_dev_info, ibdev);
431}
432
Dennis Dalessandro01946212016-01-06 09:50:24 -0800433int rvt_register_device(struct rvt_dev_info *rvd);
434void rvt_unregister_device(struct rvt_dev_info *rvd);
435
436#endif /* DEF_RDMA_VT_H */