blob: ab1957e31077288ed075f293017eb41e47f356e8 [file] [log] [blame]
Oded Gabbay99b9d7b2019-02-16 00:39:13 +02001/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
2 *
3 * Copyright 2016-2018 HabanaLabs, Ltd.
4 * All Rights Reserved.
5 *
6 */
7
8#ifndef HABANALABS_H_
9#define HABANALABS_H_
10
11#include <linux/types.h>
12#include <linux/ioctl.h>
13
14/*
15 * Defines that are asic-specific but constitutes as ABI between kernel driver
16 * and userspace
17 */
18#define GOYA_KMD_SRAM_RESERVED_SIZE_FROM_START 0x8000 /* 32KB */
19
Oded Gabbay9494a8d2019-02-16 00:39:17 +020020/*
21 * Queue Numbering
22 *
23 * The external queues (DMA channels + CPU) MUST be before the internal queues
24 * and each group (DMA channels + CPU and internal) must be contiguous inside
25 * itself but there can be a gap between the two groups (although not
26 * recommended)
27 */
28
29enum goya_queue_id {
30 GOYA_QUEUE_ID_DMA_0 = 0,
31 GOYA_QUEUE_ID_DMA_1,
32 GOYA_QUEUE_ID_DMA_2,
33 GOYA_QUEUE_ID_DMA_3,
34 GOYA_QUEUE_ID_DMA_4,
35 GOYA_QUEUE_ID_CPU_PQ,
Oded Gabbaye1266002019-03-07 14:20:05 +020036 GOYA_QUEUE_ID_MME, /* Internal queues start here */
Oded Gabbay9494a8d2019-02-16 00:39:17 +020037 GOYA_QUEUE_ID_TPC0,
38 GOYA_QUEUE_ID_TPC1,
39 GOYA_QUEUE_ID_TPC2,
40 GOYA_QUEUE_ID_TPC3,
41 GOYA_QUEUE_ID_TPC4,
42 GOYA_QUEUE_ID_TPC5,
43 GOYA_QUEUE_ID_TPC6,
44 GOYA_QUEUE_ID_TPC7,
45 GOYA_QUEUE_ID_SIZE
46};
47
Oded Gabbayd8dd7b02019-02-16 00:39:23 +020048/* Opcode for management ioctl */
49#define HL_INFO_HW_IP_INFO 0
50#define HL_INFO_HW_EVENTS 1
51#define HL_INFO_DRAM_USAGE 2
52#define HL_INFO_HW_IDLE 3
53
54#define HL_INFO_VERSION_MAX_LEN 128
55
56struct hl_info_hw_ip_info {
57 __u64 sram_base_address;
58 __u64 dram_base_address;
59 __u64 dram_size;
60 __u32 sram_size;
61 __u32 num_of_events;
62 __u32 device_id; /* PCI Device ID */
63 __u32 reserved[3];
64 __u32 armcp_cpld_version;
65 __u32 psoc_pci_pll_nr;
66 __u32 psoc_pci_pll_nf;
67 __u32 psoc_pci_pll_od;
68 __u32 psoc_pci_pll_div_factor;
69 __u8 tpc_enabled_mask;
70 __u8 dram_enabled;
71 __u8 pad[2];
72 __u8 armcp_version[HL_INFO_VERSION_MAX_LEN];
73};
74
75struct hl_info_dram_usage {
76 __u64 dram_free_mem;
77 __u64 ctx_dram_mem;
78};
79
80struct hl_info_hw_idle {
81 __u32 is_idle;
82 __u32 pad;
83};
84
85struct hl_info_args {
86 /* Location of relevant struct in userspace */
87 __u64 return_pointer;
88 /*
89 * The size of the return value. Just like "size" in "snprintf",
90 * it limits how many bytes the kernel can write
91 *
92 * For hw_events array, the size should be
93 * hl_info_hw_ip_info.num_of_events * sizeof(__u32)
94 */
95 __u32 return_size;
96
97 /* HL_INFO_* */
98 __u32 op;
99
100 /* Context ID - Currently not in use */
101 __u32 ctx_id;
102 __u32 pad;
103};
Oded Gabbay9494a8d2019-02-16 00:39:17 +0200104
Oded Gabbaybe5d9262019-02-16 00:39:15 +0200105/* Opcode to create a new command buffer */
106#define HL_CB_OP_CREATE 0
107/* Opcode to destroy previously created command buffer */
108#define HL_CB_OP_DESTROY 1
109
110struct hl_cb_in {
111 /* Handle of CB or 0 if we want to create one */
112 __u64 cb_handle;
113 /* HL_CB_OP_* */
114 __u32 op;
Oded Gabbay541664d32019-02-28 11:55:44 +0200115 /* Size of CB. Maximum size is 2MB. The minimum size that will be
116 * allocated, regardless of this parameter's value, is PAGE_SIZE
117 */
Oded Gabbaybe5d9262019-02-16 00:39:15 +0200118 __u32 cb_size;
119 /* Context ID - Currently not in use */
120 __u32 ctx_id;
121 __u32 pad;
122};
123
124struct hl_cb_out {
125 /* Handle of CB */
126 __u64 cb_handle;
127};
128
129union hl_cb_args {
130 struct hl_cb_in in;
131 struct hl_cb_out out;
132};
133
134/*
Oded Gabbayeff6f4a2019-02-16 00:39:21 +0200135 * This structure size must always be fixed to 64-bytes for backward
136 * compatibility
137 */
138struct hl_cs_chunk {
139 /*
140 * For external queue, this represents a Handle of CB on the Host
141 * For internal queue, this represents an SRAM or DRAM address of the
142 * internal CB
143 */
144 __u64 cb_handle;
145 /* Index of queue to put the CB on */
146 __u32 queue_index;
147 /*
148 * Size of command buffer with valid packets
149 * Can be smaller then actual CB size
150 */
151 __u32 cb_size;
152 /* HL_CS_CHUNK_FLAGS_* */
153 __u32 cs_chunk_flags;
154 /* Align structure to 64 bytes */
155 __u32 pad[11];
156};
157
158#define HL_CS_FLAGS_FORCE_RESTORE 0x1
159
160#define HL_CS_STATUS_SUCCESS 0
161
162struct hl_cs_in {
163 /* this holds address of array of hl_cs_chunk for restore phase */
164 __u64 chunks_restore;
165 /* this holds address of array of hl_cs_chunk for execution phase */
166 __u64 chunks_execute;
167 /* this holds address of array of hl_cs_chunk for store phase -
168 * Currently not in use
169 */
170 __u64 chunks_store;
171 /* Number of chunks in restore phase array */
172 __u32 num_chunks_restore;
173 /* Number of chunks in execution array */
174 __u32 num_chunks_execute;
175 /* Number of chunks in restore phase array - Currently not in use */
176 __u32 num_chunks_store;
177 /* HL_CS_FLAGS_* */
178 __u32 cs_flags;
179 /* Context ID - Currently not in use */
180 __u32 ctx_id;
181};
182
183struct hl_cs_out {
Oded Gabbaye1266002019-03-07 14:20:05 +0200184 /*
185 * seq holds the sequence number of the CS to pass to wait ioctl. All
186 * values are valid except for 0 and ULLONG_MAX
187 */
Oded Gabbayeff6f4a2019-02-16 00:39:21 +0200188 __u64 seq;
189 /* HL_CS_STATUS_* */
190 __u32 status;
191 __u32 pad;
192};
193
194union hl_cs_args {
195 struct hl_cs_in in;
196 struct hl_cs_out out;
197};
198
199struct hl_wait_cs_in {
200 /* Command submission sequence number */
201 __u64 seq;
202 /* Absolute timeout to wait in microseconds */
203 __u64 timeout_us;
204 /* Context ID - Currently not in use */
205 __u32 ctx_id;
206 __u32 pad;
207};
208
209#define HL_WAIT_CS_STATUS_COMPLETED 0
210#define HL_WAIT_CS_STATUS_BUSY 1
211#define HL_WAIT_CS_STATUS_TIMEDOUT 2
212#define HL_WAIT_CS_STATUS_ABORTED 3
213#define HL_WAIT_CS_STATUS_INTERRUPTED 4
214
215struct hl_wait_cs_out {
216 /* HL_WAIT_CS_STATUS_* */
217 __u32 status;
218 __u32 pad;
219};
220
221union hl_wait_cs_args {
222 struct hl_wait_cs_in in;
223 struct hl_wait_cs_out out;
224};
225
Omer Shpigelman0feaf862019-02-16 00:39:22 +0200226/* Opcode to alloc device memory */
227#define HL_MEM_OP_ALLOC 0
228/* Opcode to free previously allocated device memory */
229#define HL_MEM_OP_FREE 1
230/* Opcode to map host memory */
231#define HL_MEM_OP_MAP 2
232/* Opcode to unmap previously mapped host memory */
233#define HL_MEM_OP_UNMAP 3
234
235/* Memory flags */
236#define HL_MEM_CONTIGUOUS 0x1
237#define HL_MEM_SHARED 0x2
238#define HL_MEM_USERPTR 0x4
239
240struct hl_mem_in {
241 union {
242 /* HL_MEM_OP_ALLOC- allocate device memory */
243 struct {
244 /* Size to alloc */
Oded Gabbay230afe72019-02-27 00:19:18 +0200245 __u64 mem_size;
Omer Shpigelman0feaf862019-02-16 00:39:22 +0200246 } alloc;
247
248 /* HL_MEM_OP_FREE - free device memory */
249 struct {
250 /* Handle returned from HL_MEM_OP_ALLOC */
251 __u64 handle;
252 } free;
253
254 /* HL_MEM_OP_MAP - map device memory */
255 struct {
256 /*
257 * Requested virtual address of mapped memory.
258 * KMD will try to map the requested region to this
259 * hint address, as long as the address is valid and
260 * not already mapped. The user should check the
261 * returned address of the IOCTL to make sure he got
262 * the hint address. Passing 0 here means that KMD
263 * will choose the address itself.
264 */
265 __u64 hint_addr;
266 /* Handle returned from HL_MEM_OP_ALLOC */
267 __u64 handle;
268 } map_device;
269
270 /* HL_MEM_OP_MAP - map host memory */
271 struct {
272 /* Address of allocated host memory */
273 __u64 host_virt_addr;
274 /*
275 * Requested virtual address of mapped memory.
276 * KMD will try to map the requested region to this
277 * hint address, as long as the address is valid and
278 * not already mapped. The user should check the
279 * returned address of the IOCTL to make sure he got
280 * the hint address. Passing 0 here means that KMD
281 * will choose the address itself.
282 */
283 __u64 hint_addr;
284 /* Size of allocated host memory */
Oded Gabbay230afe72019-02-27 00:19:18 +0200285 __u64 mem_size;
Omer Shpigelman0feaf862019-02-16 00:39:22 +0200286 } map_host;
287
288 /* HL_MEM_OP_UNMAP - unmap host memory */
289 struct {
290 /* Virtual address returned from HL_MEM_OP_MAP */
291 __u64 device_virt_addr;
292 } unmap;
293 };
294
295 /* HL_MEM_OP_* */
296 __u32 op;
297 /* HL_MEM_* flags */
298 __u32 flags;
299 /* Context ID - Currently not in use */
300 __u32 ctx_id;
301 __u32 pad;
302};
303
304struct hl_mem_out {
305 union {
306 /*
307 * Used for HL_MEM_OP_MAP as the virtual address that was
308 * assigned in the device VA space.
309 * A value of 0 means the requested operation failed.
310 */
311 __u64 device_virt_addr;
312
313 /*
314 * Used for HL_MEM_OP_ALLOC. This is the assigned
315 * handle for the allocated memory
316 */
317 __u64 handle;
318 };
319};
320
321union hl_mem_args {
322 struct hl_mem_in in;
323 struct hl_mem_out out;
324};
325
Oded Gabbayeff6f4a2019-02-16 00:39:21 +0200326/*
Oded Gabbayd8dd7b02019-02-16 00:39:23 +0200327 * Various information operations such as:
328 * - H/W IP information
329 * - Current dram usage
330 *
331 * The user calls this IOCTL with an opcode that describes the required
332 * information. The user should supply a pointer to a user-allocated memory
333 * chunk, which will be filled by the driver with the requested information.
334 *
335 * The user supplies the maximum amount of size to copy into the user's memory,
336 * in order to prevent data corruption in case of differences between the
337 * definitions of structures in kernel and userspace, e.g. in case of old
338 * userspace and new kernel driver
339 */
340#define HL_IOCTL_INFO \
341 _IOWR('H', 0x01, struct hl_info_args)
342
343/*
Oded Gabbaybe5d9262019-02-16 00:39:15 +0200344 * Command Buffer
345 * - Request a Command Buffer
346 * - Destroy a Command Buffer
347 *
348 * The command buffers are memory blocks that reside in DMA-able address
349 * space and are physically contiguous so they can be accessed by the device
350 * directly. They are allocated using the coherent DMA API.
351 *
352 * When creating a new CB, the IOCTL returns a handle of it, and the user-space
353 * process needs to use that handle to mmap the buffer so it can access them.
354 *
355 */
356#define HL_IOCTL_CB \
357 _IOWR('H', 0x02, union hl_cb_args)
358
Oded Gabbayeff6f4a2019-02-16 00:39:21 +0200359/*
360 * Command Submission
361 *
362 * To submit work to the device, the user need to call this IOCTL with a set
363 * of JOBS. That set of JOBS constitutes a CS object.
364 * Each JOB will be enqueued on a specific queue, according to the user's input.
365 * There can be more then one JOB per queue.
366 *
367 * There are two types of queues - external and internal. External queues
368 * are DMA queues which transfer data from/to the Host. All other queues are
369 * internal. The driver will get completion notifications from the device only
370 * on JOBS which are enqueued in the external queues.
371 *
Oded Gabbay541664d32019-02-28 11:55:44 +0200372 * For jobs on external queues, the user needs to create command buffers
373 * through the CB ioctl and give the CB's handle to the CS ioctl. For jobs on
374 * internal queues, the user needs to prepare a "command buffer" with packets
375 * on either the SRAM or DRAM, and give the device address of that buffer to
376 * the CS ioctl.
377 *
Oded Gabbayeff6f4a2019-02-16 00:39:21 +0200378 * This IOCTL is asynchronous in regard to the actual execution of the CS. This
379 * means it returns immediately after ALL the JOBS were enqueued on their
380 * relevant queues. Therefore, the user mustn't assume the CS has been completed
381 * or has even started to execute.
382 *
383 * Upon successful enqueue, the IOCTL returns an opaque handle which the user
384 * can use with the "Wait for CS" IOCTL to check whether the handle's CS
385 * external JOBS have been completed. Note that if the CS has internal JOBS
386 * which can execute AFTER the external JOBS have finished, the driver might
387 * report that the CS has finished executing BEFORE the internal JOBS have
388 * actually finish executing.
389 *
390 * The CS IOCTL will receive three sets of JOBS. One set is for "restore" phase,
391 * a second set is for "execution" phase and a third set is for "store" phase.
392 * The JOBS on the "restore" phase are enqueued only after context-switch
393 * (or if its the first CS for this context). The user can also order the
394 * driver to run the "restore" phase explicitly
395 *
396 */
397#define HL_IOCTL_CS \
398 _IOWR('H', 0x03, union hl_cs_args)
399
400/*
401 * Wait for Command Submission
402 *
403 * The user can call this IOCTL with a handle it received from the CS IOCTL
404 * to wait until the handle's CS has finished executing. The user will wait
405 * inside the kernel until the CS has finished or until the user-requeusted
406 * timeout has expired.
407 *
408 * The return value of the IOCTL is a standard Linux error code. The possible
409 * values are:
410 *
411 * EINTR - Kernel waiting has been interrupted, e.g. due to OS signal
412 * that the user process received
413 * ETIMEDOUT - The CS has caused a timeout on the device
414 * EIO - The CS was aborted (usually because the device was reset)
415 * ENODEV - The device wants to do hard-reset (so user need to close FD)
416 *
417 * The driver also returns a custom define inside the IOCTL which can be:
418 *
419 * HL_WAIT_CS_STATUS_COMPLETED - The CS has been completed successfully (0)
420 * HL_WAIT_CS_STATUS_BUSY - The CS is still executing (0)
421 * HL_WAIT_CS_STATUS_TIMEDOUT - The CS has caused a timeout on the device
422 * (ETIMEDOUT)
423 * HL_WAIT_CS_STATUS_ABORTED - The CS was aborted, usually because the
424 * device was reset (EIO)
425 * HL_WAIT_CS_STATUS_INTERRUPTED - Waiting for the CS was interrupted (EINTR)
426 *
427 */
428
429#define HL_IOCTL_WAIT_CS \
430 _IOWR('H', 0x04, union hl_wait_cs_args)
431
Omer Shpigelman0feaf862019-02-16 00:39:22 +0200432/*
433 * Memory
434 * - Map host memory to device MMU
435 * - Unmap host memory from device MMU
436 *
437 * This IOCTL allows the user to map host memory to the device MMU
438 *
439 * For host memory, the IOCTL doesn't allocate memory. The user is supposed
440 * to allocate the memory in user-space (malloc/new). The driver pins the
441 * physical pages (up to the allowed limit by the OS), assigns a virtual
442 * address in the device VA space and initializes the device MMU.
443 *
444 * There is an option for the user to specify the requested virtual address.
445 *
446 */
447#define HL_IOCTL_MEMORY \
448 _IOWR('H', 0x05, union hl_mem_args)
449
Oded Gabbayd8dd7b02019-02-16 00:39:23 +0200450#define HL_COMMAND_START 0x01
Omer Shpigelman0feaf862019-02-16 00:39:22 +0200451#define HL_COMMAND_END 0x06
Oded Gabbaybe5d9262019-02-16 00:39:15 +0200452
Oded Gabbay99b9d7b2019-02-16 00:39:13 +0200453#endif /* HABANALABS_H_ */