blob: 70dfccea703810ada13246aa5350c1c34e61f5b6 [file] [log] [blame]
Oded Gabbay99b9d7b2019-02-16 00:39:13 +02001/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
2 *
Omer Shpigelmanf9e5f292020-05-07 13:41:16 +03003 * Copyright 2016-2020 HabanaLabs, Ltd.
Oded Gabbay99b9d7b2019-02-16 00:39:13 +02004 * All Rights Reserved.
5 *
6 */
7
8#ifndef HABANALABS_H_
9#define HABANALABS_H_
10
11#include <linux/types.h>
12#include <linux/ioctl.h>
13
14/*
15 * Defines that are asic-specific but constitutes as ABI between kernel driver
16 * and userspace
17 */
18#define GOYA_KMD_SRAM_RESERVED_SIZE_FROM_START 0x8000 /* 32KB */
19
Oded Gabbay9494a8d2019-02-16 00:39:17 +020020/*
21 * Queue Numbering
22 *
Oded Gabbay90027292019-04-03 09:51:04 +030023 * The external queues (PCI DMA channels) MUST be before the internal queues
24 * and each group (PCI DMA channels and internal) must be contiguous inside
Oded Gabbay9494a8d2019-02-16 00:39:17 +020025 * itself but there can be a gap between the two groups (although not
26 * recommended)
27 */
28
29enum goya_queue_id {
30 GOYA_QUEUE_ID_DMA_0 = 0,
Dotan Barak4fd2cb12019-08-12 10:23:33 +030031 GOYA_QUEUE_ID_DMA_1 = 1,
32 GOYA_QUEUE_ID_DMA_2 = 2,
33 GOYA_QUEUE_ID_DMA_3 = 3,
34 GOYA_QUEUE_ID_DMA_4 = 4,
35 GOYA_QUEUE_ID_CPU_PQ = 5,
36 GOYA_QUEUE_ID_MME = 6, /* Internal queues start here */
37 GOYA_QUEUE_ID_TPC0 = 7,
38 GOYA_QUEUE_ID_TPC1 = 8,
39 GOYA_QUEUE_ID_TPC2 = 9,
40 GOYA_QUEUE_ID_TPC3 = 10,
41 GOYA_QUEUE_ID_TPC4 = 11,
42 GOYA_QUEUE_ID_TPC5 = 12,
43 GOYA_QUEUE_ID_TPC6 = 13,
44 GOYA_QUEUE_ID_TPC7 = 14,
Oded Gabbay9494a8d2019-02-16 00:39:17 +020045 GOYA_QUEUE_ID_SIZE
46};
47
Tomer Tayare8960ca2019-07-01 13:59:45 +000048/*
49 * Engine Numbering
50 *
51 * Used in the "busy_engines_mask" field in `struct hl_info_hw_idle'
52 */
53
54enum goya_engine_id {
55 GOYA_ENGINE_ID_DMA_0 = 0,
56 GOYA_ENGINE_ID_DMA_1,
57 GOYA_ENGINE_ID_DMA_2,
58 GOYA_ENGINE_ID_DMA_3,
59 GOYA_ENGINE_ID_DMA_4,
60 GOYA_ENGINE_ID_MME_0,
61 GOYA_ENGINE_ID_TPC_0,
62 GOYA_ENGINE_ID_TPC_1,
63 GOYA_ENGINE_ID_TPC_2,
64 GOYA_ENGINE_ID_TPC_3,
65 GOYA_ENGINE_ID_TPC_4,
66 GOYA_ENGINE_ID_TPC_5,
67 GOYA_ENGINE_ID_TPC_6,
68 GOYA_ENGINE_ID_TPC_7,
69 GOYA_ENGINE_ID_SIZE
70};
71
Dalit Ben Zooraa957082019-03-24 10:15:44 +020072enum hl_device_status {
73 HL_DEVICE_STATUS_OPERATIONAL,
74 HL_DEVICE_STATUS_IN_RESET,
75 HL_DEVICE_STATUS_MALFUNCTION
76};
77
Oded Gabbayf388ec72019-07-16 08:55:04 +030078/* Opcode for management ioctl
79 *
Oded Gabbay75b3cb22019-08-28 17:32:04 +030080 * HW_IP_INFO - Receive information about different IP blocks in the
81 * device.
82 * HL_INFO_HW_EVENTS - Receive an array describing how many times each event
83 * occurred since the last hard reset.
84 * HL_INFO_DRAM_USAGE - Retrieve the dram usage inside the device and of the
85 * specific context. This is relevant only for devices
86 * where the dram is managed by the kernel driver
87 * HL_INFO_HW_IDLE - Retrieve information about the idle status of each
88 * internal engine.
Oded Gabbayf388ec72019-07-16 08:55:04 +030089 * HL_INFO_DEVICE_STATUS - Retrieve the device's status. This opcode doesn't
90 * require an open context.
Oded Gabbay62c1e122019-10-10 15:48:59 +030091 * HL_INFO_DEVICE_UTILIZATION - Retrieve the total utilization of the device
92 * over the last period specified by the user.
93 * The period can be between 100ms to 1s, in
94 * resolution of 100ms. The return value is a
95 * percentage of the utilization rate.
Oded Gabbaye9730762019-08-28 21:51:52 +030096 * HL_INFO_HW_EVENTS_AGGREGATE - Receive an array describing how many times each
97 * event occurred since the driver was loaded.
Oded Gabbay62c1e122019-10-10 15:48:59 +030098 * HL_INFO_CLK_RATE - Retrieve the current and maximum clock rate
99 * of the device in MHz. The maximum clock rate is
100 * configurable via sysfs parameter
Moti Haimovski52c01b02019-11-03 16:26:44 +0200101 * HL_INFO_RESET_COUNT - Retrieve the counts of the soft and hard reset
102 * operations performed on the device since the last
103 * time the driver was loaded.
Tomer Tayar25e7aeb2020-03-31 22:46:36 +0300104 * HL_INFO_TIME_SYNC - Retrieve the device's time alongside the host's time
105 * for synchronization.
Oded Gabbayf388ec72019-07-16 08:55:04 +0300106 */
Oded Gabbay75b3cb22019-08-28 17:32:04 +0300107#define HL_INFO_HW_IP_INFO 0
108#define HL_INFO_HW_EVENTS 1
109#define HL_INFO_DRAM_USAGE 2
110#define HL_INFO_HW_IDLE 3
111#define HL_INFO_DEVICE_STATUS 4
112#define HL_INFO_DEVICE_UTILIZATION 6
Oded Gabbaye9730762019-08-28 21:51:52 +0300113#define HL_INFO_HW_EVENTS_AGGREGATE 7
Oded Gabbay62c1e122019-10-10 15:48:59 +0300114#define HL_INFO_CLK_RATE 8
Moti Haimovski52c01b02019-11-03 16:26:44 +0200115#define HL_INFO_RESET_COUNT 9
Tomer Tayar25e7aeb2020-03-31 22:46:36 +0300116#define HL_INFO_TIME_SYNC 10
Oded Gabbayd8dd7b02019-02-16 00:39:23 +0200117
118#define HL_INFO_VERSION_MAX_LEN 128
Oded Gabbay91edbf22019-10-16 11:53:52 +0300119#define HL_INFO_CARD_NAME_MAX_LEN 16
Oded Gabbayd8dd7b02019-02-16 00:39:23 +0200120
121struct hl_info_hw_ip_info {
122 __u64 sram_base_address;
123 __u64 dram_base_address;
124 __u64 dram_size;
125 __u32 sram_size;
126 __u32 num_of_events;
127 __u32 device_id; /* PCI Device ID */
128 __u32 reserved[3];
129 __u32 armcp_cpld_version;
130 __u32 psoc_pci_pll_nr;
131 __u32 psoc_pci_pll_nf;
132 __u32 psoc_pci_pll_od;
133 __u32 psoc_pci_pll_div_factor;
134 __u8 tpc_enabled_mask;
135 __u8 dram_enabled;
136 __u8 pad[2];
137 __u8 armcp_version[HL_INFO_VERSION_MAX_LEN];
Oded Gabbay91edbf22019-10-16 11:53:52 +0300138 __u8 card_name[HL_INFO_CARD_NAME_MAX_LEN];
Oded Gabbayd8dd7b02019-02-16 00:39:23 +0200139};
140
141struct hl_info_dram_usage {
142 __u64 dram_free_mem;
143 __u64 ctx_dram_mem;
144};
145
146struct hl_info_hw_idle {
147 __u32 is_idle;
Tomer Tayare8960ca2019-07-01 13:59:45 +0000148 /*
149 * Bitmask of busy engines.
150 * Bits definition is according to `enum <chip>_enging_id'.
151 */
152 __u32 busy_engines_mask;
Oded Gabbayd8dd7b02019-02-16 00:39:23 +0200153};
154
Dalit Ben Zooraa957082019-03-24 10:15:44 +0200155struct hl_info_device_status {
156 __u32 status;
157 __u32 pad;
158};
159
Oded Gabbay75b3cb22019-08-28 17:32:04 +0300160struct hl_info_device_utilization {
161 __u32 utilization;
162 __u32 pad;
163};
164
Oded Gabbay62c1e122019-10-10 15:48:59 +0300165struct hl_info_clk_rate {
166 __u32 cur_clk_rate_mhz;
167 __u32 max_clk_rate_mhz;
168};
169
Moti Haimovski52c01b02019-11-03 16:26:44 +0200170struct hl_info_reset_count {
171 __u32 hard_reset_cnt;
172 __u32 soft_reset_cnt;
173};
174
Tomer Tayar25e7aeb2020-03-31 22:46:36 +0300175struct hl_info_time_sync {
176 __u64 device_time;
177 __u64 host_time;
178};
179
Oded Gabbayd8dd7b02019-02-16 00:39:23 +0200180struct hl_info_args {
181 /* Location of relevant struct in userspace */
182 __u64 return_pointer;
183 /*
184 * The size of the return value. Just like "size" in "snprintf",
185 * it limits how many bytes the kernel can write
186 *
187 * For hw_events array, the size should be
188 * hl_info_hw_ip_info.num_of_events * sizeof(__u32)
189 */
190 __u32 return_size;
191
192 /* HL_INFO_* */
193 __u32 op;
194
Oded Gabbay75b3cb22019-08-28 17:32:04 +0300195 union {
196 /* Context ID - Currently not in use */
197 __u32 ctx_id;
198 /* Period value for utilization rate (100ms - 1000ms, in 100ms
199 * resolution.
200 */
201 __u32 period_ms;
202 };
203
Oded Gabbayd8dd7b02019-02-16 00:39:23 +0200204 __u32 pad;
205};
Oded Gabbay9494a8d2019-02-16 00:39:17 +0200206
Oded Gabbaybe5d9262019-02-16 00:39:15 +0200207/* Opcode to create a new command buffer */
208#define HL_CB_OP_CREATE 0
209/* Opcode to destroy previously created command buffer */
210#define HL_CB_OP_DESTROY 1
211
Oded Gabbay39b42512020-04-17 12:12:13 +0300212/* 2MB minus 32 bytes for 2xMSG_PROT */
213#define HL_MAX_CB_SIZE (0x200000 - 32)
Oded Gabbay5d101252019-11-10 16:08:26 +0200214
Oded Gabbaybe5d9262019-02-16 00:39:15 +0200215struct hl_cb_in {
216 /* Handle of CB or 0 if we want to create one */
217 __u64 cb_handle;
218 /* HL_CB_OP_* */
219 __u32 op;
Oded Gabbay5d101252019-11-10 16:08:26 +0200220 /* Size of CB. Maximum size is HL_MAX_CB_SIZE. The minimum size that
221 * will be allocated, regardless of this parameter's value, is PAGE_SIZE
Oded Gabbay541664d2019-02-28 11:55:44 +0200222 */
Oded Gabbaybe5d9262019-02-16 00:39:15 +0200223 __u32 cb_size;
224 /* Context ID - Currently not in use */
225 __u32 ctx_id;
226 __u32 pad;
227};
228
229struct hl_cb_out {
230 /* Handle of CB */
231 __u64 cb_handle;
232};
233
234union hl_cb_args {
235 struct hl_cb_in in;
236 struct hl_cb_out out;
237};
238
239/*
Oded Gabbayeff6f4a2019-02-16 00:39:21 +0200240 * This structure size must always be fixed to 64-bytes for backward
241 * compatibility
242 */
243struct hl_cs_chunk {
Omer Shpigelmanf9e5f292020-05-07 13:41:16 +0300244 union {
245 /* For external queue, this represents a Handle of CB on the
246 * Host.
247 * For internal queue in Goya, this represents an SRAM or
248 * a DRAM address of the internal CB. In Gaudi, this might also
249 * represent a mapped host address of the CB.
250 *
251 * A mapped host address is in the device address space, after
252 * a host address was mapped by the device MMU.
253 */
254 __u64 cb_handle;
255
256 /* Relevant only when HL_CS_FLAGS_WAIT is set.
257 * This holds address of array of u64 values that contain
258 * signal CS sequence numbers. The wait described by this job
259 * will listen on all those signals (wait event per signal)
260 */
261 __u64 signal_seq_arr;
262 };
263
Oded Gabbayeff6f4a2019-02-16 00:39:21 +0200264 /* Index of queue to put the CB on */
265 __u32 queue_index;
Omer Shpigelmanf9e5f292020-05-07 13:41:16 +0300266
267 union {
268 /*
269 * Size of command buffer with valid packets
270 * Can be smaller then actual CB size
271 */
272 __u32 cb_size;
273
274 /* Relevant only when HL_CS_FLAGS_WAIT is set.
275 * Number of entries in signal_seq_arr
276 */
277 __u32 num_signal_seq_arr;
278 };
279
Oded Gabbayeff6f4a2019-02-16 00:39:21 +0200280 /* HL_CS_CHUNK_FLAGS_* */
281 __u32 cs_chunk_flags;
Omer Shpigelmanf9e5f292020-05-07 13:41:16 +0300282
Oded Gabbayeff6f4a2019-02-16 00:39:21 +0200283 /* Align structure to 64 bytes */
284 __u32 pad[11];
285};
286
Omer Shpigelmanf9e5f292020-05-07 13:41:16 +0300287/* SIGNAL and WAIT flags are mutually exclusive */
Oded Gabbayeff6f4a2019-02-16 00:39:21 +0200288#define HL_CS_FLAGS_FORCE_RESTORE 0x1
Omer Shpigelmanf9e5f292020-05-07 13:41:16 +0300289#define HL_CS_FLAGS_SIGNAL 0x2
290#define HL_CS_FLAGS_WAIT 0x4
Oded Gabbayeff6f4a2019-02-16 00:39:21 +0200291
292#define HL_CS_STATUS_SUCCESS 0
293
Oded Gabbay5d101252019-11-10 16:08:26 +0200294#define HL_MAX_JOBS_PER_CS 512
295
Oded Gabbayeff6f4a2019-02-16 00:39:21 +0200296struct hl_cs_in {
Omer Shpigelmanf9e5f292020-05-07 13:41:16 +0300297
Oded Gabbayeff6f4a2019-02-16 00:39:21 +0200298 /* this holds address of array of hl_cs_chunk for restore phase */
299 __u64 chunks_restore;
Omer Shpigelmanf9e5f292020-05-07 13:41:16 +0300300
301 /* holds address of array of hl_cs_chunk for execution phase */
Oded Gabbayeff6f4a2019-02-16 00:39:21 +0200302 __u64 chunks_execute;
Omer Shpigelmanf9e5f292020-05-07 13:41:16 +0300303
Oded Gabbayeff6f4a2019-02-16 00:39:21 +0200304 /* this holds address of array of hl_cs_chunk for store phase -
305 * Currently not in use
306 */
307 __u64 chunks_store;
Omer Shpigelmanf9e5f292020-05-07 13:41:16 +0300308
Oded Gabbay5d101252019-11-10 16:08:26 +0200309 /* Number of chunks in restore phase array. Maximum number is
310 * HL_MAX_JOBS_PER_CS
311 */
Oded Gabbayeff6f4a2019-02-16 00:39:21 +0200312 __u32 num_chunks_restore;
Omer Shpigelmanf9e5f292020-05-07 13:41:16 +0300313
Oded Gabbay5d101252019-11-10 16:08:26 +0200314 /* Number of chunks in execution array. Maximum number is
315 * HL_MAX_JOBS_PER_CS
316 */
Oded Gabbayeff6f4a2019-02-16 00:39:21 +0200317 __u32 num_chunks_execute;
Omer Shpigelmanf9e5f292020-05-07 13:41:16 +0300318
Oded Gabbayeff6f4a2019-02-16 00:39:21 +0200319 /* Number of chunks in restore phase array - Currently not in use */
320 __u32 num_chunks_store;
Omer Shpigelmanf9e5f292020-05-07 13:41:16 +0300321
Oded Gabbayeff6f4a2019-02-16 00:39:21 +0200322 /* HL_CS_FLAGS_* */
323 __u32 cs_flags;
Omer Shpigelmanf9e5f292020-05-07 13:41:16 +0300324
Oded Gabbayeff6f4a2019-02-16 00:39:21 +0200325 /* Context ID - Currently not in use */
326 __u32 ctx_id;
327};
328
329struct hl_cs_out {
Oded Gabbaye1266002019-03-07 14:20:05 +0200330 /*
331 * seq holds the sequence number of the CS to pass to wait ioctl. All
332 * values are valid except for 0 and ULLONG_MAX
333 */
Oded Gabbayeff6f4a2019-02-16 00:39:21 +0200334 __u64 seq;
335 /* HL_CS_STATUS_* */
336 __u32 status;
337 __u32 pad;
338};
339
340union hl_cs_args {
341 struct hl_cs_in in;
342 struct hl_cs_out out;
343};
344
345struct hl_wait_cs_in {
346 /* Command submission sequence number */
347 __u64 seq;
348 /* Absolute timeout to wait in microseconds */
349 __u64 timeout_us;
350 /* Context ID - Currently not in use */
351 __u32 ctx_id;
352 __u32 pad;
353};
354
355#define HL_WAIT_CS_STATUS_COMPLETED 0
356#define HL_WAIT_CS_STATUS_BUSY 1
357#define HL_WAIT_CS_STATUS_TIMEDOUT 2
358#define HL_WAIT_CS_STATUS_ABORTED 3
359#define HL_WAIT_CS_STATUS_INTERRUPTED 4
360
361struct hl_wait_cs_out {
362 /* HL_WAIT_CS_STATUS_* */
363 __u32 status;
364 __u32 pad;
365};
366
367union hl_wait_cs_args {
368 struct hl_wait_cs_in in;
369 struct hl_wait_cs_out out;
370};
371
Omer Shpigelman0feaf862019-02-16 00:39:22 +0200372/* Opcode to alloc device memory */
373#define HL_MEM_OP_ALLOC 0
374/* Opcode to free previously allocated device memory */
375#define HL_MEM_OP_FREE 1
376/* Opcode to map host memory */
377#define HL_MEM_OP_MAP 2
378/* Opcode to unmap previously mapped host memory */
379#define HL_MEM_OP_UNMAP 3
380
381/* Memory flags */
382#define HL_MEM_CONTIGUOUS 0x1
383#define HL_MEM_SHARED 0x2
384#define HL_MEM_USERPTR 0x4
385
386struct hl_mem_in {
387 union {
388 /* HL_MEM_OP_ALLOC- allocate device memory */
389 struct {
390 /* Size to alloc */
Oded Gabbay230afe72019-02-27 00:19:18 +0200391 __u64 mem_size;
Omer Shpigelman0feaf862019-02-16 00:39:22 +0200392 } alloc;
393
394 /* HL_MEM_OP_FREE - free device memory */
395 struct {
396 /* Handle returned from HL_MEM_OP_ALLOC */
397 __u64 handle;
398 } free;
399
400 /* HL_MEM_OP_MAP - map device memory */
401 struct {
402 /*
403 * Requested virtual address of mapped memory.
Oded Gabbay4c172bb2019-08-30 16:59:33 +0300404 * The driver will try to map the requested region to
405 * this hint address, as long as the address is valid
406 * and not already mapped. The user should check the
Omer Shpigelman0feaf862019-02-16 00:39:22 +0200407 * returned address of the IOCTL to make sure he got
Oded Gabbay4c172bb2019-08-30 16:59:33 +0300408 * the hint address. Passing 0 here means that the
409 * driver will choose the address itself.
Omer Shpigelman0feaf862019-02-16 00:39:22 +0200410 */
411 __u64 hint_addr;
412 /* Handle returned from HL_MEM_OP_ALLOC */
413 __u64 handle;
414 } map_device;
415
416 /* HL_MEM_OP_MAP - map host memory */
417 struct {
418 /* Address of allocated host memory */
419 __u64 host_virt_addr;
420 /*
421 * Requested virtual address of mapped memory.
Oded Gabbay4c172bb2019-08-30 16:59:33 +0300422 * The driver will try to map the requested region to
423 * this hint address, as long as the address is valid
424 * and not already mapped. The user should check the
Omer Shpigelman0feaf862019-02-16 00:39:22 +0200425 * returned address of the IOCTL to make sure he got
Oded Gabbay4c172bb2019-08-30 16:59:33 +0300426 * the hint address. Passing 0 here means that the
427 * driver will choose the address itself.
Omer Shpigelman0feaf862019-02-16 00:39:22 +0200428 */
429 __u64 hint_addr;
430 /* Size of allocated host memory */
Oded Gabbay230afe72019-02-27 00:19:18 +0200431 __u64 mem_size;
Omer Shpigelman0feaf862019-02-16 00:39:22 +0200432 } map_host;
433
434 /* HL_MEM_OP_UNMAP - unmap host memory */
435 struct {
436 /* Virtual address returned from HL_MEM_OP_MAP */
437 __u64 device_virt_addr;
438 } unmap;
439 };
440
441 /* HL_MEM_OP_* */
442 __u32 op;
443 /* HL_MEM_* flags */
444 __u32 flags;
445 /* Context ID - Currently not in use */
446 __u32 ctx_id;
447 __u32 pad;
448};
449
450struct hl_mem_out {
451 union {
452 /*
453 * Used for HL_MEM_OP_MAP as the virtual address that was
454 * assigned in the device VA space.
455 * A value of 0 means the requested operation failed.
456 */
457 __u64 device_virt_addr;
458
459 /*
460 * Used for HL_MEM_OP_ALLOC. This is the assigned
461 * handle for the allocated memory
462 */
463 __u64 handle;
464 };
465};
466
467union hl_mem_args {
468 struct hl_mem_in in;
469 struct hl_mem_out out;
470};
471
Omer Shpigelman315bc052019-04-01 22:31:22 +0300472#define HL_DEBUG_MAX_AUX_VALUES 10
473
474struct hl_debug_params_etr {
475 /* Address in memory to allocate buffer */
476 __u64 buffer_address;
477
478 /* Size of buffer to allocate */
479 __u64 buffer_size;
480
481 /* Sink operation mode: SW fifo, HW fifo, Circular buffer */
482 __u32 sink_mode;
483 __u32 pad;
484};
485
486struct hl_debug_params_etf {
487 /* Address in memory to allocate buffer */
488 __u64 buffer_address;
489
490 /* Size of buffer to allocate */
491 __u64 buffer_size;
492
493 /* Sink operation mode: SW fifo, HW fifo, Circular buffer */
494 __u32 sink_mode;
495 __u32 pad;
496};
497
498struct hl_debug_params_stm {
499 /* Two bit masks for HW event and Stimulus Port */
500 __u64 he_mask;
501 __u64 sp_mask;
502
503 /* Trace source ID */
504 __u32 id;
505
506 /* Frequency for the timestamp register */
507 __u32 frequency;
508};
509
510struct hl_debug_params_bmon {
Oded Gabbayd6911712019-04-21 16:20:46 +0300511 /* Two address ranges that the user can request to filter */
512 __u64 start_addr0;
513 __u64 addr_mask0;
514
515 __u64 start_addr1;
516 __u64 addr_mask1;
Omer Shpigelman315bc052019-04-01 22:31:22 +0300517
518 /* Capture window configuration */
519 __u32 bw_win;
520 __u32 win_capture;
521
522 /* Trace source ID */
523 __u32 id;
524 __u32 pad;
525};
526
527struct hl_debug_params_spmu {
528 /* Event types selection */
529 __u64 event_types[HL_DEBUG_MAX_AUX_VALUES];
530
531 /* Number of event types selection */
532 __u32 event_types_num;
533 __u32 pad;
534};
535
536/* Opcode for ETR component */
537#define HL_DEBUG_OP_ETR 0
538/* Opcode for ETF component */
539#define HL_DEBUG_OP_ETF 1
540/* Opcode for STM component */
541#define HL_DEBUG_OP_STM 2
542/* Opcode for FUNNEL component */
543#define HL_DEBUG_OP_FUNNEL 3
544/* Opcode for BMON component */
545#define HL_DEBUG_OP_BMON 4
546/* Opcode for SPMU component */
547#define HL_DEBUG_OP_SPMU 5
Tomer Tayar413cf572019-08-27 16:14:18 +0000548/* Opcode for timestamp (deprecated) */
Omer Shpigelman315bc052019-04-01 22:31:22 +0300549#define HL_DEBUG_OP_TIMESTAMP 6
Oded Gabbay2add64e2019-05-04 16:30:00 +0300550/* Opcode for setting the device into or out of debug mode. The enable
551 * variable should be 1 for enabling debug mode and 0 for disabling it
552 */
553#define HL_DEBUG_OP_SET_MODE 7
Omer Shpigelman315bc052019-04-01 22:31:22 +0300554
555struct hl_debug_args {
556 /*
557 * Pointer to user input structure.
558 * This field is relevant to specific opcodes.
559 */
560 __u64 input_ptr;
561 /* Pointer to user output structure */
562 __u64 output_ptr;
563 /* Size of user input structure */
564 __u32 input_size;
565 /* Size of user output structure */
566 __u32 output_size;
567 /* HL_DEBUG_OP_* */
568 __u32 op;
569 /*
570 * Register index in the component, taken from the debug_regs_index enum
571 * in the various ASIC header files
572 */
573 __u32 reg_idx;
574 /* Enable/disable */
575 __u32 enable;
576 /* Context ID - Currently not in use */
577 __u32 ctx_id;
578};
579
Oded Gabbayeff6f4a2019-02-16 00:39:21 +0200580/*
Oded Gabbayd8dd7b02019-02-16 00:39:23 +0200581 * Various information operations such as:
582 * - H/W IP information
583 * - Current dram usage
584 *
585 * The user calls this IOCTL with an opcode that describes the required
586 * information. The user should supply a pointer to a user-allocated memory
587 * chunk, which will be filled by the driver with the requested information.
588 *
589 * The user supplies the maximum amount of size to copy into the user's memory,
590 * in order to prevent data corruption in case of differences between the
591 * definitions of structures in kernel and userspace, e.g. in case of old
592 * userspace and new kernel driver
593 */
594#define HL_IOCTL_INFO \
595 _IOWR('H', 0x01, struct hl_info_args)
596
597/*
Oded Gabbaybe5d9262019-02-16 00:39:15 +0200598 * Command Buffer
599 * - Request a Command Buffer
600 * - Destroy a Command Buffer
601 *
602 * The command buffers are memory blocks that reside in DMA-able address
603 * space and are physically contiguous so they can be accessed by the device
604 * directly. They are allocated using the coherent DMA API.
605 *
606 * When creating a new CB, the IOCTL returns a handle of it, and the user-space
607 * process needs to use that handle to mmap the buffer so it can access them.
608 *
609 */
610#define HL_IOCTL_CB \
611 _IOWR('H', 0x02, union hl_cb_args)
612
Oded Gabbayeff6f4a2019-02-16 00:39:21 +0200613/*
614 * Command Submission
615 *
616 * To submit work to the device, the user need to call this IOCTL with a set
617 * of JOBS. That set of JOBS constitutes a CS object.
618 * Each JOB will be enqueued on a specific queue, according to the user's input.
619 * There can be more then one JOB per queue.
620 *
Oded Gabbay90027292019-04-03 09:51:04 +0300621 * The CS IOCTL will receive three sets of JOBS. One set is for "restore" phase,
622 * a second set is for "execution" phase and a third set is for "store" phase.
623 * The JOBS on the "restore" phase are enqueued only after context-switch
624 * (or if its the first CS for this context). The user can also order the
625 * driver to run the "restore" phase explicitly
626 *
Oded Gabbayeff6f4a2019-02-16 00:39:21 +0200627 * There are two types of queues - external and internal. External queues
628 * are DMA queues which transfer data from/to the Host. All other queues are
629 * internal. The driver will get completion notifications from the device only
630 * on JOBS which are enqueued in the external queues.
631 *
Oded Gabbay541664d2019-02-28 11:55:44 +0200632 * For jobs on external queues, the user needs to create command buffers
633 * through the CB ioctl and give the CB's handle to the CS ioctl. For jobs on
634 * internal queues, the user needs to prepare a "command buffer" with packets
Omer Shpigelmanf9e5f292020-05-07 13:41:16 +0300635 * on either the device SRAM/DRAM or the host, and give the device address of
636 * that buffer to the CS ioctl.
Oded Gabbay541664d2019-02-28 11:55:44 +0200637 *
Oded Gabbayeff6f4a2019-02-16 00:39:21 +0200638 * This IOCTL is asynchronous in regard to the actual execution of the CS. This
639 * means it returns immediately after ALL the JOBS were enqueued on their
640 * relevant queues. Therefore, the user mustn't assume the CS has been completed
641 * or has even started to execute.
642 *
Oded Gabbay90027292019-04-03 09:51:04 +0300643 * Upon successful enqueue, the IOCTL returns a sequence number which the user
Oded Gabbayeff6f4a2019-02-16 00:39:21 +0200644 * can use with the "Wait for CS" IOCTL to check whether the handle's CS
645 * external JOBS have been completed. Note that if the CS has internal JOBS
646 * which can execute AFTER the external JOBS have finished, the driver might
647 * report that the CS has finished executing BEFORE the internal JOBS have
Omer Shpigelmanf9e5f292020-05-07 13:41:16 +0300648 * actually finished executing.
Oded Gabbayeff6f4a2019-02-16 00:39:21 +0200649 *
Oded Gabbay90027292019-04-03 09:51:04 +0300650 * Even though the sequence number increments per CS, the user can NOT
651 * automatically assume that if CS with sequence number N finished, then CS
652 * with sequence number N-1 also finished. The user can make this assumption if
653 * and only if CS N and CS N-1 are exactly the same (same CBs for the same
654 * queues).
Oded Gabbayeff6f4a2019-02-16 00:39:21 +0200655 */
656#define HL_IOCTL_CS \
657 _IOWR('H', 0x03, union hl_cs_args)
658
659/*
660 * Wait for Command Submission
661 *
662 * The user can call this IOCTL with a handle it received from the CS IOCTL
663 * to wait until the handle's CS has finished executing. The user will wait
Tomer Tayarf4356142019-10-02 13:53:52 +0000664 * inside the kernel until the CS has finished or until the user-requested
Oded Gabbayeff6f4a2019-02-16 00:39:21 +0200665 * timeout has expired.
666 *
667 * The return value of the IOCTL is a standard Linux error code. The possible
668 * values are:
669 *
670 * EINTR - Kernel waiting has been interrupted, e.g. due to OS signal
671 * that the user process received
672 * ETIMEDOUT - The CS has caused a timeout on the device
673 * EIO - The CS was aborted (usually because the device was reset)
674 * ENODEV - The device wants to do hard-reset (so user need to close FD)
675 *
676 * The driver also returns a custom define inside the IOCTL which can be:
677 *
678 * HL_WAIT_CS_STATUS_COMPLETED - The CS has been completed successfully (0)
679 * HL_WAIT_CS_STATUS_BUSY - The CS is still executing (0)
680 * HL_WAIT_CS_STATUS_TIMEDOUT - The CS has caused a timeout on the device
681 * (ETIMEDOUT)
682 * HL_WAIT_CS_STATUS_ABORTED - The CS was aborted, usually because the
683 * device was reset (EIO)
684 * HL_WAIT_CS_STATUS_INTERRUPTED - Waiting for the CS was interrupted (EINTR)
685 *
686 */
687
688#define HL_IOCTL_WAIT_CS \
689 _IOWR('H', 0x04, union hl_wait_cs_args)
690
Omer Shpigelman0feaf862019-02-16 00:39:22 +0200691/*
692 * Memory
693 * - Map host memory to device MMU
694 * - Unmap host memory from device MMU
695 *
696 * This IOCTL allows the user to map host memory to the device MMU
697 *
698 * For host memory, the IOCTL doesn't allocate memory. The user is supposed
699 * to allocate the memory in user-space (malloc/new). The driver pins the
700 * physical pages (up to the allowed limit by the OS), assigns a virtual
701 * address in the device VA space and initializes the device MMU.
702 *
703 * There is an option for the user to specify the requested virtual address.
704 *
705 */
706#define HL_IOCTL_MEMORY \
707 _IOWR('H', 0x05, union hl_mem_args)
708
Omer Shpigelman315bc052019-04-01 22:31:22 +0300709/*
710 * Debug
711 * - Enable/disable the ETR/ETF/FUNNEL/STM/BMON/SPMU debug traces
712 *
713 * This IOCTL allows the user to get debug traces from the chip.
714 *
Oded Gabbay2add64e2019-05-04 16:30:00 +0300715 * Before the user can send configuration requests of the various
716 * debug/profile engines, it needs to set the device into debug mode.
717 * This is because the debug/profile infrastructure is shared component in the
718 * device and we can't allow multiple users to access it at the same time.
719 *
720 * Once a user set the device into debug mode, the driver won't allow other
721 * users to "work" with the device, i.e. open a FD. If there are multiple users
722 * opened on the device, the driver won't allow any user to debug the device.
723 *
724 * For each configuration request, the user needs to provide the register index
725 * and essential data such as buffer address and size.
726 *
727 * Once the user has finished using the debug/profile engines, he should
728 * set the device into non-debug mode, i.e. disable debug mode.
729 *
730 * The driver can decide to "kick out" the user if he abuses this interface.
Omer Shpigelman315bc052019-04-01 22:31:22 +0300731 *
732 */
733#define HL_IOCTL_DEBUG \
734 _IOWR('H', 0x06, struct hl_debug_args)
735
Oded Gabbayd8dd7b02019-02-16 00:39:23 +0200736#define HL_COMMAND_START 0x01
Omer Shpigelman315bc052019-04-01 22:31:22 +0300737#define HL_COMMAND_END 0x07
Oded Gabbaybe5d9262019-02-16 00:39:15 +0200738
Oded Gabbay99b9d7b2019-02-16 00:39:13 +0200739#endif /* HABANALABS_H_ */