blob: fc377c618af04ed7a00eed86ef379a8f56dd6c87 [file] [log] [blame]
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03001// SPDX-License-Identifier: GPL-2.0
2
3/*
4 * Copyright 2016-2020 HabanaLabs, Ltd.
5 * All Rights Reserved.
6 */
7
8#include "gaudiP.h"
9#include "include/hw_ip/mmu/mmu_general.h"
10#include "include/hw_ip/mmu/mmu_v1_1.h"
11#include "include/gaudi/gaudi_masks.h"
12#include "include/gaudi/gaudi_fw_if.h"
13#include "include/gaudi/gaudi_reg_map.h"
Ofir Bittonebd8d122020-05-10 13:41:28 +030014#include "include/gaudi/gaudi_async_ids_map_extended.h"
Oded Gabbayac0ae6a2020-05-11 10:29:27 +030015
16#include <linux/module.h>
17#include <linux/pci.h>
18#include <linux/firmware.h>
19#include <linux/hwmon.h>
20#include <linux/genalloc.h>
21#include <linux/io-64-nonatomic-lo-hi.h>
22#include <linux/iommu.h>
23#include <linux/seq_file.h>
24
25/*
26 * Gaudi security scheme:
27 *
28 * 1. Host is protected by:
29 * - Range registers
30 * - MMU
31 *
32 * 2. DDR is protected by:
33 * - Range registers (protect the first 512MB)
34 *
35 * 3. Configuration is protected by:
36 * - Range registers
37 * - Protection bits
38 *
39 * MMU is always enabled.
40 *
41 * QMAN DMA channels 0,1,5 (PCI DMAN):
42 * - DMA is not secured.
43 * - PQ and CQ are secured.
44 * - CP is secured: The driver needs to parse CB but WREG should be allowed
45 * because of TDMA (tensor DMA). Hence, WREG is always not
46 * secured.
47 *
48 * When the driver needs to use DMA it will check that Gaudi is idle, set DMA
49 * channel 0 to be secured, execute the DMA and change it back to not secured.
50 * Currently, the driver doesn't use the DMA while there are compute jobs
51 * running.
52 *
53 * The current use cases for the driver to use the DMA are:
54 * - Clear SRAM on context switch (happens on context switch when device is
55 * idle)
56 * - MMU page tables area clear (happens on init)
57 *
58 * QMAN DMA 2-4,6,7, TPC, MME, NIC:
59 * PQ is secured and is located on the Host (HBM CON TPC3 bug)
60 * CQ, CP and the engine are not secured
61 *
62 */
63
64#define GAUDI_BOOT_FIT_FILE "habanalabs/gaudi/gaudi-boot-fit.itb"
65#define GAUDI_LINUX_FW_FILE "habanalabs/gaudi/gaudi-fit.itb"
66#define GAUDI_TPC_FW_FILE "habanalabs/gaudi/gaudi_tpc.bin"
67
68#define GAUDI_DMA_POOL_BLK_SIZE 0x100 /* 256 bytes */
69
70#define GAUDI_RESET_TIMEOUT_MSEC 1000 /* 1000ms */
71#define GAUDI_RESET_WAIT_MSEC 1 /* 1ms */
72#define GAUDI_CPU_RESET_WAIT_MSEC 200 /* 200ms */
73#define GAUDI_TEST_QUEUE_WAIT_USEC 100000 /* 100ms */
74
75#define GAUDI_PLDM_RESET_WAIT_MSEC 1000 /* 1s */
76#define GAUDI_PLDM_HRESET_TIMEOUT_MSEC 20000 /* 20s */
77#define GAUDI_PLDM_SRESET_TIMEOUT_MSEC 14000 /* 14s */
78#define GAUDI_PLDM_TEST_QUEUE_WAIT_USEC 1000000 /* 1s */
79#define GAUDI_PLDM_MMU_TIMEOUT_USEC (MMU_CONFIG_TIMEOUT_USEC * 100)
80#define GAUDI_PLDM_QMAN0_TIMEOUT_USEC (HL_DEVICE_TIMEOUT_USEC * 30)
81#define GAUDI_PLDM_TPC_KERNEL_WAIT_USEC (HL_DEVICE_TIMEOUT_USEC * 30)
82#define GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC 1000000 /* 1s */
83
84#define GAUDI_QMAN0_FENCE_VAL 0x72E91AB9
85
86#define GAUDI_MAX_STRING_LEN 20
87
88#define GAUDI_CB_POOL_CB_CNT 512
89#define GAUDI_CB_POOL_CB_SIZE 0x20000 /* 128KB */
90
91#define GAUDI_ALLOC_CPU_MEM_RETRY_CNT 3
92
93#define GAUDI_NUM_OF_TPC_INTR_CAUSE 20
94
95#define GAUDI_NUM_OF_QM_ERR_CAUSE 16
96
97#define GAUDI_NUM_OF_QM_ARB_ERR_CAUSE 3
98
Oded Gabbay647e8352020-06-07 11:26:48 +030099#define GAUDI_ARB_WDT_TIMEOUT 0x1000000
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300100
101static const char gaudi_irq_name[GAUDI_MSI_ENTRIES][GAUDI_MAX_STRING_LEN] = {
102 "gaudi cq 0_0", "gaudi cq 0_1", "gaudi cq 0_2", "gaudi cq 0_3",
103 "gaudi cq 1_0", "gaudi cq 1_1", "gaudi cq 1_2", "gaudi cq 1_3",
104 "gaudi cq 5_0", "gaudi cq 5_1", "gaudi cq 5_2", "gaudi cq 5_3",
105 "gaudi cpu eq"
106};
107
108static const u8 gaudi_dma_assignment[GAUDI_DMA_MAX] = {
109 [GAUDI_PCI_DMA_1] = 0,
110 [GAUDI_PCI_DMA_2] = 1,
111 [GAUDI_PCI_DMA_3] = 5,
112 [GAUDI_HBM_DMA_1] = 2,
113 [GAUDI_HBM_DMA_2] = 3,
114 [GAUDI_HBM_DMA_3] = 4,
115 [GAUDI_HBM_DMA_4] = 6,
116 [GAUDI_HBM_DMA_5] = 7
117};
118
119static const u8 gaudi_cq_assignment[NUMBER_OF_CMPLT_QUEUES] = {
120 [0] = GAUDI_QUEUE_ID_DMA_0_0,
121 [1] = GAUDI_QUEUE_ID_DMA_0_1,
122 [2] = GAUDI_QUEUE_ID_DMA_0_2,
123 [3] = GAUDI_QUEUE_ID_DMA_0_3,
124 [4] = GAUDI_QUEUE_ID_DMA_1_0,
125 [5] = GAUDI_QUEUE_ID_DMA_1_1,
126 [6] = GAUDI_QUEUE_ID_DMA_1_2,
127 [7] = GAUDI_QUEUE_ID_DMA_1_3,
128 [8] = GAUDI_QUEUE_ID_DMA_5_0,
129 [9] = GAUDI_QUEUE_ID_DMA_5_1,
130 [10] = GAUDI_QUEUE_ID_DMA_5_2,
131 [11] = GAUDI_QUEUE_ID_DMA_5_3
132};
133
134static const u16 gaudi_packet_sizes[MAX_PACKET_ID] = {
135 [PACKET_WREG_32] = sizeof(struct packet_wreg32),
136 [PACKET_WREG_BULK] = sizeof(struct packet_wreg_bulk),
137 [PACKET_MSG_LONG] = sizeof(struct packet_msg_long),
138 [PACKET_MSG_SHORT] = sizeof(struct packet_msg_short),
139 [PACKET_CP_DMA] = sizeof(struct packet_cp_dma),
140 [PACKET_REPEAT] = sizeof(struct packet_repeat),
141 [PACKET_MSG_PROT] = sizeof(struct packet_msg_prot),
142 [PACKET_FENCE] = sizeof(struct packet_fence),
143 [PACKET_LIN_DMA] = sizeof(struct packet_lin_dma),
144 [PACKET_NOP] = sizeof(struct packet_nop),
145 [PACKET_STOP] = sizeof(struct packet_stop),
146 [PACKET_ARB_POINT] = sizeof(struct packet_arb_point),
147 [PACKET_WAIT] = sizeof(struct packet_wait),
148 [PACKET_LOAD_AND_EXE] = sizeof(struct packet_load_and_exe)
149};
150
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300151static const char * const
152gaudi_tpc_interrupts_cause[GAUDI_NUM_OF_TPC_INTR_CAUSE] = {
153 "tpc_address_exceed_slm",
154 "tpc_div_by_0",
155 "tpc_spu_mac_overflow",
156 "tpc_spu_addsub_overflow",
157 "tpc_spu_abs_overflow",
158 "tpc_spu_fp_dst_nan_inf",
159 "tpc_spu_fp_dst_denorm",
160 "tpc_vpu_mac_overflow",
161 "tpc_vpu_addsub_overflow",
162 "tpc_vpu_abs_overflow",
163 "tpc_vpu_fp_dst_nan_inf",
164 "tpc_vpu_fp_dst_denorm",
165 "tpc_assertions",
166 "tpc_illegal_instruction",
167 "tpc_pc_wrap_around",
168 "tpc_qm_sw_err",
169 "tpc_hbw_rresp_err",
170 "tpc_hbw_bresp_err",
171 "tpc_lbw_rresp_err",
172 "tpc_lbw_bresp_err"
173};
174
175static const char * const
176gaudi_qman_error_cause[GAUDI_NUM_OF_QM_ERR_CAUSE] = {
177 "PQ AXI HBW error",
178 "CQ AXI HBW error",
179 "CP AXI HBW error",
180 "CP error due to undefined OPCODE",
181 "CP encountered STOP OPCODE",
182 "CP AXI LBW error",
183 "CP WRREG32 or WRBULK returned error",
184 "N/A",
185 "FENCE 0 inc over max value and clipped",
186 "FENCE 1 inc over max value and clipped",
187 "FENCE 2 inc over max value and clipped",
188 "FENCE 3 inc over max value and clipped",
189 "FENCE 0 dec under min value and clipped",
190 "FENCE 1 dec under min value and clipped",
191 "FENCE 2 dec under min value and clipped",
192 "FENCE 3 dec under min value and clipped"
193};
194
195static const char * const
196gaudi_qman_arb_error_cause[GAUDI_NUM_OF_QM_ARB_ERR_CAUSE] = {
197 "Choice push while full error",
198 "Choice Q watchdog error",
199 "MSG AXI LBW returned with error"
200};
201
202static enum hl_queue_type gaudi_queue_type[GAUDI_QUEUE_ID_SIZE] = {
203 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_0 */
204 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_1 */
205 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_2 */
206 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_3 */
207 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_0 */
208 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_1 */
209 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_2 */
210 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_3 */
211 QUEUE_TYPE_CPU, /* GAUDI_QUEUE_ID_CPU_PQ */
212 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_0 */
213 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_1 */
214 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_2 */
215 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_3 */
216 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_0 */
217 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_1 */
218 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_2 */
219 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_3 */
220 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_0 */
221 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_1 */
222 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_2 */
223 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_3 */
224 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_5_0 */
225 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_5_1 */
226 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_5_2 */
227 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_5_3 */
228 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_0 */
229 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_1 */
230 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_2 */
231 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_3 */
232 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_0 */
233 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_1 */
234 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_2 */
235 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_3 */
236 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_0 */
237 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_1 */
238 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_2 */
239 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_3 */
240 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_0 */
241 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_1 */
242 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_2 */
243 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_3 */
244 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_0 */
245 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_1 */
246 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_2 */
247 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_3 */
248 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_0 */
249 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_1 */
250 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_2 */
251 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_3 */
252 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_0 */
253 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_1 */
254 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_2 */
255 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_3 */
256 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_0 */
257 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_1 */
258 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_2 */
259 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_3 */
260 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_0 */
261 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_1 */
262 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_2 */
263 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_3 */
264 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_0 */
265 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_1 */
266 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_2 */
267 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_3 */
268 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_0 */
269 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_1 */
270 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_2 */
271 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_3 */
272 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_0 */
273 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_1 */
274 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_2 */
275 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_3 */
276 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_0_0 */
277 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_0_1 */
278 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_0_2 */
279 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_0_3 */
280 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_1_0 */
281 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_1_1 */
282 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_1_2 */
283 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_1_3 */
284 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_2_0 */
285 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_2_1 */
286 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_2_2 */
287 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_2_3 */
288 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_3_0 */
289 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_3_1 */
290 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_3_2 */
291 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_3_3 */
292 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_4_0 */
293 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_4_1 */
294 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_4_2 */
295 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_4_3 */
296 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_5_0 */
297 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_5_1 */
298 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_5_2 */
299 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_5_3 */
300 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_6_0 */
301 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_6_1 */
302 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_6_2 */
303 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_6_3 */
304 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_7_0 */
305 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_7_1 */
306 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_7_2 */
307 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_7_3 */
308 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_8_0 */
309 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_8_1 */
310 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_8_2 */
311 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_8_3 */
312 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_9_0 */
313 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_9_1 */
314 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_9_2 */
315 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_9_3 */
316};
317
318static int gaudi_mmu_update_asid_hop0_addr(struct hl_device *hdev, u32 asid,
319 u64 phys_addr);
320static int gaudi_send_job_on_qman0(struct hl_device *hdev,
321 struct hl_cs_job *job);
322static int gaudi_memset_device_memory(struct hl_device *hdev, u64 addr,
323 u32 size, u64 val);
324static int gaudi_run_tpc_kernel(struct hl_device *hdev, u64 tpc_kernel,
325 u32 tpc_id);
326static int gaudi_mmu_clear_pgt_range(struct hl_device *hdev);
327static int gaudi_armcp_info_get(struct hl_device *hdev);
328static void gaudi_disable_clock_gating(struct hl_device *hdev);
329static void gaudi_mmu_prepare(struct hl_device *hdev, u32 asid);
330
331static int gaudi_get_fixed_properties(struct hl_device *hdev)
332{
333 struct asic_fixed_properties *prop = &hdev->asic_prop;
334 int i;
335
336 if (GAUDI_QUEUE_ID_SIZE >= HL_MAX_QUEUES) {
337 dev_err(hdev->dev,
338 "Number of H/W queues must be smaller than %d\n",
339 HL_MAX_QUEUES);
340 return -EFAULT;
341 }
342
343 for (i = 0 ; i < GAUDI_QUEUE_ID_SIZE ; i++) {
344 if (gaudi_queue_type[i] == QUEUE_TYPE_EXT) {
345 prop->hw_queues_props[i].type = QUEUE_TYPE_EXT;
346 prop->hw_queues_props[i].driver_only = 0;
347 prop->hw_queues_props[i].requires_kernel_cb = 1;
Ofir Bitton21e7a342020-05-14 18:25:47 +0300348 prop->hw_queues_props[i].supports_sync_stream = 1;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300349 } else if (gaudi_queue_type[i] == QUEUE_TYPE_CPU) {
350 prop->hw_queues_props[i].type = QUEUE_TYPE_CPU;
351 prop->hw_queues_props[i].driver_only = 1;
352 prop->hw_queues_props[i].requires_kernel_cb = 0;
Ofir Bitton21e7a342020-05-14 18:25:47 +0300353 prop->hw_queues_props[i].supports_sync_stream = 0;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300354 } else if (gaudi_queue_type[i] == QUEUE_TYPE_INT) {
355 prop->hw_queues_props[i].type = QUEUE_TYPE_INT;
356 prop->hw_queues_props[i].driver_only = 0;
357 prop->hw_queues_props[i].requires_kernel_cb = 0;
358 } else if (gaudi_queue_type[i] == QUEUE_TYPE_NA) {
359 prop->hw_queues_props[i].type = QUEUE_TYPE_NA;
360 prop->hw_queues_props[i].driver_only = 0;
361 prop->hw_queues_props[i].requires_kernel_cb = 0;
Ofir Bitton21e7a342020-05-14 18:25:47 +0300362 prop->hw_queues_props[i].supports_sync_stream = 0;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300363 }
364 }
365
366 for (; i < HL_MAX_QUEUES; i++)
367 prop->hw_queues_props[i].type = QUEUE_TYPE_NA;
368
369 prop->completion_queues_count = NUMBER_OF_CMPLT_QUEUES;
Ofir Bitton21e7a342020-05-14 18:25:47 +0300370 prop->sync_stream_first_sob = 0;
371 prop->sync_stream_first_mon = 0;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300372 prop->dram_base_address = DRAM_PHYS_BASE;
373 prop->dram_size = GAUDI_HBM_SIZE_32GB;
374 prop->dram_end_address = prop->dram_base_address +
375 prop->dram_size;
376 prop->dram_user_base_address = DRAM_BASE_ADDR_USER;
377
378 prop->sram_base_address = SRAM_BASE_ADDR;
379 prop->sram_size = SRAM_SIZE;
380 prop->sram_end_address = prop->sram_base_address +
381 prop->sram_size;
382 prop->sram_user_base_address = prop->sram_base_address +
383 SRAM_USER_BASE_OFFSET;
384
385 prop->mmu_pgt_addr = MMU_PAGE_TABLES_ADDR;
386 if (hdev->pldm)
387 prop->mmu_pgt_size = 0x800000; /* 8MB */
388 else
389 prop->mmu_pgt_size = MMU_PAGE_TABLES_SIZE;
390 prop->mmu_pte_size = HL_PTE_SIZE;
391 prop->mmu_hop_table_size = HOP_TABLE_SIZE;
392 prop->mmu_hop0_tables_total_size = HOP0_TABLES_TOTAL_SIZE;
393 prop->dram_page_size = PAGE_SIZE_2MB;
394
395 prop->pmmu.hop0_shift = HOP0_SHIFT;
396 prop->pmmu.hop1_shift = HOP1_SHIFT;
397 prop->pmmu.hop2_shift = HOP2_SHIFT;
398 prop->pmmu.hop3_shift = HOP3_SHIFT;
399 prop->pmmu.hop4_shift = HOP4_SHIFT;
400 prop->pmmu.hop0_mask = HOP0_MASK;
401 prop->pmmu.hop1_mask = HOP1_MASK;
402 prop->pmmu.hop2_mask = HOP2_MASK;
403 prop->pmmu.hop3_mask = HOP3_MASK;
404 prop->pmmu.hop4_mask = HOP4_MASK;
405 prop->pmmu.start_addr = VA_HOST_SPACE_START;
406 prop->pmmu.end_addr =
407 (VA_HOST_SPACE_START + VA_HOST_SPACE_SIZE / 2) - 1;
408 prop->pmmu.page_size = PAGE_SIZE_4KB;
409
410 /* PMMU and HPMMU are the same except of page size */
411 memcpy(&prop->pmmu_huge, &prop->pmmu, sizeof(prop->pmmu));
412 prop->pmmu_huge.page_size = PAGE_SIZE_2MB;
413
414 /* shifts and masks are the same in PMMU and DMMU */
415 memcpy(&prop->dmmu, &prop->pmmu, sizeof(prop->pmmu));
416 prop->dmmu.start_addr = (VA_HOST_SPACE_START + VA_HOST_SPACE_SIZE / 2);
417 prop->dmmu.end_addr = VA_HOST_SPACE_END;
418 prop->dmmu.page_size = PAGE_SIZE_2MB;
419
420 prop->cfg_size = CFG_SIZE;
421 prop->max_asid = MAX_ASID;
422 prop->num_of_events = GAUDI_EVENT_SIZE;
423 prop->tpc_enabled_mask = TPC_ENABLED_MASK;
424
425 prop->max_power_default = MAX_POWER_DEFAULT;
426
427 prop->cb_pool_cb_cnt = GAUDI_CB_POOL_CB_CNT;
428 prop->cb_pool_cb_size = GAUDI_CB_POOL_CB_SIZE;
429
430 prop->pcie_dbi_base_address = mmPCIE_DBI_BASE;
431 prop->pcie_aux_dbi_reg_addr = CFG_BASE + mmPCIE_AUX_DBI;
432
433 strncpy(prop->armcp_info.card_name, GAUDI_DEFAULT_CARD_NAME,
434 CARD_NAME_MAX_LEN);
435
Ofir Bittonc16d45f2020-06-02 12:28:27 +0300436 prop->max_pending_cs = GAUDI_MAX_PENDING_CS;
437
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300438 return 0;
439}
440
441static int gaudi_pci_bars_map(struct hl_device *hdev)
442{
443 static const char * const name[] = {"SRAM", "CFG", "HBM"};
444 bool is_wc[3] = {false, false, true};
445 int rc;
446
447 rc = hl_pci_bars_map(hdev, name, is_wc);
448 if (rc)
449 return rc;
450
451 hdev->rmmio = hdev->pcie_bar[CFG_BAR_ID] +
452 (CFG_BASE - SPI_FLASH_BASE_ADDR);
453
454 return 0;
455}
456
457static u64 gaudi_set_hbm_bar_base(struct hl_device *hdev, u64 addr)
458{
459 struct gaudi_device *gaudi = hdev->asic_specific;
460 u64 old_addr = addr;
461 int rc;
462
463 if ((gaudi) && (gaudi->hbm_bar_cur_addr == addr))
464 return old_addr;
465
466 /* Inbound Region 2 - Bar 4 - Point to HBM */
467 rc = hl_pci_set_dram_bar_base(hdev, 2, 4, addr);
468 if (rc)
469 return U64_MAX;
470
471 if (gaudi) {
472 old_addr = gaudi->hbm_bar_cur_addr;
473 gaudi->hbm_bar_cur_addr = addr;
474 }
475
476 return old_addr;
477}
478
479static int gaudi_init_iatu(struct hl_device *hdev)
480{
481 int rc = 0;
482
483 /* Inbound Region 1 - Bar 2 - Point to SPI FLASH */
484 rc = hl_pci_iatu_write(hdev, 0x314,
485 lower_32_bits(SPI_FLASH_BASE_ADDR));
486 rc |= hl_pci_iatu_write(hdev, 0x318,
487 upper_32_bits(SPI_FLASH_BASE_ADDR));
488 rc |= hl_pci_iatu_write(hdev, 0x300, 0);
489 /* Enable + Bar match + match enable */
490 rc |= hl_pci_iatu_write(hdev, 0x304, 0xC0080200);
491
492 if (rc)
493 return -EIO;
494
495 return hl_pci_init_iatu(hdev, SRAM_BASE_ADDR, DRAM_PHYS_BASE,
496 HOST_PHYS_BASE, HOST_PHYS_SIZE);
497}
498
499static int gaudi_early_init(struct hl_device *hdev)
500{
501 struct asic_fixed_properties *prop = &hdev->asic_prop;
502 struct pci_dev *pdev = hdev->pdev;
503 int rc;
504
505 rc = gaudi_get_fixed_properties(hdev);
506 if (rc) {
507 dev_err(hdev->dev, "Failed to get fixed properties\n");
508 return rc;
509 }
510
511 /* Check BAR sizes */
512 if (pci_resource_len(pdev, SRAM_BAR_ID) != SRAM_BAR_SIZE) {
513 dev_err(hdev->dev,
514 "Not " HL_NAME "? BAR %d size %llu, expecting %llu\n",
515 SRAM_BAR_ID,
516 (unsigned long long) pci_resource_len(pdev,
517 SRAM_BAR_ID),
518 SRAM_BAR_SIZE);
519 return -ENODEV;
520 }
521
522 if (pci_resource_len(pdev, CFG_BAR_ID) != CFG_BAR_SIZE) {
523 dev_err(hdev->dev,
524 "Not " HL_NAME "? BAR %d size %llu, expecting %llu\n",
525 CFG_BAR_ID,
526 (unsigned long long) pci_resource_len(pdev,
527 CFG_BAR_ID),
528 CFG_BAR_SIZE);
529 return -ENODEV;
530 }
531
532 prop->dram_pci_bar_size = pci_resource_len(pdev, HBM_BAR_ID);
533
534 rc = hl_pci_init(hdev);
535 if (rc)
536 return rc;
537
538 return 0;
539}
540
541static int gaudi_early_fini(struct hl_device *hdev)
542{
543 hl_pci_fini(hdev);
544
545 return 0;
546}
547
548/**
549 * gaudi_fetch_psoc_frequency - Fetch PSOC frequency values
550 *
551 * @hdev: pointer to hl_device structure
552 *
553 */
554static void gaudi_fetch_psoc_frequency(struct hl_device *hdev)
555{
556 struct asic_fixed_properties *prop = &hdev->asic_prop;
557
558 prop->psoc_pci_pll_nr = RREG32(mmPSOC_PCI_PLL_NR);
559 prop->psoc_pci_pll_nf = RREG32(mmPSOC_PCI_PLL_NF);
560 prop->psoc_pci_pll_od = RREG32(mmPSOC_PCI_PLL_OD);
561 prop->psoc_pci_pll_div_factor = RREG32(mmPSOC_PCI_PLL_DIV_FACTOR_1);
562}
563
564static int _gaudi_init_tpc_mem(struct hl_device *hdev,
565 dma_addr_t tpc_kernel_src_addr, u32 tpc_kernel_size)
566{
567 struct asic_fixed_properties *prop = &hdev->asic_prop;
568 struct packet_lin_dma *init_tpc_mem_pkt;
569 struct hl_cs_job *job;
570 struct hl_cb *cb;
571 u64 dst_addr;
572 u32 cb_size, ctl;
573 u8 tpc_id;
574 int rc;
575
576 cb = hl_cb_kernel_create(hdev, PAGE_SIZE);
577 if (!cb)
578 return -EFAULT;
579
580 init_tpc_mem_pkt = (struct packet_lin_dma *) (uintptr_t)
581 cb->kernel_address;
582 cb_size = sizeof(*init_tpc_mem_pkt);
583 memset(init_tpc_mem_pkt, 0, cb_size);
584
585 init_tpc_mem_pkt->tsize = cpu_to_le32(tpc_kernel_size);
586
587 ctl = ((PACKET_LIN_DMA << GAUDI_PKT_CTL_OPCODE_SHIFT) |
588 (1 << GAUDI_PKT_LIN_DMA_CTL_LIN_SHIFT) |
589 (1 << GAUDI_PKT_CTL_RB_SHIFT) |
590 (1 << GAUDI_PKT_CTL_MB_SHIFT));
591
592 init_tpc_mem_pkt->ctl = cpu_to_le32(ctl);
593
594 init_tpc_mem_pkt->src_addr = cpu_to_le64(tpc_kernel_src_addr);
595 dst_addr = (prop->sram_user_base_address &
596 GAUDI_PKT_LIN_DMA_DST_ADDR_MASK) >>
597 GAUDI_PKT_LIN_DMA_DST_ADDR_SHIFT;
598 init_tpc_mem_pkt->dst_addr |= cpu_to_le64(dst_addr);
599
600 job = hl_cs_allocate_job(hdev, QUEUE_TYPE_EXT, true);
601 if (!job) {
602 dev_err(hdev->dev, "Failed to allocate a new job\n");
603 rc = -ENOMEM;
604 goto release_cb;
605 }
606
607 job->id = 0;
608 job->user_cb = cb;
609 job->user_cb->cs_cnt++;
610 job->user_cb_size = cb_size;
611 job->hw_queue_id = GAUDI_QUEUE_ID_DMA_0_0;
612 job->patched_cb = job->user_cb;
613 job->job_cb_size = job->user_cb_size + sizeof(struct packet_msg_prot);
614
615 hl_debugfs_add_job(hdev, job);
616
617 rc = gaudi_send_job_on_qman0(hdev, job);
618
619 if (rc)
620 goto free_job;
621
622 for (tpc_id = 0 ; tpc_id < TPC_NUMBER_OF_ENGINES ; tpc_id++) {
623 rc = gaudi_run_tpc_kernel(hdev, dst_addr, tpc_id);
624 if (rc)
625 break;
626 }
627
628free_job:
629 hl_userptr_delete_list(hdev, &job->userptr_list);
630 hl_debugfs_remove_job(hdev, job);
631 kfree(job);
632 cb->cs_cnt--;
633
634release_cb:
635 hl_cb_put(cb);
636 hl_cb_destroy(hdev, &hdev->kernel_cb_mgr, cb->id << PAGE_SHIFT);
637
638 return rc;
639}
640
641/*
642 * gaudi_init_tpc_mem() - Initialize TPC memories.
643 * @hdev: Pointer to hl_device structure.
644 *
645 * Copy TPC kernel fw from firmware file and run it to initialize TPC memories.
646 *
647 * Return: 0 for success, negative value for error.
648 */
649static int gaudi_init_tpc_mem(struct hl_device *hdev)
650{
651 const struct firmware *fw;
652 size_t fw_size;
653 void *cpu_addr;
654 dma_addr_t dma_handle;
655 int rc;
656
657 rc = request_firmware(&fw, GAUDI_TPC_FW_FILE, hdev->dev);
658 if (rc) {
659 dev_err(hdev->dev, "Firmware file %s is not found!\n",
660 GAUDI_TPC_FW_FILE);
661 goto out;
662 }
663
664 fw_size = fw->size;
665 cpu_addr = hdev->asic_funcs->asic_dma_alloc_coherent(hdev, fw_size,
666 &dma_handle, GFP_KERNEL | __GFP_ZERO);
667 if (!cpu_addr) {
668 dev_err(hdev->dev,
669 "Failed to allocate %zu of dma memory for TPC kernel\n",
670 fw_size);
671 rc = -ENOMEM;
672 goto out;
673 }
674
675 memcpy(cpu_addr, fw->data, fw_size);
676
677 rc = _gaudi_init_tpc_mem(hdev, dma_handle, fw_size);
678
679 hdev->asic_funcs->asic_dma_free_coherent(hdev, fw->size, cpu_addr,
680 dma_handle);
681
682out:
683 release_firmware(fw);
684 return rc;
685}
686
687static int gaudi_late_init(struct hl_device *hdev)
688{
689 struct gaudi_device *gaudi = hdev->asic_specific;
690 int rc;
691
692 rc = gaudi->armcp_info_get(hdev);
693 if (rc) {
694 dev_err(hdev->dev, "Failed to get armcp info\n");
695 return rc;
696 }
697
698 rc = hl_fw_send_pci_access_msg(hdev, ARMCP_PACKET_ENABLE_PCI_ACCESS);
699 if (rc) {
700 dev_err(hdev->dev, "Failed to enable PCI access from CPU\n");
701 return rc;
702 }
703
704 WREG32(mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR, GAUDI_EVENT_INTS_REGISTER);
705
706 gaudi_fetch_psoc_frequency(hdev);
707
708 rc = gaudi_mmu_clear_pgt_range(hdev);
709 if (rc) {
710 dev_err(hdev->dev, "Failed to clear MMU page tables range\n");
711 goto disable_pci_access;
712 }
713
714 rc = gaudi_init_tpc_mem(hdev);
715 if (rc) {
716 dev_err(hdev->dev, "Failed to initialize TPC memories\n");
717 goto disable_pci_access;
718 }
719
720 return 0;
721
722disable_pci_access:
723 hl_fw_send_pci_access_msg(hdev, ARMCP_PACKET_DISABLE_PCI_ACCESS);
724
725 return rc;
726}
727
728static void gaudi_late_fini(struct hl_device *hdev)
729{
730 const struct hwmon_channel_info **channel_info_arr;
731 int i = 0;
732
733 if (!hdev->hl_chip_info->info)
734 return;
735
736 channel_info_arr = hdev->hl_chip_info->info;
737
738 while (channel_info_arr[i]) {
739 kfree(channel_info_arr[i]->config);
740 kfree(channel_info_arr[i]);
741 i++;
742 }
743
744 kfree(channel_info_arr);
745
746 hdev->hl_chip_info->info = NULL;
747}
748
749static int gaudi_alloc_cpu_accessible_dma_mem(struct hl_device *hdev)
750{
751 dma_addr_t dma_addr_arr[GAUDI_ALLOC_CPU_MEM_RETRY_CNT] = {}, end_addr;
752 void *virt_addr_arr[GAUDI_ALLOC_CPU_MEM_RETRY_CNT] = {};
753 int i, j, rc = 0;
754
755 /*
756 * The device CPU works with 40-bits addresses, while bit 39 must be set
757 * to '1' when accessing the host.
758 * Bits 49:39 of the full host address are saved for a later
759 * configuration of the HW to perform extension to 50 bits.
760 * Because there is a single HW register that holds the extension bits,
761 * these bits must be identical in all allocated range.
762 */
763
764 for (i = 0 ; i < GAUDI_ALLOC_CPU_MEM_RETRY_CNT ; i++) {
765 virt_addr_arr[i] =
766 hdev->asic_funcs->asic_dma_alloc_coherent(hdev,
767 HL_CPU_ACCESSIBLE_MEM_SIZE,
768 &dma_addr_arr[i],
769 GFP_KERNEL | __GFP_ZERO);
770 if (!virt_addr_arr[i]) {
771 rc = -ENOMEM;
772 goto free_dma_mem_arr;
773 }
774
775 end_addr = dma_addr_arr[i] + HL_CPU_ACCESSIBLE_MEM_SIZE - 1;
776 if (GAUDI_CPU_PCI_MSB_ADDR(dma_addr_arr[i]) ==
777 GAUDI_CPU_PCI_MSB_ADDR(end_addr))
778 break;
779 }
780
781 if (i == GAUDI_ALLOC_CPU_MEM_RETRY_CNT) {
782 dev_err(hdev->dev,
783 "MSB of CPU accessible DMA memory are not identical in all range\n");
784 rc = -EFAULT;
785 goto free_dma_mem_arr;
786 }
787
788 hdev->cpu_accessible_dma_mem = virt_addr_arr[i];
789 hdev->cpu_accessible_dma_address = dma_addr_arr[i];
790 hdev->cpu_pci_msb_addr =
791 GAUDI_CPU_PCI_MSB_ADDR(hdev->cpu_accessible_dma_address);
792
793 GAUDI_PCI_TO_CPU_ADDR(hdev->cpu_accessible_dma_address);
794
795free_dma_mem_arr:
796 for (j = 0 ; j < i ; j++)
797 hdev->asic_funcs->asic_dma_free_coherent(hdev,
798 HL_CPU_ACCESSIBLE_MEM_SIZE,
799 virt_addr_arr[j],
800 dma_addr_arr[j]);
801
802 return rc;
803}
804
805static void gaudi_free_internal_qmans_pq_mem(struct hl_device *hdev)
806{
807 struct gaudi_device *gaudi = hdev->asic_specific;
808 struct gaudi_internal_qman_info *q;
809 u32 i;
810
811 for (i = 0 ; i < GAUDI_QUEUE_ID_SIZE ; i++) {
812 q = &gaudi->internal_qmans[i];
813 if (!q->pq_kernel_addr)
814 continue;
815 hdev->asic_funcs->asic_dma_free_coherent(hdev, q->pq_size,
816 q->pq_kernel_addr,
817 q->pq_dma_addr);
818 }
819}
820
821static int gaudi_alloc_internal_qmans_pq_mem(struct hl_device *hdev)
822{
823 struct gaudi_device *gaudi = hdev->asic_specific;
824 struct gaudi_internal_qman_info *q;
825 int rc, i;
826
827 for (i = 0 ; i < GAUDI_QUEUE_ID_SIZE ; i++) {
828 if (gaudi_queue_type[i] != QUEUE_TYPE_INT)
829 continue;
830
831 q = &gaudi->internal_qmans[i];
832
833 switch (i) {
834 case GAUDI_QUEUE_ID_DMA_2_0 ... GAUDI_QUEUE_ID_DMA_4_3:
835 case GAUDI_QUEUE_ID_DMA_6_0 ... GAUDI_QUEUE_ID_DMA_7_3:
836 q->pq_size = HBM_DMA_QMAN_SIZE_IN_BYTES;
837 break;
838 case GAUDI_QUEUE_ID_MME_0_0 ... GAUDI_QUEUE_ID_MME_1_3:
839 q->pq_size = MME_QMAN_SIZE_IN_BYTES;
840 break;
841 case GAUDI_QUEUE_ID_TPC_0_0 ... GAUDI_QUEUE_ID_TPC_7_3:
842 q->pq_size = TPC_QMAN_SIZE_IN_BYTES;
843 break;
844 default:
845 dev_err(hdev->dev, "Bad internal queue index %d", i);
846 rc = -EINVAL;
847 goto free_internal_qmans_pq_mem;
848 }
849
850 q->pq_kernel_addr = hdev->asic_funcs->asic_dma_alloc_coherent(
851 hdev, q->pq_size,
852 &q->pq_dma_addr,
853 GFP_KERNEL | __GFP_ZERO);
854 if (!q->pq_kernel_addr) {
855 rc = -ENOMEM;
856 goto free_internal_qmans_pq_mem;
857 }
858 }
859
860 return 0;
861
862free_internal_qmans_pq_mem:
863 gaudi_free_internal_qmans_pq_mem(hdev);
864 return rc;
865}
866
867static int gaudi_sw_init(struct hl_device *hdev)
868{
869 struct gaudi_device *gaudi;
Ofir Bittonebd8d122020-05-10 13:41:28 +0300870 u32 i, event_id = 0;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300871 int rc;
872
873 /* Allocate device structure */
874 gaudi = kzalloc(sizeof(*gaudi), GFP_KERNEL);
875 if (!gaudi)
876 return -ENOMEM;
877
Ofir Bittonebd8d122020-05-10 13:41:28 +0300878 for (i = 0 ; i < ARRAY_SIZE(gaudi_irq_map_table) ; i++) {
879 if (gaudi_irq_map_table[i].valid) {
880 if (event_id == GAUDI_EVENT_SIZE) {
881 dev_err(hdev->dev,
882 "Event array exceeds the limit of %u events\n",
883 GAUDI_EVENT_SIZE);
884 rc = -EINVAL;
885 goto free_gaudi_device;
886 }
887
888 gaudi->events[event_id++] =
889 gaudi_irq_map_table[i].fc_id;
890 }
891 }
892
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300893 gaudi->armcp_info_get = gaudi_armcp_info_get;
894
895 gaudi->max_freq_value = GAUDI_MAX_CLK_FREQ;
896
897 hdev->asic_specific = gaudi;
898
899 /* Create DMA pool for small allocations */
900 hdev->dma_pool = dma_pool_create(dev_name(hdev->dev),
901 &hdev->pdev->dev, GAUDI_DMA_POOL_BLK_SIZE, 8, 0);
902 if (!hdev->dma_pool) {
903 dev_err(hdev->dev, "failed to create DMA pool\n");
904 rc = -ENOMEM;
905 goto free_gaudi_device;
906 }
907
908 rc = gaudi_alloc_cpu_accessible_dma_mem(hdev);
909 if (rc)
910 goto free_dma_pool;
911
912 hdev->cpu_accessible_dma_pool = gen_pool_create(ilog2(32), -1);
913 if (!hdev->cpu_accessible_dma_pool) {
914 dev_err(hdev->dev,
915 "Failed to create CPU accessible DMA pool\n");
916 rc = -ENOMEM;
917 goto free_cpu_dma_mem;
918 }
919
920 rc = gen_pool_add(hdev->cpu_accessible_dma_pool,
921 (uintptr_t) hdev->cpu_accessible_dma_mem,
922 HL_CPU_ACCESSIBLE_MEM_SIZE, -1);
923 if (rc) {
924 dev_err(hdev->dev,
925 "Failed to add memory to CPU accessible DMA pool\n");
926 rc = -EFAULT;
927 goto free_cpu_accessible_dma_pool;
928 }
929
930 rc = gaudi_alloc_internal_qmans_pq_mem(hdev);
931 if (rc)
932 goto free_cpu_accessible_dma_pool;
933
934 spin_lock_init(&gaudi->hw_queues_lock);
935 mutex_init(&gaudi->clk_gate_mutex);
936
937 hdev->supports_sync_stream = true;
938 hdev->supports_coresight = true;
939
940 return 0;
941
942free_cpu_accessible_dma_pool:
943 gen_pool_destroy(hdev->cpu_accessible_dma_pool);
944free_cpu_dma_mem:
945 GAUDI_CPU_TO_PCI_ADDR(hdev->cpu_accessible_dma_address,
946 hdev->cpu_pci_msb_addr);
947 hdev->asic_funcs->asic_dma_free_coherent(hdev,
948 HL_CPU_ACCESSIBLE_MEM_SIZE,
949 hdev->cpu_accessible_dma_mem,
950 hdev->cpu_accessible_dma_address);
951free_dma_pool:
952 dma_pool_destroy(hdev->dma_pool);
953free_gaudi_device:
954 kfree(gaudi);
955 return rc;
956}
957
958static int gaudi_sw_fini(struct hl_device *hdev)
959{
960 struct gaudi_device *gaudi = hdev->asic_specific;
961
962 gaudi_free_internal_qmans_pq_mem(hdev);
963
964 gen_pool_destroy(hdev->cpu_accessible_dma_pool);
965
966 GAUDI_CPU_TO_PCI_ADDR(hdev->cpu_accessible_dma_address,
967 hdev->cpu_pci_msb_addr);
968 hdev->asic_funcs->asic_dma_free_coherent(hdev,
969 HL_CPU_ACCESSIBLE_MEM_SIZE,
970 hdev->cpu_accessible_dma_mem,
971 hdev->cpu_accessible_dma_address);
972
973 dma_pool_destroy(hdev->dma_pool);
974
975 mutex_destroy(&gaudi->clk_gate_mutex);
976
977 kfree(gaudi);
978
979 return 0;
980}
981
982static irqreturn_t gaudi_irq_handler_single(int irq, void *arg)
983{
984 struct hl_device *hdev = arg;
985 int i;
986
987 if (hdev->disabled)
988 return IRQ_HANDLED;
989
990 for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++)
991 hl_irq_handler_cq(irq, &hdev->completion_queue[i]);
992
993 hl_irq_handler_eq(irq, &hdev->event_queue);
994
995 return IRQ_HANDLED;
996}
997
998/*
999 * For backward compatibility, new MSI interrupts should be set after the
1000 * existing CPU and NIC interrupts.
1001 */
1002static int gaudi_pci_irq_vector(struct hl_device *hdev, unsigned int nr,
1003 bool cpu_eq)
1004{
1005 int msi_vec;
1006
1007 if ((nr != GAUDI_EVENT_QUEUE_MSI_IDX) && (cpu_eq))
1008 dev_crit(hdev->dev, "CPU EQ must use IRQ %d\n",
1009 GAUDI_EVENT_QUEUE_MSI_IDX);
1010
1011 msi_vec = ((nr < GAUDI_EVENT_QUEUE_MSI_IDX) || (cpu_eq)) ? nr :
1012 (nr + NIC_NUMBER_OF_ENGINES + 1);
1013
1014 return pci_irq_vector(hdev->pdev, msi_vec);
1015}
1016
1017static int gaudi_enable_msi_single(struct hl_device *hdev)
1018{
1019 int rc, irq;
1020
1021 dev_info(hdev->dev, "Working in single MSI IRQ mode\n");
1022
1023 irq = gaudi_pci_irq_vector(hdev, 0, false);
1024 rc = request_irq(irq, gaudi_irq_handler_single, 0,
1025 "gaudi single msi", hdev);
1026 if (rc)
1027 dev_err(hdev->dev,
1028 "Failed to request single MSI IRQ\n");
1029
1030 return rc;
1031}
1032
1033static int gaudi_enable_msi_multi(struct hl_device *hdev)
1034{
1035 int cq_cnt = hdev->asic_prop.completion_queues_count;
1036 int rc, i, irq_cnt_init, irq;
1037
1038 for (i = 0, irq_cnt_init = 0 ; i < cq_cnt ; i++, irq_cnt_init++) {
1039 irq = gaudi_pci_irq_vector(hdev, i, false);
1040 rc = request_irq(irq, hl_irq_handler_cq, 0, gaudi_irq_name[i],
1041 &hdev->completion_queue[i]);
1042 if (rc) {
1043 dev_err(hdev->dev, "Failed to request IRQ %d", irq);
1044 goto free_irqs;
1045 }
1046 }
1047
1048 irq = gaudi_pci_irq_vector(hdev, GAUDI_EVENT_QUEUE_MSI_IDX, true);
1049 rc = request_irq(irq, hl_irq_handler_eq, 0, gaudi_irq_name[cq_cnt],
1050 &hdev->event_queue);
1051 if (rc) {
1052 dev_err(hdev->dev, "Failed to request IRQ %d", irq);
1053 goto free_irqs;
1054 }
1055
1056 return 0;
1057
1058free_irqs:
1059 for (i = 0 ; i < irq_cnt_init ; i++)
1060 free_irq(gaudi_pci_irq_vector(hdev, i, false),
1061 &hdev->completion_queue[i]);
1062 return rc;
1063}
1064
1065static int gaudi_enable_msi(struct hl_device *hdev)
1066{
1067 struct gaudi_device *gaudi = hdev->asic_specific;
1068 int rc;
1069
1070 if (gaudi->hw_cap_initialized & HW_CAP_MSI)
1071 return 0;
1072
1073 rc = pci_alloc_irq_vectors(hdev->pdev, 1, GAUDI_MSI_ENTRIES,
1074 PCI_IRQ_MSI);
1075 if (rc < 0) {
1076 dev_err(hdev->dev, "MSI: Failed to enable support %d\n", rc);
1077 return rc;
1078 }
1079
1080 if (rc < NUMBER_OF_INTERRUPTS) {
1081 gaudi->multi_msi_mode = false;
1082 rc = gaudi_enable_msi_single(hdev);
1083 } else {
1084 gaudi->multi_msi_mode = true;
1085 rc = gaudi_enable_msi_multi(hdev);
1086 }
1087
1088 if (rc)
1089 goto free_pci_irq_vectors;
1090
1091 gaudi->hw_cap_initialized |= HW_CAP_MSI;
1092
1093 return 0;
1094
1095free_pci_irq_vectors:
1096 pci_free_irq_vectors(hdev->pdev);
1097 return rc;
1098}
1099
1100static void gaudi_sync_irqs(struct hl_device *hdev)
1101{
1102 struct gaudi_device *gaudi = hdev->asic_specific;
1103 int i, cq_cnt = hdev->asic_prop.completion_queues_count;
1104
1105 if (!(gaudi->hw_cap_initialized & HW_CAP_MSI))
1106 return;
1107
1108 /* Wait for all pending IRQs to be finished */
1109 if (gaudi->multi_msi_mode) {
1110 for (i = 0 ; i < cq_cnt ; i++)
1111 synchronize_irq(gaudi_pci_irq_vector(hdev, i, false));
1112
1113 synchronize_irq(gaudi_pci_irq_vector(hdev,
1114 GAUDI_EVENT_QUEUE_MSI_IDX,
1115 true));
1116 } else {
1117 synchronize_irq(gaudi_pci_irq_vector(hdev, 0, false));
1118 }
1119}
1120
1121static void gaudi_disable_msi(struct hl_device *hdev)
1122{
1123 struct gaudi_device *gaudi = hdev->asic_specific;
1124 int i, irq, cq_cnt = hdev->asic_prop.completion_queues_count;
1125
1126 if (!(gaudi->hw_cap_initialized & HW_CAP_MSI))
1127 return;
1128
1129 gaudi_sync_irqs(hdev);
1130
1131 if (gaudi->multi_msi_mode) {
1132 irq = gaudi_pci_irq_vector(hdev, GAUDI_EVENT_QUEUE_MSI_IDX,
1133 true);
1134 free_irq(irq, &hdev->event_queue);
1135
1136 for (i = 0 ; i < cq_cnt ; i++) {
1137 irq = gaudi_pci_irq_vector(hdev, i, false);
1138 free_irq(irq, &hdev->completion_queue[i]);
1139 }
1140 } else {
1141 free_irq(gaudi_pci_irq_vector(hdev, 0, false), hdev);
1142 }
1143
1144 pci_free_irq_vectors(hdev->pdev);
1145
1146 gaudi->hw_cap_initialized &= ~HW_CAP_MSI;
1147}
1148
1149static void gaudi_init_scrambler_sram(struct hl_device *hdev)
1150{
1151 struct gaudi_device *gaudi = hdev->asic_specific;
1152
1153 if (gaudi->hw_cap_initialized & HW_CAP_SRAM_SCRAMBLER)
1154 return;
1155
1156 if (!hdev->sram_scrambler_enable)
1157 return;
1158
1159 WREG32(mmNIF_RTR_CTRL_0_SCRAM_SRAM_EN,
1160 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1161 WREG32(mmNIF_RTR_CTRL_1_SCRAM_SRAM_EN,
1162 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1163 WREG32(mmNIF_RTR_CTRL_2_SCRAM_SRAM_EN,
1164 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1165 WREG32(mmNIF_RTR_CTRL_3_SCRAM_SRAM_EN,
1166 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1167 WREG32(mmNIF_RTR_CTRL_4_SCRAM_SRAM_EN,
1168 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1169 WREG32(mmNIF_RTR_CTRL_5_SCRAM_SRAM_EN,
1170 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1171 WREG32(mmNIF_RTR_CTRL_6_SCRAM_SRAM_EN,
1172 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1173 WREG32(mmNIF_RTR_CTRL_7_SCRAM_SRAM_EN,
1174 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1175
1176 WREG32(mmSIF_RTR_CTRL_0_SCRAM_SRAM_EN,
1177 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1178 WREG32(mmSIF_RTR_CTRL_1_SCRAM_SRAM_EN,
1179 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1180 WREG32(mmSIF_RTR_CTRL_2_SCRAM_SRAM_EN,
1181 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1182 WREG32(mmSIF_RTR_CTRL_3_SCRAM_SRAM_EN,
1183 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1184 WREG32(mmSIF_RTR_CTRL_4_SCRAM_SRAM_EN,
1185 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1186 WREG32(mmSIF_RTR_CTRL_5_SCRAM_SRAM_EN,
1187 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1188 WREG32(mmSIF_RTR_CTRL_6_SCRAM_SRAM_EN,
1189 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1190 WREG32(mmSIF_RTR_CTRL_7_SCRAM_SRAM_EN,
1191 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1192
1193 WREG32(mmDMA_IF_E_N_DOWN_CH0_SCRAM_SRAM_EN,
1194 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
1195 WREG32(mmDMA_IF_E_N_DOWN_CH1_SCRAM_SRAM_EN,
1196 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
1197 WREG32(mmDMA_IF_E_S_DOWN_CH0_SCRAM_SRAM_EN,
1198 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
1199 WREG32(mmDMA_IF_E_S_DOWN_CH1_SCRAM_SRAM_EN,
1200 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
1201 WREG32(mmDMA_IF_W_N_DOWN_CH0_SCRAM_SRAM_EN,
1202 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
1203 WREG32(mmDMA_IF_W_N_DOWN_CH1_SCRAM_SRAM_EN,
1204 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
1205 WREG32(mmDMA_IF_W_S_DOWN_CH0_SCRAM_SRAM_EN,
1206 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
1207 WREG32(mmDMA_IF_W_S_DOWN_CH1_SCRAM_SRAM_EN,
1208 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
1209
1210 gaudi->hw_cap_initialized |= HW_CAP_SRAM_SCRAMBLER;
1211}
1212
1213static void gaudi_init_scrambler_hbm(struct hl_device *hdev)
1214{
1215 struct gaudi_device *gaudi = hdev->asic_specific;
1216
1217 if (gaudi->hw_cap_initialized & HW_CAP_HBM_SCRAMBLER)
1218 return;
1219
1220 if (!hdev->dram_scrambler_enable)
1221 return;
1222
1223 WREG32(mmNIF_RTR_CTRL_0_SCRAM_HBM_EN,
1224 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1225 WREG32(mmNIF_RTR_CTRL_1_SCRAM_HBM_EN,
1226 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1227 WREG32(mmNIF_RTR_CTRL_2_SCRAM_HBM_EN,
1228 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1229 WREG32(mmNIF_RTR_CTRL_3_SCRAM_HBM_EN,
1230 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1231 WREG32(mmNIF_RTR_CTRL_4_SCRAM_HBM_EN,
1232 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1233 WREG32(mmNIF_RTR_CTRL_5_SCRAM_HBM_EN,
1234 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1235 WREG32(mmNIF_RTR_CTRL_6_SCRAM_HBM_EN,
1236 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1237 WREG32(mmNIF_RTR_CTRL_7_SCRAM_HBM_EN,
1238 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1239
1240 WREG32(mmSIF_RTR_CTRL_0_SCRAM_HBM_EN,
1241 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1242 WREG32(mmSIF_RTR_CTRL_1_SCRAM_HBM_EN,
1243 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1244 WREG32(mmSIF_RTR_CTRL_2_SCRAM_HBM_EN,
1245 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1246 WREG32(mmSIF_RTR_CTRL_3_SCRAM_HBM_EN,
1247 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1248 WREG32(mmSIF_RTR_CTRL_4_SCRAM_HBM_EN,
1249 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1250 WREG32(mmSIF_RTR_CTRL_5_SCRAM_HBM_EN,
1251 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1252 WREG32(mmSIF_RTR_CTRL_6_SCRAM_HBM_EN,
1253 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1254 WREG32(mmSIF_RTR_CTRL_7_SCRAM_HBM_EN,
1255 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1256
1257 WREG32(mmDMA_IF_E_N_DOWN_CH0_SCRAM_HBM_EN,
1258 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
1259 WREG32(mmDMA_IF_E_N_DOWN_CH1_SCRAM_HBM_EN,
1260 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
1261 WREG32(mmDMA_IF_E_S_DOWN_CH0_SCRAM_HBM_EN,
1262 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
1263 WREG32(mmDMA_IF_E_S_DOWN_CH1_SCRAM_HBM_EN,
1264 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
1265 WREG32(mmDMA_IF_W_N_DOWN_CH0_SCRAM_HBM_EN,
1266 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
1267 WREG32(mmDMA_IF_W_N_DOWN_CH1_SCRAM_HBM_EN,
1268 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
1269 WREG32(mmDMA_IF_W_S_DOWN_CH0_SCRAM_HBM_EN,
1270 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
1271 WREG32(mmDMA_IF_W_S_DOWN_CH1_SCRAM_HBM_EN,
1272 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
1273
1274 gaudi->hw_cap_initialized |= HW_CAP_HBM_SCRAMBLER;
1275}
1276
1277static void gaudi_init_e2e(struct hl_device *hdev)
1278{
1279 WREG32(mmSIF_RTR_CTRL_0_E2E_HBM_WR_SIZE, 247 >> 3);
1280 WREG32(mmSIF_RTR_CTRL_0_E2E_HBM_RD_SIZE, 785 >> 3);
1281 WREG32(mmSIF_RTR_CTRL_0_E2E_PCI_WR_SIZE, 49);
1282 WREG32(mmSIF_RTR_CTRL_0_E2E_PCI_RD_SIZE, 101);
1283
1284 WREG32(mmSIF_RTR_CTRL_1_E2E_HBM_WR_SIZE, 275 >> 3);
1285 WREG32(mmSIF_RTR_CTRL_1_E2E_HBM_RD_SIZE, 614 >> 3);
1286 WREG32(mmSIF_RTR_CTRL_1_E2E_PCI_WR_SIZE, 1);
1287 WREG32(mmSIF_RTR_CTRL_1_E2E_PCI_RD_SIZE, 39);
1288
1289 WREG32(mmSIF_RTR_CTRL_2_E2E_HBM_WR_SIZE, 1);
1290 WREG32(mmSIF_RTR_CTRL_2_E2E_HBM_RD_SIZE, 1);
1291 WREG32(mmSIF_RTR_CTRL_2_E2E_PCI_WR_SIZE, 1);
1292 WREG32(mmSIF_RTR_CTRL_2_E2E_PCI_RD_SIZE, 32);
1293
1294 WREG32(mmSIF_RTR_CTRL_3_E2E_HBM_WR_SIZE, 176 >> 3);
1295 WREG32(mmSIF_RTR_CTRL_3_E2E_HBM_RD_SIZE, 32 >> 3);
1296 WREG32(mmSIF_RTR_CTRL_3_E2E_PCI_WR_SIZE, 19);
1297 WREG32(mmSIF_RTR_CTRL_3_E2E_PCI_RD_SIZE, 32);
1298
1299 WREG32(mmSIF_RTR_CTRL_4_E2E_HBM_WR_SIZE, 176 >> 3);
1300 WREG32(mmSIF_RTR_CTRL_4_E2E_HBM_RD_SIZE, 32 >> 3);
1301 WREG32(mmSIF_RTR_CTRL_4_E2E_PCI_WR_SIZE, 19);
1302 WREG32(mmSIF_RTR_CTRL_4_E2E_PCI_RD_SIZE, 32);
1303
1304 WREG32(mmSIF_RTR_CTRL_5_E2E_HBM_WR_SIZE, 1);
1305 WREG32(mmSIF_RTR_CTRL_5_E2E_HBM_RD_SIZE, 1);
1306 WREG32(mmSIF_RTR_CTRL_5_E2E_PCI_WR_SIZE, 1);
1307 WREG32(mmSIF_RTR_CTRL_5_E2E_PCI_RD_SIZE, 32);
1308
1309 WREG32(mmSIF_RTR_CTRL_6_E2E_HBM_WR_SIZE, 275 >> 3);
1310 WREG32(mmSIF_RTR_CTRL_6_E2E_HBM_RD_SIZE, 614 >> 3);
1311 WREG32(mmSIF_RTR_CTRL_6_E2E_PCI_WR_SIZE, 1);
1312 WREG32(mmSIF_RTR_CTRL_6_E2E_PCI_RD_SIZE, 39);
1313
1314 WREG32(mmSIF_RTR_CTRL_7_E2E_HBM_WR_SIZE, 297 >> 3);
1315 WREG32(mmSIF_RTR_CTRL_7_E2E_HBM_RD_SIZE, 908 >> 3);
1316 WREG32(mmSIF_RTR_CTRL_7_E2E_PCI_WR_SIZE, 19);
1317 WREG32(mmSIF_RTR_CTRL_7_E2E_PCI_RD_SIZE, 19);
1318
1319 WREG32(mmNIF_RTR_CTRL_0_E2E_HBM_WR_SIZE, 318 >> 3);
1320 WREG32(mmNIF_RTR_CTRL_0_E2E_HBM_RD_SIZE, 956 >> 3);
1321 WREG32(mmNIF_RTR_CTRL_0_E2E_PCI_WR_SIZE, 79);
1322 WREG32(mmNIF_RTR_CTRL_0_E2E_PCI_RD_SIZE, 163);
1323
1324 WREG32(mmNIF_RTR_CTRL_1_E2E_HBM_WR_SIZE, 275 >> 3);
1325 WREG32(mmNIF_RTR_CTRL_1_E2E_HBM_RD_SIZE, 614 >> 3);
1326 WREG32(mmNIF_RTR_CTRL_1_E2E_PCI_WR_SIZE, 1);
1327 WREG32(mmNIF_RTR_CTRL_1_E2E_PCI_RD_SIZE, 39);
1328
1329 WREG32(mmNIF_RTR_CTRL_2_E2E_HBM_WR_SIZE, 1);
1330 WREG32(mmNIF_RTR_CTRL_2_E2E_HBM_RD_SIZE, 1);
1331 WREG32(mmNIF_RTR_CTRL_2_E2E_PCI_WR_SIZE, 1);
1332 WREG32(mmNIF_RTR_CTRL_2_E2E_PCI_RD_SIZE, 32);
1333
1334 WREG32(mmNIF_RTR_CTRL_3_E2E_HBM_WR_SIZE, 176 >> 3);
1335 WREG32(mmNIF_RTR_CTRL_3_E2E_HBM_RD_SIZE, 32 >> 3);
1336 WREG32(mmNIF_RTR_CTRL_3_E2E_PCI_WR_SIZE, 19);
1337 WREG32(mmNIF_RTR_CTRL_3_E2E_PCI_RD_SIZE, 32);
1338
1339 WREG32(mmNIF_RTR_CTRL_4_E2E_HBM_WR_SIZE, 176 >> 3);
1340 WREG32(mmNIF_RTR_CTRL_4_E2E_HBM_RD_SIZE, 32 >> 3);
1341 WREG32(mmNIF_RTR_CTRL_4_E2E_PCI_WR_SIZE, 19);
1342 WREG32(mmNIF_RTR_CTRL_4_E2E_PCI_RD_SIZE, 32);
1343
1344 WREG32(mmNIF_RTR_CTRL_5_E2E_HBM_WR_SIZE, 1);
1345 WREG32(mmNIF_RTR_CTRL_5_E2E_HBM_RD_SIZE, 1);
1346 WREG32(mmNIF_RTR_CTRL_5_E2E_PCI_WR_SIZE, 1);
1347 WREG32(mmNIF_RTR_CTRL_5_E2E_PCI_RD_SIZE, 32);
1348
1349 WREG32(mmNIF_RTR_CTRL_6_E2E_HBM_WR_SIZE, 275 >> 3);
1350 WREG32(mmNIF_RTR_CTRL_6_E2E_HBM_RD_SIZE, 614 >> 3);
1351 WREG32(mmNIF_RTR_CTRL_6_E2E_PCI_WR_SIZE, 1);
1352 WREG32(mmNIF_RTR_CTRL_6_E2E_PCI_RD_SIZE, 39);
1353
1354 WREG32(mmNIF_RTR_CTRL_7_E2E_HBM_WR_SIZE, 318 >> 3);
1355 WREG32(mmNIF_RTR_CTRL_7_E2E_HBM_RD_SIZE, 956 >> 3);
1356 WREG32(mmNIF_RTR_CTRL_7_E2E_PCI_WR_SIZE, 79);
1357 WREG32(mmNIF_RTR_CTRL_7_E2E_PCI_RD_SIZE, 79);
1358
1359 WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
1360 WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
1361 WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
1362 WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
1363
1364 WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
1365 WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
1366 WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
1367 WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
1368
1369 WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
1370 WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
1371 WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
1372 WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
1373
1374 WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
1375 WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
1376 WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
1377 WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
1378
1379 WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
1380 WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
1381 WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
1382 WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
1383
1384 WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
1385 WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
1386 WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
1387 WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
1388
1389 WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
1390 WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
1391 WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
1392 WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
1393
1394 WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
1395 WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
1396 WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
1397 WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
1398
1399 if (!hdev->dram_scrambler_enable) {
1400 WREG32(mmSIF_RTR_CTRL_0_NL_HBM_SEL_0, 0x21);
1401 WREG32(mmSIF_RTR_CTRL_0_NL_HBM_SEL_1, 0x22);
1402 WREG32(mmSIF_RTR_CTRL_0_NL_HBM_OFFSET_18, 0x1F);
1403 WREG32(mmSIF_RTR_CTRL_0_NL_HBM_PC_SEL_3, 0x20);
1404
1405 WREG32(mmSIF_RTR_CTRL_1_NL_HBM_SEL_0, 0x21);
1406 WREG32(mmSIF_RTR_CTRL_1_NL_HBM_SEL_1, 0x22);
1407 WREG32(mmSIF_RTR_CTRL_1_NL_HBM_OFFSET_18, 0x1F);
1408 WREG32(mmSIF_RTR_CTRL_1_NL_HBM_PC_SEL_3, 0x20);
1409
1410 WREG32(mmSIF_RTR_CTRL_2_NL_HBM_SEL_0, 0x21);
1411 WREG32(mmSIF_RTR_CTRL_2_NL_HBM_SEL_1, 0x22);
1412 WREG32(mmSIF_RTR_CTRL_2_NL_HBM_OFFSET_18, 0x1F);
1413 WREG32(mmSIF_RTR_CTRL_2_NL_HBM_PC_SEL_3, 0x20);
1414
1415 WREG32(mmSIF_RTR_CTRL_3_NL_HBM_SEL_0, 0x21);
1416 WREG32(mmSIF_RTR_CTRL_3_NL_HBM_SEL_1, 0x22);
1417 WREG32(mmSIF_RTR_CTRL_3_NL_HBM_OFFSET_18, 0x1F);
1418 WREG32(mmSIF_RTR_CTRL_3_NL_HBM_PC_SEL_3, 0x20);
1419
1420 WREG32(mmSIF_RTR_CTRL_4_NL_HBM_SEL_0, 0x21);
1421 WREG32(mmSIF_RTR_CTRL_4_NL_HBM_SEL_1, 0x22);
1422 WREG32(mmSIF_RTR_CTRL_4_NL_HBM_OFFSET_18, 0x1F);
1423 WREG32(mmSIF_RTR_CTRL_4_NL_HBM_PC_SEL_3, 0x20);
1424
1425 WREG32(mmSIF_RTR_CTRL_5_NL_HBM_SEL_0, 0x21);
1426 WREG32(mmSIF_RTR_CTRL_5_NL_HBM_SEL_1, 0x22);
1427 WREG32(mmSIF_RTR_CTRL_5_NL_HBM_OFFSET_18, 0x1F);
1428 WREG32(mmSIF_RTR_CTRL_5_NL_HBM_PC_SEL_3, 0x20);
1429
1430 WREG32(mmSIF_RTR_CTRL_6_NL_HBM_SEL_0, 0x21);
1431 WREG32(mmSIF_RTR_CTRL_6_NL_HBM_SEL_1, 0x22);
1432 WREG32(mmSIF_RTR_CTRL_6_NL_HBM_OFFSET_18, 0x1F);
1433 WREG32(mmSIF_RTR_CTRL_6_NL_HBM_PC_SEL_3, 0x20);
1434
1435 WREG32(mmSIF_RTR_CTRL_7_NL_HBM_SEL_0, 0x21);
1436 WREG32(mmSIF_RTR_CTRL_7_NL_HBM_SEL_1, 0x22);
1437 WREG32(mmSIF_RTR_CTRL_7_NL_HBM_OFFSET_18, 0x1F);
1438 WREG32(mmSIF_RTR_CTRL_7_NL_HBM_PC_SEL_3, 0x20);
1439
1440 WREG32(mmNIF_RTR_CTRL_0_NL_HBM_SEL_0, 0x21);
1441 WREG32(mmNIF_RTR_CTRL_0_NL_HBM_SEL_1, 0x22);
1442 WREG32(mmNIF_RTR_CTRL_0_NL_HBM_OFFSET_18, 0x1F);
1443 WREG32(mmNIF_RTR_CTRL_0_NL_HBM_PC_SEL_3, 0x20);
1444
1445 WREG32(mmNIF_RTR_CTRL_1_NL_HBM_SEL_0, 0x21);
1446 WREG32(mmNIF_RTR_CTRL_1_NL_HBM_SEL_1, 0x22);
1447 WREG32(mmNIF_RTR_CTRL_1_NL_HBM_OFFSET_18, 0x1F);
1448 WREG32(mmNIF_RTR_CTRL_1_NL_HBM_PC_SEL_3, 0x20);
1449
1450 WREG32(mmNIF_RTR_CTRL_2_NL_HBM_SEL_0, 0x21);
1451 WREG32(mmNIF_RTR_CTRL_2_NL_HBM_SEL_1, 0x22);
1452 WREG32(mmNIF_RTR_CTRL_2_NL_HBM_OFFSET_18, 0x1F);
1453 WREG32(mmNIF_RTR_CTRL_2_NL_HBM_PC_SEL_3, 0x20);
1454
1455 WREG32(mmNIF_RTR_CTRL_3_NL_HBM_SEL_0, 0x21);
1456 WREG32(mmNIF_RTR_CTRL_3_NL_HBM_SEL_1, 0x22);
1457 WREG32(mmNIF_RTR_CTRL_3_NL_HBM_OFFSET_18, 0x1F);
1458 WREG32(mmNIF_RTR_CTRL_3_NL_HBM_PC_SEL_3, 0x20);
1459
1460 WREG32(mmNIF_RTR_CTRL_4_NL_HBM_SEL_0, 0x21);
1461 WREG32(mmNIF_RTR_CTRL_4_NL_HBM_SEL_1, 0x22);
1462 WREG32(mmNIF_RTR_CTRL_4_NL_HBM_OFFSET_18, 0x1F);
1463 WREG32(mmNIF_RTR_CTRL_4_NL_HBM_PC_SEL_3, 0x20);
1464
1465 WREG32(mmNIF_RTR_CTRL_5_NL_HBM_SEL_0, 0x21);
1466 WREG32(mmNIF_RTR_CTRL_5_NL_HBM_SEL_1, 0x22);
1467 WREG32(mmNIF_RTR_CTRL_5_NL_HBM_OFFSET_18, 0x1F);
1468 WREG32(mmNIF_RTR_CTRL_5_NL_HBM_PC_SEL_3, 0x20);
1469
1470 WREG32(mmNIF_RTR_CTRL_6_NL_HBM_SEL_0, 0x21);
1471 WREG32(mmNIF_RTR_CTRL_6_NL_HBM_SEL_1, 0x22);
1472 WREG32(mmNIF_RTR_CTRL_6_NL_HBM_OFFSET_18, 0x1F);
1473 WREG32(mmNIF_RTR_CTRL_6_NL_HBM_PC_SEL_3, 0x20);
1474
1475 WREG32(mmNIF_RTR_CTRL_7_NL_HBM_SEL_0, 0x21);
1476 WREG32(mmNIF_RTR_CTRL_7_NL_HBM_SEL_1, 0x22);
1477 WREG32(mmNIF_RTR_CTRL_7_NL_HBM_OFFSET_18, 0x1F);
1478 WREG32(mmNIF_RTR_CTRL_7_NL_HBM_PC_SEL_3, 0x20);
1479
1480 WREG32(mmDMA_IF_E_N_DOWN_CH0_NL_HBM_SEL_0, 0x21);
1481 WREG32(mmDMA_IF_E_N_DOWN_CH0_NL_HBM_SEL_1, 0x22);
1482 WREG32(mmDMA_IF_E_N_DOWN_CH0_NL_HBM_OFFSET_18, 0x1F);
1483 WREG32(mmDMA_IF_E_N_DOWN_CH0_NL_HBM_PC_SEL_3, 0x20);
1484
1485 WREG32(mmDMA_IF_E_N_DOWN_CH1_NL_HBM_SEL_0, 0x21);
1486 WREG32(mmDMA_IF_E_N_DOWN_CH1_NL_HBM_SEL_1, 0x22);
1487 WREG32(mmDMA_IF_E_N_DOWN_CH1_NL_HBM_OFFSET_18, 0x1F);
1488 WREG32(mmDMA_IF_E_N_DOWN_CH1_NL_HBM_PC_SEL_3, 0x20);
1489
1490 WREG32(mmDMA_IF_E_S_DOWN_CH0_NL_HBM_SEL_0, 0x21);
1491 WREG32(mmDMA_IF_E_S_DOWN_CH0_NL_HBM_SEL_1, 0x22);
1492 WREG32(mmDMA_IF_E_S_DOWN_CH0_NL_HBM_OFFSET_18, 0x1F);
1493 WREG32(mmDMA_IF_E_S_DOWN_CH0_NL_HBM_PC_SEL_3, 0x20);
1494
1495 WREG32(mmDMA_IF_E_S_DOWN_CH1_NL_HBM_SEL_0, 0x21);
1496 WREG32(mmDMA_IF_E_S_DOWN_CH1_NL_HBM_SEL_1, 0x22);
1497 WREG32(mmDMA_IF_E_S_DOWN_CH1_NL_HBM_OFFSET_18, 0x1F);
1498 WREG32(mmDMA_IF_E_S_DOWN_CH1_NL_HBM_PC_SEL_3, 0x20);
1499
1500 WREG32(mmDMA_IF_W_N_DOWN_CH0_NL_HBM_SEL_0, 0x21);
1501 WREG32(mmDMA_IF_W_N_DOWN_CH0_NL_HBM_SEL_1, 0x22);
1502 WREG32(mmDMA_IF_W_N_DOWN_CH0_NL_HBM_OFFSET_18, 0x1F);
1503 WREG32(mmDMA_IF_W_N_DOWN_CH0_NL_HBM_PC_SEL_3, 0x20);
1504
1505 WREG32(mmDMA_IF_W_N_DOWN_CH1_NL_HBM_SEL_0, 0x21);
1506 WREG32(mmDMA_IF_W_N_DOWN_CH1_NL_HBM_SEL_1, 0x22);
1507 WREG32(mmDMA_IF_W_N_DOWN_CH1_NL_HBM_OFFSET_18, 0x1F);
1508 WREG32(mmDMA_IF_W_N_DOWN_CH1_NL_HBM_PC_SEL_3, 0x20);
1509
1510 WREG32(mmDMA_IF_W_S_DOWN_CH0_NL_HBM_SEL_0, 0x21);
1511 WREG32(mmDMA_IF_W_S_DOWN_CH0_NL_HBM_SEL_1, 0x22);
1512 WREG32(mmDMA_IF_W_S_DOWN_CH0_NL_HBM_OFFSET_18, 0x1F);
1513 WREG32(mmDMA_IF_W_S_DOWN_CH0_NL_HBM_PC_SEL_3, 0x20);
1514
1515 WREG32(mmDMA_IF_W_S_DOWN_CH1_NL_HBM_SEL_0, 0x21);
1516 WREG32(mmDMA_IF_W_S_DOWN_CH1_NL_HBM_SEL_1, 0x22);
1517 WREG32(mmDMA_IF_W_S_DOWN_CH1_NL_HBM_OFFSET_18, 0x1F);
1518 WREG32(mmDMA_IF_W_S_DOWN_CH1_NL_HBM_PC_SEL_3, 0x20);
1519 }
1520
1521 WREG32(mmSIF_RTR_CTRL_0_E2E_HBM_EN,
1522 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
1523 WREG32(mmSIF_RTR_CTRL_0_E2E_PCI_EN,
1524 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
1525
1526 WREG32(mmSIF_RTR_CTRL_1_E2E_HBM_EN,
1527 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
1528 WREG32(mmSIF_RTR_CTRL_1_E2E_PCI_EN,
1529 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
1530
1531 WREG32(mmSIF_RTR_CTRL_2_E2E_HBM_EN,
1532 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
1533 WREG32(mmSIF_RTR_CTRL_2_E2E_PCI_EN,
1534 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
1535
1536 WREG32(mmSIF_RTR_CTRL_3_E2E_HBM_EN,
1537 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
1538 WREG32(mmSIF_RTR_CTRL_3_E2E_PCI_EN,
1539 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
1540
1541 WREG32(mmSIF_RTR_CTRL_4_E2E_HBM_EN,
1542 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
1543 WREG32(mmSIF_RTR_CTRL_4_E2E_PCI_EN,
1544 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
1545
1546 WREG32(mmSIF_RTR_CTRL_5_E2E_HBM_EN,
1547 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
1548 WREG32(mmSIF_RTR_CTRL_5_E2E_PCI_EN,
1549 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
1550
1551 WREG32(mmSIF_RTR_CTRL_6_E2E_HBM_EN,
1552 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
1553 WREG32(mmSIF_RTR_CTRL_6_E2E_PCI_EN,
1554 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
1555
1556 WREG32(mmSIF_RTR_CTRL_7_E2E_HBM_EN,
1557 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
1558 WREG32(mmSIF_RTR_CTRL_7_E2E_PCI_EN,
1559 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
1560
1561 WREG32(mmNIF_RTR_CTRL_0_E2E_HBM_EN,
1562 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
1563 WREG32(mmNIF_RTR_CTRL_0_E2E_PCI_EN,
1564 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
1565
1566 WREG32(mmNIF_RTR_CTRL_1_E2E_HBM_EN,
1567 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
1568 WREG32(mmNIF_RTR_CTRL_1_E2E_PCI_EN,
1569 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
1570
1571 WREG32(mmNIF_RTR_CTRL_2_E2E_HBM_EN,
1572 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
1573 WREG32(mmNIF_RTR_CTRL_2_E2E_PCI_EN,
1574 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
1575
1576 WREG32(mmNIF_RTR_CTRL_3_E2E_HBM_EN,
1577 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
1578 WREG32(mmNIF_RTR_CTRL_3_E2E_PCI_EN,
1579 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
1580
1581 WREG32(mmNIF_RTR_CTRL_4_E2E_HBM_EN,
1582 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
1583 WREG32(mmNIF_RTR_CTRL_4_E2E_PCI_EN,
1584 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
1585
1586 WREG32(mmNIF_RTR_CTRL_5_E2E_HBM_EN,
1587 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
1588 WREG32(mmNIF_RTR_CTRL_5_E2E_PCI_EN,
1589 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
1590
1591 WREG32(mmNIF_RTR_CTRL_6_E2E_HBM_EN,
1592 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
1593 WREG32(mmNIF_RTR_CTRL_6_E2E_PCI_EN,
1594 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
1595
1596 WREG32(mmNIF_RTR_CTRL_7_E2E_HBM_EN,
1597 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
1598 WREG32(mmNIF_RTR_CTRL_7_E2E_PCI_EN,
1599 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
1600
1601 WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_HBM_EN,
1602 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
1603 WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_PCI_EN,
1604 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
1605
1606 WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_HBM_EN,
1607 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
1608 WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_PCI_EN,
1609 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
1610
1611 WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_HBM_EN,
1612 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
1613 WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_PCI_EN,
1614 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
1615
1616 WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_HBM_EN,
1617 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
1618 WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_PCI_EN,
1619 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
1620
1621 WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_HBM_EN,
1622 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
1623 WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_PCI_EN,
1624 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
1625
1626 WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_HBM_EN,
1627 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
1628 WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_PCI_EN,
1629 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
1630
1631 WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_HBM_EN,
1632 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
1633 WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_PCI_EN,
1634 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
1635
1636 WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_HBM_EN,
1637 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
1638 WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_PCI_EN,
1639 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
1640}
1641
1642static void gaudi_init_hbm_cred(struct hl_device *hdev)
1643{
1644 uint32_t hbm0_wr, hbm1_wr, hbm0_rd, hbm1_rd;
1645
1646 hbm0_wr = 0x33333333;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03001647 hbm0_rd = 0x77777777;
Oded Gabbay0b168c82020-06-15 19:25:57 +03001648 hbm1_wr = 0x55555555;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03001649 hbm1_rd = 0xDDDDDDDD;
1650
1651 WREG32(mmDMA_IF_E_N_HBM0_WR_CRED_CNT, hbm0_wr);
1652 WREG32(mmDMA_IF_E_N_HBM1_WR_CRED_CNT, hbm1_wr);
1653 WREG32(mmDMA_IF_E_N_HBM0_RD_CRED_CNT, hbm0_rd);
1654 WREG32(mmDMA_IF_E_N_HBM1_RD_CRED_CNT, hbm1_rd);
1655
1656 WREG32(mmDMA_IF_E_S_HBM0_WR_CRED_CNT, hbm0_wr);
1657 WREG32(mmDMA_IF_E_S_HBM1_WR_CRED_CNT, hbm1_wr);
1658 WREG32(mmDMA_IF_E_S_HBM0_RD_CRED_CNT, hbm0_rd);
1659 WREG32(mmDMA_IF_E_S_HBM1_RD_CRED_CNT, hbm1_rd);
1660
1661 WREG32(mmDMA_IF_W_N_HBM0_WR_CRED_CNT, hbm0_wr);
1662 WREG32(mmDMA_IF_W_N_HBM1_WR_CRED_CNT, hbm1_wr);
1663 WREG32(mmDMA_IF_W_N_HBM0_RD_CRED_CNT, hbm0_rd);
1664 WREG32(mmDMA_IF_W_N_HBM1_RD_CRED_CNT, hbm1_rd);
1665
1666 WREG32(mmDMA_IF_W_S_HBM0_WR_CRED_CNT, hbm0_wr);
1667 WREG32(mmDMA_IF_W_S_HBM1_WR_CRED_CNT, hbm1_wr);
1668 WREG32(mmDMA_IF_W_S_HBM0_RD_CRED_CNT, hbm0_rd);
1669 WREG32(mmDMA_IF_W_S_HBM1_RD_CRED_CNT, hbm1_rd);
1670
1671 WREG32(mmDMA_IF_E_N_HBM_CRED_EN_0,
1672 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
1673 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
1674 WREG32(mmDMA_IF_E_S_HBM_CRED_EN_0,
1675 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
1676 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
1677 WREG32(mmDMA_IF_W_N_HBM_CRED_EN_0,
1678 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
1679 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
1680 WREG32(mmDMA_IF_W_S_HBM_CRED_EN_0,
1681 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
1682 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
1683
1684 WREG32(mmDMA_IF_E_N_HBM_CRED_EN_1,
1685 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
1686 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
1687 WREG32(mmDMA_IF_E_S_HBM_CRED_EN_1,
1688 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
1689 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
1690 WREG32(mmDMA_IF_W_N_HBM_CRED_EN_1,
1691 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
1692 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
1693 WREG32(mmDMA_IF_W_S_HBM_CRED_EN_1,
1694 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
1695 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
1696}
1697
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03001698static void gaudi_init_golden_registers(struct hl_device *hdev)
1699{
1700 u32 tpc_offset;
1701 int tpc_id, i;
1702
1703 gaudi_init_e2e(hdev);
1704
1705 gaudi_init_hbm_cred(hdev);
1706
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03001707 gaudi_disable_clock_gating(hdev);
1708
1709 for (tpc_id = 0, tpc_offset = 0;
1710 tpc_id < TPC_NUMBER_OF_ENGINES;
1711 tpc_id++, tpc_offset += TPC_CFG_OFFSET) {
1712 /* Mask all arithmetic interrupts from TPC */
1713 WREG32(mmTPC0_CFG_TPC_INTR_MASK + tpc_offset, 0x8FFF);
1714 /* Set 16 cache lines */
1715 WREG32_FIELD(TPC0_CFG_MSS_CONFIG, tpc_offset,
1716 ICACHE_FETCH_LINE_NUM, 2);
1717 }
1718
1719 /* Make sure 1st 128 bytes in SRAM are 0 for Tensor DMA */
1720 for (i = 0 ; i < 128 ; i += 8)
1721 writeq(0, hdev->pcie_bar[SRAM_BAR_ID] + i);
1722
1723 WREG32(mmMME0_CTRL_EUS_ROLLUP_CNT_ADD, 3);
1724 WREG32(mmMME1_CTRL_EUS_ROLLUP_CNT_ADD, 3);
1725 WREG32(mmMME2_CTRL_EUS_ROLLUP_CNT_ADD, 3);
1726 WREG32(mmMME3_CTRL_EUS_ROLLUP_CNT_ADD, 3);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03001727}
1728
1729static void gaudi_init_pci_dma_qman(struct hl_device *hdev, int dma_id,
1730 int qman_id, dma_addr_t qman_pq_addr)
1731{
1732 u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi;
1733 u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi;
1734 u32 q_off, dma_qm_offset;
1735 u32 dma_qm_err_cfg;
1736
1737 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
1738
1739 mtr_base_en_lo = lower_32_bits(CFG_BASE +
1740 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
1741 mtr_base_en_hi = upper_32_bits(CFG_BASE +
1742 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
1743 so_base_en_lo = lower_32_bits(CFG_BASE +
1744 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
1745 so_base_en_hi = upper_32_bits(CFG_BASE +
1746 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
1747 mtr_base_ws_lo = lower_32_bits(CFG_BASE +
1748 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
1749 mtr_base_ws_hi = upper_32_bits(CFG_BASE +
1750 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
1751 so_base_ws_lo = lower_32_bits(CFG_BASE +
1752 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
1753 so_base_ws_hi = upper_32_bits(CFG_BASE +
1754 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
1755
1756 q_off = dma_qm_offset + qman_id * 4;
1757
1758 WREG32(mmDMA0_QM_PQ_BASE_LO_0 + q_off, lower_32_bits(qman_pq_addr));
1759 WREG32(mmDMA0_QM_PQ_BASE_HI_0 + q_off, upper_32_bits(qman_pq_addr));
1760
1761 WREG32(mmDMA0_QM_PQ_SIZE_0 + q_off, ilog2(HL_QUEUE_LENGTH));
1762 WREG32(mmDMA0_QM_PQ_PI_0 + q_off, 0);
1763 WREG32(mmDMA0_QM_PQ_CI_0 + q_off, 0);
1764
1765 WREG32(mmDMA0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off, 0x74);
1766 WREG32(mmDMA0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off, 0x14);
1767 WREG32(mmDMA0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off, 0x1C);
1768
1769 WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo);
1770 WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi);
1771 WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo);
1772 WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi);
1773 WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_LO_0 + q_off, mtr_base_ws_lo);
1774 WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_HI_0 + q_off, mtr_base_ws_hi);
1775 WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_LO_0 + q_off, so_base_ws_lo);
1776 WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_HI_0 + q_off, so_base_ws_hi);
1777
Omer Shpigelmance043262020-06-16 17:56:27 +03001778 WREG32(mmDMA0_QM_CP_BARRIER_CFG_0 + q_off, 0x100);
1779
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03001780 /* The following configuration is needed only once per QMAN */
1781 if (qman_id == 0) {
1782 /* Configure RAZWI IRQ */
1783 dma_qm_err_cfg = PCI_DMA_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
1784 if (hdev->stop_on_err) {
1785 dma_qm_err_cfg |=
1786 PCI_DMA_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
1787 }
1788
1789 WREG32(mmDMA0_QM_GLBL_ERR_CFG + dma_qm_offset, dma_qm_err_cfg);
1790 WREG32(mmDMA0_QM_GLBL_ERR_ADDR_LO + dma_qm_offset,
1791 lower_32_bits(CFG_BASE +
1792 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR));
1793 WREG32(mmDMA0_QM_GLBL_ERR_ADDR_HI + dma_qm_offset,
1794 upper_32_bits(CFG_BASE +
1795 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR));
1796 WREG32(mmDMA0_QM_GLBL_ERR_WDATA + dma_qm_offset,
1797 gaudi_irq_map_table[GAUDI_EVENT_DMA0_QM].cpu_id +
1798 dma_id);
1799
1800 WREG32(mmDMA0_QM_ARB_ERR_MSG_EN + dma_qm_offset,
1801 QM_ARB_ERR_MSG_EN_MASK);
1802
1803 /* Increase ARB WDT to support streams architecture */
1804 WREG32(mmDMA0_QM_ARB_SLV_CHOISE_WDT + dma_qm_offset,
1805 GAUDI_ARB_WDT_TIMEOUT);
1806
1807 WREG32(mmDMA0_QM_GLBL_PROT + dma_qm_offset,
1808 QMAN_EXTERNAL_MAKE_TRUSTED);
1809
1810 WREG32(mmDMA0_QM_GLBL_CFG1 + dma_qm_offset, 0);
1811 }
1812}
1813
1814static void gaudi_init_dma_core(struct hl_device *hdev, int dma_id)
1815{
1816 u32 dma_offset = dma_id * DMA_CORE_OFFSET;
1817 u32 dma_err_cfg = 1 << DMA0_CORE_ERR_CFG_ERR_MSG_EN_SHIFT;
1818
1819 /* Set to maximum possible according to physical size */
1820 WREG32(mmDMA0_CORE_RD_MAX_OUTSTAND + dma_offset, 0);
1821 WREG32(mmDMA0_CORE_RD_MAX_SIZE + dma_offset, 0);
1822
1823 /* STOP_ON bit implies no completion to operation in case of RAZWI */
1824 if (hdev->stop_on_err)
1825 dma_err_cfg |= 1 << DMA0_CORE_ERR_CFG_STOP_ON_ERR_SHIFT;
1826
1827 WREG32(mmDMA0_CORE_ERR_CFG + dma_offset, dma_err_cfg);
1828 WREG32(mmDMA0_CORE_ERRMSG_ADDR_LO + dma_offset,
1829 lower_32_bits(CFG_BASE + mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR));
1830 WREG32(mmDMA0_CORE_ERRMSG_ADDR_HI + dma_offset,
1831 upper_32_bits(CFG_BASE + mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR));
1832 WREG32(mmDMA0_CORE_ERRMSG_WDATA + dma_offset,
1833 gaudi_irq_map_table[GAUDI_EVENT_DMA0_CORE].cpu_id + dma_id);
1834 WREG32(mmDMA0_CORE_PROT + dma_offset,
1835 1 << DMA0_CORE_PROT_ERR_VAL_SHIFT);
1836 /* If the channel is secured, it should be in MMU bypass mode */
1837 WREG32(mmDMA0_CORE_SECURE_PROPS + dma_offset,
1838 1 << DMA0_CORE_SECURE_PROPS_MMBP_SHIFT);
1839 WREG32(mmDMA0_CORE_CFG_0 + dma_offset, 1 << DMA0_CORE_CFG_0_EN_SHIFT);
1840}
1841
1842static void gaudi_enable_qman(struct hl_device *hdev, int dma_id,
1843 u32 enable_mask)
1844{
1845 u32 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
1846
1847 WREG32(mmDMA0_QM_GLBL_CFG0 + dma_qm_offset, enable_mask);
1848}
1849
1850static void gaudi_init_pci_dma_qmans(struct hl_device *hdev)
1851{
1852 struct gaudi_device *gaudi = hdev->asic_specific;
1853 struct hl_hw_queue *q;
1854 int i, j, dma_id, cpu_skip, nic_skip, cq_id = 0, q_idx, msi_vec = 0;
1855
1856 if (gaudi->hw_cap_initialized & HW_CAP_PCI_DMA)
1857 return;
1858
1859 for (i = 0 ; i < PCI_DMA_NUMBER_OF_CHNLS ; i++) {
1860 dma_id = gaudi_dma_assignment[i];
1861 /*
1862 * For queues after the CPU Q need to add 1 to get the correct
1863 * queue. In addition, need to add the CPU EQ and NIC IRQs in
1864 * order to get the correct MSI register.
1865 */
1866 if (dma_id > 1) {
1867 cpu_skip = 1;
1868 nic_skip = NIC_NUMBER_OF_ENGINES;
1869 } else {
1870 cpu_skip = 0;
1871 nic_skip = 0;
1872 }
1873
1874 for (j = 0 ; j < QMAN_STREAMS ; j++) {
1875 q_idx = 4 * dma_id + j + cpu_skip;
1876 q = &hdev->kernel_queues[q_idx];
1877 q->cq_id = cq_id++;
1878 q->msi_vec = nic_skip + cpu_skip + msi_vec++;
1879 gaudi_init_pci_dma_qman(hdev, dma_id, j,
1880 q->bus_address);
1881 }
1882
1883 gaudi_init_dma_core(hdev, dma_id);
1884
1885 gaudi_enable_qman(hdev, dma_id, PCI_DMA_QMAN_ENABLE);
1886 }
1887
1888 gaudi->hw_cap_initialized |= HW_CAP_PCI_DMA;
1889}
1890
1891static void gaudi_init_hbm_dma_qman(struct hl_device *hdev, int dma_id,
1892 int qman_id, u64 qman_base_addr)
1893{
1894 u32 mtr_base_lo, mtr_base_hi;
1895 u32 so_base_lo, so_base_hi;
1896 u32 q_off, dma_qm_offset;
1897 u32 dma_qm_err_cfg;
1898
1899 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
1900
1901 mtr_base_lo = lower_32_bits(CFG_BASE +
1902 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
1903 mtr_base_hi = upper_32_bits(CFG_BASE +
1904 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
1905 so_base_lo = lower_32_bits(CFG_BASE +
1906 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
1907 so_base_hi = upper_32_bits(CFG_BASE +
1908 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
1909
1910 q_off = dma_qm_offset + qman_id * 4;
1911
1912 if (qman_id < 4) {
1913 WREG32(mmDMA0_QM_PQ_BASE_LO_0 + q_off,
1914 lower_32_bits(qman_base_addr));
1915 WREG32(mmDMA0_QM_PQ_BASE_HI_0 + q_off,
1916 upper_32_bits(qman_base_addr));
1917
1918 WREG32(mmDMA0_QM_PQ_SIZE_0 + q_off, ilog2(HBM_DMA_QMAN_LENGTH));
1919 WREG32(mmDMA0_QM_PQ_PI_0 + q_off, 0);
1920 WREG32(mmDMA0_QM_PQ_CI_0 + q_off, 0);
1921
1922 WREG32(mmDMA0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off, 0x81BC);
1923 WREG32(mmDMA0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off, 0x81B4);
1924 WREG32(mmDMA0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off, 0x1C);
1925 } else {
1926 WREG32(mmDMA0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off, 0x74);
1927 WREG32(mmDMA0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off, 0x14);
1928 WREG32(mmDMA0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off, 0x1C);
1929
1930 /* Configure RAZWI IRQ */
1931 dma_qm_err_cfg = HBM_DMA_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
1932 if (hdev->stop_on_err) {
1933 dma_qm_err_cfg |=
1934 HBM_DMA_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
1935 }
1936 WREG32(mmDMA0_QM_GLBL_ERR_CFG + dma_qm_offset, dma_qm_err_cfg);
1937
1938 WREG32(mmDMA0_QM_GLBL_ERR_ADDR_LO + dma_qm_offset,
1939 lower_32_bits(CFG_BASE +
1940 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR));
1941 WREG32(mmDMA0_QM_GLBL_ERR_ADDR_HI + dma_qm_offset,
1942 upper_32_bits(CFG_BASE +
1943 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR));
1944 WREG32(mmDMA0_QM_GLBL_ERR_WDATA + dma_qm_offset,
1945 gaudi_irq_map_table[GAUDI_EVENT_DMA0_QM].cpu_id +
1946 dma_id);
1947
1948 WREG32(mmDMA0_QM_ARB_ERR_MSG_EN + dma_qm_offset,
1949 QM_ARB_ERR_MSG_EN_MASK);
1950
1951 /* Increase ARB WDT to support streams architecture */
1952 WREG32(mmDMA0_QM_ARB_SLV_CHOISE_WDT + dma_qm_offset,
1953 GAUDI_ARB_WDT_TIMEOUT);
1954
1955 WREG32(mmDMA0_QM_GLBL_CFG1 + dma_qm_offset, 0);
1956 WREG32(mmDMA0_QM_GLBL_PROT + dma_qm_offset,
1957 QMAN_INTERNAL_MAKE_TRUSTED);
1958 }
1959
1960 WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_lo);
1961 WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_hi);
1962 WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_lo);
1963 WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_hi);
1964}
1965
1966static void gaudi_init_hbm_dma_qmans(struct hl_device *hdev)
1967{
1968 struct gaudi_device *gaudi = hdev->asic_specific;
1969 struct gaudi_internal_qman_info *q;
1970 u64 qman_base_addr;
1971 int i, j, dma_id, internal_q_index;
1972
1973 if (gaudi->hw_cap_initialized & HW_CAP_HBM_DMA)
1974 return;
1975
1976 for (i = 0 ; i < HBM_DMA_NUMBER_OF_CHNLS ; i++) {
1977 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_1 + i];
1978
1979 for (j = 0 ; j < QMAN_STREAMS ; j++) {
1980 /*
1981 * Add the CPU queue in order to get the correct queue
1982 * number as all internal queue are placed after it
1983 */
1984 internal_q_index = dma_id * QMAN_STREAMS + j + 1;
1985
1986 q = &gaudi->internal_qmans[internal_q_index];
1987 qman_base_addr = (u64) q->pq_dma_addr;
1988 gaudi_init_hbm_dma_qman(hdev, dma_id, j,
1989 qman_base_addr);
1990 }
1991
1992 /* Initializing lower CP for HBM DMA QMAN */
1993 gaudi_init_hbm_dma_qman(hdev, dma_id, 4, 0);
1994
1995 gaudi_init_dma_core(hdev, dma_id);
1996
1997 gaudi_enable_qman(hdev, dma_id, HBM_DMA_QMAN_ENABLE);
1998 }
1999
2000 gaudi->hw_cap_initialized |= HW_CAP_HBM_DMA;
2001}
2002
2003static void gaudi_init_mme_qman(struct hl_device *hdev, u32 mme_offset,
2004 int qman_id, u64 qman_base_addr)
2005{
2006 u32 mtr_base_lo, mtr_base_hi;
2007 u32 so_base_lo, so_base_hi;
2008 u32 q_off, mme_id;
2009 u32 mme_qm_err_cfg;
2010
2011 mtr_base_lo = lower_32_bits(CFG_BASE +
2012 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2013 mtr_base_hi = upper_32_bits(CFG_BASE +
2014 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2015 so_base_lo = lower_32_bits(CFG_BASE +
2016 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2017 so_base_hi = upper_32_bits(CFG_BASE +
2018 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2019
2020 q_off = mme_offset + qman_id * 4;
2021
2022 if (qman_id < 4) {
2023 WREG32(mmMME0_QM_PQ_BASE_LO_0 + q_off,
2024 lower_32_bits(qman_base_addr));
2025 WREG32(mmMME0_QM_PQ_BASE_HI_0 + q_off,
2026 upper_32_bits(qman_base_addr));
2027
2028 WREG32(mmMME0_QM_PQ_SIZE_0 + q_off, ilog2(MME_QMAN_LENGTH));
2029 WREG32(mmMME0_QM_PQ_PI_0 + q_off, 0);
2030 WREG32(mmMME0_QM_PQ_CI_0 + q_off, 0);
2031
2032 WREG32(mmMME0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off, 0x81BC);
2033 WREG32(mmMME0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off, 0x81B4);
2034 WREG32(mmMME0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off, 0x1C);
2035 } else {
2036 WREG32(mmMME0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off, 0x74);
2037 WREG32(mmMME0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off, 0x14);
2038 WREG32(mmMME0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off, 0x1C);
2039
2040 /* Configure RAZWI IRQ */
2041 mme_id = mme_offset /
2042 (mmMME1_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0);
2043
2044 mme_qm_err_cfg = MME_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
2045 if (hdev->stop_on_err) {
2046 mme_qm_err_cfg |=
2047 MME_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
2048 }
2049 WREG32(mmMME0_QM_GLBL_ERR_CFG + mme_offset, mme_qm_err_cfg);
2050 WREG32(mmMME0_QM_GLBL_ERR_ADDR_LO + mme_offset,
2051 lower_32_bits(CFG_BASE +
2052 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR));
2053 WREG32(mmMME0_QM_GLBL_ERR_ADDR_HI + mme_offset,
2054 upper_32_bits(CFG_BASE +
2055 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR));
2056 WREG32(mmMME0_QM_GLBL_ERR_WDATA + mme_offset,
2057 gaudi_irq_map_table[GAUDI_EVENT_MME0_QM].cpu_id +
2058 mme_id);
2059
2060 WREG32(mmMME0_QM_ARB_ERR_MSG_EN + mme_offset,
2061 QM_ARB_ERR_MSG_EN_MASK);
2062
2063 /* Increase ARB WDT to support streams architecture */
2064 WREG32(mmMME0_QM_ARB_SLV_CHOISE_WDT + mme_offset,
2065 GAUDI_ARB_WDT_TIMEOUT);
2066
2067 WREG32(mmMME0_QM_GLBL_CFG1 + mme_offset, 0);
2068 WREG32(mmMME0_QM_GLBL_PROT + mme_offset,
2069 QMAN_INTERNAL_MAKE_TRUSTED);
2070 }
2071
2072 WREG32(mmMME0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_lo);
2073 WREG32(mmMME0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_hi);
2074 WREG32(mmMME0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_lo);
2075 WREG32(mmMME0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_hi);
2076}
2077
2078static void gaudi_init_mme_qmans(struct hl_device *hdev)
2079{
2080 struct gaudi_device *gaudi = hdev->asic_specific;
2081 struct gaudi_internal_qman_info *q;
2082 u64 qman_base_addr;
2083 u32 mme_offset;
2084 int i, internal_q_index;
2085
2086 if (gaudi->hw_cap_initialized & HW_CAP_MME)
2087 return;
2088
2089 /*
2090 * map GAUDI_QUEUE_ID_MME_0_X to the N_W_MME (mmMME2_QM_BASE)
2091 * and GAUDI_QUEUE_ID_MME_1_X to the S_W_MME (mmMME0_QM_BASE)
2092 */
2093
2094 mme_offset = mmMME2_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0;
2095
2096 for (i = 0 ; i < MME_NUMBER_OF_QMANS ; i++) {
2097 internal_q_index = GAUDI_QUEUE_ID_MME_0_0 + i;
2098 q = &gaudi->internal_qmans[internal_q_index];
2099 qman_base_addr = (u64) q->pq_dma_addr;
2100 gaudi_init_mme_qman(hdev, mme_offset, (i & 0x3),
2101 qman_base_addr);
2102 if (i == 3)
2103 mme_offset = 0;
2104 }
2105
2106 /* Initializing lower CP for MME QMANs */
2107 mme_offset = mmMME2_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0;
2108 gaudi_init_mme_qman(hdev, mme_offset, 4, 0);
2109 gaudi_init_mme_qman(hdev, 0, 4, 0);
2110
2111 WREG32(mmMME2_QM_GLBL_CFG0, QMAN_MME_ENABLE);
2112 WREG32(mmMME0_QM_GLBL_CFG0, QMAN_MME_ENABLE);
2113
2114 gaudi->hw_cap_initialized |= HW_CAP_MME;
2115}
2116
2117static void gaudi_init_tpc_qman(struct hl_device *hdev, u32 tpc_offset,
2118 int qman_id, u64 qman_base_addr)
2119{
2120 u32 mtr_base_lo, mtr_base_hi;
2121 u32 so_base_lo, so_base_hi;
2122 u32 q_off, tpc_id;
2123 u32 tpc_qm_err_cfg;
2124
2125 mtr_base_lo = lower_32_bits(CFG_BASE +
2126 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2127 mtr_base_hi = upper_32_bits(CFG_BASE +
2128 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2129 so_base_lo = lower_32_bits(CFG_BASE +
2130 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2131 so_base_hi = upper_32_bits(CFG_BASE +
2132 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2133
2134 q_off = tpc_offset + qman_id * 4;
2135
2136 if (qman_id < 4) {
2137 WREG32(mmTPC0_QM_PQ_BASE_LO_0 + q_off,
2138 lower_32_bits(qman_base_addr));
2139 WREG32(mmTPC0_QM_PQ_BASE_HI_0 + q_off,
2140 upper_32_bits(qman_base_addr));
2141
2142 WREG32(mmTPC0_QM_PQ_SIZE_0 + q_off, ilog2(TPC_QMAN_LENGTH));
2143 WREG32(mmTPC0_QM_PQ_PI_0 + q_off, 0);
2144 WREG32(mmTPC0_QM_PQ_CI_0 + q_off, 0);
2145
2146 WREG32(mmTPC0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off, 0x81BC);
2147 WREG32(mmTPC0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off, 0x81B4);
2148 WREG32(mmTPC0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off, 0x1C);
2149 } else {
2150 WREG32(mmTPC0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off, 0x74);
2151 WREG32(mmTPC0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off, 0x14);
2152 WREG32(mmTPC0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off, 0x1C);
2153
2154 /* Configure RAZWI IRQ */
2155 tpc_id = tpc_offset /
2156 (mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0);
2157
2158 tpc_qm_err_cfg = TPC_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
2159 if (hdev->stop_on_err) {
2160 tpc_qm_err_cfg |=
2161 TPC_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
2162 }
2163
2164 WREG32(mmTPC0_QM_GLBL_ERR_CFG + tpc_offset, tpc_qm_err_cfg);
2165 WREG32(mmTPC0_QM_GLBL_ERR_ADDR_LO + tpc_offset,
2166 lower_32_bits(CFG_BASE +
2167 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR));
2168 WREG32(mmTPC0_QM_GLBL_ERR_ADDR_HI + tpc_offset,
2169 upper_32_bits(CFG_BASE +
2170 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR));
2171 WREG32(mmTPC0_QM_GLBL_ERR_WDATA + tpc_offset,
2172 gaudi_irq_map_table[GAUDI_EVENT_TPC0_QM].cpu_id +
2173 tpc_id);
2174
2175 WREG32(mmTPC0_QM_ARB_ERR_MSG_EN + tpc_offset,
2176 QM_ARB_ERR_MSG_EN_MASK);
2177
2178 /* Increase ARB WDT to support streams architecture */
2179 WREG32(mmTPC0_QM_ARB_SLV_CHOISE_WDT + tpc_offset,
2180 GAUDI_ARB_WDT_TIMEOUT);
2181
2182 WREG32(mmTPC0_QM_GLBL_CFG1 + tpc_offset, 0);
2183 WREG32(mmTPC0_QM_GLBL_PROT + tpc_offset,
2184 QMAN_INTERNAL_MAKE_TRUSTED);
2185 }
2186
2187 WREG32(mmTPC0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_lo);
2188 WREG32(mmTPC0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_hi);
2189 WREG32(mmTPC0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_lo);
2190 WREG32(mmTPC0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_hi);
2191}
2192
2193static void gaudi_init_tpc_qmans(struct hl_device *hdev)
2194{
2195 struct gaudi_device *gaudi = hdev->asic_specific;
2196 struct gaudi_internal_qman_info *q;
2197 u64 qman_base_addr;
2198 u32 so_base_hi, tpc_offset = 0;
2199 u32 tpc_delta = mmTPC1_CFG_SM_BASE_ADDRESS_HIGH -
2200 mmTPC0_CFG_SM_BASE_ADDRESS_HIGH;
2201 int i, tpc_id, internal_q_index;
2202
2203 if (gaudi->hw_cap_initialized & HW_CAP_TPC_MASK)
2204 return;
2205
2206 so_base_hi = upper_32_bits(CFG_BASE +
2207 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2208
2209 for (tpc_id = 0 ; tpc_id < TPC_NUMBER_OF_ENGINES ; tpc_id++) {
2210 for (i = 0 ; i < QMAN_STREAMS ; i++) {
2211 internal_q_index = GAUDI_QUEUE_ID_TPC_0_0 +
2212 tpc_id * QMAN_STREAMS + i;
2213 q = &gaudi->internal_qmans[internal_q_index];
2214 qman_base_addr = (u64) q->pq_dma_addr;
2215 gaudi_init_tpc_qman(hdev, tpc_offset, i,
2216 qman_base_addr);
2217
2218 if (i == 3) {
2219 /* Initializing lower CP for TPC QMAN */
2220 gaudi_init_tpc_qman(hdev, tpc_offset, 4, 0);
2221
2222 /* Enable the QMAN and TPC channel */
2223 WREG32(mmTPC0_QM_GLBL_CFG0 + tpc_offset,
2224 QMAN_TPC_ENABLE);
2225 }
2226 }
2227
2228 WREG32(mmTPC0_CFG_SM_BASE_ADDRESS_HIGH + tpc_id * tpc_delta,
2229 so_base_hi);
2230
2231 tpc_offset += mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0;
2232
2233 gaudi->hw_cap_initialized |= 1 << (HW_CAP_TPC_SHIFT + tpc_id);
2234 }
2235}
2236
2237static void gaudi_disable_pci_dma_qmans(struct hl_device *hdev)
2238{
2239 struct gaudi_device *gaudi = hdev->asic_specific;
2240
2241 if (!(gaudi->hw_cap_initialized & HW_CAP_PCI_DMA))
2242 return;
2243
2244 WREG32(mmDMA0_QM_GLBL_CFG0, 0);
2245 WREG32(mmDMA1_QM_GLBL_CFG0, 0);
2246 WREG32(mmDMA5_QM_GLBL_CFG0, 0);
2247}
2248
2249static void gaudi_disable_hbm_dma_qmans(struct hl_device *hdev)
2250{
2251 struct gaudi_device *gaudi = hdev->asic_specific;
2252
2253 if (!(gaudi->hw_cap_initialized & HW_CAP_HBM_DMA))
2254 return;
2255
2256 WREG32(mmDMA2_QM_GLBL_CFG0, 0);
2257 WREG32(mmDMA3_QM_GLBL_CFG0, 0);
2258 WREG32(mmDMA4_QM_GLBL_CFG0, 0);
2259 WREG32(mmDMA6_QM_GLBL_CFG0, 0);
2260 WREG32(mmDMA7_QM_GLBL_CFG0, 0);
2261}
2262
2263static void gaudi_disable_mme_qmans(struct hl_device *hdev)
2264{
2265 struct gaudi_device *gaudi = hdev->asic_specific;
2266
2267 if (!(gaudi->hw_cap_initialized & HW_CAP_MME))
2268 return;
2269
2270 WREG32(mmMME2_QM_GLBL_CFG0, 0);
2271 WREG32(mmMME0_QM_GLBL_CFG0, 0);
2272}
2273
2274static void gaudi_disable_tpc_qmans(struct hl_device *hdev)
2275{
2276 struct gaudi_device *gaudi = hdev->asic_specific;
2277 u32 tpc_offset = 0;
2278 int tpc_id;
2279
2280 if (!(gaudi->hw_cap_initialized & HW_CAP_TPC_MASK))
2281 return;
2282
2283 for (tpc_id = 0 ; tpc_id < TPC_NUMBER_OF_ENGINES ; tpc_id++) {
2284 WREG32(mmTPC0_QM_GLBL_CFG0 + tpc_offset, 0);
2285 tpc_offset += mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0;
2286 }
2287}
2288
2289static void gaudi_stop_pci_dma_qmans(struct hl_device *hdev)
2290{
2291 struct gaudi_device *gaudi = hdev->asic_specific;
2292
2293 if (!(gaudi->hw_cap_initialized & HW_CAP_PCI_DMA))
2294 return;
2295
2296 /* Stop upper CPs of QMANs 0.0 to 1.3 and 5.0 to 5.3 */
2297 WREG32(mmDMA0_QM_GLBL_CFG1, 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
2298 WREG32(mmDMA1_QM_GLBL_CFG1, 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
2299 WREG32(mmDMA5_QM_GLBL_CFG1, 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
2300}
2301
2302static void gaudi_stop_hbm_dma_qmans(struct hl_device *hdev)
2303{
2304 struct gaudi_device *gaudi = hdev->asic_specific;
2305
2306 if (!(gaudi->hw_cap_initialized & HW_CAP_HBM_DMA))
2307 return;
2308
2309 /* Stop CPs of HBM DMA QMANs */
2310
2311 WREG32(mmDMA2_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
2312 WREG32(mmDMA3_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
2313 WREG32(mmDMA4_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
2314 WREG32(mmDMA6_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
2315 WREG32(mmDMA7_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
2316}
2317
2318static void gaudi_stop_mme_qmans(struct hl_device *hdev)
2319{
2320 struct gaudi_device *gaudi = hdev->asic_specific;
2321
2322 if (!(gaudi->hw_cap_initialized & HW_CAP_MME))
2323 return;
2324
2325 /* Stop CPs of MME QMANs */
2326 WREG32(mmMME2_QM_GLBL_CFG1, 0x1F << MME0_QM_GLBL_CFG1_CP_STOP_SHIFT);
2327 WREG32(mmMME0_QM_GLBL_CFG1, 0x1F << MME0_QM_GLBL_CFG1_CP_STOP_SHIFT);
2328}
2329
2330static void gaudi_stop_tpc_qmans(struct hl_device *hdev)
2331{
2332 struct gaudi_device *gaudi = hdev->asic_specific;
2333
2334 if (!(gaudi->hw_cap_initialized & HW_CAP_TPC_MASK))
2335 return;
2336
2337 WREG32(mmTPC0_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
2338 WREG32(mmTPC1_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
2339 WREG32(mmTPC2_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
2340 WREG32(mmTPC3_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
2341 WREG32(mmTPC4_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
2342 WREG32(mmTPC5_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
2343 WREG32(mmTPC6_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
2344 WREG32(mmTPC7_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
2345}
2346
2347static void gaudi_pci_dma_stall(struct hl_device *hdev)
2348{
2349 struct gaudi_device *gaudi = hdev->asic_specific;
2350
2351 if (!(gaudi->hw_cap_initialized & HW_CAP_PCI_DMA))
2352 return;
2353
2354 WREG32(mmDMA0_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
2355 WREG32(mmDMA1_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
2356 WREG32(mmDMA5_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
2357}
2358
2359static void gaudi_hbm_dma_stall(struct hl_device *hdev)
2360{
2361 struct gaudi_device *gaudi = hdev->asic_specific;
2362
2363 if (!(gaudi->hw_cap_initialized & HW_CAP_HBM_DMA))
2364 return;
2365
2366 WREG32(mmDMA2_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
2367 WREG32(mmDMA3_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
2368 WREG32(mmDMA4_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
2369 WREG32(mmDMA6_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
2370 WREG32(mmDMA7_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
2371}
2372
2373static void gaudi_mme_stall(struct hl_device *hdev)
2374{
2375 struct gaudi_device *gaudi = hdev->asic_specific;
2376
2377 if (!(gaudi->hw_cap_initialized & HW_CAP_MME))
2378 return;
2379
2380 /* WA for H3-1800 bug: do ACC and SBAB writes twice */
2381 WREG32(mmMME0_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
2382 WREG32(mmMME0_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
2383 WREG32(mmMME0_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
2384 WREG32(mmMME0_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
2385 WREG32(mmMME1_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
2386 WREG32(mmMME1_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
2387 WREG32(mmMME1_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
2388 WREG32(mmMME1_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
2389 WREG32(mmMME2_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
2390 WREG32(mmMME2_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
2391 WREG32(mmMME2_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
2392 WREG32(mmMME2_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
2393 WREG32(mmMME3_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
2394 WREG32(mmMME3_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
2395 WREG32(mmMME3_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
2396 WREG32(mmMME3_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
2397}
2398
2399static void gaudi_tpc_stall(struct hl_device *hdev)
2400{
2401 struct gaudi_device *gaudi = hdev->asic_specific;
2402
2403 if (!(gaudi->hw_cap_initialized & HW_CAP_TPC_MASK))
2404 return;
2405
2406 WREG32(mmTPC0_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
2407 WREG32(mmTPC1_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
2408 WREG32(mmTPC2_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
2409 WREG32(mmTPC3_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
2410 WREG32(mmTPC4_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
2411 WREG32(mmTPC5_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
2412 WREG32(mmTPC6_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
2413 WREG32(mmTPC7_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
2414}
2415
2416static void gaudi_enable_clock_gating(struct hl_device *hdev)
2417{
2418 struct gaudi_device *gaudi = hdev->asic_specific;
2419 u32 qman_offset;
2420 int i;
2421
2422 if (!hdev->clock_gating)
2423 return;
2424
2425 if (gaudi->hw_cap_initialized & HW_CAP_CLK_GATE)
2426 return;
2427
2428 /* In case we are during debug session, don't enable the clock gate
2429 * as it may interfere
2430 */
2431 if (hdev->in_debug)
2432 return;
2433
2434 for (i = 0, qman_offset = 0 ; i < PCI_DMA_NUMBER_OF_CHNLS ; i++) {
2435 qman_offset = gaudi_dma_assignment[i] * DMA_QMAN_OFFSET;
2436 WREG32(mmDMA0_QM_CGM_CFG1 + qman_offset, QMAN_CGM1_PWR_GATE_EN);
2437 WREG32(mmDMA0_QM_CGM_CFG + qman_offset,
2438 QMAN_UPPER_CP_CGM_PWR_GATE_EN);
2439 }
2440
2441 for (; i < HBM_DMA_NUMBER_OF_CHNLS ; i++) {
2442 qman_offset = gaudi_dma_assignment[i] * DMA_QMAN_OFFSET;
2443 WREG32(mmDMA0_QM_CGM_CFG1 + qman_offset, QMAN_CGM1_PWR_GATE_EN);
2444 WREG32(mmDMA0_QM_CGM_CFG + qman_offset,
2445 QMAN_COMMON_CP_CGM_PWR_GATE_EN);
2446 }
2447
2448 WREG32(mmMME0_QM_CGM_CFG1, QMAN_CGM1_PWR_GATE_EN);
2449 WREG32(mmMME0_QM_CGM_CFG,
2450 QMAN_COMMON_CP_CGM_PWR_GATE_EN);
2451 WREG32(mmMME2_QM_CGM_CFG1, QMAN_CGM1_PWR_GATE_EN);
2452 WREG32(mmMME2_QM_CGM_CFG,
2453 QMAN_COMMON_CP_CGM_PWR_GATE_EN);
2454
2455 for (i = 0, qman_offset = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) {
2456 WREG32(mmTPC0_QM_CGM_CFG1 + qman_offset,
2457 QMAN_CGM1_PWR_GATE_EN);
2458 WREG32(mmTPC0_QM_CGM_CFG + qman_offset,
2459 QMAN_COMMON_CP_CGM_PWR_GATE_EN);
2460
2461 qman_offset += TPC_QMAN_OFFSET;
2462 }
2463
2464 gaudi->hw_cap_initialized |= HW_CAP_CLK_GATE;
2465}
2466
2467static void gaudi_disable_clock_gating(struct hl_device *hdev)
2468{
2469 struct gaudi_device *gaudi = hdev->asic_specific;
2470 u32 qman_offset;
2471 int i;
2472
2473 if (!(gaudi->hw_cap_initialized & HW_CAP_CLK_GATE))
2474 return;
2475
2476 for (i = 0, qman_offset = 0 ; i < DMA_NUMBER_OF_CHANNELS ; i++) {
2477 WREG32(mmDMA0_QM_CGM_CFG + qman_offset, 0);
2478 WREG32(mmDMA0_QM_CGM_CFG1 + qman_offset, 0);
2479
2480 qman_offset += (mmDMA1_QM_CGM_CFG - mmDMA0_QM_CGM_CFG);
2481 }
2482
2483 WREG32(mmMME0_QM_CGM_CFG, 0);
2484 WREG32(mmMME0_QM_CGM_CFG1, 0);
2485 WREG32(mmMME2_QM_CGM_CFG, 0);
2486 WREG32(mmMME2_QM_CGM_CFG1, 0);
2487
2488 for (i = 0, qman_offset = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) {
2489 WREG32(mmTPC0_QM_CGM_CFG + qman_offset, 0);
2490 WREG32(mmTPC0_QM_CGM_CFG1 + qman_offset, 0);
2491
2492 qman_offset += (mmTPC1_QM_CGM_CFG - mmTPC0_QM_CGM_CFG);
2493 }
2494
2495 gaudi->hw_cap_initialized &= ~(HW_CAP_CLK_GATE);
2496}
2497
2498static void gaudi_enable_timestamp(struct hl_device *hdev)
2499{
2500 /* Disable the timestamp counter */
2501 WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 0);
2502
2503 /* Zero the lower/upper parts of the 64-bit counter */
2504 WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE + 0xC, 0);
2505 WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE + 0x8, 0);
2506
2507 /* Enable the counter */
2508 WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 1);
2509}
2510
2511static void gaudi_disable_timestamp(struct hl_device *hdev)
2512{
2513 /* Disable the timestamp counter */
2514 WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 0);
2515}
2516
2517static void gaudi_halt_engines(struct hl_device *hdev, bool hard_reset)
2518{
2519 u32 wait_timeout_ms, cpu_timeout_ms;
2520
2521 dev_info(hdev->dev,
2522 "Halting compute engines and disabling interrupts\n");
2523
2524 if (hdev->pldm) {
2525 wait_timeout_ms = GAUDI_PLDM_RESET_WAIT_MSEC;
2526 cpu_timeout_ms = GAUDI_PLDM_RESET_WAIT_MSEC;
2527 } else {
2528 wait_timeout_ms = GAUDI_RESET_WAIT_MSEC;
2529 cpu_timeout_ms = GAUDI_CPU_RESET_WAIT_MSEC;
2530 }
2531
2532 if (hard_reset) {
2533 /*
2534 * I don't know what is the state of the CPU so make sure it is
2535 * stopped in any means necessary
2536 */
2537 WREG32(mmPSOC_GLOBAL_CONF_KMD_MSG_TO_CPU, KMD_MSG_GOTO_WFE);
2538 WREG32(mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR,
Ofir Bittonebd8d122020-05-10 13:41:28 +03002539 GAUDI_EVENT_HALT_MACHINE);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03002540 msleep(cpu_timeout_ms);
2541 }
2542
2543 gaudi_stop_mme_qmans(hdev);
2544 gaudi_stop_tpc_qmans(hdev);
2545 gaudi_stop_hbm_dma_qmans(hdev);
2546 gaudi_stop_pci_dma_qmans(hdev);
2547
2548 gaudi_disable_clock_gating(hdev);
2549
2550 msleep(wait_timeout_ms);
2551
2552 gaudi_pci_dma_stall(hdev);
2553 gaudi_hbm_dma_stall(hdev);
2554 gaudi_tpc_stall(hdev);
2555 gaudi_mme_stall(hdev);
2556
2557 msleep(wait_timeout_ms);
2558
2559 gaudi_disable_mme_qmans(hdev);
2560 gaudi_disable_tpc_qmans(hdev);
2561 gaudi_disable_hbm_dma_qmans(hdev);
2562 gaudi_disable_pci_dma_qmans(hdev);
2563
2564 gaudi_disable_timestamp(hdev);
2565
2566 if (hard_reset)
2567 gaudi_disable_msi(hdev);
2568 else
2569 gaudi_sync_irqs(hdev);
2570}
2571
2572static int gaudi_mmu_init(struct hl_device *hdev)
2573{
2574 struct asic_fixed_properties *prop = &hdev->asic_prop;
2575 struct gaudi_device *gaudi = hdev->asic_specific;
2576 u64 hop0_addr;
2577 int rc, i;
2578
2579 if (!hdev->mmu_enable)
2580 return 0;
2581
2582 if (gaudi->hw_cap_initialized & HW_CAP_MMU)
2583 return 0;
2584
2585 hdev->dram_supports_virtual_memory = false;
2586
2587 for (i = 0 ; i < prop->max_asid ; i++) {
2588 hop0_addr = prop->mmu_pgt_addr +
2589 (i * prop->mmu_hop_table_size);
2590
2591 rc = gaudi_mmu_update_asid_hop0_addr(hdev, i, hop0_addr);
2592 if (rc) {
2593 dev_err(hdev->dev,
2594 "failed to set hop0 addr for asid %d\n", i);
2595 goto err;
2596 }
2597 }
2598
2599 /* init MMU cache manage page */
2600 WREG32(mmSTLB_CACHE_INV_BASE_39_8, MMU_CACHE_MNG_ADDR >> 8);
2601 WREG32(mmSTLB_CACHE_INV_BASE_49_40, MMU_CACHE_MNG_ADDR >> 40);
2602
2603 hdev->asic_funcs->mmu_invalidate_cache(hdev, true,
2604 VM_TYPE_USERPTR | VM_TYPE_PHYS_PACK);
2605
2606 WREG32(mmMMU_UP_MMU_ENABLE, 1);
2607 WREG32(mmMMU_UP_SPI_MASK, 0xF);
2608
2609 WREG32(mmSTLB_HOP_CONFIGURATION,
2610 hdev->mmu_huge_page_opt ? 0x30440 : 0x40440);
2611
Omer Shpigelmancfd41762020-06-03 13:03:35 +03002612 /*
2613 * The H/W expects the first PI after init to be 1. After wraparound
2614 * we'll write 0.
2615 */
2616 gaudi->mmu_cache_inv_pi = 1;
2617
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03002618 gaudi->hw_cap_initialized |= HW_CAP_MMU;
2619
2620 return 0;
2621
2622err:
2623 return rc;
2624}
2625
2626static int gaudi_load_firmware_to_device(struct hl_device *hdev)
2627{
2628 void __iomem *dst;
2629
2630 /* HBM scrambler must be initialized before pushing F/W to HBM */
2631 gaudi_init_scrambler_hbm(hdev);
2632
2633 dst = hdev->pcie_bar[HBM_BAR_ID] + LINUX_FW_OFFSET;
2634
2635 return hl_fw_load_fw_to_device(hdev, GAUDI_LINUX_FW_FILE, dst);
2636}
2637
2638static int gaudi_load_boot_fit_to_device(struct hl_device *hdev)
2639{
2640 void __iomem *dst;
2641
2642 dst = hdev->pcie_bar[SRAM_BAR_ID] + BOOT_FIT_SRAM_OFFSET;
2643
2644 return hl_fw_load_fw_to_device(hdev, GAUDI_BOOT_FIT_FILE, dst);
2645}
2646
2647static void gaudi_read_device_fw_version(struct hl_device *hdev,
2648 enum hl_fw_component fwc)
2649{
2650 const char *name;
2651 u32 ver_off;
2652 char *dest;
2653
2654 switch (fwc) {
2655 case FW_COMP_UBOOT:
2656 ver_off = RREG32(mmUBOOT_VER_OFFSET);
2657 dest = hdev->asic_prop.uboot_ver;
2658 name = "U-Boot";
2659 break;
2660 case FW_COMP_PREBOOT:
2661 ver_off = RREG32(mmPREBOOT_VER_OFFSET);
2662 dest = hdev->asic_prop.preboot_ver;
2663 name = "Preboot";
2664 break;
2665 default:
2666 dev_warn(hdev->dev, "Undefined FW component: %d\n", fwc);
2667 return;
2668 }
2669
2670 ver_off &= ~((u32)SRAM_BASE_ADDR);
2671
2672 if (ver_off < SRAM_SIZE - VERSION_MAX_LEN) {
2673 memcpy_fromio(dest, hdev->pcie_bar[SRAM_BAR_ID] + ver_off,
2674 VERSION_MAX_LEN);
2675 } else {
2676 dev_err(hdev->dev, "%s version offset (0x%x) is above SRAM\n",
2677 name, ver_off);
2678 strcpy(dest, "unavailable");
2679 }
2680}
2681
2682static int gaudi_init_cpu(struct hl_device *hdev)
2683{
2684 struct gaudi_device *gaudi = hdev->asic_specific;
2685 int rc;
2686
2687 if (!hdev->cpu_enable)
2688 return 0;
2689
2690 if (gaudi->hw_cap_initialized & HW_CAP_CPU)
2691 return 0;
2692
2693 /*
2694 * The device CPU works with 40 bits addresses.
2695 * This register sets the extension to 50 bits.
2696 */
2697 WREG32(mmCPU_IF_CPU_MSB_ADDR, hdev->cpu_pci_msb_addr);
2698
2699 rc = hl_fw_init_cpu(hdev, mmPSOC_GLOBAL_CONF_CPU_BOOT_STATUS,
2700 mmPSOC_GLOBAL_CONF_KMD_MSG_TO_CPU,
2701 mmCPU_CMD_STATUS_TO_HOST,
2702 mmCPU_BOOT_ERR0,
2703 !hdev->bmc_enable, GAUDI_CPU_TIMEOUT_USEC,
2704 GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC);
2705
2706 if (rc)
2707 return rc;
2708
2709 gaudi->hw_cap_initialized |= HW_CAP_CPU;
2710
2711 return 0;
2712}
2713
2714static int gaudi_init_cpu_queues(struct hl_device *hdev, u32 cpu_timeout)
2715{
2716 struct gaudi_device *gaudi = hdev->asic_specific;
2717 struct hl_eq *eq;
2718 u32 status;
2719 struct hl_hw_queue *cpu_pq =
2720 &hdev->kernel_queues[GAUDI_QUEUE_ID_CPU_PQ];
2721 int err;
2722
2723 if (!hdev->cpu_queues_enable)
2724 return 0;
2725
2726 if (gaudi->hw_cap_initialized & HW_CAP_CPU_Q)
2727 return 0;
2728
2729 eq = &hdev->event_queue;
2730
2731 WREG32(mmCPU_IF_PQ_BASE_ADDR_LOW, lower_32_bits(cpu_pq->bus_address));
2732 WREG32(mmCPU_IF_PQ_BASE_ADDR_HIGH, upper_32_bits(cpu_pq->bus_address));
2733
2734 WREG32(mmCPU_IF_EQ_BASE_ADDR_LOW, lower_32_bits(eq->bus_address));
2735 WREG32(mmCPU_IF_EQ_BASE_ADDR_HIGH, upper_32_bits(eq->bus_address));
2736
2737 WREG32(mmCPU_IF_CQ_BASE_ADDR_LOW,
2738 lower_32_bits(hdev->cpu_accessible_dma_address));
2739 WREG32(mmCPU_IF_CQ_BASE_ADDR_HIGH,
2740 upper_32_bits(hdev->cpu_accessible_dma_address));
2741
2742 WREG32(mmCPU_IF_PQ_LENGTH, HL_QUEUE_SIZE_IN_BYTES);
2743 WREG32(mmCPU_IF_EQ_LENGTH, HL_EQ_SIZE_IN_BYTES);
2744 WREG32(mmCPU_IF_CQ_LENGTH, HL_CPU_ACCESSIBLE_MEM_SIZE);
2745
2746 /* Used for EQ CI */
2747 WREG32(mmCPU_IF_EQ_RD_OFFS, 0);
2748
2749 WREG32(mmCPU_IF_PF_PQ_PI, 0);
2750
2751 if (gaudi->multi_msi_mode)
2752 WREG32(mmCPU_IF_QUEUE_INIT, PQ_INIT_STATUS_READY_FOR_CP);
2753 else
2754 WREG32(mmCPU_IF_QUEUE_INIT,
2755 PQ_INIT_STATUS_READY_FOR_CP_SINGLE_MSI);
2756
2757 WREG32(mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR, GAUDI_EVENT_PI_UPDATE);
2758
2759 err = hl_poll_timeout(
2760 hdev,
2761 mmCPU_IF_QUEUE_INIT,
2762 status,
2763 (status == PQ_INIT_STATUS_READY_FOR_HOST),
2764 1000,
2765 cpu_timeout);
2766
2767 if (err) {
2768 dev_err(hdev->dev,
2769 "Failed to communicate with ARM CPU (ArmCP timeout)\n");
2770 return -EIO;
2771 }
2772
2773 gaudi->hw_cap_initialized |= HW_CAP_CPU_Q;
2774 return 0;
2775}
2776
2777static void gaudi_pre_hw_init(struct hl_device *hdev)
2778{
2779 /* Perform read from the device to make sure device is up */
2780 RREG32(mmPCIE_DBI_DEVICE_ID_VENDOR_ID_REG);
2781
2782 /*
2783 * Let's mark in the H/W that we have reached this point. We check
2784 * this value in the reset_before_init function to understand whether
2785 * we need to reset the chip before doing H/W init. This register is
2786 * cleared by the H/W upon H/W reset
2787 */
2788 WREG32(mmHW_STATE, HL_DEVICE_HW_STATE_DIRTY);
2789
2790 /* Set the access through PCI bars (Linux driver only) as secured */
2791 WREG32(mmPCIE_WRAP_LBW_PROT_OVR, (PCIE_WRAP_LBW_PROT_OVR_RD_EN_MASK |
2792 PCIE_WRAP_LBW_PROT_OVR_WR_EN_MASK));
2793
2794 /* Perform read to flush the waiting writes to ensure configuration
2795 * was set in the device
2796 */
2797 RREG32(mmPCIE_WRAP_LBW_PROT_OVR);
2798
2799 if (hdev->axi_drain) {
2800 WREG32(mmPCIE_WRAP_LBW_DRAIN_CFG,
2801 1 << PCIE_WRAP_LBW_DRAIN_CFG_EN_SHIFT);
2802 WREG32(mmPCIE_WRAP_HBW_DRAIN_CFG,
2803 1 << PCIE_WRAP_HBW_DRAIN_CFG_EN_SHIFT);
2804
2805 /* Perform read to flush the DRAIN cfg */
2806 RREG32(mmPCIE_WRAP_HBW_DRAIN_CFG);
2807 } else {
2808 WREG32(mmPCIE_WRAP_LBW_DRAIN_CFG, 0);
2809 WREG32(mmPCIE_WRAP_HBW_DRAIN_CFG, 0);
2810
2811 /* Perform read to flush the DRAIN cfg */
2812 RREG32(mmPCIE_WRAP_HBW_DRAIN_CFG);
2813 }
2814
2815 /* Configure the reset registers. Must be done as early as possible
2816 * in case we fail during H/W initialization
2817 */
2818 WREG32(mmPSOC_GLOBAL_CONF_SOFT_RST_CFG_H,
2819 (CFG_RST_H_DMA_MASK |
2820 CFG_RST_H_MME_MASK |
2821 CFG_RST_H_SM_MASK |
2822 CFG_RST_H_TPC_MASK));
2823
2824 WREG32(mmPSOC_GLOBAL_CONF_SOFT_RST_CFG_L, CFG_RST_L_TPC_MASK);
2825
2826 WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST_CFG_H,
2827 (CFG_RST_H_HBM_MASK |
2828 CFG_RST_H_TPC_MASK |
2829 CFG_RST_H_NIC_MASK |
2830 CFG_RST_H_SM_MASK |
2831 CFG_RST_H_DMA_MASK |
2832 CFG_RST_H_MME_MASK |
2833 CFG_RST_H_CPU_MASK |
2834 CFG_RST_H_MMU_MASK));
2835
2836 WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST_CFG_L,
2837 (CFG_RST_L_IF_MASK |
2838 CFG_RST_L_PSOC_MASK |
2839 CFG_RST_L_TPC_MASK));
2840}
2841
2842static int gaudi_hw_init(struct hl_device *hdev)
2843{
2844 int rc;
2845
2846 dev_info(hdev->dev, "Starting initialization of H/W\n");
2847
2848 gaudi_pre_hw_init(hdev);
2849
2850 gaudi_init_pci_dma_qmans(hdev);
2851
2852 gaudi_init_hbm_dma_qmans(hdev);
2853
2854 /*
2855 * Before pushing u-boot/linux to device, need to set the hbm bar to
2856 * base address of dram
2857 */
2858 if (gaudi_set_hbm_bar_base(hdev, DRAM_PHYS_BASE) == U64_MAX) {
2859 dev_err(hdev->dev,
2860 "failed to map HBM bar to DRAM base address\n");
2861 return -EIO;
2862 }
2863
2864 rc = gaudi_init_cpu(hdev);
2865 if (rc) {
2866 dev_err(hdev->dev, "failed to initialize CPU\n");
2867 return rc;
2868 }
2869
2870 /* SRAM scrambler must be initialized after CPU is running from HBM */
2871 gaudi_init_scrambler_sram(hdev);
2872
2873 /* This is here just in case we are working without CPU */
2874 gaudi_init_scrambler_hbm(hdev);
2875
2876 gaudi_init_golden_registers(hdev);
2877
2878 rc = gaudi_mmu_init(hdev);
2879 if (rc)
2880 return rc;
2881
Omer Shpigelman3a3a5bf12020-05-11 10:45:12 +03002882 gaudi_init_security(hdev);
2883
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03002884 gaudi_init_mme_qmans(hdev);
2885
2886 gaudi_init_tpc_qmans(hdev);
2887
2888 gaudi_enable_clock_gating(hdev);
2889
2890 gaudi_enable_timestamp(hdev);
2891
2892 /* MSI must be enabled before CPU queues are initialized */
2893 rc = gaudi_enable_msi(hdev);
2894 if (rc)
2895 goto disable_queues;
2896
2897 /* must be called after MSI was enabled */
2898 rc = gaudi_init_cpu_queues(hdev, GAUDI_CPU_TIMEOUT_USEC);
2899 if (rc) {
2900 dev_err(hdev->dev, "failed to initialize CPU H/W queues %d\n",
2901 rc);
2902 goto disable_msi;
2903 }
2904
2905 /* Perform read from the device to flush all configuration */
2906 RREG32(mmPCIE_DBI_DEVICE_ID_VENDOR_ID_REG);
2907
2908 return 0;
2909
2910disable_msi:
2911 gaudi_disable_msi(hdev);
2912disable_queues:
2913 gaudi_disable_mme_qmans(hdev);
2914 gaudi_disable_pci_dma_qmans(hdev);
2915
2916 return rc;
2917}
2918
2919static void gaudi_hw_fini(struct hl_device *hdev, bool hard_reset)
2920{
2921 struct gaudi_device *gaudi = hdev->asic_specific;
2922 u32 status, reset_timeout_ms, boot_strap = 0;
2923
2924 if (hdev->pldm) {
2925 if (hard_reset)
2926 reset_timeout_ms = GAUDI_PLDM_HRESET_TIMEOUT_MSEC;
2927 else
2928 reset_timeout_ms = GAUDI_PLDM_SRESET_TIMEOUT_MSEC;
2929 } else {
2930 reset_timeout_ms = GAUDI_RESET_TIMEOUT_MSEC;
2931 }
2932
2933 if (hard_reset) {
2934 /* Tell ASIC not to re-initialize PCIe */
2935 WREG32(mmPREBOOT_PCIE_EN, LKD_HARD_RESET_MAGIC);
2936
2937 boot_strap = RREG32(mmPSOC_GLOBAL_CONF_BOOT_STRAP_PINS);
2938 /* H/W bug WA:
2939 * rdata[31:0] = strap_read_val;
2940 * wdata[31:0] = rdata[30:21],1'b0,rdata[20:0]
2941 */
2942 boot_strap = (((boot_strap & 0x7FE00000) << 1) |
2943 (boot_strap & 0x001FFFFF));
2944 WREG32(mmPSOC_GLOBAL_CONF_BOOT_STRAP_PINS, boot_strap & ~0x2);
2945
2946 /* Restart BTL/BLR upon hard-reset */
2947 WREG32(mmPSOC_GLOBAL_CONF_BOOT_SEQ_RE_START, 1);
2948
2949 WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST,
2950 1 << PSOC_GLOBAL_CONF_SW_ALL_RST_IND_SHIFT);
2951 dev_info(hdev->dev,
2952 "Issued HARD reset command, going to wait %dms\n",
2953 reset_timeout_ms);
2954 } else {
2955 /* Don't restart BTL/BLR upon soft-reset */
2956 WREG32(mmPSOC_GLOBAL_CONF_BOOT_SEQ_RE_START, 0);
2957
2958 WREG32(mmPSOC_GLOBAL_CONF_SOFT_RST,
2959 1 << PSOC_GLOBAL_CONF_SOFT_RST_IND_SHIFT);
2960 dev_info(hdev->dev,
2961 "Issued SOFT reset command, going to wait %dms\n",
2962 reset_timeout_ms);
2963 }
2964
2965 /*
2966 * After hard reset, we can't poll the BTM_FSM register because the PSOC
2967 * itself is in reset. Need to wait until the reset is deasserted
2968 */
2969 msleep(reset_timeout_ms);
2970
2971 status = RREG32(mmPSOC_GLOBAL_CONF_BTM_FSM);
2972 if (status & PSOC_GLOBAL_CONF_BTM_FSM_STATE_MASK)
2973 dev_err(hdev->dev,
2974 "Timeout while waiting for device to reset 0x%x\n",
2975 status);
2976
2977 if (!hard_reset) {
2978 gaudi->hw_cap_initialized &= ~(HW_CAP_PCI_DMA | HW_CAP_MME |
2979 HW_CAP_TPC_MASK |
2980 HW_CAP_HBM_DMA);
2981
2982 WREG32(mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR,
2983 GAUDI_EVENT_SOFT_RESET);
2984 return;
2985 }
2986
2987 /* We continue here only for hard-reset */
2988
2989 WREG32(mmPSOC_GLOBAL_CONF_BOOT_STRAP_PINS, boot_strap);
2990
2991 gaudi->hw_cap_initialized &= ~(HW_CAP_CPU | HW_CAP_CPU_Q |
2992 HW_CAP_HBM | HW_CAP_PCI_DMA |
2993 HW_CAP_MME | HW_CAP_TPC_MASK |
2994 HW_CAP_HBM_DMA | HW_CAP_PLL |
2995 HW_CAP_MMU |
2996 HW_CAP_SRAM_SCRAMBLER |
2997 HW_CAP_HBM_SCRAMBLER);
2998 memset(gaudi->events_stat, 0, sizeof(gaudi->events_stat));
2999}
3000
3001static int gaudi_suspend(struct hl_device *hdev)
3002{
3003 int rc;
3004
3005 rc = hl_fw_send_pci_access_msg(hdev, ARMCP_PACKET_DISABLE_PCI_ACCESS);
3006 if (rc)
3007 dev_err(hdev->dev, "Failed to disable PCI access from CPU\n");
3008
3009 return rc;
3010}
3011
3012static int gaudi_resume(struct hl_device *hdev)
3013{
3014 return gaudi_init_iatu(hdev);
3015}
3016
3017static int gaudi_cb_mmap(struct hl_device *hdev, struct vm_area_struct *vma,
3018 u64 kaddress, phys_addr_t paddress, u32 size)
3019{
3020 int rc;
3021
3022 vma->vm_flags |= VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP |
3023 VM_DONTCOPY | VM_NORESERVE;
3024
3025 rc = remap_pfn_range(vma, vma->vm_start, paddress >> PAGE_SHIFT,
3026 size, vma->vm_page_prot);
3027 if (rc)
3028 dev_err(hdev->dev, "remap_pfn_range error %d", rc);
3029
3030 return rc;
3031}
3032
3033static void gaudi_ring_doorbell(struct hl_device *hdev, u32 hw_queue_id, u32 pi)
3034{
3035 struct gaudi_device *gaudi = hdev->asic_specific;
3036 u32 db_reg_offset, db_value, dma_qm_offset, q_off;
3037 int dma_id;
3038 bool invalid_queue = false;
3039
3040 switch (hw_queue_id) {
3041 case GAUDI_QUEUE_ID_DMA_0_0...GAUDI_QUEUE_ID_DMA_0_3:
3042 dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_1];
3043 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
3044 q_off = dma_qm_offset + (hw_queue_id & 0x3) * 4;
3045 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
3046 break;
3047
3048 case GAUDI_QUEUE_ID_DMA_1_0...GAUDI_QUEUE_ID_DMA_1_3:
3049 dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_2];
3050 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
3051 q_off = dma_qm_offset + (hw_queue_id & 0x3) * 4;
3052 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
3053 break;
3054
3055 case GAUDI_QUEUE_ID_DMA_2_0...GAUDI_QUEUE_ID_DMA_2_3:
3056 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_1];
3057 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
3058 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
3059 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
3060 break;
3061
3062 case GAUDI_QUEUE_ID_DMA_3_0...GAUDI_QUEUE_ID_DMA_3_3:
3063 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_2];
3064 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
3065 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
3066 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
3067 break;
3068
3069 case GAUDI_QUEUE_ID_DMA_4_0...GAUDI_QUEUE_ID_DMA_4_3:
3070 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_3];
3071 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
3072 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
3073 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
3074 break;
3075
3076 case GAUDI_QUEUE_ID_DMA_5_0...GAUDI_QUEUE_ID_DMA_5_3:
3077 dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_3];
3078 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
3079 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
3080 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
3081 break;
3082
3083 case GAUDI_QUEUE_ID_DMA_6_0...GAUDI_QUEUE_ID_DMA_6_3:
3084 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_4];
3085 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
3086 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
3087 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
3088 break;
3089
3090 case GAUDI_QUEUE_ID_DMA_7_0...GAUDI_QUEUE_ID_DMA_7_3:
3091 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_5];
3092 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
3093 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
3094 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
3095 break;
3096
3097 case GAUDI_QUEUE_ID_CPU_PQ:
3098 if (gaudi->hw_cap_initialized & HW_CAP_CPU_Q)
3099 db_reg_offset = mmCPU_IF_PF_PQ_PI;
3100 else
3101 invalid_queue = true;
3102 break;
3103
3104 case GAUDI_QUEUE_ID_MME_0_0:
3105 db_reg_offset = mmMME2_QM_PQ_PI_0;
3106 break;
3107
3108 case GAUDI_QUEUE_ID_MME_0_1:
3109 db_reg_offset = mmMME2_QM_PQ_PI_1;
3110 break;
3111
3112 case GAUDI_QUEUE_ID_MME_0_2:
3113 db_reg_offset = mmMME2_QM_PQ_PI_2;
3114 break;
3115
3116 case GAUDI_QUEUE_ID_MME_0_3:
3117 db_reg_offset = mmMME2_QM_PQ_PI_3;
3118 break;
3119
3120 case GAUDI_QUEUE_ID_MME_1_0:
3121 db_reg_offset = mmMME0_QM_PQ_PI_0;
3122 break;
3123
3124 case GAUDI_QUEUE_ID_MME_1_1:
3125 db_reg_offset = mmMME0_QM_PQ_PI_1;
3126 break;
3127
3128 case GAUDI_QUEUE_ID_MME_1_2:
3129 db_reg_offset = mmMME0_QM_PQ_PI_2;
3130 break;
3131
3132 case GAUDI_QUEUE_ID_MME_1_3:
3133 db_reg_offset = mmMME0_QM_PQ_PI_3;
3134 break;
3135
3136 case GAUDI_QUEUE_ID_TPC_0_0:
3137 db_reg_offset = mmTPC0_QM_PQ_PI_0;
3138 break;
3139
3140 case GAUDI_QUEUE_ID_TPC_0_1:
3141 db_reg_offset = mmTPC0_QM_PQ_PI_1;
3142 break;
3143
3144 case GAUDI_QUEUE_ID_TPC_0_2:
3145 db_reg_offset = mmTPC0_QM_PQ_PI_2;
3146 break;
3147
3148 case GAUDI_QUEUE_ID_TPC_0_3:
3149 db_reg_offset = mmTPC0_QM_PQ_PI_3;
3150 break;
3151
3152 case GAUDI_QUEUE_ID_TPC_1_0:
3153 db_reg_offset = mmTPC1_QM_PQ_PI_0;
3154 break;
3155
3156 case GAUDI_QUEUE_ID_TPC_1_1:
3157 db_reg_offset = mmTPC1_QM_PQ_PI_1;
3158 break;
3159
3160 case GAUDI_QUEUE_ID_TPC_1_2:
3161 db_reg_offset = mmTPC1_QM_PQ_PI_2;
3162 break;
3163
3164 case GAUDI_QUEUE_ID_TPC_1_3:
3165 db_reg_offset = mmTPC1_QM_PQ_PI_3;
3166 break;
3167
3168 case GAUDI_QUEUE_ID_TPC_2_0:
3169 db_reg_offset = mmTPC2_QM_PQ_PI_0;
3170 break;
3171
3172 case GAUDI_QUEUE_ID_TPC_2_1:
3173 db_reg_offset = mmTPC2_QM_PQ_PI_1;
3174 break;
3175
3176 case GAUDI_QUEUE_ID_TPC_2_2:
3177 db_reg_offset = mmTPC2_QM_PQ_PI_2;
3178 break;
3179
3180 case GAUDI_QUEUE_ID_TPC_2_3:
3181 db_reg_offset = mmTPC2_QM_PQ_PI_3;
3182 break;
3183
3184 case GAUDI_QUEUE_ID_TPC_3_0:
3185 db_reg_offset = mmTPC3_QM_PQ_PI_0;
3186 break;
3187
3188 case GAUDI_QUEUE_ID_TPC_3_1:
3189 db_reg_offset = mmTPC3_QM_PQ_PI_1;
3190 break;
3191
3192 case GAUDI_QUEUE_ID_TPC_3_2:
3193 db_reg_offset = mmTPC3_QM_PQ_PI_2;
3194 break;
3195
3196 case GAUDI_QUEUE_ID_TPC_3_3:
3197 db_reg_offset = mmTPC3_QM_PQ_PI_3;
3198 break;
3199
3200 case GAUDI_QUEUE_ID_TPC_4_0:
3201 db_reg_offset = mmTPC4_QM_PQ_PI_0;
3202 break;
3203
3204 case GAUDI_QUEUE_ID_TPC_4_1:
3205 db_reg_offset = mmTPC4_QM_PQ_PI_1;
3206 break;
3207
3208 case GAUDI_QUEUE_ID_TPC_4_2:
3209 db_reg_offset = mmTPC4_QM_PQ_PI_2;
3210 break;
3211
3212 case GAUDI_QUEUE_ID_TPC_4_3:
3213 db_reg_offset = mmTPC4_QM_PQ_PI_3;
3214 break;
3215
3216 case GAUDI_QUEUE_ID_TPC_5_0:
3217 db_reg_offset = mmTPC5_QM_PQ_PI_0;
3218 break;
3219
3220 case GAUDI_QUEUE_ID_TPC_5_1:
3221 db_reg_offset = mmTPC5_QM_PQ_PI_1;
3222 break;
3223
3224 case GAUDI_QUEUE_ID_TPC_5_2:
3225 db_reg_offset = mmTPC5_QM_PQ_PI_2;
3226 break;
3227
3228 case GAUDI_QUEUE_ID_TPC_5_3:
3229 db_reg_offset = mmTPC5_QM_PQ_PI_3;
3230 break;
3231
3232 case GAUDI_QUEUE_ID_TPC_6_0:
3233 db_reg_offset = mmTPC6_QM_PQ_PI_0;
3234 break;
3235
3236 case GAUDI_QUEUE_ID_TPC_6_1:
3237 db_reg_offset = mmTPC6_QM_PQ_PI_1;
3238 break;
3239
3240 case GAUDI_QUEUE_ID_TPC_6_2:
3241 db_reg_offset = mmTPC6_QM_PQ_PI_2;
3242 break;
3243
3244 case GAUDI_QUEUE_ID_TPC_6_3:
3245 db_reg_offset = mmTPC6_QM_PQ_PI_3;
3246 break;
3247
3248 case GAUDI_QUEUE_ID_TPC_7_0:
3249 db_reg_offset = mmTPC7_QM_PQ_PI_0;
3250 break;
3251
3252 case GAUDI_QUEUE_ID_TPC_7_1:
3253 db_reg_offset = mmTPC7_QM_PQ_PI_1;
3254 break;
3255
3256 case GAUDI_QUEUE_ID_TPC_7_2:
3257 db_reg_offset = mmTPC7_QM_PQ_PI_2;
3258 break;
3259
3260 case GAUDI_QUEUE_ID_TPC_7_3:
3261 db_reg_offset = mmTPC7_QM_PQ_PI_3;
3262 break;
3263
3264 default:
3265 invalid_queue = true;
3266 }
3267
3268 if (invalid_queue) {
3269 /* Should never get here */
3270 dev_err(hdev->dev, "h/w queue %d is invalid. Can't set pi\n",
3271 hw_queue_id);
3272 return;
3273 }
3274
3275 db_value = pi;
3276
3277 /* ring the doorbell */
3278 WREG32(db_reg_offset, db_value);
3279
3280 if (hw_queue_id == GAUDI_QUEUE_ID_CPU_PQ)
3281 WREG32(mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR,
3282 GAUDI_EVENT_PI_UPDATE);
3283}
3284
3285static void gaudi_pqe_write(struct hl_device *hdev, __le64 *pqe,
3286 struct hl_bd *bd)
3287{
3288 __le64 *pbd = (__le64 *) bd;
3289
3290 /* The QMANs are on the host memory so a simple copy suffice */
3291 pqe[0] = pbd[0];
3292 pqe[1] = pbd[1];
3293}
3294
3295static void *gaudi_dma_alloc_coherent(struct hl_device *hdev, size_t size,
3296 dma_addr_t *dma_handle, gfp_t flags)
3297{
3298 void *kernel_addr = dma_alloc_coherent(&hdev->pdev->dev, size,
3299 dma_handle, flags);
3300
3301 /* Shift to the device's base physical address of host memory */
3302 if (kernel_addr)
3303 *dma_handle += HOST_PHYS_BASE;
3304
3305 return kernel_addr;
3306}
3307
3308static void gaudi_dma_free_coherent(struct hl_device *hdev, size_t size,
3309 void *cpu_addr, dma_addr_t dma_handle)
3310{
3311 /* Cancel the device's base physical address of host memory */
3312 dma_addr_t fixed_dma_handle = dma_handle - HOST_PHYS_BASE;
3313
3314 dma_free_coherent(&hdev->pdev->dev, size, cpu_addr, fixed_dma_handle);
3315}
3316
3317static void *gaudi_get_int_queue_base(struct hl_device *hdev,
3318 u32 queue_id, dma_addr_t *dma_handle,
3319 u16 *queue_len)
3320{
3321 struct gaudi_device *gaudi = hdev->asic_specific;
3322 struct gaudi_internal_qman_info *q;
3323
3324 if (queue_id >= GAUDI_QUEUE_ID_SIZE ||
3325 gaudi_queue_type[queue_id] != QUEUE_TYPE_INT) {
3326 dev_err(hdev->dev, "Got invalid queue id %d\n", queue_id);
3327 return NULL;
3328 }
3329
3330 q = &gaudi->internal_qmans[queue_id];
3331 *dma_handle = q->pq_dma_addr;
3332 *queue_len = q->pq_size / QMAN_PQ_ENTRY_SIZE;
3333
3334 return q->pq_kernel_addr;
3335}
3336
3337static int gaudi_send_cpu_message(struct hl_device *hdev, u32 *msg,
3338 u16 len, u32 timeout, long *result)
3339{
3340 struct gaudi_device *gaudi = hdev->asic_specific;
3341
3342 if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q)) {
3343 if (result)
3344 *result = 0;
3345 return 0;
3346 }
3347
3348 return hl_fw_send_cpu_message(hdev, GAUDI_QUEUE_ID_CPU_PQ, msg, len,
3349 timeout, result);
3350}
3351
3352static int gaudi_test_queue(struct hl_device *hdev, u32 hw_queue_id)
3353{
3354 struct packet_msg_prot *fence_pkt;
3355 dma_addr_t pkt_dma_addr;
3356 u32 fence_val, tmp, timeout_usec;
3357 dma_addr_t fence_dma_addr;
3358 u32 *fence_ptr;
3359 int rc;
3360
3361 if (hdev->pldm)
3362 timeout_usec = GAUDI_PLDM_TEST_QUEUE_WAIT_USEC;
3363 else
3364 timeout_usec = GAUDI_TEST_QUEUE_WAIT_USEC;
3365
3366 fence_val = GAUDI_QMAN0_FENCE_VAL;
3367
3368 fence_ptr = hdev->asic_funcs->asic_dma_pool_zalloc(hdev, 4, GFP_KERNEL,
3369 &fence_dma_addr);
3370 if (!fence_ptr) {
3371 dev_err(hdev->dev,
3372 "Failed to allocate memory for queue testing\n");
3373 return -ENOMEM;
3374 }
3375
3376 *fence_ptr = 0;
3377
3378 fence_pkt = hdev->asic_funcs->asic_dma_pool_zalloc(hdev,
3379 sizeof(struct packet_msg_prot),
3380 GFP_KERNEL, &pkt_dma_addr);
3381 if (!fence_pkt) {
3382 dev_err(hdev->dev,
3383 "Failed to allocate packet for queue testing\n");
3384 rc = -ENOMEM;
3385 goto free_fence_ptr;
3386 }
3387
3388 tmp = (PACKET_MSG_PROT << GAUDI_PKT_CTL_OPCODE_SHIFT) |
3389 (1 << GAUDI_PKT_CTL_EB_SHIFT) |
3390 (1 << GAUDI_PKT_CTL_MB_SHIFT);
3391 fence_pkt->ctl = cpu_to_le32(tmp);
3392 fence_pkt->value = cpu_to_le32(fence_val);
3393 fence_pkt->addr = cpu_to_le64(fence_dma_addr);
3394
3395 rc = hl_hw_queue_send_cb_no_cmpl(hdev, hw_queue_id,
3396 sizeof(struct packet_msg_prot),
3397 pkt_dma_addr);
3398 if (rc) {
3399 dev_err(hdev->dev,
3400 "Failed to send fence packet\n");
3401 goto free_pkt;
3402 }
3403
3404 rc = hl_poll_timeout_memory(hdev, fence_ptr, tmp, (tmp == fence_val),
3405 1000, timeout_usec, true);
3406
3407 hl_hw_queue_inc_ci_kernel(hdev, hw_queue_id);
3408
3409 if (rc == -ETIMEDOUT) {
3410 dev_err(hdev->dev,
3411 "H/W queue %d test failed (scratch(0x%08llX) == 0x%08X)\n",
3412 hw_queue_id, (unsigned long long) fence_dma_addr, tmp);
3413 rc = -EIO;
3414 }
3415
3416free_pkt:
3417 hdev->asic_funcs->asic_dma_pool_free(hdev, (void *) fence_pkt,
3418 pkt_dma_addr);
3419free_fence_ptr:
3420 hdev->asic_funcs->asic_dma_pool_free(hdev, (void *) fence_ptr,
3421 fence_dma_addr);
3422 return rc;
3423}
3424
3425static int gaudi_test_cpu_queue(struct hl_device *hdev)
3426{
3427 struct gaudi_device *gaudi = hdev->asic_specific;
3428
3429 /*
3430 * check capability here as send_cpu_message() won't update the result
3431 * value if no capability
3432 */
3433 if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
3434 return 0;
3435
3436 return hl_fw_test_cpu_queue(hdev);
3437}
3438
3439static int gaudi_test_queues(struct hl_device *hdev)
3440{
3441 int i, rc, ret_val = 0;
3442
3443 for (i = 0 ; i < HL_MAX_QUEUES ; i++) {
3444 if (hdev->asic_prop.hw_queues_props[i].type == QUEUE_TYPE_EXT) {
3445 rc = gaudi_test_queue(hdev, i);
3446 if (rc)
3447 ret_val = -EINVAL;
3448 }
3449 }
3450
3451 rc = gaudi_test_cpu_queue(hdev);
3452 if (rc)
3453 ret_val = -EINVAL;
3454
3455 return ret_val;
3456}
3457
3458static void *gaudi_dma_pool_zalloc(struct hl_device *hdev, size_t size,
3459 gfp_t mem_flags, dma_addr_t *dma_handle)
3460{
3461 void *kernel_addr;
3462
3463 if (size > GAUDI_DMA_POOL_BLK_SIZE)
3464 return NULL;
3465
3466 kernel_addr = dma_pool_zalloc(hdev->dma_pool, mem_flags, dma_handle);
3467
3468 /* Shift to the device's base physical address of host memory */
3469 if (kernel_addr)
3470 *dma_handle += HOST_PHYS_BASE;
3471
3472 return kernel_addr;
3473}
3474
3475static void gaudi_dma_pool_free(struct hl_device *hdev, void *vaddr,
3476 dma_addr_t dma_addr)
3477{
3478 /* Cancel the device's base physical address of host memory */
3479 dma_addr_t fixed_dma_addr = dma_addr - HOST_PHYS_BASE;
3480
3481 dma_pool_free(hdev->dma_pool, vaddr, fixed_dma_addr);
3482}
3483
3484static void *gaudi_cpu_accessible_dma_pool_alloc(struct hl_device *hdev,
3485 size_t size, dma_addr_t *dma_handle)
3486{
3487 return hl_fw_cpu_accessible_dma_pool_alloc(hdev, size, dma_handle);
3488}
3489
3490static void gaudi_cpu_accessible_dma_pool_free(struct hl_device *hdev,
3491 size_t size, void *vaddr)
3492{
3493 hl_fw_cpu_accessible_dma_pool_free(hdev, size, vaddr);
3494}
3495
3496static int gaudi_dma_map_sg(struct hl_device *hdev, struct scatterlist *sgl,
3497 int nents, enum dma_data_direction dir)
3498{
3499 struct scatterlist *sg;
3500 int i;
3501
3502 if (!dma_map_sg(&hdev->pdev->dev, sgl, nents, dir))
3503 return -ENOMEM;
3504
3505 /* Shift to the device's base physical address of host memory */
3506 for_each_sg(sgl, sg, nents, i)
3507 sg->dma_address += HOST_PHYS_BASE;
3508
3509 return 0;
3510}
3511
3512static void gaudi_dma_unmap_sg(struct hl_device *hdev, struct scatterlist *sgl,
3513 int nents, enum dma_data_direction dir)
3514{
3515 struct scatterlist *sg;
3516 int i;
3517
3518 /* Cancel the device's base physical address of host memory */
3519 for_each_sg(sgl, sg, nents, i)
3520 sg->dma_address -= HOST_PHYS_BASE;
3521
3522 dma_unmap_sg(&hdev->pdev->dev, sgl, nents, dir);
3523}
3524
3525static u32 gaudi_get_dma_desc_list_size(struct hl_device *hdev,
3526 struct sg_table *sgt)
3527{
3528 struct scatterlist *sg, *sg_next_iter;
3529 u32 count, dma_desc_cnt;
3530 u64 len, len_next;
3531 dma_addr_t addr, addr_next;
3532
3533 dma_desc_cnt = 0;
3534
3535 for_each_sg(sgt->sgl, sg, sgt->nents, count) {
3536
3537 len = sg_dma_len(sg);
3538 addr = sg_dma_address(sg);
3539
3540 if (len == 0)
3541 break;
3542
3543 while ((count + 1) < sgt->nents) {
3544 sg_next_iter = sg_next(sg);
3545 len_next = sg_dma_len(sg_next_iter);
3546 addr_next = sg_dma_address(sg_next_iter);
3547
3548 if (len_next == 0)
3549 break;
3550
3551 if ((addr + len == addr_next) &&
3552 (len + len_next <= DMA_MAX_TRANSFER_SIZE)) {
3553 len += len_next;
3554 count++;
3555 sg = sg_next_iter;
3556 } else {
3557 break;
3558 }
3559 }
3560
3561 dma_desc_cnt++;
3562 }
3563
3564 return dma_desc_cnt * sizeof(struct packet_lin_dma);
3565}
3566
3567static int gaudi_pin_memory_before_cs(struct hl_device *hdev,
3568 struct hl_cs_parser *parser,
3569 struct packet_lin_dma *user_dma_pkt,
3570 u64 addr, enum dma_data_direction dir)
3571{
3572 struct hl_userptr *userptr;
3573 int rc;
3574
3575 if (hl_userptr_is_pinned(hdev, addr, le32_to_cpu(user_dma_pkt->tsize),
3576 parser->job_userptr_list, &userptr))
3577 goto already_pinned;
3578
3579 userptr = kzalloc(sizeof(*userptr), GFP_ATOMIC);
3580 if (!userptr)
3581 return -ENOMEM;
3582
3583 rc = hl_pin_host_memory(hdev, addr, le32_to_cpu(user_dma_pkt->tsize),
3584 userptr);
3585 if (rc)
3586 goto free_userptr;
3587
3588 list_add_tail(&userptr->job_node, parser->job_userptr_list);
3589
3590 rc = hdev->asic_funcs->asic_dma_map_sg(hdev, userptr->sgt->sgl,
3591 userptr->sgt->nents, dir);
3592 if (rc) {
3593 dev_err(hdev->dev, "failed to map sgt with DMA region\n");
3594 goto unpin_memory;
3595 }
3596
3597 userptr->dma_mapped = true;
3598 userptr->dir = dir;
3599
3600already_pinned:
3601 parser->patched_cb_size +=
3602 gaudi_get_dma_desc_list_size(hdev, userptr->sgt);
3603
3604 return 0;
3605
3606unpin_memory:
3607 hl_unpin_host_memory(hdev, userptr);
3608free_userptr:
3609 kfree(userptr);
3610 return rc;
3611}
3612
3613static int gaudi_validate_dma_pkt_host(struct hl_device *hdev,
3614 struct hl_cs_parser *parser,
3615 struct packet_lin_dma *user_dma_pkt,
3616 bool src_in_host)
3617{
3618 enum dma_data_direction dir;
3619 bool skip_host_mem_pin = false, user_memset;
3620 u64 addr;
3621 int rc = 0;
3622
3623 user_memset = (le32_to_cpu(user_dma_pkt->ctl) &
3624 GAUDI_PKT_LIN_DMA_CTL_MEMSET_MASK) >>
3625 GAUDI_PKT_LIN_DMA_CTL_MEMSET_SHIFT;
3626
3627 if (src_in_host) {
3628 if (user_memset)
3629 skip_host_mem_pin = true;
3630
3631 dev_dbg(hdev->dev, "DMA direction is HOST --> DEVICE\n");
3632 dir = DMA_TO_DEVICE;
3633 addr = le64_to_cpu(user_dma_pkt->src_addr);
3634 } else {
3635 dev_dbg(hdev->dev, "DMA direction is DEVICE --> HOST\n");
3636 dir = DMA_FROM_DEVICE;
3637 addr = (le64_to_cpu(user_dma_pkt->dst_addr) &
3638 GAUDI_PKT_LIN_DMA_DST_ADDR_MASK) >>
3639 GAUDI_PKT_LIN_DMA_DST_ADDR_SHIFT;
3640 }
3641
3642 if (skip_host_mem_pin)
3643 parser->patched_cb_size += sizeof(*user_dma_pkt);
3644 else
3645 rc = gaudi_pin_memory_before_cs(hdev, parser, user_dma_pkt,
3646 addr, dir);
3647
3648 return rc;
3649}
3650
3651static int gaudi_validate_dma_pkt_no_mmu(struct hl_device *hdev,
3652 struct hl_cs_parser *parser,
3653 struct packet_lin_dma *user_dma_pkt)
3654{
3655 bool src_in_host = false;
3656 u64 dst_addr = (le64_to_cpu(user_dma_pkt->dst_addr) &
3657 GAUDI_PKT_LIN_DMA_DST_ADDR_MASK) >>
3658 GAUDI_PKT_LIN_DMA_DST_ADDR_SHIFT;
3659
3660 dev_dbg(hdev->dev, "DMA packet details:\n");
3661 dev_dbg(hdev->dev, "source == 0x%llx\n",
3662 le64_to_cpu(user_dma_pkt->src_addr));
3663 dev_dbg(hdev->dev, "destination == 0x%llx\n", dst_addr);
3664 dev_dbg(hdev->dev, "size == %u\n", le32_to_cpu(user_dma_pkt->tsize));
3665
3666 /*
3667 * Special handling for DMA with size 0. Bypass all validations
3668 * because no transactions will be done except for WR_COMP, which
3669 * is not a security issue
3670 */
3671 if (!le32_to_cpu(user_dma_pkt->tsize)) {
3672 parser->patched_cb_size += sizeof(*user_dma_pkt);
3673 return 0;
3674 }
3675
3676 if (parser->hw_queue_id <= GAUDI_QUEUE_ID_DMA_0_3)
3677 src_in_host = true;
3678
3679 return gaudi_validate_dma_pkt_host(hdev, parser, user_dma_pkt,
3680 src_in_host);
3681}
3682
Oded Gabbay64536ab2020-05-27 12:38:16 +03003683static int gaudi_validate_load_and_exe_pkt(struct hl_device *hdev,
3684 struct hl_cs_parser *parser,
3685 struct packet_load_and_exe *user_pkt)
3686{
3687 u32 cfg;
3688
3689 cfg = le32_to_cpu(user_pkt->cfg);
3690
3691 if (cfg & GAUDI_PKT_LOAD_AND_EXE_CFG_DST_MASK) {
3692 dev_err(hdev->dev,
3693 "User not allowed to use Load and Execute\n");
3694 return -EPERM;
3695 }
3696
3697 parser->patched_cb_size += sizeof(struct packet_load_and_exe);
3698
3699 return 0;
3700}
3701
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003702static int gaudi_validate_cb(struct hl_device *hdev,
3703 struct hl_cs_parser *parser, bool is_mmu)
3704{
3705 u32 cb_parsed_length = 0;
3706 int rc = 0;
3707
3708 parser->patched_cb_size = 0;
3709
3710 /* cb_user_size is more than 0 so loop will always be executed */
3711 while (cb_parsed_length < parser->user_cb_size) {
3712 enum packet_id pkt_id;
3713 u16 pkt_size;
3714 struct gaudi_packet *user_pkt;
3715
3716 user_pkt = (struct gaudi_packet *) (uintptr_t)
3717 (parser->user_cb->kernel_address + cb_parsed_length);
3718
3719 pkt_id = (enum packet_id) (
3720 (le64_to_cpu(user_pkt->header) &
3721 PACKET_HEADER_PACKET_ID_MASK) >>
3722 PACKET_HEADER_PACKET_ID_SHIFT);
3723
3724 pkt_size = gaudi_packet_sizes[pkt_id];
3725 cb_parsed_length += pkt_size;
3726 if (cb_parsed_length > parser->user_cb_size) {
3727 dev_err(hdev->dev,
3728 "packet 0x%x is out of CB boundary\n", pkt_id);
3729 rc = -EINVAL;
3730 break;
3731 }
3732
3733 switch (pkt_id) {
3734 case PACKET_MSG_PROT:
3735 dev_err(hdev->dev,
3736 "User not allowed to use MSG_PROT\n");
3737 rc = -EPERM;
3738 break;
3739
3740 case PACKET_CP_DMA:
3741 dev_err(hdev->dev, "User not allowed to use CP_DMA\n");
3742 rc = -EPERM;
3743 break;
3744
3745 case PACKET_STOP:
3746 dev_err(hdev->dev, "User not allowed to use STOP\n");
3747 rc = -EPERM;
3748 break;
3749
Oded Gabbay64536ab2020-05-27 12:38:16 +03003750 case PACKET_LOAD_AND_EXE:
3751 rc = gaudi_validate_load_and_exe_pkt(hdev, parser,
3752 (struct packet_load_and_exe *) user_pkt);
3753 break;
3754
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003755 case PACKET_LIN_DMA:
3756 parser->contains_dma_pkt = true;
3757 if (is_mmu)
3758 parser->patched_cb_size += pkt_size;
3759 else
3760 rc = gaudi_validate_dma_pkt_no_mmu(hdev, parser,
3761 (struct packet_lin_dma *) user_pkt);
3762 break;
3763
3764 case PACKET_WREG_32:
3765 case PACKET_WREG_BULK:
3766 case PACKET_MSG_LONG:
3767 case PACKET_MSG_SHORT:
3768 case PACKET_REPEAT:
3769 case PACKET_FENCE:
3770 case PACKET_NOP:
3771 case PACKET_ARB_POINT:
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003772 parser->patched_cb_size += pkt_size;
3773 break;
3774
3775 default:
3776 dev_err(hdev->dev, "Invalid packet header 0x%x\n",
3777 pkt_id);
3778 rc = -EINVAL;
3779 break;
3780 }
3781
3782 if (rc)
3783 break;
3784 }
3785
3786 /*
3787 * The new CB should have space at the end for two MSG_PROT packets:
3788 * 1. A packet that will act as a completion packet
3789 * 2. A packet that will generate MSI-X interrupt
3790 */
3791 parser->patched_cb_size += sizeof(struct packet_msg_prot) * 2;
3792
3793 return rc;
3794}
3795
3796static int gaudi_patch_dma_packet(struct hl_device *hdev,
3797 struct hl_cs_parser *parser,
3798 struct packet_lin_dma *user_dma_pkt,
3799 struct packet_lin_dma *new_dma_pkt,
3800 u32 *new_dma_pkt_size)
3801{
3802 struct hl_userptr *userptr;
3803 struct scatterlist *sg, *sg_next_iter;
3804 u32 count, dma_desc_cnt, user_wrcomp_en_mask, ctl;
3805 u64 len, len_next;
3806 dma_addr_t dma_addr, dma_addr_next;
3807 u64 device_memory_addr, addr;
3808 enum dma_data_direction dir;
3809 struct sg_table *sgt;
3810 bool src_in_host = false;
3811 bool skip_host_mem_pin = false;
3812 bool user_memset;
3813
3814 ctl = le32_to_cpu(user_dma_pkt->ctl);
3815
3816 if (parser->hw_queue_id <= GAUDI_QUEUE_ID_DMA_0_3)
3817 src_in_host = true;
3818
3819 user_memset = (ctl & GAUDI_PKT_LIN_DMA_CTL_MEMSET_MASK) >>
3820 GAUDI_PKT_LIN_DMA_CTL_MEMSET_SHIFT;
3821
3822 if (src_in_host) {
3823 addr = le64_to_cpu(user_dma_pkt->src_addr);
3824 device_memory_addr = le64_to_cpu(user_dma_pkt->dst_addr);
3825 dir = DMA_TO_DEVICE;
3826 if (user_memset)
3827 skip_host_mem_pin = true;
3828 } else {
3829 addr = le64_to_cpu(user_dma_pkt->dst_addr);
3830 device_memory_addr = le64_to_cpu(user_dma_pkt->src_addr);
3831 dir = DMA_FROM_DEVICE;
3832 }
3833
3834 if ((!skip_host_mem_pin) &&
3835 (!hl_userptr_is_pinned(hdev, addr,
3836 le32_to_cpu(user_dma_pkt->tsize),
3837 parser->job_userptr_list, &userptr))) {
3838 dev_err(hdev->dev, "Userptr 0x%llx + 0x%x NOT mapped\n",
3839 addr, user_dma_pkt->tsize);
3840 return -EFAULT;
3841 }
3842
3843 if ((user_memset) && (dir == DMA_TO_DEVICE)) {
3844 memcpy(new_dma_pkt, user_dma_pkt, sizeof(*user_dma_pkt));
3845 *new_dma_pkt_size = sizeof(*user_dma_pkt);
3846 return 0;
3847 }
3848
3849 user_wrcomp_en_mask = ctl & GAUDI_PKT_LIN_DMA_CTL_WRCOMP_EN_MASK;
3850
3851 sgt = userptr->sgt;
3852 dma_desc_cnt = 0;
3853
3854 for_each_sg(sgt->sgl, sg, sgt->nents, count) {
3855 len = sg_dma_len(sg);
3856 dma_addr = sg_dma_address(sg);
3857
3858 if (len == 0)
3859 break;
3860
3861 while ((count + 1) < sgt->nents) {
3862 sg_next_iter = sg_next(sg);
3863 len_next = sg_dma_len(sg_next_iter);
3864 dma_addr_next = sg_dma_address(sg_next_iter);
3865
3866 if (len_next == 0)
3867 break;
3868
3869 if ((dma_addr + len == dma_addr_next) &&
3870 (len + len_next <= DMA_MAX_TRANSFER_SIZE)) {
3871 len += len_next;
3872 count++;
3873 sg = sg_next_iter;
3874 } else {
3875 break;
3876 }
3877 }
3878
3879 new_dma_pkt->ctl = user_dma_pkt->ctl;
3880
3881 ctl = le32_to_cpu(user_dma_pkt->ctl);
3882 if (likely(dma_desc_cnt))
3883 ctl &= ~GAUDI_PKT_CTL_EB_MASK;
3884 ctl &= ~GAUDI_PKT_LIN_DMA_CTL_WRCOMP_EN_MASK;
3885 new_dma_pkt->ctl = cpu_to_le32(ctl);
3886 new_dma_pkt->tsize = cpu_to_le32(len);
3887
3888 if (dir == DMA_TO_DEVICE) {
3889 new_dma_pkt->src_addr = cpu_to_le64(dma_addr);
3890 new_dma_pkt->dst_addr = cpu_to_le64(device_memory_addr);
3891 } else {
3892 new_dma_pkt->src_addr = cpu_to_le64(device_memory_addr);
3893 new_dma_pkt->dst_addr = cpu_to_le64(dma_addr);
3894 }
3895
3896 if (!user_memset)
3897 device_memory_addr += len;
3898 dma_desc_cnt++;
3899 new_dma_pkt++;
3900 }
3901
3902 if (!dma_desc_cnt) {
3903 dev_err(hdev->dev,
3904 "Error of 0 SG entries when patching DMA packet\n");
3905 return -EFAULT;
3906 }
3907
3908 /* Fix the last dma packet - wrcomp must be as user set it */
3909 new_dma_pkt--;
3910 new_dma_pkt->ctl |= cpu_to_le32(user_wrcomp_en_mask);
3911
3912 *new_dma_pkt_size = dma_desc_cnt * sizeof(struct packet_lin_dma);
3913
3914 return 0;
3915}
3916
3917static int gaudi_patch_cb(struct hl_device *hdev,
3918 struct hl_cs_parser *parser)
3919{
3920 u32 cb_parsed_length = 0;
3921 u32 cb_patched_cur_length = 0;
3922 int rc = 0;
3923
3924 /* cb_user_size is more than 0 so loop will always be executed */
3925 while (cb_parsed_length < parser->user_cb_size) {
3926 enum packet_id pkt_id;
3927 u16 pkt_size;
3928 u32 new_pkt_size = 0;
3929 struct gaudi_packet *user_pkt, *kernel_pkt;
3930
3931 user_pkt = (struct gaudi_packet *) (uintptr_t)
3932 (parser->user_cb->kernel_address + cb_parsed_length);
3933 kernel_pkt = (struct gaudi_packet *) (uintptr_t)
3934 (parser->patched_cb->kernel_address +
3935 cb_patched_cur_length);
3936
3937 pkt_id = (enum packet_id) (
3938 (le64_to_cpu(user_pkt->header) &
3939 PACKET_HEADER_PACKET_ID_MASK) >>
3940 PACKET_HEADER_PACKET_ID_SHIFT);
3941
3942 pkt_size = gaudi_packet_sizes[pkt_id];
3943 cb_parsed_length += pkt_size;
3944 if (cb_parsed_length > parser->user_cb_size) {
3945 dev_err(hdev->dev,
3946 "packet 0x%x is out of CB boundary\n", pkt_id);
3947 rc = -EINVAL;
3948 break;
3949 }
3950
3951 switch (pkt_id) {
3952 case PACKET_LIN_DMA:
3953 rc = gaudi_patch_dma_packet(hdev, parser,
3954 (struct packet_lin_dma *) user_pkt,
3955 (struct packet_lin_dma *) kernel_pkt,
3956 &new_pkt_size);
3957 cb_patched_cur_length += new_pkt_size;
3958 break;
3959
3960 case PACKET_MSG_PROT:
3961 dev_err(hdev->dev,
3962 "User not allowed to use MSG_PROT\n");
3963 rc = -EPERM;
3964 break;
3965
3966 case PACKET_CP_DMA:
3967 dev_err(hdev->dev, "User not allowed to use CP_DMA\n");
3968 rc = -EPERM;
3969 break;
3970
3971 case PACKET_STOP:
3972 dev_err(hdev->dev, "User not allowed to use STOP\n");
3973 rc = -EPERM;
3974 break;
3975
3976 case PACKET_WREG_32:
3977 case PACKET_WREG_BULK:
3978 case PACKET_MSG_LONG:
3979 case PACKET_MSG_SHORT:
3980 case PACKET_REPEAT:
3981 case PACKET_FENCE:
3982 case PACKET_NOP:
3983 case PACKET_ARB_POINT:
3984 case PACKET_LOAD_AND_EXE:
3985 memcpy(kernel_pkt, user_pkt, pkt_size);
3986 cb_patched_cur_length += pkt_size;
3987 break;
3988
3989 default:
3990 dev_err(hdev->dev, "Invalid packet header 0x%x\n",
3991 pkt_id);
3992 rc = -EINVAL;
3993 break;
3994 }
3995
3996 if (rc)
3997 break;
3998 }
3999
4000 return rc;
4001}
4002
4003static int gaudi_parse_cb_mmu(struct hl_device *hdev,
4004 struct hl_cs_parser *parser)
4005{
4006 u64 patched_cb_handle;
4007 u32 patched_cb_size;
4008 struct hl_cb *user_cb;
4009 int rc;
4010
4011 /*
4012 * The new CB should have space at the end for two MSG_PROT pkt:
4013 * 1. A packet that will act as a completion packet
4014 * 2. A packet that will generate MSI interrupt
4015 */
4016 parser->patched_cb_size = parser->user_cb_size +
4017 sizeof(struct packet_msg_prot) * 2;
4018
4019 rc = hl_cb_create(hdev, &hdev->kernel_cb_mgr,
4020 parser->patched_cb_size,
4021 &patched_cb_handle, HL_KERNEL_ASID_ID);
4022
4023 if (rc) {
4024 dev_err(hdev->dev,
4025 "Failed to allocate patched CB for DMA CS %d\n",
4026 rc);
4027 return rc;
4028 }
4029
4030 patched_cb_handle >>= PAGE_SHIFT;
4031 parser->patched_cb = hl_cb_get(hdev, &hdev->kernel_cb_mgr,
4032 (u32) patched_cb_handle);
4033 /* hl_cb_get should never fail here so use kernel WARN */
4034 WARN(!parser->patched_cb, "DMA CB handle invalid 0x%x\n",
4035 (u32) patched_cb_handle);
4036 if (!parser->patched_cb) {
4037 rc = -EFAULT;
4038 goto out;
4039 }
4040
4041 /*
4042 * The check that parser->user_cb_size <= parser->user_cb->size was done
4043 * in validate_queue_index().
4044 */
4045 memcpy((void *) (uintptr_t) parser->patched_cb->kernel_address,
4046 (void *) (uintptr_t) parser->user_cb->kernel_address,
4047 parser->user_cb_size);
4048
4049 patched_cb_size = parser->patched_cb_size;
4050
4051 /* Validate patched CB instead of user CB */
4052 user_cb = parser->user_cb;
4053 parser->user_cb = parser->patched_cb;
4054 rc = gaudi_validate_cb(hdev, parser, true);
4055 parser->user_cb = user_cb;
4056
4057 if (rc) {
4058 hl_cb_put(parser->patched_cb);
4059 goto out;
4060 }
4061
4062 if (patched_cb_size != parser->patched_cb_size) {
4063 dev_err(hdev->dev, "user CB size mismatch\n");
4064 hl_cb_put(parser->patched_cb);
4065 rc = -EINVAL;
4066 goto out;
4067 }
4068
4069out:
4070 /*
4071 * Always call cb destroy here because we still have 1 reference
4072 * to it by calling cb_get earlier. After the job will be completed,
4073 * cb_put will release it, but here we want to remove it from the
4074 * idr
4075 */
4076 hl_cb_destroy(hdev, &hdev->kernel_cb_mgr,
4077 patched_cb_handle << PAGE_SHIFT);
4078
4079 return rc;
4080}
4081
4082static int gaudi_parse_cb_no_mmu(struct hl_device *hdev,
4083 struct hl_cs_parser *parser)
4084{
4085 u64 patched_cb_handle;
4086 int rc;
4087
4088 rc = gaudi_validate_cb(hdev, parser, false);
4089
4090 if (rc)
4091 goto free_userptr;
4092
4093 rc = hl_cb_create(hdev, &hdev->kernel_cb_mgr,
4094 parser->patched_cb_size,
4095 &patched_cb_handle, HL_KERNEL_ASID_ID);
4096 if (rc) {
4097 dev_err(hdev->dev,
4098 "Failed to allocate patched CB for DMA CS %d\n", rc);
4099 goto free_userptr;
4100 }
4101
4102 patched_cb_handle >>= PAGE_SHIFT;
4103 parser->patched_cb = hl_cb_get(hdev, &hdev->kernel_cb_mgr,
4104 (u32) patched_cb_handle);
4105 /* hl_cb_get should never fail here so use kernel WARN */
4106 WARN(!parser->patched_cb, "DMA CB handle invalid 0x%x\n",
4107 (u32) patched_cb_handle);
4108 if (!parser->patched_cb) {
4109 rc = -EFAULT;
4110 goto out;
4111 }
4112
4113 rc = gaudi_patch_cb(hdev, parser);
4114
4115 if (rc)
4116 hl_cb_put(parser->patched_cb);
4117
4118out:
4119 /*
4120 * Always call cb destroy here because we still have 1 reference
4121 * to it by calling cb_get earlier. After the job will be completed,
4122 * cb_put will release it, but here we want to remove it from the
4123 * idr
4124 */
4125 hl_cb_destroy(hdev, &hdev->kernel_cb_mgr,
4126 patched_cb_handle << PAGE_SHIFT);
4127
4128free_userptr:
4129 if (rc)
4130 hl_userptr_delete_list(hdev, parser->job_userptr_list);
4131 return rc;
4132}
4133
4134static int gaudi_parse_cb_no_ext_queue(struct hl_device *hdev,
4135 struct hl_cs_parser *parser)
4136{
4137 struct asic_fixed_properties *asic_prop = &hdev->asic_prop;
4138
4139 /* For internal queue jobs just check if CB address is valid */
4140 if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,
4141 parser->user_cb_size,
4142 asic_prop->sram_user_base_address,
4143 asic_prop->sram_end_address))
4144 return 0;
4145
4146 if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,
4147 parser->user_cb_size,
4148 asic_prop->dram_user_base_address,
4149 asic_prop->dram_end_address))
4150 return 0;
4151
4152 /* PMMU and HPMMU addresses are equal, check only one of them */
4153 if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,
4154 parser->user_cb_size,
4155 asic_prop->pmmu.start_addr,
4156 asic_prop->pmmu.end_addr))
4157 return 0;
4158
4159 dev_err(hdev->dev,
4160 "CB address 0x%px + 0x%x for internal QMAN is not valid\n",
4161 parser->user_cb, parser->user_cb_size);
4162
4163 return -EFAULT;
4164}
4165
4166static int gaudi_cs_parser(struct hl_device *hdev, struct hl_cs_parser *parser)
4167{
4168 struct gaudi_device *gaudi = hdev->asic_specific;
4169
4170 if (parser->queue_type == QUEUE_TYPE_INT)
4171 return gaudi_parse_cb_no_ext_queue(hdev, parser);
4172
4173 if (gaudi->hw_cap_initialized & HW_CAP_MMU)
4174 return gaudi_parse_cb_mmu(hdev, parser);
4175 else
4176 return gaudi_parse_cb_no_mmu(hdev, parser);
4177}
4178
4179static void gaudi_add_end_of_cb_packets(struct hl_device *hdev,
4180 u64 kernel_address, u32 len,
4181 u64 cq_addr, u32 cq_val, u32 msi_vec,
4182 bool eb)
4183{
4184 struct gaudi_device *gaudi = hdev->asic_specific;
4185 struct packet_msg_prot *cq_pkt;
4186 u32 tmp;
4187
4188 cq_pkt = (struct packet_msg_prot *) (uintptr_t)
4189 (kernel_address + len - (sizeof(struct packet_msg_prot) * 2));
4190
4191 tmp = (PACKET_MSG_PROT << GAUDI_PKT_CTL_OPCODE_SHIFT) |
4192 (1 << GAUDI_PKT_CTL_MB_SHIFT);
4193
4194 if (eb)
4195 tmp |= (1 << GAUDI_PKT_CTL_EB_SHIFT);
4196
4197 cq_pkt->ctl = cpu_to_le32(tmp);
4198 cq_pkt->value = cpu_to_le32(cq_val);
4199 cq_pkt->addr = cpu_to_le64(cq_addr);
4200
4201 cq_pkt++;
4202
4203 tmp = (PACKET_MSG_PROT << GAUDI_PKT_CTL_OPCODE_SHIFT) |
4204 (1 << GAUDI_PKT_CTL_MB_SHIFT);
4205 cq_pkt->ctl = cpu_to_le32(tmp);
4206 cq_pkt->value = cpu_to_le32(1);
4207
4208 if (!gaudi->multi_msi_mode)
4209 msi_vec = 0;
4210
4211 cq_pkt->addr = cpu_to_le64(CFG_BASE + mmPCIE_MSI_INTR_0 + msi_vec * 4);
4212}
4213
4214static void gaudi_update_eq_ci(struct hl_device *hdev, u32 val)
4215{
4216 WREG32(mmCPU_IF_EQ_RD_OFFS, val);
4217}
4218
4219static int gaudi_memset_device_memory(struct hl_device *hdev, u64 addr,
4220 u32 size, u64 val)
4221{
4222 struct packet_lin_dma *lin_dma_pkt;
4223 struct hl_cs_job *job;
4224 u32 cb_size, ctl;
4225 struct hl_cb *cb;
4226 int rc;
4227
4228 cb = hl_cb_kernel_create(hdev, PAGE_SIZE);
4229 if (!cb)
4230 return -EFAULT;
4231
4232 lin_dma_pkt = (struct packet_lin_dma *) (uintptr_t) cb->kernel_address;
4233 memset(lin_dma_pkt, 0, sizeof(*lin_dma_pkt));
4234 cb_size = sizeof(*lin_dma_pkt);
4235
4236 ctl = ((PACKET_LIN_DMA << GAUDI_PKT_CTL_OPCODE_SHIFT) |
4237 (1 << GAUDI_PKT_LIN_DMA_CTL_MEMSET_SHIFT) |
4238 (1 << GAUDI_PKT_LIN_DMA_CTL_LIN_SHIFT) |
4239 (1 << GAUDI_PKT_CTL_RB_SHIFT) |
4240 (1 << GAUDI_PKT_CTL_MB_SHIFT));
4241 lin_dma_pkt->ctl = cpu_to_le32(ctl);
4242 lin_dma_pkt->src_addr = cpu_to_le64(val);
4243 lin_dma_pkt->dst_addr |= cpu_to_le64(addr);
4244 lin_dma_pkt->tsize = cpu_to_le32(size);
4245
4246 job = hl_cs_allocate_job(hdev, QUEUE_TYPE_EXT, true);
4247 if (!job) {
4248 dev_err(hdev->dev, "Failed to allocate a new job\n");
4249 rc = -ENOMEM;
4250 goto release_cb;
4251 }
4252
4253 job->id = 0;
4254 job->user_cb = cb;
4255 job->user_cb->cs_cnt++;
4256 job->user_cb_size = cb_size;
4257 job->hw_queue_id = GAUDI_QUEUE_ID_DMA_0_0;
4258 job->patched_cb = job->user_cb;
4259 job->job_cb_size = job->user_cb_size + sizeof(struct packet_msg_prot);
4260
4261 hl_debugfs_add_job(hdev, job);
4262
4263 rc = gaudi_send_job_on_qman0(hdev, job);
4264
4265 hl_debugfs_remove_job(hdev, job);
4266 kfree(job);
4267 cb->cs_cnt--;
4268
4269release_cb:
4270 hl_cb_put(cb);
4271 hl_cb_destroy(hdev, &hdev->kernel_cb_mgr, cb->id << PAGE_SHIFT);
4272
4273 return rc;
4274}
4275
4276static void gaudi_restore_sm_registers(struct hl_device *hdev)
4277{
4278 int i;
4279
4280 for (i = 0 ; i < NUM_OF_SOB_IN_BLOCK << 2 ; i += 4) {
4281 WREG32(mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0 + i, 0);
4282 WREG32(mmSYNC_MNGR_E_S_SYNC_MNGR_OBJS_SOB_OBJ_0 + i, 0);
4283 WREG32(mmSYNC_MNGR_W_N_SYNC_MNGR_OBJS_SOB_OBJ_0 + i, 0);
4284 }
4285
4286 for (i = 0 ; i < NUM_OF_MONITORS_IN_BLOCK << 2 ; i += 4) {
4287 WREG32(mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_STATUS_0 + i, 0);
4288 WREG32(mmSYNC_MNGR_E_S_SYNC_MNGR_OBJS_MON_STATUS_0 + i, 0);
4289 WREG32(mmSYNC_MNGR_W_N_SYNC_MNGR_OBJS_MON_STATUS_0 + i, 0);
4290 }
4291
4292 i = GAUDI_FIRST_AVAILABLE_W_S_SYNC_OBJECT * 4;
4293
4294 for (; i < NUM_OF_SOB_IN_BLOCK << 2 ; i += 4)
4295 WREG32(mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 + i, 0);
4296
4297 i = GAUDI_FIRST_AVAILABLE_W_S_MONITOR * 4;
4298
4299 for (; i < NUM_OF_MONITORS_IN_BLOCK << 2 ; i += 4)
4300 WREG32(mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_STATUS_0 + i, 0);
4301}
4302
4303static void gaudi_restore_dma_registers(struct hl_device *hdev)
4304{
4305 u32 sob_delta = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_1 -
4306 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0;
4307 int i;
4308
4309 for (i = 0 ; i < DMA_NUMBER_OF_CHANNELS ; i++) {
4310 u64 sob_addr = CFG_BASE +
4311 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0 +
4312 (i * sob_delta);
4313 u32 dma_offset = i * DMA_CORE_OFFSET;
4314
4315 WREG32(mmDMA0_CORE_WR_COMP_ADDR_LO + dma_offset,
4316 lower_32_bits(sob_addr));
4317 WREG32(mmDMA0_CORE_WR_COMP_ADDR_HI + dma_offset,
4318 upper_32_bits(sob_addr));
4319 WREG32(mmDMA0_CORE_WR_COMP_WDATA + dma_offset, 0x80000001);
4320
4321 /* For DMAs 2-7, need to restore WR_AWUSER_31_11 as it can be
4322 * modified by the user for SRAM reduction
4323 */
4324 if (i > 1)
4325 WREG32(mmDMA0_CORE_WR_AWUSER_31_11 + dma_offset,
4326 0x00000001);
4327 }
4328}
4329
4330static void gaudi_restore_qm_registers(struct hl_device *hdev)
4331{
4332 u32 qman_offset;
4333 int i;
4334
4335 for (i = 0 ; i < DMA_NUMBER_OF_CHANNELS ; i++) {
4336 qman_offset = i * DMA_QMAN_OFFSET;
4337 WREG32(mmDMA0_QM_ARB_CFG_0 + qman_offset, 0);
4338 }
4339
4340 for (i = 0 ; i < MME_NUMBER_OF_MASTER_ENGINES ; i++) {
4341 qman_offset = i * (mmMME2_QM_BASE - mmMME0_QM_BASE);
4342 WREG32(mmMME0_QM_ARB_CFG_0 + qman_offset, 0);
4343 }
4344
4345 for (i = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) {
4346 qman_offset = i * TPC_QMAN_OFFSET;
4347 WREG32(mmTPC0_QM_ARB_CFG_0 + qman_offset, 0);
4348 }
4349}
4350
4351static void gaudi_restore_user_registers(struct hl_device *hdev)
4352{
4353 gaudi_restore_sm_registers(hdev);
4354 gaudi_restore_dma_registers(hdev);
4355 gaudi_restore_qm_registers(hdev);
4356}
4357
4358static int gaudi_context_switch(struct hl_device *hdev, u32 asid)
4359{
4360 struct asic_fixed_properties *prop = &hdev->asic_prop;
4361 u64 addr = prop->sram_user_base_address;
4362 u32 size = hdev->pldm ? 0x10000 :
4363 (prop->sram_size - SRAM_USER_BASE_OFFSET);
4364 u64 val = 0x7777777777777777ull;
4365 int rc;
4366
4367 rc = gaudi_memset_device_memory(hdev, addr, size, val);
4368 if (rc) {
4369 dev_err(hdev->dev, "Failed to clear SRAM in context switch\n");
4370 return rc;
4371 }
4372
4373 gaudi_mmu_prepare(hdev, asid);
4374
4375 gaudi_restore_user_registers(hdev);
4376
4377 return 0;
4378}
4379
4380static int gaudi_mmu_clear_pgt_range(struct hl_device *hdev)
4381{
4382 struct asic_fixed_properties *prop = &hdev->asic_prop;
4383 struct gaudi_device *gaudi = hdev->asic_specific;
4384 u64 addr = prop->mmu_pgt_addr;
4385 u32 size = prop->mmu_pgt_size + MMU_CACHE_MNG_SIZE;
4386
4387 if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
4388 return 0;
4389
4390 return gaudi_memset_device_memory(hdev, addr, size, 0);
4391}
4392
4393static void gaudi_restore_phase_topology(struct hl_device *hdev)
4394{
4395
4396}
4397
4398static int gaudi_debugfs_read32(struct hl_device *hdev, u64 addr, u32 *val)
4399{
4400 struct asic_fixed_properties *prop = &hdev->asic_prop;
4401 struct gaudi_device *gaudi = hdev->asic_specific;
4402 u64 hbm_bar_addr;
4403 int rc = 0;
4404
4405 if ((addr >= CFG_BASE) && (addr < CFG_BASE + CFG_SIZE)) {
4406 if (gaudi->hw_cap_initialized & HW_CAP_CLK_GATE) {
4407 dev_err_ratelimited(hdev->dev,
4408 "Can't read register - clock gating is enabled!\n");
4409 rc = -EFAULT;
4410 } else {
4411 *val = RREG32(addr - CFG_BASE);
4412 }
4413 } else if ((addr >= SRAM_BASE_ADDR) &&
4414 (addr < SRAM_BASE_ADDR + SRAM_BAR_SIZE)) {
4415 *val = readl(hdev->pcie_bar[SRAM_BAR_ID] +
4416 (addr - SRAM_BASE_ADDR));
4417 } else if (addr < DRAM_PHYS_BASE + hdev->asic_prop.dram_size) {
4418 u64 bar_base_addr = DRAM_PHYS_BASE +
4419 (addr & ~(prop->dram_pci_bar_size - 0x1ull));
4420
4421 hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, bar_base_addr);
4422 if (hbm_bar_addr != U64_MAX) {
4423 *val = readl(hdev->pcie_bar[HBM_BAR_ID] +
4424 (addr - bar_base_addr));
4425
4426 hbm_bar_addr = gaudi_set_hbm_bar_base(hdev,
4427 hbm_bar_addr);
4428 }
4429 if (hbm_bar_addr == U64_MAX)
4430 rc = -EIO;
4431 } else if (addr >= HOST_PHYS_BASE && !iommu_present(&pci_bus_type)) {
4432 *val = *(u32 *) phys_to_virt(addr - HOST_PHYS_BASE);
4433 } else {
4434 rc = -EFAULT;
4435 }
4436
4437 return rc;
4438}
4439
4440static int gaudi_debugfs_write32(struct hl_device *hdev, u64 addr, u32 val)
4441{
4442 struct asic_fixed_properties *prop = &hdev->asic_prop;
4443 struct gaudi_device *gaudi = hdev->asic_specific;
4444 u64 hbm_bar_addr;
4445 int rc = 0;
4446
4447 if ((addr >= CFG_BASE) && (addr < CFG_BASE + CFG_SIZE)) {
4448 if (gaudi->hw_cap_initialized & HW_CAP_CLK_GATE) {
4449 dev_err_ratelimited(hdev->dev,
4450 "Can't write register - clock gating is enabled!\n");
4451 rc = -EFAULT;
4452 } else {
4453 WREG32(addr - CFG_BASE, val);
4454 }
4455 } else if ((addr >= SRAM_BASE_ADDR) &&
4456 (addr < SRAM_BASE_ADDR + SRAM_BAR_SIZE)) {
4457 writel(val, hdev->pcie_bar[SRAM_BAR_ID] +
4458 (addr - SRAM_BASE_ADDR));
4459 } else if (addr < DRAM_PHYS_BASE + hdev->asic_prop.dram_size) {
4460 u64 bar_base_addr = DRAM_PHYS_BASE +
4461 (addr & ~(prop->dram_pci_bar_size - 0x1ull));
4462
4463 hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, bar_base_addr);
4464 if (hbm_bar_addr != U64_MAX) {
4465 writel(val, hdev->pcie_bar[HBM_BAR_ID] +
4466 (addr - bar_base_addr));
4467
4468 hbm_bar_addr = gaudi_set_hbm_bar_base(hdev,
4469 hbm_bar_addr);
4470 }
4471 if (hbm_bar_addr == U64_MAX)
4472 rc = -EIO;
4473 } else if (addr >= HOST_PHYS_BASE && !iommu_present(&pci_bus_type)) {
4474 *(u32 *) phys_to_virt(addr - HOST_PHYS_BASE) = val;
4475 } else {
4476 rc = -EFAULT;
4477 }
4478
4479 return rc;
4480}
4481
4482static int gaudi_debugfs_read64(struct hl_device *hdev, u64 addr, u64 *val)
4483{
4484 struct asic_fixed_properties *prop = &hdev->asic_prop;
4485 struct gaudi_device *gaudi = hdev->asic_specific;
4486 u64 hbm_bar_addr;
4487 int rc = 0;
4488
4489 if ((addr >= CFG_BASE) && (addr <= CFG_BASE + CFG_SIZE - sizeof(u64))) {
4490 if (gaudi->hw_cap_initialized & HW_CAP_CLK_GATE) {
4491 dev_err_ratelimited(hdev->dev,
4492 "Can't read register - clock gating is enabled!\n");
4493 rc = -EFAULT;
4494 } else {
4495 u32 val_l = RREG32(addr - CFG_BASE);
4496 u32 val_h = RREG32(addr + sizeof(u32) - CFG_BASE);
4497
4498 *val = (((u64) val_h) << 32) | val_l;
4499 }
4500 } else if ((addr >= SRAM_BASE_ADDR) &&
4501 (addr <= SRAM_BASE_ADDR + SRAM_BAR_SIZE - sizeof(u64))) {
4502 *val = readq(hdev->pcie_bar[SRAM_BAR_ID] +
4503 (addr - SRAM_BASE_ADDR));
4504 } else if (addr <=
4505 DRAM_PHYS_BASE + hdev->asic_prop.dram_size - sizeof(u64)) {
4506 u64 bar_base_addr = DRAM_PHYS_BASE +
4507 (addr & ~(prop->dram_pci_bar_size - 0x1ull));
4508
4509 hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, bar_base_addr);
4510 if (hbm_bar_addr != U64_MAX) {
4511 *val = readq(hdev->pcie_bar[HBM_BAR_ID] +
4512 (addr - bar_base_addr));
4513
4514 hbm_bar_addr = gaudi_set_hbm_bar_base(hdev,
4515 hbm_bar_addr);
4516 }
4517 if (hbm_bar_addr == U64_MAX)
4518 rc = -EIO;
4519 } else if (addr >= HOST_PHYS_BASE && !iommu_present(&pci_bus_type)) {
4520 *val = *(u64 *) phys_to_virt(addr - HOST_PHYS_BASE);
4521 } else {
4522 rc = -EFAULT;
4523 }
4524
4525 return rc;
4526}
4527
4528static int gaudi_debugfs_write64(struct hl_device *hdev, u64 addr, u64 val)
4529{
4530 struct asic_fixed_properties *prop = &hdev->asic_prop;
4531 struct gaudi_device *gaudi = hdev->asic_specific;
4532 u64 hbm_bar_addr;
4533 int rc = 0;
4534
4535 if ((addr >= CFG_BASE) && (addr <= CFG_BASE + CFG_SIZE - sizeof(u64))) {
4536 if (gaudi->hw_cap_initialized & HW_CAP_CLK_GATE) {
4537 dev_err_ratelimited(hdev->dev,
4538 "Can't write register - clock gating is enabled!\n");
4539 rc = -EFAULT;
4540 } else {
4541 WREG32(addr - CFG_BASE, lower_32_bits(val));
4542 WREG32(addr + sizeof(u32) - CFG_BASE,
4543 upper_32_bits(val));
4544 }
4545 } else if ((addr >= SRAM_BASE_ADDR) &&
4546 (addr <= SRAM_BASE_ADDR + SRAM_BAR_SIZE - sizeof(u64))) {
4547 writeq(val, hdev->pcie_bar[SRAM_BAR_ID] +
4548 (addr - SRAM_BASE_ADDR));
4549 } else if (addr <=
4550 DRAM_PHYS_BASE + hdev->asic_prop.dram_size - sizeof(u64)) {
4551 u64 bar_base_addr = DRAM_PHYS_BASE +
4552 (addr & ~(prop->dram_pci_bar_size - 0x1ull));
4553
4554 hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, bar_base_addr);
4555 if (hbm_bar_addr != U64_MAX) {
4556 writeq(val, hdev->pcie_bar[HBM_BAR_ID] +
4557 (addr - bar_base_addr));
4558
4559 hbm_bar_addr = gaudi_set_hbm_bar_base(hdev,
4560 hbm_bar_addr);
4561 }
4562 if (hbm_bar_addr == U64_MAX)
4563 rc = -EIO;
4564 } else if (addr >= HOST_PHYS_BASE && !iommu_present(&pci_bus_type)) {
4565 *(u64 *) phys_to_virt(addr - HOST_PHYS_BASE) = val;
4566 } else {
4567 rc = -EFAULT;
4568 }
4569
4570 return rc;
4571}
4572
4573static u64 gaudi_read_pte(struct hl_device *hdev, u64 addr)
4574{
4575 struct gaudi_device *gaudi = hdev->asic_specific;
4576
4577 if (hdev->hard_reset_pending)
4578 return U64_MAX;
4579
4580 return readq(hdev->pcie_bar[HBM_BAR_ID] +
4581 (addr - gaudi->hbm_bar_cur_addr));
4582}
4583
4584static void gaudi_write_pte(struct hl_device *hdev, u64 addr, u64 val)
4585{
4586 struct gaudi_device *gaudi = hdev->asic_specific;
4587
4588 if (hdev->hard_reset_pending)
4589 return;
4590
4591 writeq(val, hdev->pcie_bar[HBM_BAR_ID] +
4592 (addr - gaudi->hbm_bar_cur_addr));
4593}
4594
4595static void gaudi_mmu_prepare_reg(struct hl_device *hdev, u64 reg, u32 asid)
4596{
4597 /* mask to zero the MMBP and ASID bits */
4598 WREG32_AND(reg, ~0x7FF);
4599 WREG32_OR(reg, asid);
4600}
4601
4602static void gaudi_mmu_prepare(struct hl_device *hdev, u32 asid)
4603{
4604 struct gaudi_device *gaudi = hdev->asic_specific;
4605
4606 if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
4607 return;
4608
4609 if (asid & ~DMA0_QM_GLBL_NON_SECURE_PROPS_0_ASID_MASK) {
4610 WARN(1, "asid %u is too big\n", asid);
4611 return;
4612 }
4613
4614 mutex_lock(&gaudi->clk_gate_mutex);
4615
4616 hdev->asic_funcs->disable_clock_gating(hdev);
4617
4618 gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_0, asid);
4619 gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_1, asid);
4620 gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_2, asid);
4621 gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_3, asid);
4622 gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_4, asid);
4623
4624 gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_0, asid);
4625 gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_1, asid);
4626 gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_2, asid);
4627 gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_3, asid);
4628 gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_4, asid);
4629
4630 gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_0, asid);
4631 gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_1, asid);
4632 gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_2, asid);
4633 gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_3, asid);
4634 gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_4, asid);
4635
4636 gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_0, asid);
4637 gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_1, asid);
4638 gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_2, asid);
4639 gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_3, asid);
4640 gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_4, asid);
4641
4642 gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_0, asid);
4643 gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_1, asid);
4644 gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_2, asid);
4645 gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_3, asid);
4646 gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_4, asid);
4647
4648 gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_0, asid);
4649 gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_1, asid);
4650 gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_2, asid);
4651 gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_3, asid);
4652 gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_4, asid);
4653
4654 gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_0, asid);
4655 gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_1, asid);
4656 gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_2, asid);
4657 gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_3, asid);
4658 gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_4, asid);
4659
4660 gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_0, asid);
4661 gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_1, asid);
4662 gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_2, asid);
4663 gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_3, asid);
4664 gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_4, asid);
4665
4666 gaudi_mmu_prepare_reg(hdev, mmDMA0_CORE_NON_SECURE_PROPS, asid);
4667 gaudi_mmu_prepare_reg(hdev, mmDMA1_CORE_NON_SECURE_PROPS, asid);
4668 gaudi_mmu_prepare_reg(hdev, mmDMA2_CORE_NON_SECURE_PROPS, asid);
4669 gaudi_mmu_prepare_reg(hdev, mmDMA3_CORE_NON_SECURE_PROPS, asid);
4670 gaudi_mmu_prepare_reg(hdev, mmDMA4_CORE_NON_SECURE_PROPS, asid);
4671 gaudi_mmu_prepare_reg(hdev, mmDMA5_CORE_NON_SECURE_PROPS, asid);
4672 gaudi_mmu_prepare_reg(hdev, mmDMA6_CORE_NON_SECURE_PROPS, asid);
4673 gaudi_mmu_prepare_reg(hdev, mmDMA7_CORE_NON_SECURE_PROPS, asid);
4674
4675 gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_0, asid);
4676 gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_1, asid);
4677 gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_2, asid);
4678 gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_3, asid);
4679 gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_4, asid);
4680 gaudi_mmu_prepare_reg(hdev, mmTPC0_CFG_ARUSER_LO, asid);
4681 gaudi_mmu_prepare_reg(hdev, mmTPC0_CFG_AWUSER_LO, asid);
4682
4683 gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_0, asid);
4684 gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_1, asid);
4685 gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_2, asid);
4686 gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_3, asid);
4687 gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_4, asid);
4688 gaudi_mmu_prepare_reg(hdev, mmTPC1_CFG_ARUSER_LO, asid);
4689 gaudi_mmu_prepare_reg(hdev, mmTPC1_CFG_AWUSER_LO, asid);
4690
4691 gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_0, asid);
4692 gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_1, asid);
4693 gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_2, asid);
4694 gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_3, asid);
4695 gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_4, asid);
4696 gaudi_mmu_prepare_reg(hdev, mmTPC2_CFG_ARUSER_LO, asid);
4697 gaudi_mmu_prepare_reg(hdev, mmTPC2_CFG_AWUSER_LO, asid);
4698
4699 gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_0, asid);
4700 gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_1, asid);
4701 gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_2, asid);
4702 gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_3, asid);
4703 gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_4, asid);
4704 gaudi_mmu_prepare_reg(hdev, mmTPC3_CFG_ARUSER_LO, asid);
4705 gaudi_mmu_prepare_reg(hdev, mmTPC3_CFG_AWUSER_LO, asid);
4706
4707 gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_0, asid);
4708 gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_1, asid);
4709 gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_2, asid);
4710 gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_3, asid);
4711 gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_4, asid);
4712 gaudi_mmu_prepare_reg(hdev, mmTPC4_CFG_ARUSER_LO, asid);
4713 gaudi_mmu_prepare_reg(hdev, mmTPC4_CFG_AWUSER_LO, asid);
4714
4715 gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_0, asid);
4716 gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_1, asid);
4717 gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_2, asid);
4718 gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_3, asid);
4719 gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_4, asid);
4720 gaudi_mmu_prepare_reg(hdev, mmTPC5_CFG_ARUSER_LO, asid);
4721 gaudi_mmu_prepare_reg(hdev, mmTPC5_CFG_AWUSER_LO, asid);
4722
4723 gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_0, asid);
4724 gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_1, asid);
4725 gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_2, asid);
4726 gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_3, asid);
4727 gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_4, asid);
4728 gaudi_mmu_prepare_reg(hdev, mmTPC6_CFG_ARUSER_LO, asid);
4729 gaudi_mmu_prepare_reg(hdev, mmTPC6_CFG_AWUSER_LO, asid);
4730
4731 gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_0, asid);
4732 gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_1, asid);
4733 gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_2, asid);
4734 gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_3, asid);
4735 gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_4, asid);
4736 gaudi_mmu_prepare_reg(hdev, mmTPC7_CFG_ARUSER_LO, asid);
4737 gaudi_mmu_prepare_reg(hdev, mmTPC7_CFG_AWUSER_LO, asid);
4738
4739 gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_0, asid);
4740 gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_1, asid);
4741 gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_2, asid);
4742 gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_3, asid);
4743 gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_4, asid);
4744 gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_0, asid);
4745 gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_1, asid);
4746 gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_2, asid);
4747 gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_3, asid);
4748 gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_4, asid);
4749
4750 gaudi_mmu_prepare_reg(hdev, mmMME0_SBAB_ARUSER0, asid);
4751 gaudi_mmu_prepare_reg(hdev, mmMME0_SBAB_ARUSER1, asid);
4752 gaudi_mmu_prepare_reg(hdev, mmMME1_SBAB_ARUSER0, asid);
4753 gaudi_mmu_prepare_reg(hdev, mmMME1_SBAB_ARUSER1, asid);
4754 gaudi_mmu_prepare_reg(hdev, mmMME2_SBAB_ARUSER0, asid);
4755 gaudi_mmu_prepare_reg(hdev, mmMME2_SBAB_ARUSER1, asid);
4756 gaudi_mmu_prepare_reg(hdev, mmMME3_SBAB_ARUSER0, asid);
4757 gaudi_mmu_prepare_reg(hdev, mmMME3_SBAB_ARUSER1, asid);
4758 gaudi_mmu_prepare_reg(hdev, mmMME0_ACC_WBC, asid);
4759 gaudi_mmu_prepare_reg(hdev, mmMME1_ACC_WBC, asid);
4760 gaudi_mmu_prepare_reg(hdev, mmMME2_ACC_WBC, asid);
4761 gaudi_mmu_prepare_reg(hdev, mmMME3_ACC_WBC, asid);
4762
4763 gaudi_mmu_prepare_reg(hdev, mmPSOC_GLOBAL_CONF_TRACE_ARUSER, asid);
4764 gaudi_mmu_prepare_reg(hdev, mmPSOC_GLOBAL_CONF_TRACE_AWUSER, asid);
4765
4766 hdev->asic_funcs->enable_clock_gating(hdev);
4767
4768 mutex_unlock(&gaudi->clk_gate_mutex);
4769}
4770
4771static int gaudi_send_job_on_qman0(struct hl_device *hdev,
4772 struct hl_cs_job *job)
4773{
4774 struct packet_msg_prot *fence_pkt;
4775 u32 *fence_ptr;
4776 dma_addr_t fence_dma_addr;
4777 struct hl_cb *cb;
4778 u32 tmp, timeout, dma_offset;
4779 int rc;
4780
4781 if (hdev->pldm)
4782 timeout = GAUDI_PLDM_QMAN0_TIMEOUT_USEC;
4783 else
4784 timeout = HL_DEVICE_TIMEOUT_USEC;
4785
4786 if (!hdev->asic_funcs->is_device_idle(hdev, NULL, NULL)) {
4787 dev_err_ratelimited(hdev->dev,
4788 "Can't send driver job on QMAN0 because the device is not idle\n");
4789 return -EBUSY;
4790 }
4791
4792 fence_ptr = hdev->asic_funcs->asic_dma_pool_zalloc(hdev, 4, GFP_KERNEL,
4793 &fence_dma_addr);
4794 if (!fence_ptr) {
4795 dev_err(hdev->dev,
4796 "Failed to allocate fence memory for QMAN0\n");
4797 return -ENOMEM;
4798 }
4799
4800 cb = job->patched_cb;
4801
4802 fence_pkt = (struct packet_msg_prot *) (uintptr_t) (cb->kernel_address +
4803 job->job_cb_size - sizeof(struct packet_msg_prot));
4804
4805 tmp = (PACKET_MSG_PROT << GAUDI_PKT_CTL_OPCODE_SHIFT) |
4806 (1 << GAUDI_PKT_CTL_EB_SHIFT) |
4807 (1 << GAUDI_PKT_CTL_MB_SHIFT);
4808 fence_pkt->ctl = cpu_to_le32(tmp);
4809 fence_pkt->value = cpu_to_le32(GAUDI_QMAN0_FENCE_VAL);
4810 fence_pkt->addr = cpu_to_le64(fence_dma_addr);
4811
4812 dma_offset = gaudi_dma_assignment[GAUDI_PCI_DMA_1] * DMA_CORE_OFFSET;
4813
4814 WREG32_OR(mmDMA0_CORE_PROT + dma_offset, BIT(DMA0_CORE_PROT_VAL_SHIFT));
4815
4816 rc = hl_hw_queue_send_cb_no_cmpl(hdev, GAUDI_QUEUE_ID_DMA_0_0,
4817 job->job_cb_size, cb->bus_address);
4818 if (rc) {
4819 dev_err(hdev->dev, "Failed to send CB on QMAN0, %d\n", rc);
4820 goto free_fence_ptr;
4821 }
4822
4823 rc = hl_poll_timeout_memory(hdev, fence_ptr, tmp,
4824 (tmp == GAUDI_QMAN0_FENCE_VAL), 1000,
4825 timeout, true);
4826
4827 hl_hw_queue_inc_ci_kernel(hdev, GAUDI_QUEUE_ID_DMA_0_0);
4828
4829 if (rc == -ETIMEDOUT) {
4830 dev_err(hdev->dev, "QMAN0 Job timeout (0x%x)\n", tmp);
4831 goto free_fence_ptr;
4832 }
4833
4834free_fence_ptr:
4835 WREG32_AND(mmDMA0_CORE_PROT + dma_offset,
4836 ~BIT(DMA0_CORE_PROT_VAL_SHIFT));
4837
4838 hdev->asic_funcs->asic_dma_pool_free(hdev, (void *) fence_ptr,
4839 fence_dma_addr);
4840 return rc;
4841}
4842
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03004843static void gaudi_get_event_desc(u16 event_type, char *desc, size_t size)
4844{
Ofir Bittonebd8d122020-05-10 13:41:28 +03004845 if (event_type >= GAUDI_EVENT_SIZE)
4846 goto event_not_supported;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03004847
Ofir Bittonebd8d122020-05-10 13:41:28 +03004848 if (!gaudi_irq_map_table[event_type].valid)
4849 goto event_not_supported;
4850
4851 snprintf(desc, size, gaudi_irq_map_table[event_type].name);
4852
4853 return;
4854
4855event_not_supported:
4856 snprintf(desc, size, "N/A");
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03004857}
4858
4859static const char *gaudi_get_razwi_initiator_dma_name(struct hl_device *hdev,
4860 u32 x_y, bool is_write)
4861{
4862 u32 dma_id[2], dma_offset, err_cause[2], mask, i;
4863
4864 mask = is_write ? DMA0_CORE_ERR_CAUSE_HBW_WR_ERR_MASK :
4865 DMA0_CORE_ERR_CAUSE_HBW_RD_ERR_MASK;
4866
4867 switch (x_y) {
4868 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_0:
4869 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_1:
4870 dma_id[0] = 0;
4871 dma_id[1] = 2;
4872 break;
4873 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_0:
4874 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_1:
4875 dma_id[0] = 1;
4876 dma_id[1] = 3;
4877 break;
4878 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_0:
4879 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_1:
4880 dma_id[0] = 4;
4881 dma_id[1] = 6;
4882 break;
4883 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_0:
4884 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_1:
4885 dma_id[0] = 5;
4886 dma_id[1] = 7;
4887 break;
4888 default:
4889 goto unknown_initiator;
4890 }
4891
4892 for (i = 0 ; i < 2 ; i++) {
4893 dma_offset = dma_id[i] * DMA_CORE_OFFSET;
4894 err_cause[i] = RREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset);
4895 }
4896
4897 switch (x_y) {
4898 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_0:
4899 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_1:
4900 if ((err_cause[0] & mask) && !(err_cause[1] & mask))
4901 return "DMA0";
4902 else if (!(err_cause[0] & mask) && (err_cause[1] & mask))
4903 return "DMA2";
4904 else
4905 return "DMA0 or DMA2";
4906 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_0:
4907 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_1:
4908 if ((err_cause[0] & mask) && !(err_cause[1] & mask))
4909 return "DMA1";
4910 else if (!(err_cause[0] & mask) && (err_cause[1] & mask))
4911 return "DMA3";
4912 else
4913 return "DMA1 or DMA3";
4914 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_0:
4915 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_1:
4916 if ((err_cause[0] & mask) && !(err_cause[1] & mask))
4917 return "DMA4";
4918 else if (!(err_cause[0] & mask) && (err_cause[1] & mask))
4919 return "DMA6";
4920 else
4921 return "DMA4 or DMA6";
4922 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_0:
4923 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_1:
4924 if ((err_cause[0] & mask) && !(err_cause[1] & mask))
4925 return "DMA5";
4926 else if (!(err_cause[0] & mask) && (err_cause[1] & mask))
4927 return "DMA7";
4928 else
4929 return "DMA5 or DMA7";
4930 }
4931
4932unknown_initiator:
4933 return "unknown initiator";
4934}
4935
4936static const char *gaudi_get_razwi_initiator_name(struct hl_device *hdev,
4937 bool is_write)
4938{
4939 u32 val, x_y, axi_id;
4940
4941 val = is_write ? RREG32(mmMMU_UP_RAZWI_WRITE_ID) :
4942 RREG32(mmMMU_UP_RAZWI_READ_ID);
4943 x_y = val & ((RAZWI_INITIATOR_Y_MASK << RAZWI_INITIATOR_Y_SHIFT) |
4944 (RAZWI_INITIATOR_X_MASK << RAZWI_INITIATOR_X_SHIFT));
4945 axi_id = val & (RAZWI_INITIATOR_AXI_ID_MASK <<
4946 RAZWI_INITIATOR_AXI_ID_SHIFT);
4947
4948 switch (x_y) {
4949 case RAZWI_INITIATOR_ID_X_Y_TPC0_NIC0:
4950 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC))
4951 return "TPC0";
4952 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC))
4953 return "NIC0";
4954 break;
4955 case RAZWI_INITIATOR_ID_X_Y_TPC1:
4956 return "TPC1";
4957 case RAZWI_INITIATOR_ID_X_Y_MME0_0:
4958 case RAZWI_INITIATOR_ID_X_Y_MME0_1:
4959 return "MME0";
4960 case RAZWI_INITIATOR_ID_X_Y_MME1_0:
4961 case RAZWI_INITIATOR_ID_X_Y_MME1_1:
4962 return "MME1";
4963 case RAZWI_INITIATOR_ID_X_Y_TPC2:
4964 return "TPC2";
4965 case RAZWI_INITIATOR_ID_X_Y_TPC3_PCI_CPU_PSOC:
4966 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC))
4967 return "TPC3";
4968 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_PCI))
4969 return "PCI";
4970 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_CPU))
4971 return "CPU";
4972 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_PSOC))
4973 return "PSOC";
4974 break;
4975 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_0:
4976 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_1:
4977 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_0:
4978 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_1:
4979 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_0:
4980 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_1:
4981 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_0:
4982 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_1:
4983 return gaudi_get_razwi_initiator_dma_name(hdev, x_y, is_write);
4984 case RAZWI_INITIATOR_ID_X_Y_TPC4_NIC1_NIC2:
4985 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC))
4986 return "TPC4";
4987 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC))
4988 return "NIC1";
4989 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC_FT))
4990 return "NIC2";
4991 break;
4992 case RAZWI_INITIATOR_ID_X_Y_TPC5:
4993 return "TPC5";
4994 case RAZWI_INITIATOR_ID_X_Y_MME2_0:
4995 case RAZWI_INITIATOR_ID_X_Y_MME2_1:
4996 return "MME2";
4997 case RAZWI_INITIATOR_ID_X_Y_MME3_0:
4998 case RAZWI_INITIATOR_ID_X_Y_MME3_1:
4999 return "MME3";
5000 case RAZWI_INITIATOR_ID_X_Y_TPC6:
5001 return "TPC6";
5002 case RAZWI_INITIATOR_ID_X_Y_TPC7_NIC4_NIC5:
5003 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC))
5004 return "TPC7";
5005 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC))
5006 return "NIC4";
5007 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC_FT))
5008 return "NIC5";
5009 break;
5010 default:
5011 break;
5012 }
5013
5014 dev_err(hdev->dev,
5015 "Unknown RAZWI initiator ID 0x%x [Y=%d, X=%d, AXI_ID=%d]\n",
5016 val,
5017 (val >> RAZWI_INITIATOR_Y_SHIFT) & RAZWI_INITIATOR_Y_MASK,
5018 (val >> RAZWI_INITIATOR_X_SHIFT) & RAZWI_INITIATOR_X_MASK,
5019 (val >> RAZWI_INITIATOR_AXI_ID_SHIFT) &
5020 RAZWI_INITIATOR_AXI_ID_MASK);
5021
5022 return "unknown initiator";
5023}
5024
5025static void gaudi_print_razwi_info(struct hl_device *hdev)
5026{
5027 if (RREG32(mmMMU_UP_RAZWI_WRITE_VLD)) {
5028 dev_err_ratelimited(hdev->dev,
5029 "RAZWI event caused by illegal write of %s\n",
5030 gaudi_get_razwi_initiator_name(hdev, true));
5031 WREG32(mmMMU_UP_RAZWI_WRITE_VLD, 0);
5032 }
5033
5034 if (RREG32(mmMMU_UP_RAZWI_READ_VLD)) {
5035 dev_err_ratelimited(hdev->dev,
5036 "RAZWI event caused by illegal read of %s\n",
5037 gaudi_get_razwi_initiator_name(hdev, false));
5038 WREG32(mmMMU_UP_RAZWI_READ_VLD, 0);
5039 }
5040}
5041
5042static void gaudi_print_mmu_error_info(struct hl_device *hdev)
5043{
5044 struct gaudi_device *gaudi = hdev->asic_specific;
5045 u64 addr;
5046 u32 val;
5047
5048 if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
5049 return;
5050
5051 val = RREG32(mmMMU_UP_PAGE_ERROR_CAPTURE);
5052 if (val & MMU_UP_PAGE_ERROR_CAPTURE_ENTRY_VALID_MASK) {
5053 addr = val & MMU_UP_PAGE_ERROR_CAPTURE_VA_49_32_MASK;
5054 addr <<= 32;
5055 addr |= RREG32(mmMMU_UP_PAGE_ERROR_CAPTURE_VA);
5056
5057 dev_err_ratelimited(hdev->dev, "MMU page fault on va 0x%llx\n",
5058 addr);
5059
5060 WREG32(mmMMU_UP_PAGE_ERROR_CAPTURE, 0);
5061 }
5062
5063 val = RREG32(mmMMU_UP_ACCESS_ERROR_CAPTURE);
5064 if (val & MMU_UP_ACCESS_ERROR_CAPTURE_ENTRY_VALID_MASK) {
5065 addr = val & MMU_UP_ACCESS_ERROR_CAPTURE_VA_49_32_MASK;
5066 addr <<= 32;
5067 addr |= RREG32(mmMMU_UP_ACCESS_ERROR_CAPTURE_VA);
5068
5069 dev_err_ratelimited(hdev->dev,
5070 "MMU access error on va 0x%llx\n", addr);
5071
5072 WREG32(mmMMU_UP_ACCESS_ERROR_CAPTURE, 0);
5073 }
5074}
5075
5076/*
5077 * +-------------------+------------------------------------------------------+
5078 * | Configuration Reg | Description |
5079 * | Address | |
5080 * +-------------------+------------------------------------------------------+
5081 * | 0xF30 - 0xF3F |ECC single error indication (1 bit per memory wrapper)|
5082 * | |0xF30 memory wrappers 31:0 (MSB to LSB) |
5083 * | |0xF34 memory wrappers 63:32 |
5084 * | |0xF38 memory wrappers 95:64 |
5085 * | |0xF3C memory wrappers 127:96 |
5086 * +-------------------+------------------------------------------------------+
5087 * | 0xF40 - 0xF4F |ECC double error indication (1 bit per memory wrapper)|
5088 * | |0xF40 memory wrappers 31:0 (MSB to LSB) |
5089 * | |0xF44 memory wrappers 63:32 |
5090 * | |0xF48 memory wrappers 95:64 |
5091 * | |0xF4C memory wrappers 127:96 |
5092 * +-------------------+------------------------------------------------------+
5093 */
5094static void gaudi_print_ecc_info_generic(struct hl_device *hdev,
5095 const char *block_name,
5096 u64 block_address, int num_memories,
5097 bool derr, bool disable_clock_gating)
5098{
5099 struct gaudi_device *gaudi = hdev->asic_specific;
5100 int num_mem_regs = num_memories / 32 + ((num_memories % 32) ? 1 : 0);
5101
5102 if (block_address >= CFG_BASE)
5103 block_address -= CFG_BASE;
5104
5105 if (derr)
5106 block_address += GAUDI_ECC_DERR0_OFFSET;
5107 else
5108 block_address += GAUDI_ECC_SERR0_OFFSET;
5109
5110 if (disable_clock_gating) {
5111 mutex_lock(&gaudi->clk_gate_mutex);
5112 hdev->asic_funcs->disable_clock_gating(hdev);
5113 }
5114
5115 switch (num_mem_regs) {
5116 case 1:
5117 dev_err(hdev->dev,
5118 "%s ECC indication: 0x%08x\n",
5119 block_name, RREG32(block_address));
5120 break;
5121 case 2:
5122 dev_err(hdev->dev,
5123 "%s ECC indication: 0x%08x 0x%08x\n",
5124 block_name,
5125 RREG32(block_address), RREG32(block_address + 4));
5126 break;
5127 case 3:
5128 dev_err(hdev->dev,
5129 "%s ECC indication: 0x%08x 0x%08x 0x%08x\n",
5130 block_name,
5131 RREG32(block_address), RREG32(block_address + 4),
5132 RREG32(block_address + 8));
5133 break;
5134 case 4:
5135 dev_err(hdev->dev,
5136 "%s ECC indication: 0x%08x 0x%08x 0x%08x 0x%08x\n",
5137 block_name,
5138 RREG32(block_address), RREG32(block_address + 4),
5139 RREG32(block_address + 8), RREG32(block_address + 0xc));
5140 break;
5141 default:
5142 break;
5143
5144 }
5145
5146 if (disable_clock_gating) {
5147 hdev->asic_funcs->enable_clock_gating(hdev);
5148 mutex_unlock(&gaudi->clk_gate_mutex);
5149 }
5150}
5151
5152static void gaudi_handle_qman_err_generic(struct hl_device *hdev,
5153 const char *qm_name,
5154 u64 glbl_sts_addr,
5155 u64 arb_err_addr)
5156{
5157 u32 i, j, glbl_sts_val, arb_err_val, glbl_sts_clr_val;
5158 char reg_desc[32];
5159
5160 /* Iterate through all stream GLBL_STS1 registers + Lower CP */
5161 for (i = 0 ; i < QMAN_STREAMS + 1 ; i++) {
5162 glbl_sts_clr_val = 0;
5163 glbl_sts_val = RREG32(glbl_sts_addr + 4 * i);
5164
5165 if (!glbl_sts_val)
5166 continue;
5167
5168 if (i == QMAN_STREAMS)
5169 snprintf(reg_desc, ARRAY_SIZE(reg_desc), "LowerCP");
5170 else
5171 snprintf(reg_desc, ARRAY_SIZE(reg_desc), "stream%u", i);
5172
5173 for (j = 0 ; j < GAUDI_NUM_OF_QM_ERR_CAUSE ; j++) {
5174 if (glbl_sts_val & BIT(j)) {
5175 dev_err_ratelimited(hdev->dev,
5176 "%s %s. err cause: %s\n",
5177 qm_name, reg_desc,
5178 gaudi_qman_error_cause[j]);
5179 glbl_sts_clr_val |= BIT(j);
5180 }
5181 }
5182
5183 /* Write 1 clear errors */
5184 WREG32(glbl_sts_addr + 4 * i, glbl_sts_clr_val);
5185 }
5186
5187 arb_err_val = RREG32(arb_err_addr);
5188
5189 if (!arb_err_val)
5190 return;
5191
5192 for (j = 0 ; j < GAUDI_NUM_OF_QM_ARB_ERR_CAUSE ; j++) {
5193 if (arb_err_val & BIT(j)) {
5194 dev_err_ratelimited(hdev->dev,
5195 "%s ARB_ERR. err cause: %s\n",
5196 qm_name,
5197 gaudi_qman_arb_error_cause[j]);
5198 }
5199 }
5200}
5201
5202static void gaudi_print_ecc_info(struct hl_device *hdev, u16 event_type)
5203{
5204 u64 block_address;
5205 u8 index;
5206 int num_memories;
5207 char desc[32];
5208 bool derr;
5209 bool disable_clock_gating;
5210
5211 switch (event_type) {
5212 case GAUDI_EVENT_PCIE_CORE_SERR:
5213 snprintf(desc, ARRAY_SIZE(desc), "%s", "PCIE_CORE");
5214 block_address = mmPCIE_CORE_BASE;
5215 num_memories = 51;
5216 derr = false;
5217 disable_clock_gating = false;
5218 break;
5219 case GAUDI_EVENT_PCIE_CORE_DERR:
5220 snprintf(desc, ARRAY_SIZE(desc), "%s", "PCIE_CORE");
5221 block_address = mmPCIE_CORE_BASE;
5222 num_memories = 51;
5223 derr = true;
5224 disable_clock_gating = false;
5225 break;
5226 case GAUDI_EVENT_PCIE_IF_SERR:
5227 snprintf(desc, ARRAY_SIZE(desc), "%s", "PCIE_WRAP");
5228 block_address = mmPCIE_WRAP_BASE;
5229 num_memories = 11;
5230 derr = false;
5231 disable_clock_gating = false;
5232 break;
5233 case GAUDI_EVENT_PCIE_IF_DERR:
5234 snprintf(desc, ARRAY_SIZE(desc), "%s", "PCIE_WRAP");
5235 block_address = mmPCIE_WRAP_BASE;
5236 num_memories = 11;
5237 derr = true;
5238 disable_clock_gating = false;
5239 break;
5240 case GAUDI_EVENT_PCIE_PHY_SERR:
5241 snprintf(desc, ARRAY_SIZE(desc), "%s", "PCIE_PHY");
5242 block_address = mmPCIE_PHY_BASE;
5243 num_memories = 4;
5244 derr = false;
5245 disable_clock_gating = false;
5246 break;
5247 case GAUDI_EVENT_PCIE_PHY_DERR:
5248 snprintf(desc, ARRAY_SIZE(desc), "%s", "PCIE_PHY");
5249 block_address = mmPCIE_PHY_BASE;
5250 num_memories = 4;
5251 derr = true;
5252 disable_clock_gating = false;
5253 break;
5254 case GAUDI_EVENT_TPC0_SERR ... GAUDI_EVENT_TPC7_SERR:
5255 index = event_type - GAUDI_EVENT_TPC0_SERR;
5256 block_address = mmTPC0_CFG_BASE + index * TPC_CFG_OFFSET;
5257 snprintf(desc, ARRAY_SIZE(desc), "%s%d", "TPC", index);
5258 num_memories = 90;
5259 derr = false;
5260 disable_clock_gating = true;
5261 break;
5262 case GAUDI_EVENT_TPC0_DERR ... GAUDI_EVENT_TPC7_DERR:
5263 index = event_type - GAUDI_EVENT_TPC0_DERR;
5264 block_address =
5265 mmTPC0_CFG_BASE + index * TPC_CFG_OFFSET;
5266 snprintf(desc, ARRAY_SIZE(desc), "%s%d", "TPC", index);
5267 num_memories = 90;
5268 derr = true;
5269 disable_clock_gating = true;
5270 break;
5271 case GAUDI_EVENT_MME0_ACC_SERR:
5272 case GAUDI_EVENT_MME1_ACC_SERR:
5273 case GAUDI_EVENT_MME2_ACC_SERR:
5274 case GAUDI_EVENT_MME3_ACC_SERR:
5275 index = (event_type - GAUDI_EVENT_MME0_ACC_SERR) / 4;
5276 block_address = mmMME0_ACC_BASE + index * MME_ACC_OFFSET;
5277 snprintf(desc, ARRAY_SIZE(desc), "MME%d_ACC", index);
5278 num_memories = 128;
5279 derr = false;
5280 disable_clock_gating = true;
5281 break;
5282 case GAUDI_EVENT_MME0_ACC_DERR:
5283 case GAUDI_EVENT_MME1_ACC_DERR:
5284 case GAUDI_EVENT_MME2_ACC_DERR:
5285 case GAUDI_EVENT_MME3_ACC_DERR:
5286 index = (event_type - GAUDI_EVENT_MME0_ACC_DERR) / 4;
5287 block_address = mmMME0_ACC_BASE + index * MME_ACC_OFFSET;
5288 snprintf(desc, ARRAY_SIZE(desc), "MME%d_ACC", index);
5289 num_memories = 128;
5290 derr = true;
5291 disable_clock_gating = true;
5292 break;
5293 case GAUDI_EVENT_MME0_SBAB_SERR:
5294 case GAUDI_EVENT_MME1_SBAB_SERR:
5295 case GAUDI_EVENT_MME2_SBAB_SERR:
5296 case GAUDI_EVENT_MME3_SBAB_SERR:
5297 index = (event_type - GAUDI_EVENT_MME0_SBAB_SERR) / 4;
5298 block_address = mmMME0_SBAB_BASE + index * MME_ACC_OFFSET;
5299 snprintf(desc, ARRAY_SIZE(desc), "MME%d_SBAB", index);
5300 num_memories = 33;
5301 derr = false;
5302 disable_clock_gating = true;
5303 break;
5304 case GAUDI_EVENT_MME0_SBAB_DERR:
5305 case GAUDI_EVENT_MME1_SBAB_DERR:
5306 case GAUDI_EVENT_MME2_SBAB_DERR:
5307 case GAUDI_EVENT_MME3_SBAB_DERR:
5308 index = (event_type - GAUDI_EVENT_MME0_SBAB_DERR) / 4;
5309 block_address = mmMME0_SBAB_BASE + index * MME_ACC_OFFSET;
5310 snprintf(desc, ARRAY_SIZE(desc), "MME%d_SBAB", index);
5311 num_memories = 33;
5312 derr = true;
5313 disable_clock_gating = true;
5314 break;
5315 case GAUDI_EVENT_DMA0_SERR_ECC ... GAUDI_EVENT_DMA7_SERR_ECC:
5316 index = event_type - GAUDI_EVENT_DMA0_SERR_ECC;
5317 block_address = mmDMA0_CORE_BASE + index * DMA_CORE_OFFSET;
5318 snprintf(desc, ARRAY_SIZE(desc), "DMA%d_CORE", index);
5319 num_memories = 16;
5320 derr = false;
5321 disable_clock_gating = false;
5322 break;
5323 case GAUDI_EVENT_DMA0_DERR_ECC ... GAUDI_EVENT_DMA7_DERR_ECC:
5324 index = event_type - GAUDI_EVENT_DMA0_DERR_ECC;
5325 block_address = mmDMA0_CORE_BASE + index * DMA_CORE_OFFSET;
5326 snprintf(desc, ARRAY_SIZE(desc), "DMA%d_CORE", index);
5327 num_memories = 16;
5328 derr = true;
5329 disable_clock_gating = false;
5330 break;
5331 case GAUDI_EVENT_CPU_IF_ECC_SERR:
5332 block_address = mmCPU_IF_BASE;
5333 snprintf(desc, ARRAY_SIZE(desc), "%s", "CPU");
5334 num_memories = 4;
5335 derr = false;
5336 disable_clock_gating = false;
5337 break;
5338 case GAUDI_EVENT_CPU_IF_ECC_DERR:
5339 block_address = mmCPU_IF_BASE;
5340 snprintf(desc, ARRAY_SIZE(desc), "%s", "CPU");
5341 num_memories = 4;
5342 derr = true;
5343 disable_clock_gating = false;
5344 break;
5345 case GAUDI_EVENT_PSOC_MEM_SERR:
5346 block_address = mmPSOC_GLOBAL_CONF_BASE;
5347 snprintf(desc, ARRAY_SIZE(desc), "%s", "CPU");
5348 num_memories = 4;
5349 derr = false;
5350 disable_clock_gating = false;
5351 break;
5352 case GAUDI_EVENT_PSOC_MEM_DERR:
5353 block_address = mmPSOC_GLOBAL_CONF_BASE;
5354 snprintf(desc, ARRAY_SIZE(desc), "%s", "CPU");
5355 num_memories = 4;
5356 derr = true;
5357 disable_clock_gating = false;
5358 break;
5359 case GAUDI_EVENT_PSOC_CORESIGHT_SERR:
5360 block_address = mmPSOC_CS_TRACE_BASE;
5361 snprintf(desc, ARRAY_SIZE(desc), "%s", "CPU");
5362 num_memories = 2;
5363 derr = false;
5364 disable_clock_gating = false;
5365 break;
5366 case GAUDI_EVENT_PSOC_CORESIGHT_DERR:
5367 block_address = mmPSOC_CS_TRACE_BASE;
5368 snprintf(desc, ARRAY_SIZE(desc), "%s", "CPU");
5369 num_memories = 2;
5370 derr = true;
5371 disable_clock_gating = false;
5372 break;
5373 case GAUDI_EVENT_SRAM0_SERR ... GAUDI_EVENT_SRAM28_SERR:
5374 index = event_type - GAUDI_EVENT_SRAM0_SERR;
5375 block_address =
5376 mmSRAM_Y0_X0_BANK_BASE + index * SRAM_BANK_OFFSET;
5377 snprintf(desc, ARRAY_SIZE(desc), "SRAM%d", index);
5378 num_memories = 2;
5379 derr = false;
5380 disable_clock_gating = false;
5381 break;
5382 case GAUDI_EVENT_SRAM0_DERR ... GAUDI_EVENT_SRAM28_DERR:
5383 index = event_type - GAUDI_EVENT_SRAM0_DERR;
5384 block_address =
5385 mmSRAM_Y0_X0_BANK_BASE + index * SRAM_BANK_OFFSET;
5386 snprintf(desc, ARRAY_SIZE(desc), "SRAM%d", index);
5387 num_memories = 2;
5388 derr = true;
5389 disable_clock_gating = false;
5390 break;
5391 case GAUDI_EVENT_DMA_IF0_SERR ... GAUDI_EVENT_DMA_IF3_SERR:
5392 index = event_type - GAUDI_EVENT_DMA_IF0_SERR;
5393 block_address = mmDMA_IF_W_S_BASE +
5394 index * (mmDMA_IF_E_S_BASE - mmDMA_IF_W_S_BASE);
5395 snprintf(desc, ARRAY_SIZE(desc), "DMA_IF%d", index);
5396 num_memories = 60;
5397 derr = false;
5398 disable_clock_gating = false;
5399 break;
5400 case GAUDI_EVENT_DMA_IF0_DERR ... GAUDI_EVENT_DMA_IF3_DERR:
5401 index = event_type - GAUDI_EVENT_DMA_IF0_DERR;
5402 block_address = mmDMA_IF_W_S_BASE +
5403 index * (mmDMA_IF_E_S_BASE - mmDMA_IF_W_S_BASE);
5404 snprintf(desc, ARRAY_SIZE(desc), "DMA_IF%d", index);
5405 derr = true;
5406 num_memories = 60;
5407 disable_clock_gating = false;
5408 break;
5409 case GAUDI_EVENT_HBM_0_SERR ... GAUDI_EVENT_HBM_3_SERR:
5410 index = event_type - GAUDI_EVENT_HBM_0_SERR;
5411 /* HBM Registers are at different offsets */
5412 block_address = mmHBM0_BASE + 0x8000 +
5413 index * (mmHBM1_BASE - mmHBM0_BASE);
5414 snprintf(desc, ARRAY_SIZE(desc), "HBM%d", index);
5415 derr = false;
5416 num_memories = 64;
5417 disable_clock_gating = false;
5418 break;
5419 case GAUDI_EVENT_HBM_0_DERR ... GAUDI_EVENT_HBM_3_DERR:
5420 index = event_type - GAUDI_EVENT_HBM_0_SERR;
5421 /* HBM Registers are at different offsets */
5422 block_address = mmHBM0_BASE + 0x8000 +
5423 index * (mmHBM1_BASE - mmHBM0_BASE);
5424 snprintf(desc, ARRAY_SIZE(desc), "HBM%d", index);
5425 derr = true;
5426 num_memories = 64;
5427 disable_clock_gating = false;
5428 break;
5429 default:
5430 return;
5431 }
5432
5433 gaudi_print_ecc_info_generic(hdev, desc, block_address, num_memories,
5434 derr, disable_clock_gating);
5435}
5436
5437static void gaudi_handle_qman_err(struct hl_device *hdev, u16 event_type)
5438{
5439 u64 glbl_sts_addr, arb_err_addr;
5440 u8 index;
5441 char desc[32];
5442
5443 switch (event_type) {
5444 case GAUDI_EVENT_TPC0_QM ... GAUDI_EVENT_TPC7_QM:
5445 index = event_type - GAUDI_EVENT_TPC0_QM;
5446 glbl_sts_addr =
5447 mmTPC0_QM_GLBL_STS1_0 + index * TPC_QMAN_OFFSET;
5448 arb_err_addr =
5449 mmTPC0_QM_ARB_ERR_CAUSE + index * TPC_QMAN_OFFSET;
5450 snprintf(desc, ARRAY_SIZE(desc), "%s%d", "TPC_QM", index);
5451 break;
5452 case GAUDI_EVENT_MME0_QM ... GAUDI_EVENT_MME2_QM:
5453 index = event_type - GAUDI_EVENT_MME0_QM;
5454 glbl_sts_addr =
5455 mmMME0_QM_GLBL_STS1_0 + index * MME_QMAN_OFFSET;
5456 arb_err_addr =
5457 mmMME0_QM_ARB_ERR_CAUSE + index * MME_QMAN_OFFSET;
5458 snprintf(desc, ARRAY_SIZE(desc), "%s%d", "MME_QM", index);
5459 break;
5460 case GAUDI_EVENT_DMA0_QM ... GAUDI_EVENT_DMA7_QM:
5461 index = event_type - GAUDI_EVENT_DMA0_QM;
5462 glbl_sts_addr =
5463 mmDMA0_QM_GLBL_STS1_0 + index * DMA_QMAN_OFFSET;
5464 arb_err_addr =
5465 mmDMA0_QM_ARB_ERR_CAUSE + index * DMA_QMAN_OFFSET;
5466 snprintf(desc, ARRAY_SIZE(desc), "%s%d", "DMA_QM", index);
5467 break;
5468 default:
5469 return;
5470 }
5471
5472 gaudi_handle_qman_err_generic(hdev, desc, glbl_sts_addr, arb_err_addr);
5473}
5474
5475static void gaudi_print_irq_info(struct hl_device *hdev, u16 event_type,
5476 bool razwi)
5477{
Ofir Bittonebd8d122020-05-10 13:41:28 +03005478 char desc[64] = "";
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005479
5480 gaudi_get_event_desc(event_type, desc, sizeof(desc));
5481 dev_err_ratelimited(hdev->dev, "Received H/W interrupt %d [\"%s\"]\n",
5482 event_type, desc);
5483
5484 gaudi_print_ecc_info(hdev, event_type);
5485
5486 if (razwi) {
5487 gaudi_print_razwi_info(hdev);
5488 gaudi_print_mmu_error_info(hdev);
5489 }
5490}
5491
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005492static int gaudi_soft_reset_late_init(struct hl_device *hdev)
5493{
Ofir Bittonebd8d122020-05-10 13:41:28 +03005494 struct gaudi_device *gaudi = hdev->asic_specific;
5495
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005496 /* Unmask all IRQs since some could have been received
5497 * during the soft reset
5498 */
Ofir Bittonebd8d122020-05-10 13:41:28 +03005499 return hl_fw_unmask_irq_arr(hdev, gaudi->events, sizeof(gaudi->events));
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005500}
5501
5502static int gaudi_hbm_read_interrupts(struct hl_device *hdev, int device)
5503{
5504 int ch, err = 0;
5505 u32 base, val, val2;
5506
5507 base = GAUDI_HBM_CFG_BASE + device * GAUDI_HBM_CFG_OFFSET;
5508 for (ch = 0 ; ch < GAUDI_HBM_CHANNELS ; ch++) {
5509 val = RREG32_MASK(base + ch * 0x1000 + 0x06C, 0x0000FFFF);
5510 val = (val & 0xFF) | ((val >> 8) & 0xFF);
5511 if (val) {
5512 err = 1;
5513 dev_err(hdev->dev,
5514 "HBM%d pc%d interrupts info: WR_PAR=%d, RD_PAR=%d, CA_PAR=%d, SERR=%d, DERR=%d\n",
5515 device, ch * 2, val & 0x1, (val >> 1) & 0x1,
5516 (val >> 2) & 0x1, (val >> 3) & 0x1,
5517 (val >> 4) & 0x1);
5518
5519 val2 = RREG32(base + ch * 0x1000 + 0x060);
5520 dev_err(hdev->dev,
5521 "HBM%d pc%d ECC info: 1ST_ERR_ADDR=0x%x, 1ST_ERR_TYPE=%d, SEC_CONT_CNT=%d, SEC_CNT=%d, DED_CNT=%d\n",
5522 device, ch * 2,
5523 RREG32(base + ch * 0x1000 + 0x064),
5524 (val2 & 0x200) >> 9, (val2 & 0xFC00) >> 10,
5525 (val2 & 0xFF0000) >> 16,
5526 (val2 & 0xFF000000) >> 24);
5527 }
5528
5529 val = RREG32_MASK(base + ch * 0x1000 + 0x07C, 0x0000FFFF);
5530 val = (val & 0xFF) | ((val >> 8) & 0xFF);
5531 if (val) {
5532 err = 1;
5533 dev_err(hdev->dev,
5534 "HBM%d pc%d interrupts info: WR_PAR=%d, RD_PAR=%d, CA_PAR=%d, SERR=%d, DERR=%d\n",
5535 device, ch * 2 + 1, val & 0x1, (val >> 1) & 0x1,
5536 (val >> 2) & 0x1, (val >> 3) & 0x1,
5537 (val >> 4) & 0x1);
5538
5539 val2 = RREG32(base + ch * 0x1000 + 0x070);
5540 dev_err(hdev->dev,
5541 "HBM%d pc%d ECC info: 1ST_ERR_ADDR=0x%x, 1ST_ERR_TYPE=%d, SEC_CONT_CNT=%d, SEC_CNT=%d, DED_CNT=%d\n",
5542 device, ch * 2 + 1,
5543 RREG32(base + ch * 0x1000 + 0x074),
5544 (val2 & 0x200) >> 9, (val2 & 0xFC00) >> 10,
5545 (val2 & 0xFF0000) >> 16,
5546 (val2 & 0xFF000000) >> 24);
5547 }
5548
5549 /* Clear interrupts */
5550 RMWREG32(base + (ch * 0x1000) + 0x060, 0x1C8, 0x1FF);
5551 RMWREG32(base + (ch * 0x1000) + 0x070, 0x1C8, 0x1FF);
5552 WREG32(base + (ch * 0x1000) + 0x06C, 0x1F1F);
5553 WREG32(base + (ch * 0x1000) + 0x07C, 0x1F1F);
5554 RMWREG32(base + (ch * 0x1000) + 0x060, 0x0, 0xF);
5555 RMWREG32(base + (ch * 0x1000) + 0x070, 0x0, 0xF);
5556 }
5557
5558 val = RREG32(base + 0x8F30);
5559 val2 = RREG32(base + 0x8F34);
5560 if (val | val2) {
5561 err = 1;
5562 dev_err(hdev->dev,
5563 "HBM %d MC SRAM SERR info: Reg 0x8F30=0x%x, Reg 0x8F34=0x%x\n",
5564 device, val, val2);
5565 }
5566 val = RREG32(base + 0x8F40);
5567 val2 = RREG32(base + 0x8F44);
5568 if (val | val2) {
5569 err = 1;
5570 dev_err(hdev->dev,
5571 "HBM %d MC SRAM DERR info: Reg 0x8F40=0x%x, Reg 0x8F44=0x%x\n",
5572 device, val, val2);
5573 }
5574
5575 return err;
5576}
5577
5578static int gaudi_hbm_event_to_dev(u16 hbm_event_type)
5579{
5580 switch (hbm_event_type) {
5581 case GAUDI_EVENT_HBM0_SPI_0:
5582 case GAUDI_EVENT_HBM0_SPI_1:
5583 return 0;
5584 case GAUDI_EVENT_HBM1_SPI_0:
5585 case GAUDI_EVENT_HBM1_SPI_1:
5586 return 1;
5587 case GAUDI_EVENT_HBM2_SPI_0:
5588 case GAUDI_EVENT_HBM2_SPI_1:
5589 return 2;
5590 case GAUDI_EVENT_HBM3_SPI_0:
5591 case GAUDI_EVENT_HBM3_SPI_1:
5592 return 3;
5593 default:
5594 break;
5595 }
5596
5597 /* Should never happen */
5598 return 0;
5599}
5600
5601static bool gaudi_tpc_read_interrupts(struct hl_device *hdev, u8 tpc_id,
5602 char *interrupt_name)
5603{
5604 struct gaudi_device *gaudi = hdev->asic_specific;
5605 u32 tpc_offset = tpc_id * TPC_CFG_OFFSET, tpc_interrupts_cause, i;
5606 bool soft_reset_required = false;
5607
5608 /* Accessing the TPC_INTR_CAUSE registers requires disabling the clock
5609 * gating, and thus cannot be done in ArmCP and should be done instead
5610 * by the driver.
5611 */
5612
5613 mutex_lock(&gaudi->clk_gate_mutex);
5614
5615 hdev->asic_funcs->disable_clock_gating(hdev);
5616
5617 tpc_interrupts_cause = RREG32(mmTPC0_CFG_TPC_INTR_CAUSE + tpc_offset) &
5618 TPC0_CFG_TPC_INTR_CAUSE_CAUSE_MASK;
5619
5620 for (i = 0 ; i < GAUDI_NUM_OF_TPC_INTR_CAUSE ; i++)
5621 if (tpc_interrupts_cause & BIT(i)) {
5622 dev_err_ratelimited(hdev->dev,
5623 "TPC%d_%s interrupt cause: %s\n",
5624 tpc_id, interrupt_name,
5625 gaudi_tpc_interrupts_cause[i]);
5626 /* If this is QM error, we need to soft-reset */
5627 if (i == 15)
5628 soft_reset_required = true;
5629 }
5630
5631 /* Clear interrupts */
5632 WREG32(mmTPC0_CFG_TPC_INTR_CAUSE + tpc_offset, 0);
5633
5634 hdev->asic_funcs->enable_clock_gating(hdev);
5635
5636 mutex_unlock(&gaudi->clk_gate_mutex);
5637
5638 return soft_reset_required;
5639}
5640
5641static int tpc_dec_event_to_tpc_id(u16 tpc_dec_event_type)
5642{
5643 return (tpc_dec_event_type - GAUDI_EVENT_TPC0_DEC) >> 1;
5644}
5645
5646static int tpc_krn_event_to_tpc_id(u16 tpc_dec_event_type)
5647{
5648 return (tpc_dec_event_type - GAUDI_EVENT_TPC0_KRN_ERR) / 6;
5649}
5650
5651static void gaudi_print_clk_change_info(struct hl_device *hdev,
5652 u16 event_type)
5653{
5654 switch (event_type) {
5655 case GAUDI_EVENT_FIX_POWER_ENV_S:
5656 dev_info_ratelimited(hdev->dev,
5657 "Clock throttling due to power consumption\n");
5658 break;
5659
5660 case GAUDI_EVENT_FIX_POWER_ENV_E:
5661 dev_info_ratelimited(hdev->dev,
5662 "Power envelop is safe, back to optimal clock\n");
5663 break;
5664
5665 case GAUDI_EVENT_FIX_THERMAL_ENV_S:
5666 dev_info_ratelimited(hdev->dev,
5667 "Clock throttling due to overheating\n");
5668 break;
5669
5670 case GAUDI_EVENT_FIX_THERMAL_ENV_E:
5671 dev_info_ratelimited(hdev->dev,
5672 "Thermal envelop is safe, back to optimal clock\n");
5673 break;
5674
5675 default:
5676 dev_err(hdev->dev, "Received invalid clock change event %d\n",
5677 event_type);
5678 break;
5679 }
5680}
5681
5682static void gaudi_handle_eqe(struct hl_device *hdev,
5683 struct hl_eq_entry *eq_entry)
5684{
5685 struct gaudi_device *gaudi = hdev->asic_specific;
5686 u32 ctl = le32_to_cpu(eq_entry->hdr.ctl);
5687 u16 event_type = ((ctl & EQ_CTL_EVENT_TYPE_MASK)
5688 >> EQ_CTL_EVENT_TYPE_SHIFT);
5689 u8 cause;
Oded Gabbay66446822020-05-18 16:48:01 +03005690 bool reset_required;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005691
5692 gaudi->events_stat[event_type]++;
5693 gaudi->events_stat_aggregate[event_type]++;
5694
5695 switch (event_type) {
5696 case GAUDI_EVENT_PCIE_CORE_DERR:
5697 case GAUDI_EVENT_PCIE_IF_DERR:
5698 case GAUDI_EVENT_PCIE_PHY_DERR:
5699 case GAUDI_EVENT_TPC0_DERR ... GAUDI_EVENT_TPC7_DERR:
5700 case GAUDI_EVENT_MME0_ACC_DERR:
5701 case GAUDI_EVENT_MME0_SBAB_DERR:
5702 case GAUDI_EVENT_MME1_ACC_DERR:
5703 case GAUDI_EVENT_MME1_SBAB_DERR:
5704 case GAUDI_EVENT_MME2_ACC_DERR:
5705 case GAUDI_EVENT_MME2_SBAB_DERR:
5706 case GAUDI_EVENT_MME3_ACC_DERR:
5707 case GAUDI_EVENT_MME3_SBAB_DERR:
5708 case GAUDI_EVENT_DMA0_DERR_ECC ... GAUDI_EVENT_DMA7_DERR_ECC:
5709 fallthrough;
5710 case GAUDI_EVENT_CPU_IF_ECC_DERR:
5711 case GAUDI_EVENT_PSOC_MEM_DERR:
5712 case GAUDI_EVENT_PSOC_CORESIGHT_DERR:
5713 case GAUDI_EVENT_SRAM0_DERR ... GAUDI_EVENT_SRAM28_DERR:
5714 case GAUDI_EVENT_DMA_IF0_DERR ... GAUDI_EVENT_DMA_IF3_DERR:
5715 fallthrough;
5716 case GAUDI_EVENT_GIC500:
5717 case GAUDI_EVENT_HBM_0_DERR ... GAUDI_EVENT_HBM_3_DERR:
5718 case GAUDI_EVENT_MMU_DERR:
5719 case GAUDI_EVENT_AXI_ECC:
5720 case GAUDI_EVENT_L2_RAM_ECC:
5721 case GAUDI_EVENT_PLL0 ... GAUDI_EVENT_PLL17:
5722 gaudi_print_irq_info(hdev, event_type, false);
5723 if (hdev->hard_reset_on_fw_events)
5724 hl_device_reset(hdev, true, false);
5725 break;
5726
5727 case GAUDI_EVENT_HBM0_SPI_0:
5728 case GAUDI_EVENT_HBM1_SPI_0:
5729 case GAUDI_EVENT_HBM2_SPI_0:
5730 case GAUDI_EVENT_HBM3_SPI_0:
5731 gaudi_print_irq_info(hdev, event_type, false);
5732 gaudi_hbm_read_interrupts(hdev,
5733 gaudi_hbm_event_to_dev(event_type));
5734 if (hdev->hard_reset_on_fw_events)
5735 hl_device_reset(hdev, true, false);
5736 break;
5737
5738 case GAUDI_EVENT_HBM0_SPI_1:
5739 case GAUDI_EVENT_HBM1_SPI_1:
5740 case GAUDI_EVENT_HBM2_SPI_1:
5741 case GAUDI_EVENT_HBM3_SPI_1:
5742 gaudi_print_irq_info(hdev, event_type, false);
5743 gaudi_hbm_read_interrupts(hdev,
5744 gaudi_hbm_event_to_dev(event_type));
5745 break;
5746
5747 case GAUDI_EVENT_TPC0_DEC:
5748 case GAUDI_EVENT_TPC1_DEC:
5749 case GAUDI_EVENT_TPC2_DEC:
5750 case GAUDI_EVENT_TPC3_DEC:
5751 case GAUDI_EVENT_TPC4_DEC:
5752 case GAUDI_EVENT_TPC5_DEC:
5753 case GAUDI_EVENT_TPC6_DEC:
5754 case GAUDI_EVENT_TPC7_DEC:
5755 gaudi_print_irq_info(hdev, event_type, true);
Oded Gabbay66446822020-05-18 16:48:01 +03005756 reset_required = gaudi_tpc_read_interrupts(hdev,
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005757 tpc_dec_event_to_tpc_id(event_type),
5758 "AXI_SLV_DEC_Error");
Oded Gabbay66446822020-05-18 16:48:01 +03005759 if (reset_required) {
5760 dev_err(hdev->dev, "hard reset required due to %s\n",
5761 gaudi_irq_map_table[event_type].name);
5762
5763 if (hdev->hard_reset_on_fw_events)
5764 hl_device_reset(hdev, true, false);
5765 } else {
5766 hl_fw_unmask_irq(hdev, event_type);
Omer Shpigelmand7985072020-05-17 23:01:22 +03005767 }
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005768 break;
5769
5770 case GAUDI_EVENT_TPC0_KRN_ERR:
5771 case GAUDI_EVENT_TPC1_KRN_ERR:
5772 case GAUDI_EVENT_TPC2_KRN_ERR:
5773 case GAUDI_EVENT_TPC3_KRN_ERR:
5774 case GAUDI_EVENT_TPC4_KRN_ERR:
5775 case GAUDI_EVENT_TPC5_KRN_ERR:
5776 case GAUDI_EVENT_TPC6_KRN_ERR:
5777 case GAUDI_EVENT_TPC7_KRN_ERR:
5778 gaudi_print_irq_info(hdev, event_type, true);
Oded Gabbay66446822020-05-18 16:48:01 +03005779 reset_required = gaudi_tpc_read_interrupts(hdev,
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005780 tpc_krn_event_to_tpc_id(event_type),
5781 "KRN_ERR");
Oded Gabbay66446822020-05-18 16:48:01 +03005782 if (reset_required) {
5783 dev_err(hdev->dev, "hard reset required due to %s\n",
5784 gaudi_irq_map_table[event_type].name);
5785
5786 if (hdev->hard_reset_on_fw_events)
5787 hl_device_reset(hdev, true, false);
5788 } else {
5789 hl_fw_unmask_irq(hdev, event_type);
Omer Shpigelmand7985072020-05-17 23:01:22 +03005790 }
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005791 break;
5792
5793 case GAUDI_EVENT_PCIE_CORE_SERR:
5794 case GAUDI_EVENT_PCIE_IF_SERR:
5795 case GAUDI_EVENT_PCIE_PHY_SERR:
5796 case GAUDI_EVENT_TPC0_SERR ... GAUDI_EVENT_TPC7_SERR:
5797 case GAUDI_EVENT_MME0_ACC_SERR:
5798 case GAUDI_EVENT_MME0_SBAB_SERR:
5799 case GAUDI_EVENT_MME1_ACC_SERR:
5800 case GAUDI_EVENT_MME1_SBAB_SERR:
5801 case GAUDI_EVENT_MME2_ACC_SERR:
5802 case GAUDI_EVENT_MME2_SBAB_SERR:
5803 case GAUDI_EVENT_MME3_ACC_SERR:
5804 case GAUDI_EVENT_MME3_SBAB_SERR:
5805 case GAUDI_EVENT_DMA0_SERR_ECC ... GAUDI_EVENT_DMA7_SERR_ECC:
5806 case GAUDI_EVENT_CPU_IF_ECC_SERR:
5807 case GAUDI_EVENT_PSOC_MEM_SERR:
5808 case GAUDI_EVENT_PSOC_CORESIGHT_SERR:
5809 case GAUDI_EVENT_SRAM0_SERR ... GAUDI_EVENT_SRAM28_SERR:
5810 case GAUDI_EVENT_DMA_IF0_SERR ... GAUDI_EVENT_DMA_IF3_SERR:
5811 case GAUDI_EVENT_HBM_0_SERR ... GAUDI_EVENT_HBM_3_SERR:
5812 fallthrough;
5813 case GAUDI_EVENT_MMU_SERR:
5814 case GAUDI_EVENT_PCIE_DEC:
5815 case GAUDI_EVENT_MME0_WBC_RSP:
5816 case GAUDI_EVENT_MME0_SBAB0_RSP:
5817 case GAUDI_EVENT_MME1_WBC_RSP:
5818 case GAUDI_EVENT_MME1_SBAB0_RSP:
5819 case GAUDI_EVENT_MME2_WBC_RSP:
5820 case GAUDI_EVENT_MME2_SBAB0_RSP:
5821 case GAUDI_EVENT_MME3_WBC_RSP:
5822 case GAUDI_EVENT_MME3_SBAB0_RSP:
5823 case GAUDI_EVENT_CPU_AXI_SPLITTER:
5824 case GAUDI_EVENT_PSOC_AXI_DEC:
5825 case GAUDI_EVENT_PSOC_PRSTN_FALL:
5826 case GAUDI_EVENT_MMU_PAGE_FAULT:
5827 case GAUDI_EVENT_MMU_WR_PERM:
5828 case GAUDI_EVENT_RAZWI_OR_ADC:
5829 case GAUDI_EVENT_TPC0_QM ... GAUDI_EVENT_TPC7_QM:
5830 case GAUDI_EVENT_MME0_QM ... GAUDI_EVENT_MME2_QM:
5831 case GAUDI_EVENT_DMA0_QM ... GAUDI_EVENT_DMA7_QM:
5832 fallthrough;
5833 case GAUDI_EVENT_DMA0_CORE ... GAUDI_EVENT_DMA7_CORE:
5834 gaudi_print_irq_info(hdev, event_type, true);
5835 gaudi_handle_qman_err(hdev, event_type);
Ofir Bittonebd8d122020-05-10 13:41:28 +03005836 hl_fw_unmask_irq(hdev, event_type);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005837 break;
5838
5839 case GAUDI_EVENT_RAZWI_OR_ADC_SW:
5840 gaudi_print_irq_info(hdev, event_type, true);
Oded Gabbay66446822020-05-18 16:48:01 +03005841 if (hdev->hard_reset_on_fw_events)
5842 hl_device_reset(hdev, true, false);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005843 break;
5844
5845 case GAUDI_EVENT_TPC0_BMON_SPMU:
5846 case GAUDI_EVENT_TPC1_BMON_SPMU:
5847 case GAUDI_EVENT_TPC2_BMON_SPMU:
5848 case GAUDI_EVENT_TPC3_BMON_SPMU:
5849 case GAUDI_EVENT_TPC4_BMON_SPMU:
5850 case GAUDI_EVENT_TPC5_BMON_SPMU:
5851 case GAUDI_EVENT_TPC6_BMON_SPMU:
5852 case GAUDI_EVENT_TPC7_BMON_SPMU:
5853 case GAUDI_EVENT_DMA_BM_CH0 ... GAUDI_EVENT_DMA_BM_CH7:
5854 gaudi_print_irq_info(hdev, event_type, false);
Ofir Bittonebd8d122020-05-10 13:41:28 +03005855 hl_fw_unmask_irq(hdev, event_type);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005856 break;
5857
5858 case GAUDI_EVENT_FIX_POWER_ENV_S ... GAUDI_EVENT_FIX_THERMAL_ENV_E:
5859 gaudi_print_clk_change_info(hdev, event_type);
Ofir Bittonebd8d122020-05-10 13:41:28 +03005860 hl_fw_unmask_irq(hdev, event_type);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005861 break;
5862
5863 case GAUDI_EVENT_PSOC_GPIO_U16_0:
5864 cause = le64_to_cpu(eq_entry->data[0]) & 0xFF;
5865 dev_err(hdev->dev,
5866 "Received high temp H/W interrupt %d (cause %d)\n",
5867 event_type, cause);
5868 break;
5869
5870 default:
5871 dev_err(hdev->dev, "Received invalid H/W interrupt %d\n",
5872 event_type);
5873 break;
5874 }
5875}
5876
5877static void *gaudi_get_events_stat(struct hl_device *hdev, bool aggregate,
5878 u32 *size)
5879{
5880 struct gaudi_device *gaudi = hdev->asic_specific;
5881
5882 if (aggregate) {
5883 *size = (u32) sizeof(gaudi->events_stat_aggregate);
5884 return gaudi->events_stat_aggregate;
5885 }
5886
5887 *size = (u32) sizeof(gaudi->events_stat);
5888 return gaudi->events_stat;
5889}
5890
Omer Shpigelman8ff5f4f2020-05-24 23:06:59 +03005891static int gaudi_mmu_invalidate_cache(struct hl_device *hdev, bool is_hard,
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005892 u32 flags)
5893{
5894 struct gaudi_device *gaudi = hdev->asic_specific;
5895 u32 status, timeout_usec;
5896 int rc;
5897
5898 if (!(gaudi->hw_cap_initialized & HW_CAP_MMU) ||
5899 hdev->hard_reset_pending)
Omer Shpigelman8ff5f4f2020-05-24 23:06:59 +03005900 return 0;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005901
5902 if (hdev->pldm)
5903 timeout_usec = GAUDI_PLDM_MMU_TIMEOUT_USEC;
5904 else
5905 timeout_usec = MMU_CONFIG_TIMEOUT_USEC;
5906
Omer Shpigelman8ff5f4f2020-05-24 23:06:59 +03005907 mutex_lock(&hdev->mmu_cache_lock);
5908
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005909 /* L0 & L1 invalidation */
Omer Shpigelmancfd41762020-06-03 13:03:35 +03005910 WREG32(mmSTLB_INV_PS, 3);
5911 WREG32(mmSTLB_CACHE_INV, gaudi->mmu_cache_inv_pi++);
Omer Shpigelman42d0b0b2020-05-17 17:35:39 +03005912 WREG32(mmSTLB_INV_PS, 2);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005913
5914 rc = hl_poll_timeout(
5915 hdev,
Omer Shpigelman42d0b0b2020-05-17 17:35:39 +03005916 mmSTLB_INV_PS,
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005917 status,
5918 !status,
5919 1000,
5920 timeout_usec);
5921
Omer Shpigelman42d0b0b2020-05-17 17:35:39 +03005922 WREG32(mmSTLB_INV_SET, 0);
5923
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005924 mutex_unlock(&hdev->mmu_cache_lock);
Omer Shpigelman8ff5f4f2020-05-24 23:06:59 +03005925
5926 if (rc) {
5927 dev_err_ratelimited(hdev->dev,
5928 "MMU cache invalidation timeout\n");
5929 hl_device_reset(hdev, true, false);
5930 }
5931
5932 return rc;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005933}
5934
Omer Shpigelman8ff5f4f2020-05-24 23:06:59 +03005935static int gaudi_mmu_invalidate_cache_range(struct hl_device *hdev,
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005936 bool is_hard, u32 asid, u64 va, u64 size)
5937{
5938 struct gaudi_device *gaudi = hdev->asic_specific;
5939 u32 status, timeout_usec;
5940 u32 inv_data;
5941 u32 pi;
5942 int rc;
5943
5944 if (!(gaudi->hw_cap_initialized & HW_CAP_MMU) ||
5945 hdev->hard_reset_pending)
Omer Shpigelman8ff5f4f2020-05-24 23:06:59 +03005946 return 0;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005947
5948 mutex_lock(&hdev->mmu_cache_lock);
5949
5950 if (hdev->pldm)
5951 timeout_usec = GAUDI_PLDM_MMU_TIMEOUT_USEC;
5952 else
5953 timeout_usec = MMU_CONFIG_TIMEOUT_USEC;
5954
5955 /*
5956 * TODO: currently invalidate entire L0 & L1 as in regular hard
5957 * invalidation. Need to apply invalidation of specific cache
5958 * lines with mask of ASID & VA & size.
5959 * Note that L1 with be flushed entirely in any case.
5960 */
5961
5962 /* L0 & L1 invalidation */
5963 inv_data = RREG32(mmSTLB_CACHE_INV);
5964 /* PI is 8 bit */
5965 pi = ((inv_data & STLB_CACHE_INV_PRODUCER_INDEX_MASK) + 1) & 0xFF;
5966 WREG32(mmSTLB_CACHE_INV,
5967 (inv_data & STLB_CACHE_INV_INDEX_MASK_MASK) | pi);
5968
5969 rc = hl_poll_timeout(
5970 hdev,
5971 mmSTLB_INV_CONSUMER_INDEX,
5972 status,
5973 status == pi,
5974 1000,
5975 timeout_usec);
5976
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005977 mutex_unlock(&hdev->mmu_cache_lock);
Omer Shpigelman8ff5f4f2020-05-24 23:06:59 +03005978
5979 if (rc) {
5980 dev_err_ratelimited(hdev->dev,
5981 "MMU cache invalidation timeout\n");
5982 hl_device_reset(hdev, true, false);
5983 }
5984
5985 return rc;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005986}
5987
5988static int gaudi_mmu_update_asid_hop0_addr(struct hl_device *hdev,
5989 u32 asid, u64 phys_addr)
5990{
5991 u32 status, timeout_usec;
5992 int rc;
5993
5994 if (hdev->pldm)
5995 timeout_usec = GAUDI_PLDM_MMU_TIMEOUT_USEC;
5996 else
5997 timeout_usec = MMU_CONFIG_TIMEOUT_USEC;
5998
5999 WREG32(MMU_ASID, asid);
6000 WREG32(MMU_HOP0_PA43_12, phys_addr >> MMU_HOP0_PA43_12_SHIFT);
6001 WREG32(MMU_HOP0_PA49_44, phys_addr >> MMU_HOP0_PA49_44_SHIFT);
6002 WREG32(MMU_BUSY, 0x80000000);
6003
6004 rc = hl_poll_timeout(
6005 hdev,
6006 MMU_BUSY,
6007 status,
6008 !(status & 0x80000000),
6009 1000,
6010 timeout_usec);
6011
6012 if (rc) {
6013 dev_err(hdev->dev,
6014 "Timeout during MMU hop0 config of asid %d\n", asid);
6015 return rc;
6016 }
6017
6018 return 0;
6019}
6020
6021static int gaudi_send_heartbeat(struct hl_device *hdev)
6022{
6023 struct gaudi_device *gaudi = hdev->asic_specific;
6024
6025 if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
6026 return 0;
6027
6028 return hl_fw_send_heartbeat(hdev);
6029}
6030
6031static int gaudi_armcp_info_get(struct hl_device *hdev)
6032{
6033 struct gaudi_device *gaudi = hdev->asic_specific;
6034 struct asic_fixed_properties *prop = &hdev->asic_prop;
6035 int rc;
6036
6037 if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
6038 return 0;
6039
6040 rc = hl_fw_armcp_info_get(hdev);
6041 if (rc)
6042 return rc;
6043
6044 if (!strlen(prop->armcp_info.card_name))
6045 strncpy(prop->armcp_info.card_name, GAUDI_DEFAULT_CARD_NAME,
6046 CARD_NAME_MAX_LEN);
6047
6048 return 0;
6049}
6050
6051static bool gaudi_is_device_idle(struct hl_device *hdev, u32 *mask,
6052 struct seq_file *s)
6053{
6054 struct gaudi_device *gaudi = hdev->asic_specific;
6055 const char *fmt = "%-5d%-9s%#-14x%#-12x%#x\n";
6056 const char *mme_slave_fmt = "%-5d%-9s%-14s%-12s%#x\n";
6057 u32 qm_glbl_sts0, qm_cgm_sts, dma_core_sts0, tpc_cfg_sts, mme_arch_sts;
6058 bool is_idle = true, is_eng_idle, is_slave;
6059 u64 offset;
6060 int i, dma_id;
6061
6062 mutex_lock(&gaudi->clk_gate_mutex);
6063
6064 hdev->asic_funcs->disable_clock_gating(hdev);
6065
6066 if (s)
6067 seq_puts(s,
6068 "\nDMA is_idle QM_GLBL_STS0 QM_CGM_STS DMA_CORE_STS0\n"
6069 "--- ------- ------------ ---------- -------------\n");
6070
6071 for (i = 0 ; i < DMA_NUMBER_OF_CHNLS ; i++) {
6072 dma_id = gaudi_dma_assignment[i];
6073 offset = dma_id * DMA_QMAN_OFFSET;
6074
6075 qm_glbl_sts0 = RREG32(mmDMA0_QM_GLBL_STS0 + offset);
6076 qm_cgm_sts = RREG32(mmDMA0_QM_CGM_STS + offset);
6077 dma_core_sts0 = RREG32(mmDMA0_CORE_STS0 + offset);
6078 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts) &&
6079 IS_DMA_IDLE(dma_core_sts0);
6080 is_idle &= is_eng_idle;
6081
6082 if (mask)
6083 *mask |= !is_eng_idle <<
6084 (GAUDI_ENGINE_ID_DMA_0 + dma_id);
6085 if (s)
6086 seq_printf(s, fmt, dma_id,
6087 is_eng_idle ? "Y" : "N", qm_glbl_sts0,
6088 qm_cgm_sts, dma_core_sts0);
6089 }
6090
6091 if (s)
6092 seq_puts(s,
6093 "\nTPC is_idle QM_GLBL_STS0 QM_CGM_STS CFG_STATUS\n"
6094 "--- ------- ------------ ---------- ----------\n");
6095
6096 for (i = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) {
6097 offset = i * TPC_QMAN_OFFSET;
6098 qm_glbl_sts0 = RREG32(mmTPC0_QM_GLBL_STS0 + offset);
6099 qm_cgm_sts = RREG32(mmTPC0_QM_CGM_STS + offset);
6100 tpc_cfg_sts = RREG32(mmTPC0_CFG_STATUS + offset);
6101 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts) &&
6102 IS_TPC_IDLE(tpc_cfg_sts);
6103 is_idle &= is_eng_idle;
6104
6105 if (mask)
6106 *mask |= !is_eng_idle << (GAUDI_ENGINE_ID_TPC_0 + i);
6107 if (s)
6108 seq_printf(s, fmt, i,
6109 is_eng_idle ? "Y" : "N",
6110 qm_glbl_sts0, qm_cgm_sts, tpc_cfg_sts);
6111 }
6112
6113 if (s)
6114 seq_puts(s,
6115 "\nMME is_idle QM_GLBL_STS0 QM_CGM_STS ARCH_STATUS\n"
6116 "--- ------- ------------ ---------- -----------\n");
6117
6118 for (i = 0 ; i < MME_NUMBER_OF_ENGINES ; i++) {
6119 offset = i * MME_QMAN_OFFSET;
6120 mme_arch_sts = RREG32(mmMME0_CTRL_ARCH_STATUS + offset);
6121 is_eng_idle = IS_MME_IDLE(mme_arch_sts);
6122
6123 /* MME 1 & 3 are slaves, no need to check their QMANs */
6124 is_slave = i % 2;
6125 if (!is_slave) {
6126 qm_glbl_sts0 = RREG32(mmMME0_QM_GLBL_STS0 + offset);
6127 qm_cgm_sts = RREG32(mmMME0_QM_CGM_STS + offset);
6128 is_eng_idle &= IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts);
6129 }
6130
6131 is_idle &= is_eng_idle;
6132
6133 if (mask)
6134 *mask |= !is_eng_idle << (GAUDI_ENGINE_ID_MME_0 + i);
6135 if (s) {
6136 if (!is_slave)
6137 seq_printf(s, fmt, i,
6138 is_eng_idle ? "Y" : "N",
6139 qm_glbl_sts0, qm_cgm_sts, mme_arch_sts);
6140 else
6141 seq_printf(s, mme_slave_fmt, i,
6142 is_eng_idle ? "Y" : "N", "-",
6143 "-", mme_arch_sts);
6144 }
6145 }
6146
6147 if (s)
6148 seq_puts(s, "\n");
6149
6150 hdev->asic_funcs->enable_clock_gating(hdev);
6151
6152 mutex_unlock(&gaudi->clk_gate_mutex);
6153
6154 return is_idle;
6155}
6156
6157static void gaudi_hw_queues_lock(struct hl_device *hdev)
6158 __acquires(&gaudi->hw_queues_lock)
6159{
6160 struct gaudi_device *gaudi = hdev->asic_specific;
6161
6162 spin_lock(&gaudi->hw_queues_lock);
6163}
6164
6165static void gaudi_hw_queues_unlock(struct hl_device *hdev)
6166 __releases(&gaudi->hw_queues_lock)
6167{
6168 struct gaudi_device *gaudi = hdev->asic_specific;
6169
6170 spin_unlock(&gaudi->hw_queues_lock);
6171}
6172
6173static u32 gaudi_get_pci_id(struct hl_device *hdev)
6174{
6175 return hdev->pdev->device;
6176}
6177
6178static int gaudi_get_eeprom_data(struct hl_device *hdev, void *data,
6179 size_t max_size)
6180{
6181 struct gaudi_device *gaudi = hdev->asic_specific;
6182
6183 if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
6184 return 0;
6185
6186 return hl_fw_get_eeprom_data(hdev, data, max_size);
6187}
6188
6189/*
6190 * this function should be used only during initialization and/or after reset,
6191 * when there are no active users.
6192 */
6193static int gaudi_run_tpc_kernel(struct hl_device *hdev, u64 tpc_kernel,
6194 u32 tpc_id)
6195{
6196 struct gaudi_device *gaudi = hdev->asic_specific;
6197 u64 kernel_timeout;
6198 u32 status, offset;
6199 int rc;
6200
6201 offset = tpc_id * (mmTPC1_CFG_STATUS - mmTPC0_CFG_STATUS);
6202
6203 if (hdev->pldm)
6204 kernel_timeout = GAUDI_PLDM_TPC_KERNEL_WAIT_USEC;
6205 else
6206 kernel_timeout = HL_DEVICE_TIMEOUT_USEC;
6207
6208 mutex_lock(&gaudi->clk_gate_mutex);
6209
6210 hdev->asic_funcs->disable_clock_gating(hdev);
6211
6212 WREG32(mmTPC0_CFG_QM_KERNEL_BASE_ADDRESS_LOW + offset,
6213 lower_32_bits(tpc_kernel));
6214 WREG32(mmTPC0_CFG_QM_KERNEL_BASE_ADDRESS_HIGH + offset,
6215 upper_32_bits(tpc_kernel));
6216
6217 WREG32(mmTPC0_CFG_ICACHE_BASE_ADDERESS_LOW + offset,
6218 lower_32_bits(tpc_kernel));
6219 WREG32(mmTPC0_CFG_ICACHE_BASE_ADDERESS_HIGH + offset,
6220 upper_32_bits(tpc_kernel));
6221 /* set a valid LUT pointer, content is of no significance */
6222 WREG32(mmTPC0_CFG_LUT_FUNC256_BASE_ADDR_LO + offset,
6223 lower_32_bits(tpc_kernel));
6224 WREG32(mmTPC0_CFG_LUT_FUNC256_BASE_ADDR_HI + offset,
6225 upper_32_bits(tpc_kernel));
6226
6227 WREG32(mmTPC0_CFG_QM_SYNC_OBJECT_ADDR + offset,
6228 lower_32_bits(CFG_BASE +
6229 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0));
6230
6231 WREG32(mmTPC0_CFG_TPC_CMD + offset,
6232 (1 << TPC0_CFG_TPC_CMD_ICACHE_INVALIDATE_SHIFT |
6233 1 << TPC0_CFG_TPC_CMD_ICACHE_PREFETCH_64KB_SHIFT));
6234 /* wait a bit for the engine to start executing */
6235 usleep_range(1000, 1500);
6236
6237 /* wait until engine has finished executing */
6238 rc = hl_poll_timeout(
6239 hdev,
6240 mmTPC0_CFG_STATUS + offset,
6241 status,
6242 (status & TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK) ==
6243 TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK,
6244 1000,
6245 kernel_timeout);
6246
6247 if (rc) {
6248 dev_err(hdev->dev,
6249 "Timeout while waiting for TPC%d icache prefetch\n",
6250 tpc_id);
6251 hdev->asic_funcs->enable_clock_gating(hdev);
6252 mutex_unlock(&gaudi->clk_gate_mutex);
6253 return -EIO;
6254 }
6255
6256 WREG32(mmTPC0_CFG_TPC_EXECUTE + offset,
6257 1 << TPC0_CFG_TPC_EXECUTE_V_SHIFT);
6258
6259 /* wait a bit for the engine to start executing */
6260 usleep_range(1000, 1500);
6261
6262 /* wait until engine has finished executing */
6263 rc = hl_poll_timeout(
6264 hdev,
6265 mmTPC0_CFG_STATUS + offset,
6266 status,
6267 (status & TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK) ==
6268 TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK,
6269 1000,
6270 kernel_timeout);
6271
6272 rc = hl_poll_timeout(
6273 hdev,
6274 mmTPC0_CFG_WQ_INFLIGHT_CNTR + offset,
6275 status,
6276 (status == 0),
6277 1000,
6278 kernel_timeout);
6279
6280 hdev->asic_funcs->enable_clock_gating(hdev);
6281 mutex_unlock(&gaudi->clk_gate_mutex);
6282
6283 if (rc) {
6284 dev_err(hdev->dev,
6285 "Timeout while waiting for TPC%d kernel to execute\n",
6286 tpc_id);
6287 return -EIO;
6288 }
6289
6290 return 0;
6291}
6292
6293static enum hl_device_hw_state gaudi_get_hw_state(struct hl_device *hdev)
6294{
6295 return RREG32(mmHW_STATE);
6296}
6297
6298static u32 gaudi_get_queue_id_for_cq(struct hl_device *hdev, u32 cq_idx)
6299{
6300 return gaudi_cq_assignment[cq_idx];
6301}
6302
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006303static u32 gaudi_get_signal_cb_size(struct hl_device *hdev)
6304{
6305 return sizeof(struct packet_msg_short) +
6306 sizeof(struct packet_msg_prot) * 2;
6307}
6308
6309static u32 gaudi_get_wait_cb_size(struct hl_device *hdev)
6310{
6311 return sizeof(struct packet_msg_short) * 4 +
6312 sizeof(struct packet_fence) +
6313 sizeof(struct packet_msg_prot) * 2;
6314}
6315
6316static void gaudi_gen_signal_cb(struct hl_device *hdev, void *data, u16 sob_id)
6317{
6318 struct hl_cb *cb = (struct hl_cb *) data;
6319 struct packet_msg_short *pkt;
6320 u32 value, ctl;
6321
6322 pkt = (struct packet_msg_short *) (uintptr_t) cb->kernel_address;
6323 memset(pkt, 0, sizeof(*pkt));
6324
6325 value = 1 << GAUDI_PKT_SHORT_VAL_SOB_SYNC_VAL_SHIFT; /* inc by 1 */
6326 value |= 1 << GAUDI_PKT_SHORT_VAL_SOB_MOD_SHIFT; /* add mode */
6327
6328 ctl = (sob_id * 4) << GAUDI_PKT_SHORT_CTL_ADDR_SHIFT; /* SOB id */
6329 ctl |= 0 << GAUDI_PKT_SHORT_CTL_OP_SHIFT; /* write the value */
6330 ctl |= 3 << GAUDI_PKT_SHORT_CTL_BASE_SHIFT; /* W_S SOB base */
6331 ctl |= PACKET_MSG_SHORT << GAUDI_PKT_SHORT_CTL_OPCODE_SHIFT;
6332 ctl |= 1 << GAUDI_PKT_SHORT_CTL_EB_SHIFT;
6333 ctl |= 1 << GAUDI_PKT_SHORT_CTL_RB_SHIFT;
6334 ctl |= 1 << GAUDI_PKT_SHORT_CTL_MB_SHIFT;
6335
6336 pkt->value = cpu_to_le32(value);
6337 pkt->ctl = cpu_to_le32(ctl);
6338}
6339
6340static u32 gaudi_add_mon_msg_short(struct packet_msg_short *pkt, u32 value,
6341 u16 addr)
6342{
6343 u32 ctl, pkt_size = sizeof(*pkt);
6344
6345 memset(pkt, 0, pkt_size);
6346
6347 ctl = addr << GAUDI_PKT_SHORT_CTL_ADDR_SHIFT;
6348 ctl |= 2 << GAUDI_PKT_SHORT_CTL_BASE_SHIFT; /* W_S MON base */
6349 ctl |= PACKET_MSG_SHORT << GAUDI_PKT_SHORT_CTL_OPCODE_SHIFT;
6350 ctl |= 0 << GAUDI_PKT_SHORT_CTL_EB_SHIFT;
6351 ctl |= 1 << GAUDI_PKT_SHORT_CTL_RB_SHIFT;
6352 ctl |= 0 << GAUDI_PKT_SHORT_CTL_MB_SHIFT; /* only last pkt needs MB */
6353
6354 pkt->value = cpu_to_le32(value);
6355 pkt->ctl = cpu_to_le32(ctl);
6356
6357 return pkt_size;
6358}
6359
6360static u32 gaudi_add_arm_monitor_pkt(struct packet_msg_short *pkt, u16 sob_id,
6361 u16 sob_val, u16 addr)
6362{
6363 u32 ctl, value, pkt_size = sizeof(*pkt);
6364 u8 mask = ~(1 << (sob_id & 0x7));
6365
6366 memset(pkt, 0, pkt_size);
6367
6368 value = (sob_id / 8) << GAUDI_PKT_SHORT_VAL_MON_SYNC_GID_SHIFT;
6369 value |= sob_val << GAUDI_PKT_SHORT_VAL_MON_SYNC_VAL_SHIFT;
6370 value |= 0 << GAUDI_PKT_SHORT_VAL_MON_MODE_SHIFT; /* GREATER_OR_EQUAL */
6371 value |= mask << GAUDI_PKT_SHORT_VAL_MON_MASK_SHIFT;
6372
6373 ctl = addr << GAUDI_PKT_SHORT_CTL_ADDR_SHIFT;
6374 ctl |= 0 << GAUDI_PKT_SHORT_CTL_OP_SHIFT; /* write the value */
6375 ctl |= 2 << GAUDI_PKT_SHORT_CTL_BASE_SHIFT; /* W_S MON base */
6376 ctl |= PACKET_MSG_SHORT << GAUDI_PKT_SHORT_CTL_OPCODE_SHIFT;
6377 ctl |= 0 << GAUDI_PKT_SHORT_CTL_EB_SHIFT;
6378 ctl |= 1 << GAUDI_PKT_SHORT_CTL_RB_SHIFT;
6379 ctl |= 1 << GAUDI_PKT_SHORT_CTL_MB_SHIFT;
6380
6381 pkt->value = cpu_to_le32(value);
6382 pkt->ctl = cpu_to_le32(ctl);
6383
6384 return pkt_size;
6385}
6386
6387static u32 gaudi_add_fence_pkt(struct packet_fence *pkt)
6388{
6389 u32 ctl, cfg, pkt_size = sizeof(*pkt);
6390
6391 memset(pkt, 0, pkt_size);
6392
6393 cfg = 1 << GAUDI_PKT_FENCE_CFG_DEC_VAL_SHIFT;
6394 cfg |= 1 << GAUDI_PKT_FENCE_CFG_TARGET_VAL_SHIFT;
6395 cfg |= 2 << GAUDI_PKT_FENCE_CFG_ID_SHIFT;
6396
6397 ctl = 0 << GAUDI_PKT_FENCE_CTL_PRED_SHIFT;
6398 ctl |= PACKET_FENCE << GAUDI_PKT_FENCE_CTL_OPCODE_SHIFT;
6399 ctl |= 0 << GAUDI_PKT_FENCE_CTL_EB_SHIFT;
6400 ctl |= 1 << GAUDI_PKT_FENCE_CTL_RB_SHIFT;
6401 ctl |= 1 << GAUDI_PKT_FENCE_CTL_MB_SHIFT;
6402
6403 pkt->cfg = cpu_to_le32(cfg);
6404 pkt->ctl = cpu_to_le32(ctl);
6405
6406 return pkt_size;
6407}
6408
6409static void gaudi_gen_wait_cb(struct hl_device *hdev, void *data, u16 sob_id,
6410 u16 sob_val, u16 mon_id, u32 q_idx)
6411{
6412 struct hl_cb *cb = (struct hl_cb *) data;
6413 void *buf = (void *) (uintptr_t) cb->kernel_address;
6414 u64 monitor_base, fence_addr = 0;
6415 u32 size = 0;
6416 u16 msg_addr_offset;
6417
6418 switch (q_idx) {
6419 case GAUDI_QUEUE_ID_DMA_0_0:
6420 fence_addr = mmDMA0_QM_CP_FENCE2_RDATA_0;
6421 break;
6422 case GAUDI_QUEUE_ID_DMA_0_1:
6423 fence_addr = mmDMA0_QM_CP_FENCE2_RDATA_1;
6424 break;
6425 case GAUDI_QUEUE_ID_DMA_0_2:
6426 fence_addr = mmDMA0_QM_CP_FENCE2_RDATA_2;
6427 break;
6428 case GAUDI_QUEUE_ID_DMA_0_3:
6429 fence_addr = mmDMA0_QM_CP_FENCE2_RDATA_3;
6430 break;
6431 case GAUDI_QUEUE_ID_DMA_1_0:
6432 fence_addr = mmDMA1_QM_CP_FENCE2_RDATA_0;
6433 break;
6434 case GAUDI_QUEUE_ID_DMA_1_1:
6435 fence_addr = mmDMA1_QM_CP_FENCE2_RDATA_1;
6436 break;
6437 case GAUDI_QUEUE_ID_DMA_1_2:
6438 fence_addr = mmDMA1_QM_CP_FENCE2_RDATA_2;
6439 break;
6440 case GAUDI_QUEUE_ID_DMA_1_3:
6441 fence_addr = mmDMA1_QM_CP_FENCE2_RDATA_3;
6442 break;
6443 case GAUDI_QUEUE_ID_DMA_5_0:
6444 fence_addr = mmDMA5_QM_CP_FENCE2_RDATA_0;
6445 break;
6446 case GAUDI_QUEUE_ID_DMA_5_1:
6447 fence_addr = mmDMA5_QM_CP_FENCE2_RDATA_1;
6448 break;
6449 case GAUDI_QUEUE_ID_DMA_5_2:
6450 fence_addr = mmDMA5_QM_CP_FENCE2_RDATA_2;
6451 break;
6452 case GAUDI_QUEUE_ID_DMA_5_3:
6453 fence_addr = mmDMA5_QM_CP_FENCE2_RDATA_3;
6454 break;
6455 default:
6456 /* queue index should be valid here */
6457 dev_crit(hdev->dev, "wrong queue id %d for wait packet\n",
6458 q_idx);
6459 return;
6460 }
6461
6462 fence_addr += CFG_BASE;
6463
6464 /*
6465 * monitor_base should be the content of the base0 address registers,
6466 * so it will be added to the msg short offsets
6467 */
6468 monitor_base = mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0;
6469
6470 /* First monitor config packet: low address of the sync */
6471 msg_addr_offset =
6472 (mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0 + mon_id * 4) -
6473 monitor_base;
6474
6475 size += gaudi_add_mon_msg_short(buf + size, (u32) fence_addr,
6476 msg_addr_offset);
6477
6478 /* Second monitor config packet: high address of the sync */
6479 msg_addr_offset =
6480 (mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRH_0 + mon_id * 4) -
6481 monitor_base;
6482
6483 size += gaudi_add_mon_msg_short(buf + size, (u32) (fence_addr >> 32),
6484 msg_addr_offset);
6485
6486 /*
6487 * Third monitor config packet: the payload, i.e. what to write when the
6488 * sync triggers
6489 */
6490 msg_addr_offset =
6491 (mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_DATA_0 + mon_id * 4) -
6492 monitor_base;
6493
6494 size += gaudi_add_mon_msg_short(buf + size, 1, msg_addr_offset);
6495
6496 /* Fourth monitor config packet: bind the monitor to a sync object */
6497 msg_addr_offset =
6498 (mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0 + mon_id * 4) -
6499 monitor_base;
6500 size += gaudi_add_arm_monitor_pkt(buf + size, sob_id, sob_val,
6501 msg_addr_offset);
6502
6503 /* Fence packet */
6504 size += gaudi_add_fence_pkt(buf + size);
6505}
6506
6507static void gaudi_reset_sob(struct hl_device *hdev, void *data)
6508{
6509 struct hl_hw_sob *hw_sob = (struct hl_hw_sob *) data;
6510
6511 dev_dbg(hdev->dev, "reset SOB, q_idx: %d, sob_id: %d\n", hw_sob->q_idx,
6512 hw_sob->sob_id);
6513
6514 WREG32(mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 + hw_sob->sob_id * 4,
6515 0);
6516
6517 kref_init(&hw_sob->kref);
6518}
6519
6520static void gaudi_set_dma_mask_from_fw(struct hl_device *hdev)
6521{
6522 if (RREG32(mmPSOC_GLOBAL_CONF_NON_RST_FLOPS_0) ==
6523 HL_POWER9_HOST_MAGIC) {
6524 hdev->power9_64bit_dma_enable = 1;
6525 hdev->dma_mask = 64;
6526 } else {
6527 hdev->power9_64bit_dma_enable = 0;
6528 hdev->dma_mask = 48;
6529 }
6530}
6531
6532static u64 gaudi_get_device_time(struct hl_device *hdev)
6533{
6534 u64 device_time = ((u64) RREG32(mmPSOC_TIMESTAMP_CNTCVU)) << 32;
6535
6536 return device_time | RREG32(mmPSOC_TIMESTAMP_CNTCVL);
6537}
6538
6539static const struct hl_asic_funcs gaudi_funcs = {
6540 .early_init = gaudi_early_init,
6541 .early_fini = gaudi_early_fini,
6542 .late_init = gaudi_late_init,
6543 .late_fini = gaudi_late_fini,
6544 .sw_init = gaudi_sw_init,
6545 .sw_fini = gaudi_sw_fini,
6546 .hw_init = gaudi_hw_init,
6547 .hw_fini = gaudi_hw_fini,
6548 .halt_engines = gaudi_halt_engines,
6549 .suspend = gaudi_suspend,
6550 .resume = gaudi_resume,
6551 .cb_mmap = gaudi_cb_mmap,
6552 .ring_doorbell = gaudi_ring_doorbell,
6553 .pqe_write = gaudi_pqe_write,
6554 .asic_dma_alloc_coherent = gaudi_dma_alloc_coherent,
6555 .asic_dma_free_coherent = gaudi_dma_free_coherent,
6556 .get_int_queue_base = gaudi_get_int_queue_base,
6557 .test_queues = gaudi_test_queues,
6558 .asic_dma_pool_zalloc = gaudi_dma_pool_zalloc,
6559 .asic_dma_pool_free = gaudi_dma_pool_free,
6560 .cpu_accessible_dma_pool_alloc = gaudi_cpu_accessible_dma_pool_alloc,
6561 .cpu_accessible_dma_pool_free = gaudi_cpu_accessible_dma_pool_free,
6562 .hl_dma_unmap_sg = gaudi_dma_unmap_sg,
6563 .cs_parser = gaudi_cs_parser,
6564 .asic_dma_map_sg = gaudi_dma_map_sg,
6565 .get_dma_desc_list_size = gaudi_get_dma_desc_list_size,
6566 .add_end_of_cb_packets = gaudi_add_end_of_cb_packets,
6567 .update_eq_ci = gaudi_update_eq_ci,
6568 .context_switch = gaudi_context_switch,
6569 .restore_phase_topology = gaudi_restore_phase_topology,
6570 .debugfs_read32 = gaudi_debugfs_read32,
6571 .debugfs_write32 = gaudi_debugfs_write32,
6572 .debugfs_read64 = gaudi_debugfs_read64,
6573 .debugfs_write64 = gaudi_debugfs_write64,
Oded Gabbaybcaf4152020-05-11 10:41:37 +03006574 .add_device_attr = gaudi_add_device_attr,
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006575 .handle_eqe = gaudi_handle_eqe,
Oded Gabbaybcaf4152020-05-11 10:41:37 +03006576 .set_pll_profile = gaudi_set_pll_profile,
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006577 .get_events_stat = gaudi_get_events_stat,
6578 .read_pte = gaudi_read_pte,
6579 .write_pte = gaudi_write_pte,
6580 .mmu_invalidate_cache = gaudi_mmu_invalidate_cache,
6581 .mmu_invalidate_cache_range = gaudi_mmu_invalidate_cache_range,
6582 .send_heartbeat = gaudi_send_heartbeat,
6583 .enable_clock_gating = gaudi_enable_clock_gating,
6584 .disable_clock_gating = gaudi_disable_clock_gating,
Omer Shpigelman79fc7a92020-05-11 10:46:29 +03006585 .debug_coresight = gaudi_debug_coresight,
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006586 .is_device_idle = gaudi_is_device_idle,
6587 .soft_reset_late_init = gaudi_soft_reset_late_init,
6588 .hw_queues_lock = gaudi_hw_queues_lock,
6589 .hw_queues_unlock = gaudi_hw_queues_unlock,
6590 .get_pci_id = gaudi_get_pci_id,
6591 .get_eeprom_data = gaudi_get_eeprom_data,
6592 .send_cpu_message = gaudi_send_cpu_message,
6593 .get_hw_state = gaudi_get_hw_state,
6594 .pci_bars_map = gaudi_pci_bars_map,
6595 .set_dram_bar_base = gaudi_set_hbm_bar_base,
6596 .init_iatu = gaudi_init_iatu,
6597 .rreg = hl_rreg,
6598 .wreg = hl_wreg,
Omer Shpigelman79fc7a92020-05-11 10:46:29 +03006599 .halt_coresight = gaudi_halt_coresight,
Oded Gabbaybcaf4152020-05-11 10:41:37 +03006600 .get_clk_rate = gaudi_get_clk_rate,
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006601 .get_queue_id_for_cq = gaudi_get_queue_id_for_cq,
6602 .read_device_fw_version = gaudi_read_device_fw_version,
6603 .load_firmware_to_device = gaudi_load_firmware_to_device,
6604 .load_boot_fit_to_device = gaudi_load_boot_fit_to_device,
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006605 .get_signal_cb_size = gaudi_get_signal_cb_size,
6606 .get_wait_cb_size = gaudi_get_wait_cb_size,
6607 .gen_signal_cb = gaudi_gen_signal_cb,
6608 .gen_wait_cb = gaudi_gen_wait_cb,
6609 .reset_sob = gaudi_reset_sob,
6610 .set_dma_mask_from_fw = gaudi_set_dma_mask_from_fw,
6611 .get_device_time = gaudi_get_device_time
6612};
6613
6614/**
6615 * gaudi_set_asic_funcs - set GAUDI function pointers
6616 *
Lee Jonesf7d227c2020-07-01 09:58:42 +01006617 * @hdev: pointer to hl_device structure
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006618 *
6619 */
6620void gaudi_set_asic_funcs(struct hl_device *hdev)
6621{
6622 hdev->asic_funcs = &gaudi_funcs;
6623}