blob: 637a9d608707f51e30226e121fad16019659a2bb [file] [log] [blame]
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03001// SPDX-License-Identifier: GPL-2.0
2
3/*
4 * Copyright 2016-2020 HabanaLabs, Ltd.
5 * All Rights Reserved.
6 */
7
8#include "gaudiP.h"
9#include "include/hw_ip/mmu/mmu_general.h"
10#include "include/hw_ip/mmu/mmu_v1_1.h"
11#include "include/gaudi/gaudi_masks.h"
12#include "include/gaudi/gaudi_fw_if.h"
13#include "include/gaudi/gaudi_reg_map.h"
Ofir Bittonebd8d122020-05-10 13:41:28 +030014#include "include/gaudi/gaudi_async_ids_map_extended.h"
Oded Gabbayac0ae6a2020-05-11 10:29:27 +030015
16#include <linux/module.h>
17#include <linux/pci.h>
18#include <linux/firmware.h>
19#include <linux/hwmon.h>
20#include <linux/genalloc.h>
21#include <linux/io-64-nonatomic-lo-hi.h>
22#include <linux/iommu.h>
23#include <linux/seq_file.h>
24
25/*
26 * Gaudi security scheme:
27 *
28 * 1. Host is protected by:
29 * - Range registers
30 * - MMU
31 *
32 * 2. DDR is protected by:
33 * - Range registers (protect the first 512MB)
34 *
35 * 3. Configuration is protected by:
36 * - Range registers
37 * - Protection bits
38 *
39 * MMU is always enabled.
40 *
41 * QMAN DMA channels 0,1,5 (PCI DMAN):
42 * - DMA is not secured.
43 * - PQ and CQ are secured.
44 * - CP is secured: The driver needs to parse CB but WREG should be allowed
45 * because of TDMA (tensor DMA). Hence, WREG is always not
46 * secured.
47 *
48 * When the driver needs to use DMA it will check that Gaudi is idle, set DMA
49 * channel 0 to be secured, execute the DMA and change it back to not secured.
50 * Currently, the driver doesn't use the DMA while there are compute jobs
51 * running.
52 *
53 * The current use cases for the driver to use the DMA are:
54 * - Clear SRAM on context switch (happens on context switch when device is
55 * idle)
56 * - MMU page tables area clear (happens on init)
57 *
58 * QMAN DMA 2-4,6,7, TPC, MME, NIC:
59 * PQ is secured and is located on the Host (HBM CON TPC3 bug)
60 * CQ, CP and the engine are not secured
61 *
62 */
63
64#define GAUDI_BOOT_FIT_FILE "habanalabs/gaudi/gaudi-boot-fit.itb"
65#define GAUDI_LINUX_FW_FILE "habanalabs/gaudi/gaudi-fit.itb"
66#define GAUDI_TPC_FW_FILE "habanalabs/gaudi/gaudi_tpc.bin"
67
68#define GAUDI_DMA_POOL_BLK_SIZE 0x100 /* 256 bytes */
69
70#define GAUDI_RESET_TIMEOUT_MSEC 1000 /* 1000ms */
71#define GAUDI_RESET_WAIT_MSEC 1 /* 1ms */
72#define GAUDI_CPU_RESET_WAIT_MSEC 200 /* 200ms */
73#define GAUDI_TEST_QUEUE_WAIT_USEC 100000 /* 100ms */
74
75#define GAUDI_PLDM_RESET_WAIT_MSEC 1000 /* 1s */
76#define GAUDI_PLDM_HRESET_TIMEOUT_MSEC 20000 /* 20s */
77#define GAUDI_PLDM_SRESET_TIMEOUT_MSEC 14000 /* 14s */
78#define GAUDI_PLDM_TEST_QUEUE_WAIT_USEC 1000000 /* 1s */
79#define GAUDI_PLDM_MMU_TIMEOUT_USEC (MMU_CONFIG_TIMEOUT_USEC * 100)
80#define GAUDI_PLDM_QMAN0_TIMEOUT_USEC (HL_DEVICE_TIMEOUT_USEC * 30)
81#define GAUDI_PLDM_TPC_KERNEL_WAIT_USEC (HL_DEVICE_TIMEOUT_USEC * 30)
82#define GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC 1000000 /* 1s */
Oded Gabbay788cacf2020-07-07 17:30:13 +030083#define GAUDI_MSG_TO_CPU_TIMEOUT_USEC 4000000 /* 4s */
Oded Gabbayac0ae6a2020-05-11 10:29:27 +030084
85#define GAUDI_QMAN0_FENCE_VAL 0x72E91AB9
86
87#define GAUDI_MAX_STRING_LEN 20
88
89#define GAUDI_CB_POOL_CB_CNT 512
90#define GAUDI_CB_POOL_CB_SIZE 0x20000 /* 128KB */
91
92#define GAUDI_ALLOC_CPU_MEM_RETRY_CNT 3
93
94#define GAUDI_NUM_OF_TPC_INTR_CAUSE 20
95
96#define GAUDI_NUM_OF_QM_ERR_CAUSE 16
97
98#define GAUDI_NUM_OF_QM_ARB_ERR_CAUSE 3
99
Oded Gabbay647e8352020-06-07 11:26:48 +0300100#define GAUDI_ARB_WDT_TIMEOUT 0x1000000
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300101
Oded Gabbaye38bfd32020-07-03 20:46:12 +0300102#define GAUDI_CLK_GATE_DEBUGFS_MASK (\
103 BIT(GAUDI_ENGINE_ID_MME_0) |\
104 BIT(GAUDI_ENGINE_ID_MME_2) |\
105 GENMASK_ULL(GAUDI_ENGINE_ID_TPC_7, GAUDI_ENGINE_ID_TPC_0))
106
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300107static const char gaudi_irq_name[GAUDI_MSI_ENTRIES][GAUDI_MAX_STRING_LEN] = {
108 "gaudi cq 0_0", "gaudi cq 0_1", "gaudi cq 0_2", "gaudi cq 0_3",
109 "gaudi cq 1_0", "gaudi cq 1_1", "gaudi cq 1_2", "gaudi cq 1_3",
110 "gaudi cq 5_0", "gaudi cq 5_1", "gaudi cq 5_2", "gaudi cq 5_3",
111 "gaudi cpu eq"
112};
113
114static const u8 gaudi_dma_assignment[GAUDI_DMA_MAX] = {
Oded Gabbaye38bfd32020-07-03 20:46:12 +0300115 [GAUDI_PCI_DMA_1] = GAUDI_ENGINE_ID_DMA_0,
116 [GAUDI_PCI_DMA_2] = GAUDI_ENGINE_ID_DMA_1,
117 [GAUDI_PCI_DMA_3] = GAUDI_ENGINE_ID_DMA_5,
118 [GAUDI_HBM_DMA_1] = GAUDI_ENGINE_ID_DMA_2,
119 [GAUDI_HBM_DMA_2] = GAUDI_ENGINE_ID_DMA_3,
120 [GAUDI_HBM_DMA_3] = GAUDI_ENGINE_ID_DMA_4,
121 [GAUDI_HBM_DMA_4] = GAUDI_ENGINE_ID_DMA_6,
122 [GAUDI_HBM_DMA_5] = GAUDI_ENGINE_ID_DMA_7
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300123};
124
125static const u8 gaudi_cq_assignment[NUMBER_OF_CMPLT_QUEUES] = {
126 [0] = GAUDI_QUEUE_ID_DMA_0_0,
127 [1] = GAUDI_QUEUE_ID_DMA_0_1,
128 [2] = GAUDI_QUEUE_ID_DMA_0_2,
129 [3] = GAUDI_QUEUE_ID_DMA_0_3,
130 [4] = GAUDI_QUEUE_ID_DMA_1_0,
131 [5] = GAUDI_QUEUE_ID_DMA_1_1,
132 [6] = GAUDI_QUEUE_ID_DMA_1_2,
133 [7] = GAUDI_QUEUE_ID_DMA_1_3,
134 [8] = GAUDI_QUEUE_ID_DMA_5_0,
135 [9] = GAUDI_QUEUE_ID_DMA_5_1,
136 [10] = GAUDI_QUEUE_ID_DMA_5_2,
137 [11] = GAUDI_QUEUE_ID_DMA_5_3
138};
139
140static const u16 gaudi_packet_sizes[MAX_PACKET_ID] = {
141 [PACKET_WREG_32] = sizeof(struct packet_wreg32),
142 [PACKET_WREG_BULK] = sizeof(struct packet_wreg_bulk),
143 [PACKET_MSG_LONG] = sizeof(struct packet_msg_long),
144 [PACKET_MSG_SHORT] = sizeof(struct packet_msg_short),
145 [PACKET_CP_DMA] = sizeof(struct packet_cp_dma),
146 [PACKET_REPEAT] = sizeof(struct packet_repeat),
147 [PACKET_MSG_PROT] = sizeof(struct packet_msg_prot),
148 [PACKET_FENCE] = sizeof(struct packet_fence),
149 [PACKET_LIN_DMA] = sizeof(struct packet_lin_dma),
150 [PACKET_NOP] = sizeof(struct packet_nop),
151 [PACKET_STOP] = sizeof(struct packet_stop),
152 [PACKET_ARB_POINT] = sizeof(struct packet_arb_point),
153 [PACKET_WAIT] = sizeof(struct packet_wait),
154 [PACKET_LOAD_AND_EXE] = sizeof(struct packet_load_and_exe)
155};
156
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300157static const char * const
158gaudi_tpc_interrupts_cause[GAUDI_NUM_OF_TPC_INTR_CAUSE] = {
159 "tpc_address_exceed_slm",
160 "tpc_div_by_0",
161 "tpc_spu_mac_overflow",
162 "tpc_spu_addsub_overflow",
163 "tpc_spu_abs_overflow",
164 "tpc_spu_fp_dst_nan_inf",
165 "tpc_spu_fp_dst_denorm",
166 "tpc_vpu_mac_overflow",
167 "tpc_vpu_addsub_overflow",
168 "tpc_vpu_abs_overflow",
169 "tpc_vpu_fp_dst_nan_inf",
170 "tpc_vpu_fp_dst_denorm",
171 "tpc_assertions",
172 "tpc_illegal_instruction",
173 "tpc_pc_wrap_around",
174 "tpc_qm_sw_err",
175 "tpc_hbw_rresp_err",
176 "tpc_hbw_bresp_err",
177 "tpc_lbw_rresp_err",
178 "tpc_lbw_bresp_err"
179};
180
181static const char * const
182gaudi_qman_error_cause[GAUDI_NUM_OF_QM_ERR_CAUSE] = {
183 "PQ AXI HBW error",
184 "CQ AXI HBW error",
185 "CP AXI HBW error",
186 "CP error due to undefined OPCODE",
187 "CP encountered STOP OPCODE",
188 "CP AXI LBW error",
189 "CP WRREG32 or WRBULK returned error",
190 "N/A",
191 "FENCE 0 inc over max value and clipped",
192 "FENCE 1 inc over max value and clipped",
193 "FENCE 2 inc over max value and clipped",
194 "FENCE 3 inc over max value and clipped",
195 "FENCE 0 dec under min value and clipped",
196 "FENCE 1 dec under min value and clipped",
197 "FENCE 2 dec under min value and clipped",
198 "FENCE 3 dec under min value and clipped"
199};
200
201static const char * const
202gaudi_qman_arb_error_cause[GAUDI_NUM_OF_QM_ARB_ERR_CAUSE] = {
203 "Choice push while full error",
204 "Choice Q watchdog error",
205 "MSG AXI LBW returned with error"
206};
207
208static enum hl_queue_type gaudi_queue_type[GAUDI_QUEUE_ID_SIZE] = {
209 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_0 */
210 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_1 */
211 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_2 */
212 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_3 */
213 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_0 */
214 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_1 */
215 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_2 */
216 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_3 */
217 QUEUE_TYPE_CPU, /* GAUDI_QUEUE_ID_CPU_PQ */
218 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_0 */
219 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_1 */
220 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_2 */
221 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_3 */
222 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_0 */
223 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_1 */
224 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_2 */
225 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_3 */
226 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_0 */
227 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_1 */
228 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_2 */
229 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_3 */
230 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_5_0 */
231 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_5_1 */
232 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_5_2 */
233 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_5_3 */
234 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_0 */
235 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_1 */
236 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_2 */
237 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_3 */
238 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_0 */
239 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_1 */
240 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_2 */
241 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_3 */
242 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_0 */
243 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_1 */
244 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_2 */
245 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_3 */
246 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_0 */
247 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_1 */
248 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_2 */
249 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_3 */
250 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_0 */
251 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_1 */
252 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_2 */
253 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_3 */
254 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_0 */
255 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_1 */
256 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_2 */
257 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_3 */
258 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_0 */
259 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_1 */
260 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_2 */
261 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_3 */
262 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_0 */
263 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_1 */
264 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_2 */
265 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_3 */
266 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_0 */
267 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_1 */
268 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_2 */
269 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_3 */
270 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_0 */
271 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_1 */
272 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_2 */
273 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_3 */
274 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_0 */
275 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_1 */
276 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_2 */
277 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_3 */
278 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_0 */
279 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_1 */
280 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_2 */
281 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_3 */
282 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_0_0 */
283 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_0_1 */
284 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_0_2 */
285 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_0_3 */
286 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_1_0 */
287 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_1_1 */
288 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_1_2 */
289 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_1_3 */
290 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_2_0 */
291 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_2_1 */
292 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_2_2 */
293 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_2_3 */
294 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_3_0 */
295 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_3_1 */
296 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_3_2 */
297 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_3_3 */
298 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_4_0 */
299 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_4_1 */
300 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_4_2 */
301 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_4_3 */
302 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_5_0 */
303 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_5_1 */
304 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_5_2 */
305 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_5_3 */
306 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_6_0 */
307 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_6_1 */
308 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_6_2 */
309 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_6_3 */
310 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_7_0 */
311 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_7_1 */
312 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_7_2 */
313 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_7_3 */
314 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_8_0 */
315 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_8_1 */
316 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_8_2 */
317 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_8_3 */
318 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_9_0 */
319 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_9_1 */
320 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_9_2 */
321 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_9_3 */
322};
323
324static int gaudi_mmu_update_asid_hop0_addr(struct hl_device *hdev, u32 asid,
325 u64 phys_addr);
326static int gaudi_send_job_on_qman0(struct hl_device *hdev,
327 struct hl_cs_job *job);
328static int gaudi_memset_device_memory(struct hl_device *hdev, u64 addr,
329 u32 size, u64 val);
330static int gaudi_run_tpc_kernel(struct hl_device *hdev, u64 tpc_kernel,
331 u32 tpc_id);
332static int gaudi_mmu_clear_pgt_range(struct hl_device *hdev);
333static int gaudi_armcp_info_get(struct hl_device *hdev);
334static void gaudi_disable_clock_gating(struct hl_device *hdev);
335static void gaudi_mmu_prepare(struct hl_device *hdev, u32 asid);
336
337static int gaudi_get_fixed_properties(struct hl_device *hdev)
338{
339 struct asic_fixed_properties *prop = &hdev->asic_prop;
340 int i;
341
342 if (GAUDI_QUEUE_ID_SIZE >= HL_MAX_QUEUES) {
343 dev_err(hdev->dev,
344 "Number of H/W queues must be smaller than %d\n",
345 HL_MAX_QUEUES);
346 return -EFAULT;
347 }
348
349 for (i = 0 ; i < GAUDI_QUEUE_ID_SIZE ; i++) {
350 if (gaudi_queue_type[i] == QUEUE_TYPE_EXT) {
351 prop->hw_queues_props[i].type = QUEUE_TYPE_EXT;
352 prop->hw_queues_props[i].driver_only = 0;
353 prop->hw_queues_props[i].requires_kernel_cb = 1;
354 } else if (gaudi_queue_type[i] == QUEUE_TYPE_CPU) {
355 prop->hw_queues_props[i].type = QUEUE_TYPE_CPU;
356 prop->hw_queues_props[i].driver_only = 1;
357 prop->hw_queues_props[i].requires_kernel_cb = 0;
358 } else if (gaudi_queue_type[i] == QUEUE_TYPE_INT) {
359 prop->hw_queues_props[i].type = QUEUE_TYPE_INT;
360 prop->hw_queues_props[i].driver_only = 0;
361 prop->hw_queues_props[i].requires_kernel_cb = 0;
362 } else if (gaudi_queue_type[i] == QUEUE_TYPE_NA) {
363 prop->hw_queues_props[i].type = QUEUE_TYPE_NA;
364 prop->hw_queues_props[i].driver_only = 0;
365 prop->hw_queues_props[i].requires_kernel_cb = 0;
366 }
367 }
368
369 for (; i < HL_MAX_QUEUES; i++)
370 prop->hw_queues_props[i].type = QUEUE_TYPE_NA;
371
372 prop->completion_queues_count = NUMBER_OF_CMPLT_QUEUES;
373
374 prop->dram_base_address = DRAM_PHYS_BASE;
375 prop->dram_size = GAUDI_HBM_SIZE_32GB;
376 prop->dram_end_address = prop->dram_base_address +
377 prop->dram_size;
378 prop->dram_user_base_address = DRAM_BASE_ADDR_USER;
379
380 prop->sram_base_address = SRAM_BASE_ADDR;
381 prop->sram_size = SRAM_SIZE;
382 prop->sram_end_address = prop->sram_base_address +
383 prop->sram_size;
384 prop->sram_user_base_address = prop->sram_base_address +
385 SRAM_USER_BASE_OFFSET;
386
387 prop->mmu_pgt_addr = MMU_PAGE_TABLES_ADDR;
388 if (hdev->pldm)
389 prop->mmu_pgt_size = 0x800000; /* 8MB */
390 else
391 prop->mmu_pgt_size = MMU_PAGE_TABLES_SIZE;
392 prop->mmu_pte_size = HL_PTE_SIZE;
393 prop->mmu_hop_table_size = HOP_TABLE_SIZE;
394 prop->mmu_hop0_tables_total_size = HOP0_TABLES_TOTAL_SIZE;
395 prop->dram_page_size = PAGE_SIZE_2MB;
396
397 prop->pmmu.hop0_shift = HOP0_SHIFT;
398 prop->pmmu.hop1_shift = HOP1_SHIFT;
399 prop->pmmu.hop2_shift = HOP2_SHIFT;
400 prop->pmmu.hop3_shift = HOP3_SHIFT;
401 prop->pmmu.hop4_shift = HOP4_SHIFT;
402 prop->pmmu.hop0_mask = HOP0_MASK;
403 prop->pmmu.hop1_mask = HOP1_MASK;
404 prop->pmmu.hop2_mask = HOP2_MASK;
405 prop->pmmu.hop3_mask = HOP3_MASK;
406 prop->pmmu.hop4_mask = HOP4_MASK;
407 prop->pmmu.start_addr = VA_HOST_SPACE_START;
408 prop->pmmu.end_addr =
409 (VA_HOST_SPACE_START + VA_HOST_SPACE_SIZE / 2) - 1;
410 prop->pmmu.page_size = PAGE_SIZE_4KB;
411
412 /* PMMU and HPMMU are the same except of page size */
413 memcpy(&prop->pmmu_huge, &prop->pmmu, sizeof(prop->pmmu));
414 prop->pmmu_huge.page_size = PAGE_SIZE_2MB;
415
416 /* shifts and masks are the same in PMMU and DMMU */
417 memcpy(&prop->dmmu, &prop->pmmu, sizeof(prop->pmmu));
418 prop->dmmu.start_addr = (VA_HOST_SPACE_START + VA_HOST_SPACE_SIZE / 2);
419 prop->dmmu.end_addr = VA_HOST_SPACE_END;
420 prop->dmmu.page_size = PAGE_SIZE_2MB;
421
422 prop->cfg_size = CFG_SIZE;
423 prop->max_asid = MAX_ASID;
424 prop->num_of_events = GAUDI_EVENT_SIZE;
425 prop->tpc_enabled_mask = TPC_ENABLED_MASK;
426
427 prop->max_power_default = MAX_POWER_DEFAULT;
428
429 prop->cb_pool_cb_cnt = GAUDI_CB_POOL_CB_CNT;
430 prop->cb_pool_cb_size = GAUDI_CB_POOL_CB_SIZE;
431
432 prop->pcie_dbi_base_address = mmPCIE_DBI_BASE;
433 prop->pcie_aux_dbi_reg_addr = CFG_BASE + mmPCIE_AUX_DBI;
434
435 strncpy(prop->armcp_info.card_name, GAUDI_DEFAULT_CARD_NAME,
436 CARD_NAME_MAX_LEN);
437
438 return 0;
439}
440
441static int gaudi_pci_bars_map(struct hl_device *hdev)
442{
443 static const char * const name[] = {"SRAM", "CFG", "HBM"};
444 bool is_wc[3] = {false, false, true};
445 int rc;
446
447 rc = hl_pci_bars_map(hdev, name, is_wc);
448 if (rc)
449 return rc;
450
451 hdev->rmmio = hdev->pcie_bar[CFG_BAR_ID] +
452 (CFG_BASE - SPI_FLASH_BASE_ADDR);
453
454 return 0;
455}
456
457static u64 gaudi_set_hbm_bar_base(struct hl_device *hdev, u64 addr)
458{
459 struct gaudi_device *gaudi = hdev->asic_specific;
460 u64 old_addr = addr;
461 int rc;
462
463 if ((gaudi) && (gaudi->hbm_bar_cur_addr == addr))
464 return old_addr;
465
466 /* Inbound Region 2 - Bar 4 - Point to HBM */
467 rc = hl_pci_set_dram_bar_base(hdev, 2, 4, addr);
468 if (rc)
469 return U64_MAX;
470
471 if (gaudi) {
472 old_addr = gaudi->hbm_bar_cur_addr;
473 gaudi->hbm_bar_cur_addr = addr;
474 }
475
476 return old_addr;
477}
478
479static int gaudi_init_iatu(struct hl_device *hdev)
480{
481 int rc = 0;
482
483 /* Inbound Region 1 - Bar 2 - Point to SPI FLASH */
484 rc = hl_pci_iatu_write(hdev, 0x314,
485 lower_32_bits(SPI_FLASH_BASE_ADDR));
486 rc |= hl_pci_iatu_write(hdev, 0x318,
487 upper_32_bits(SPI_FLASH_BASE_ADDR));
488 rc |= hl_pci_iatu_write(hdev, 0x300, 0);
489 /* Enable + Bar match + match enable */
490 rc |= hl_pci_iatu_write(hdev, 0x304, 0xC0080200);
491
492 if (rc)
493 return -EIO;
494
495 return hl_pci_init_iatu(hdev, SRAM_BASE_ADDR, DRAM_PHYS_BASE,
496 HOST_PHYS_BASE, HOST_PHYS_SIZE);
497}
498
499static int gaudi_early_init(struct hl_device *hdev)
500{
501 struct asic_fixed_properties *prop = &hdev->asic_prop;
502 struct pci_dev *pdev = hdev->pdev;
503 int rc;
504
505 rc = gaudi_get_fixed_properties(hdev);
506 if (rc) {
507 dev_err(hdev->dev, "Failed to get fixed properties\n");
508 return rc;
509 }
510
511 /* Check BAR sizes */
512 if (pci_resource_len(pdev, SRAM_BAR_ID) != SRAM_BAR_SIZE) {
513 dev_err(hdev->dev,
514 "Not " HL_NAME "? BAR %d size %llu, expecting %llu\n",
515 SRAM_BAR_ID,
516 (unsigned long long) pci_resource_len(pdev,
517 SRAM_BAR_ID),
518 SRAM_BAR_SIZE);
519 return -ENODEV;
520 }
521
522 if (pci_resource_len(pdev, CFG_BAR_ID) != CFG_BAR_SIZE) {
523 dev_err(hdev->dev,
524 "Not " HL_NAME "? BAR %d size %llu, expecting %llu\n",
525 CFG_BAR_ID,
526 (unsigned long long) pci_resource_len(pdev,
527 CFG_BAR_ID),
528 CFG_BAR_SIZE);
529 return -ENODEV;
530 }
531
532 prop->dram_pci_bar_size = pci_resource_len(pdev, HBM_BAR_ID);
533
534 rc = hl_pci_init(hdev);
535 if (rc)
536 return rc;
537
538 return 0;
539}
540
541static int gaudi_early_fini(struct hl_device *hdev)
542{
543 hl_pci_fini(hdev);
544
545 return 0;
546}
547
548/**
549 * gaudi_fetch_psoc_frequency - Fetch PSOC frequency values
550 *
551 * @hdev: pointer to hl_device structure
552 *
553 */
554static void gaudi_fetch_psoc_frequency(struct hl_device *hdev)
555{
556 struct asic_fixed_properties *prop = &hdev->asic_prop;
557
558 prop->psoc_pci_pll_nr = RREG32(mmPSOC_PCI_PLL_NR);
559 prop->psoc_pci_pll_nf = RREG32(mmPSOC_PCI_PLL_NF);
560 prop->psoc_pci_pll_od = RREG32(mmPSOC_PCI_PLL_OD);
561 prop->psoc_pci_pll_div_factor = RREG32(mmPSOC_PCI_PLL_DIV_FACTOR_1);
562}
563
564static int _gaudi_init_tpc_mem(struct hl_device *hdev,
565 dma_addr_t tpc_kernel_src_addr, u32 tpc_kernel_size)
566{
567 struct asic_fixed_properties *prop = &hdev->asic_prop;
568 struct packet_lin_dma *init_tpc_mem_pkt;
569 struct hl_cs_job *job;
570 struct hl_cb *cb;
571 u64 dst_addr;
572 u32 cb_size, ctl;
573 u8 tpc_id;
574 int rc;
575
576 cb = hl_cb_kernel_create(hdev, PAGE_SIZE);
577 if (!cb)
578 return -EFAULT;
579
580 init_tpc_mem_pkt = (struct packet_lin_dma *) (uintptr_t)
581 cb->kernel_address;
582 cb_size = sizeof(*init_tpc_mem_pkt);
583 memset(init_tpc_mem_pkt, 0, cb_size);
584
585 init_tpc_mem_pkt->tsize = cpu_to_le32(tpc_kernel_size);
586
587 ctl = ((PACKET_LIN_DMA << GAUDI_PKT_CTL_OPCODE_SHIFT) |
588 (1 << GAUDI_PKT_LIN_DMA_CTL_LIN_SHIFT) |
589 (1 << GAUDI_PKT_CTL_RB_SHIFT) |
590 (1 << GAUDI_PKT_CTL_MB_SHIFT));
591
592 init_tpc_mem_pkt->ctl = cpu_to_le32(ctl);
593
594 init_tpc_mem_pkt->src_addr = cpu_to_le64(tpc_kernel_src_addr);
595 dst_addr = (prop->sram_user_base_address &
596 GAUDI_PKT_LIN_DMA_DST_ADDR_MASK) >>
597 GAUDI_PKT_LIN_DMA_DST_ADDR_SHIFT;
598 init_tpc_mem_pkt->dst_addr |= cpu_to_le64(dst_addr);
599
600 job = hl_cs_allocate_job(hdev, QUEUE_TYPE_EXT, true);
601 if (!job) {
602 dev_err(hdev->dev, "Failed to allocate a new job\n");
603 rc = -ENOMEM;
604 goto release_cb;
605 }
606
607 job->id = 0;
608 job->user_cb = cb;
609 job->user_cb->cs_cnt++;
610 job->user_cb_size = cb_size;
611 job->hw_queue_id = GAUDI_QUEUE_ID_DMA_0_0;
612 job->patched_cb = job->user_cb;
613 job->job_cb_size = job->user_cb_size + sizeof(struct packet_msg_prot);
614
615 hl_debugfs_add_job(hdev, job);
616
617 rc = gaudi_send_job_on_qman0(hdev, job);
618
619 if (rc)
620 goto free_job;
621
622 for (tpc_id = 0 ; tpc_id < TPC_NUMBER_OF_ENGINES ; tpc_id++) {
623 rc = gaudi_run_tpc_kernel(hdev, dst_addr, tpc_id);
624 if (rc)
625 break;
626 }
627
628free_job:
629 hl_userptr_delete_list(hdev, &job->userptr_list);
630 hl_debugfs_remove_job(hdev, job);
631 kfree(job);
632 cb->cs_cnt--;
633
634release_cb:
635 hl_cb_put(cb);
636 hl_cb_destroy(hdev, &hdev->kernel_cb_mgr, cb->id << PAGE_SHIFT);
637
638 return rc;
639}
640
641/*
642 * gaudi_init_tpc_mem() - Initialize TPC memories.
643 * @hdev: Pointer to hl_device structure.
644 *
645 * Copy TPC kernel fw from firmware file and run it to initialize TPC memories.
646 *
647 * Return: 0 for success, negative value for error.
648 */
649static int gaudi_init_tpc_mem(struct hl_device *hdev)
650{
651 const struct firmware *fw;
652 size_t fw_size;
653 void *cpu_addr;
654 dma_addr_t dma_handle;
655 int rc;
656
657 rc = request_firmware(&fw, GAUDI_TPC_FW_FILE, hdev->dev);
658 if (rc) {
659 dev_err(hdev->dev, "Firmware file %s is not found!\n",
660 GAUDI_TPC_FW_FILE);
661 goto out;
662 }
663
664 fw_size = fw->size;
665 cpu_addr = hdev->asic_funcs->asic_dma_alloc_coherent(hdev, fw_size,
666 &dma_handle, GFP_KERNEL | __GFP_ZERO);
667 if (!cpu_addr) {
668 dev_err(hdev->dev,
669 "Failed to allocate %zu of dma memory for TPC kernel\n",
670 fw_size);
671 rc = -ENOMEM;
672 goto out;
673 }
674
675 memcpy(cpu_addr, fw->data, fw_size);
676
677 rc = _gaudi_init_tpc_mem(hdev, dma_handle, fw_size);
678
679 hdev->asic_funcs->asic_dma_free_coherent(hdev, fw->size, cpu_addr,
680 dma_handle);
681
682out:
683 release_firmware(fw);
684 return rc;
685}
686
687static int gaudi_late_init(struct hl_device *hdev)
688{
689 struct gaudi_device *gaudi = hdev->asic_specific;
690 int rc;
691
692 rc = gaudi->armcp_info_get(hdev);
693 if (rc) {
694 dev_err(hdev->dev, "Failed to get armcp info\n");
695 return rc;
696 }
697
698 rc = hl_fw_send_pci_access_msg(hdev, ARMCP_PACKET_ENABLE_PCI_ACCESS);
699 if (rc) {
700 dev_err(hdev->dev, "Failed to enable PCI access from CPU\n");
701 return rc;
702 }
703
704 WREG32(mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR, GAUDI_EVENT_INTS_REGISTER);
705
706 gaudi_fetch_psoc_frequency(hdev);
707
708 rc = gaudi_mmu_clear_pgt_range(hdev);
709 if (rc) {
710 dev_err(hdev->dev, "Failed to clear MMU page tables range\n");
711 goto disable_pci_access;
712 }
713
714 rc = gaudi_init_tpc_mem(hdev);
715 if (rc) {
716 dev_err(hdev->dev, "Failed to initialize TPC memories\n");
717 goto disable_pci_access;
718 }
719
720 return 0;
721
722disable_pci_access:
723 hl_fw_send_pci_access_msg(hdev, ARMCP_PACKET_DISABLE_PCI_ACCESS);
724
725 return rc;
726}
727
728static void gaudi_late_fini(struct hl_device *hdev)
729{
730 const struct hwmon_channel_info **channel_info_arr;
731 int i = 0;
732
733 if (!hdev->hl_chip_info->info)
734 return;
735
736 channel_info_arr = hdev->hl_chip_info->info;
737
738 while (channel_info_arr[i]) {
739 kfree(channel_info_arr[i]->config);
740 kfree(channel_info_arr[i]);
741 i++;
742 }
743
744 kfree(channel_info_arr);
745
746 hdev->hl_chip_info->info = NULL;
747}
748
749static int gaudi_alloc_cpu_accessible_dma_mem(struct hl_device *hdev)
750{
751 dma_addr_t dma_addr_arr[GAUDI_ALLOC_CPU_MEM_RETRY_CNT] = {}, end_addr;
752 void *virt_addr_arr[GAUDI_ALLOC_CPU_MEM_RETRY_CNT] = {};
753 int i, j, rc = 0;
754
755 /*
756 * The device CPU works with 40-bits addresses, while bit 39 must be set
757 * to '1' when accessing the host.
758 * Bits 49:39 of the full host address are saved for a later
759 * configuration of the HW to perform extension to 50 bits.
760 * Because there is a single HW register that holds the extension bits,
761 * these bits must be identical in all allocated range.
762 */
763
764 for (i = 0 ; i < GAUDI_ALLOC_CPU_MEM_RETRY_CNT ; i++) {
765 virt_addr_arr[i] =
766 hdev->asic_funcs->asic_dma_alloc_coherent(hdev,
767 HL_CPU_ACCESSIBLE_MEM_SIZE,
768 &dma_addr_arr[i],
769 GFP_KERNEL | __GFP_ZERO);
770 if (!virt_addr_arr[i]) {
771 rc = -ENOMEM;
772 goto free_dma_mem_arr;
773 }
774
775 end_addr = dma_addr_arr[i] + HL_CPU_ACCESSIBLE_MEM_SIZE - 1;
776 if (GAUDI_CPU_PCI_MSB_ADDR(dma_addr_arr[i]) ==
777 GAUDI_CPU_PCI_MSB_ADDR(end_addr))
778 break;
779 }
780
781 if (i == GAUDI_ALLOC_CPU_MEM_RETRY_CNT) {
782 dev_err(hdev->dev,
783 "MSB of CPU accessible DMA memory are not identical in all range\n");
784 rc = -EFAULT;
785 goto free_dma_mem_arr;
786 }
787
788 hdev->cpu_accessible_dma_mem = virt_addr_arr[i];
789 hdev->cpu_accessible_dma_address = dma_addr_arr[i];
790 hdev->cpu_pci_msb_addr =
791 GAUDI_CPU_PCI_MSB_ADDR(hdev->cpu_accessible_dma_address);
792
793 GAUDI_PCI_TO_CPU_ADDR(hdev->cpu_accessible_dma_address);
794
795free_dma_mem_arr:
796 for (j = 0 ; j < i ; j++)
797 hdev->asic_funcs->asic_dma_free_coherent(hdev,
798 HL_CPU_ACCESSIBLE_MEM_SIZE,
799 virt_addr_arr[j],
800 dma_addr_arr[j]);
801
802 return rc;
803}
804
805static void gaudi_free_internal_qmans_pq_mem(struct hl_device *hdev)
806{
807 struct gaudi_device *gaudi = hdev->asic_specific;
808 struct gaudi_internal_qman_info *q;
809 u32 i;
810
811 for (i = 0 ; i < GAUDI_QUEUE_ID_SIZE ; i++) {
812 q = &gaudi->internal_qmans[i];
813 if (!q->pq_kernel_addr)
814 continue;
815 hdev->asic_funcs->asic_dma_free_coherent(hdev, q->pq_size,
816 q->pq_kernel_addr,
817 q->pq_dma_addr);
818 }
819}
820
821static int gaudi_alloc_internal_qmans_pq_mem(struct hl_device *hdev)
822{
823 struct gaudi_device *gaudi = hdev->asic_specific;
824 struct gaudi_internal_qman_info *q;
825 int rc, i;
826
827 for (i = 0 ; i < GAUDI_QUEUE_ID_SIZE ; i++) {
828 if (gaudi_queue_type[i] != QUEUE_TYPE_INT)
829 continue;
830
831 q = &gaudi->internal_qmans[i];
832
833 switch (i) {
834 case GAUDI_QUEUE_ID_DMA_2_0 ... GAUDI_QUEUE_ID_DMA_4_3:
835 case GAUDI_QUEUE_ID_DMA_6_0 ... GAUDI_QUEUE_ID_DMA_7_3:
836 q->pq_size = HBM_DMA_QMAN_SIZE_IN_BYTES;
837 break;
838 case GAUDI_QUEUE_ID_MME_0_0 ... GAUDI_QUEUE_ID_MME_1_3:
839 q->pq_size = MME_QMAN_SIZE_IN_BYTES;
840 break;
841 case GAUDI_QUEUE_ID_TPC_0_0 ... GAUDI_QUEUE_ID_TPC_7_3:
842 q->pq_size = TPC_QMAN_SIZE_IN_BYTES;
843 break;
844 default:
845 dev_err(hdev->dev, "Bad internal queue index %d", i);
846 rc = -EINVAL;
847 goto free_internal_qmans_pq_mem;
848 }
849
850 q->pq_kernel_addr = hdev->asic_funcs->asic_dma_alloc_coherent(
851 hdev, q->pq_size,
852 &q->pq_dma_addr,
853 GFP_KERNEL | __GFP_ZERO);
854 if (!q->pq_kernel_addr) {
855 rc = -ENOMEM;
856 goto free_internal_qmans_pq_mem;
857 }
858 }
859
860 return 0;
861
862free_internal_qmans_pq_mem:
863 gaudi_free_internal_qmans_pq_mem(hdev);
864 return rc;
865}
866
867static int gaudi_sw_init(struct hl_device *hdev)
868{
869 struct gaudi_device *gaudi;
Ofir Bittonebd8d122020-05-10 13:41:28 +0300870 u32 i, event_id = 0;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300871 int rc;
872
873 /* Allocate device structure */
874 gaudi = kzalloc(sizeof(*gaudi), GFP_KERNEL);
875 if (!gaudi)
876 return -ENOMEM;
877
Ofir Bittonebd8d122020-05-10 13:41:28 +0300878 for (i = 0 ; i < ARRAY_SIZE(gaudi_irq_map_table) ; i++) {
879 if (gaudi_irq_map_table[i].valid) {
880 if (event_id == GAUDI_EVENT_SIZE) {
881 dev_err(hdev->dev,
882 "Event array exceeds the limit of %u events\n",
883 GAUDI_EVENT_SIZE);
884 rc = -EINVAL;
885 goto free_gaudi_device;
886 }
887
888 gaudi->events[event_id++] =
889 gaudi_irq_map_table[i].fc_id;
890 }
891 }
892
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300893 gaudi->armcp_info_get = gaudi_armcp_info_get;
894
895 gaudi->max_freq_value = GAUDI_MAX_CLK_FREQ;
896
897 hdev->asic_specific = gaudi;
898
899 /* Create DMA pool for small allocations */
900 hdev->dma_pool = dma_pool_create(dev_name(hdev->dev),
901 &hdev->pdev->dev, GAUDI_DMA_POOL_BLK_SIZE, 8, 0);
902 if (!hdev->dma_pool) {
903 dev_err(hdev->dev, "failed to create DMA pool\n");
904 rc = -ENOMEM;
905 goto free_gaudi_device;
906 }
907
908 rc = gaudi_alloc_cpu_accessible_dma_mem(hdev);
909 if (rc)
910 goto free_dma_pool;
911
912 hdev->cpu_accessible_dma_pool = gen_pool_create(ilog2(32), -1);
913 if (!hdev->cpu_accessible_dma_pool) {
914 dev_err(hdev->dev,
915 "Failed to create CPU accessible DMA pool\n");
916 rc = -ENOMEM;
917 goto free_cpu_dma_mem;
918 }
919
920 rc = gen_pool_add(hdev->cpu_accessible_dma_pool,
921 (uintptr_t) hdev->cpu_accessible_dma_mem,
922 HL_CPU_ACCESSIBLE_MEM_SIZE, -1);
923 if (rc) {
924 dev_err(hdev->dev,
925 "Failed to add memory to CPU accessible DMA pool\n");
926 rc = -EFAULT;
927 goto free_cpu_accessible_dma_pool;
928 }
929
930 rc = gaudi_alloc_internal_qmans_pq_mem(hdev);
931 if (rc)
932 goto free_cpu_accessible_dma_pool;
933
934 spin_lock_init(&gaudi->hw_queues_lock);
935 mutex_init(&gaudi->clk_gate_mutex);
936
937 hdev->supports_sync_stream = true;
938 hdev->supports_coresight = true;
939
940 return 0;
941
942free_cpu_accessible_dma_pool:
943 gen_pool_destroy(hdev->cpu_accessible_dma_pool);
944free_cpu_dma_mem:
945 GAUDI_CPU_TO_PCI_ADDR(hdev->cpu_accessible_dma_address,
946 hdev->cpu_pci_msb_addr);
947 hdev->asic_funcs->asic_dma_free_coherent(hdev,
948 HL_CPU_ACCESSIBLE_MEM_SIZE,
949 hdev->cpu_accessible_dma_mem,
950 hdev->cpu_accessible_dma_address);
951free_dma_pool:
952 dma_pool_destroy(hdev->dma_pool);
953free_gaudi_device:
954 kfree(gaudi);
955 return rc;
956}
957
958static int gaudi_sw_fini(struct hl_device *hdev)
959{
960 struct gaudi_device *gaudi = hdev->asic_specific;
961
962 gaudi_free_internal_qmans_pq_mem(hdev);
963
964 gen_pool_destroy(hdev->cpu_accessible_dma_pool);
965
966 GAUDI_CPU_TO_PCI_ADDR(hdev->cpu_accessible_dma_address,
967 hdev->cpu_pci_msb_addr);
968 hdev->asic_funcs->asic_dma_free_coherent(hdev,
969 HL_CPU_ACCESSIBLE_MEM_SIZE,
970 hdev->cpu_accessible_dma_mem,
971 hdev->cpu_accessible_dma_address);
972
973 dma_pool_destroy(hdev->dma_pool);
974
975 mutex_destroy(&gaudi->clk_gate_mutex);
976
977 kfree(gaudi);
978
979 return 0;
980}
981
982static irqreturn_t gaudi_irq_handler_single(int irq, void *arg)
983{
984 struct hl_device *hdev = arg;
985 int i;
986
987 if (hdev->disabled)
988 return IRQ_HANDLED;
989
990 for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++)
991 hl_irq_handler_cq(irq, &hdev->completion_queue[i]);
992
993 hl_irq_handler_eq(irq, &hdev->event_queue);
994
995 return IRQ_HANDLED;
996}
997
998/*
999 * For backward compatibility, new MSI interrupts should be set after the
1000 * existing CPU and NIC interrupts.
1001 */
1002static int gaudi_pci_irq_vector(struct hl_device *hdev, unsigned int nr,
1003 bool cpu_eq)
1004{
1005 int msi_vec;
1006
1007 if ((nr != GAUDI_EVENT_QUEUE_MSI_IDX) && (cpu_eq))
1008 dev_crit(hdev->dev, "CPU EQ must use IRQ %d\n",
1009 GAUDI_EVENT_QUEUE_MSI_IDX);
1010
1011 msi_vec = ((nr < GAUDI_EVENT_QUEUE_MSI_IDX) || (cpu_eq)) ? nr :
1012 (nr + NIC_NUMBER_OF_ENGINES + 1);
1013
1014 return pci_irq_vector(hdev->pdev, msi_vec);
1015}
1016
1017static int gaudi_enable_msi_single(struct hl_device *hdev)
1018{
1019 int rc, irq;
1020
1021 dev_info(hdev->dev, "Working in single MSI IRQ mode\n");
1022
1023 irq = gaudi_pci_irq_vector(hdev, 0, false);
1024 rc = request_irq(irq, gaudi_irq_handler_single, 0,
1025 "gaudi single msi", hdev);
1026 if (rc)
1027 dev_err(hdev->dev,
1028 "Failed to request single MSI IRQ\n");
1029
1030 return rc;
1031}
1032
1033static int gaudi_enable_msi_multi(struct hl_device *hdev)
1034{
1035 int cq_cnt = hdev->asic_prop.completion_queues_count;
1036 int rc, i, irq_cnt_init, irq;
1037
1038 for (i = 0, irq_cnt_init = 0 ; i < cq_cnt ; i++, irq_cnt_init++) {
1039 irq = gaudi_pci_irq_vector(hdev, i, false);
1040 rc = request_irq(irq, hl_irq_handler_cq, 0, gaudi_irq_name[i],
1041 &hdev->completion_queue[i]);
1042 if (rc) {
1043 dev_err(hdev->dev, "Failed to request IRQ %d", irq);
1044 goto free_irqs;
1045 }
1046 }
1047
1048 irq = gaudi_pci_irq_vector(hdev, GAUDI_EVENT_QUEUE_MSI_IDX, true);
1049 rc = request_irq(irq, hl_irq_handler_eq, 0, gaudi_irq_name[cq_cnt],
1050 &hdev->event_queue);
1051 if (rc) {
1052 dev_err(hdev->dev, "Failed to request IRQ %d", irq);
1053 goto free_irqs;
1054 }
1055
1056 return 0;
1057
1058free_irqs:
1059 for (i = 0 ; i < irq_cnt_init ; i++)
1060 free_irq(gaudi_pci_irq_vector(hdev, i, false),
1061 &hdev->completion_queue[i]);
1062 return rc;
1063}
1064
1065static int gaudi_enable_msi(struct hl_device *hdev)
1066{
1067 struct gaudi_device *gaudi = hdev->asic_specific;
1068 int rc;
1069
1070 if (gaudi->hw_cap_initialized & HW_CAP_MSI)
1071 return 0;
1072
1073 rc = pci_alloc_irq_vectors(hdev->pdev, 1, GAUDI_MSI_ENTRIES,
1074 PCI_IRQ_MSI);
1075 if (rc < 0) {
1076 dev_err(hdev->dev, "MSI: Failed to enable support %d\n", rc);
1077 return rc;
1078 }
1079
1080 if (rc < NUMBER_OF_INTERRUPTS) {
1081 gaudi->multi_msi_mode = false;
1082 rc = gaudi_enable_msi_single(hdev);
1083 } else {
1084 gaudi->multi_msi_mode = true;
1085 rc = gaudi_enable_msi_multi(hdev);
1086 }
1087
1088 if (rc)
1089 goto free_pci_irq_vectors;
1090
1091 gaudi->hw_cap_initialized |= HW_CAP_MSI;
1092
1093 return 0;
1094
1095free_pci_irq_vectors:
1096 pci_free_irq_vectors(hdev->pdev);
1097 return rc;
1098}
1099
1100static void gaudi_sync_irqs(struct hl_device *hdev)
1101{
1102 struct gaudi_device *gaudi = hdev->asic_specific;
1103 int i, cq_cnt = hdev->asic_prop.completion_queues_count;
1104
1105 if (!(gaudi->hw_cap_initialized & HW_CAP_MSI))
1106 return;
1107
1108 /* Wait for all pending IRQs to be finished */
1109 if (gaudi->multi_msi_mode) {
1110 for (i = 0 ; i < cq_cnt ; i++)
1111 synchronize_irq(gaudi_pci_irq_vector(hdev, i, false));
1112
1113 synchronize_irq(gaudi_pci_irq_vector(hdev,
1114 GAUDI_EVENT_QUEUE_MSI_IDX,
1115 true));
1116 } else {
1117 synchronize_irq(gaudi_pci_irq_vector(hdev, 0, false));
1118 }
1119}
1120
1121static void gaudi_disable_msi(struct hl_device *hdev)
1122{
1123 struct gaudi_device *gaudi = hdev->asic_specific;
1124 int i, irq, cq_cnt = hdev->asic_prop.completion_queues_count;
1125
1126 if (!(gaudi->hw_cap_initialized & HW_CAP_MSI))
1127 return;
1128
1129 gaudi_sync_irqs(hdev);
1130
1131 if (gaudi->multi_msi_mode) {
1132 irq = gaudi_pci_irq_vector(hdev, GAUDI_EVENT_QUEUE_MSI_IDX,
1133 true);
1134 free_irq(irq, &hdev->event_queue);
1135
1136 for (i = 0 ; i < cq_cnt ; i++) {
1137 irq = gaudi_pci_irq_vector(hdev, i, false);
1138 free_irq(irq, &hdev->completion_queue[i]);
1139 }
1140 } else {
1141 free_irq(gaudi_pci_irq_vector(hdev, 0, false), hdev);
1142 }
1143
1144 pci_free_irq_vectors(hdev->pdev);
1145
1146 gaudi->hw_cap_initialized &= ~HW_CAP_MSI;
1147}
1148
1149static void gaudi_init_scrambler_sram(struct hl_device *hdev)
1150{
1151 struct gaudi_device *gaudi = hdev->asic_specific;
1152
1153 if (gaudi->hw_cap_initialized & HW_CAP_SRAM_SCRAMBLER)
1154 return;
1155
1156 if (!hdev->sram_scrambler_enable)
1157 return;
1158
1159 WREG32(mmNIF_RTR_CTRL_0_SCRAM_SRAM_EN,
1160 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1161 WREG32(mmNIF_RTR_CTRL_1_SCRAM_SRAM_EN,
1162 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1163 WREG32(mmNIF_RTR_CTRL_2_SCRAM_SRAM_EN,
1164 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1165 WREG32(mmNIF_RTR_CTRL_3_SCRAM_SRAM_EN,
1166 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1167 WREG32(mmNIF_RTR_CTRL_4_SCRAM_SRAM_EN,
1168 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1169 WREG32(mmNIF_RTR_CTRL_5_SCRAM_SRAM_EN,
1170 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1171 WREG32(mmNIF_RTR_CTRL_6_SCRAM_SRAM_EN,
1172 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1173 WREG32(mmNIF_RTR_CTRL_7_SCRAM_SRAM_EN,
1174 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1175
1176 WREG32(mmSIF_RTR_CTRL_0_SCRAM_SRAM_EN,
1177 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1178 WREG32(mmSIF_RTR_CTRL_1_SCRAM_SRAM_EN,
1179 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1180 WREG32(mmSIF_RTR_CTRL_2_SCRAM_SRAM_EN,
1181 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1182 WREG32(mmSIF_RTR_CTRL_3_SCRAM_SRAM_EN,
1183 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1184 WREG32(mmSIF_RTR_CTRL_4_SCRAM_SRAM_EN,
1185 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1186 WREG32(mmSIF_RTR_CTRL_5_SCRAM_SRAM_EN,
1187 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1188 WREG32(mmSIF_RTR_CTRL_6_SCRAM_SRAM_EN,
1189 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1190 WREG32(mmSIF_RTR_CTRL_7_SCRAM_SRAM_EN,
1191 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1192
1193 WREG32(mmDMA_IF_E_N_DOWN_CH0_SCRAM_SRAM_EN,
1194 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
1195 WREG32(mmDMA_IF_E_N_DOWN_CH1_SCRAM_SRAM_EN,
1196 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
1197 WREG32(mmDMA_IF_E_S_DOWN_CH0_SCRAM_SRAM_EN,
1198 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
1199 WREG32(mmDMA_IF_E_S_DOWN_CH1_SCRAM_SRAM_EN,
1200 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
1201 WREG32(mmDMA_IF_W_N_DOWN_CH0_SCRAM_SRAM_EN,
1202 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
1203 WREG32(mmDMA_IF_W_N_DOWN_CH1_SCRAM_SRAM_EN,
1204 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
1205 WREG32(mmDMA_IF_W_S_DOWN_CH0_SCRAM_SRAM_EN,
1206 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
1207 WREG32(mmDMA_IF_W_S_DOWN_CH1_SCRAM_SRAM_EN,
1208 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
1209
1210 gaudi->hw_cap_initialized |= HW_CAP_SRAM_SCRAMBLER;
1211}
1212
1213static void gaudi_init_scrambler_hbm(struct hl_device *hdev)
1214{
1215 struct gaudi_device *gaudi = hdev->asic_specific;
1216
1217 if (gaudi->hw_cap_initialized & HW_CAP_HBM_SCRAMBLER)
1218 return;
1219
1220 if (!hdev->dram_scrambler_enable)
1221 return;
1222
1223 WREG32(mmNIF_RTR_CTRL_0_SCRAM_HBM_EN,
1224 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1225 WREG32(mmNIF_RTR_CTRL_1_SCRAM_HBM_EN,
1226 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1227 WREG32(mmNIF_RTR_CTRL_2_SCRAM_HBM_EN,
1228 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1229 WREG32(mmNIF_RTR_CTRL_3_SCRAM_HBM_EN,
1230 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1231 WREG32(mmNIF_RTR_CTRL_4_SCRAM_HBM_EN,
1232 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1233 WREG32(mmNIF_RTR_CTRL_5_SCRAM_HBM_EN,
1234 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1235 WREG32(mmNIF_RTR_CTRL_6_SCRAM_HBM_EN,
1236 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1237 WREG32(mmNIF_RTR_CTRL_7_SCRAM_HBM_EN,
1238 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1239
1240 WREG32(mmSIF_RTR_CTRL_0_SCRAM_HBM_EN,
1241 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1242 WREG32(mmSIF_RTR_CTRL_1_SCRAM_HBM_EN,
1243 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1244 WREG32(mmSIF_RTR_CTRL_2_SCRAM_HBM_EN,
1245 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1246 WREG32(mmSIF_RTR_CTRL_3_SCRAM_HBM_EN,
1247 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1248 WREG32(mmSIF_RTR_CTRL_4_SCRAM_HBM_EN,
1249 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1250 WREG32(mmSIF_RTR_CTRL_5_SCRAM_HBM_EN,
1251 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1252 WREG32(mmSIF_RTR_CTRL_6_SCRAM_HBM_EN,
1253 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1254 WREG32(mmSIF_RTR_CTRL_7_SCRAM_HBM_EN,
1255 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1256
1257 WREG32(mmDMA_IF_E_N_DOWN_CH0_SCRAM_HBM_EN,
1258 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
1259 WREG32(mmDMA_IF_E_N_DOWN_CH1_SCRAM_HBM_EN,
1260 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
1261 WREG32(mmDMA_IF_E_S_DOWN_CH0_SCRAM_HBM_EN,
1262 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
1263 WREG32(mmDMA_IF_E_S_DOWN_CH1_SCRAM_HBM_EN,
1264 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
1265 WREG32(mmDMA_IF_W_N_DOWN_CH0_SCRAM_HBM_EN,
1266 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
1267 WREG32(mmDMA_IF_W_N_DOWN_CH1_SCRAM_HBM_EN,
1268 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
1269 WREG32(mmDMA_IF_W_S_DOWN_CH0_SCRAM_HBM_EN,
1270 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
1271 WREG32(mmDMA_IF_W_S_DOWN_CH1_SCRAM_HBM_EN,
1272 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
1273
1274 gaudi->hw_cap_initialized |= HW_CAP_HBM_SCRAMBLER;
1275}
1276
1277static void gaudi_init_e2e(struct hl_device *hdev)
1278{
1279 WREG32(mmSIF_RTR_CTRL_0_E2E_HBM_WR_SIZE, 247 >> 3);
1280 WREG32(mmSIF_RTR_CTRL_0_E2E_HBM_RD_SIZE, 785 >> 3);
1281 WREG32(mmSIF_RTR_CTRL_0_E2E_PCI_WR_SIZE, 49);
1282 WREG32(mmSIF_RTR_CTRL_0_E2E_PCI_RD_SIZE, 101);
1283
1284 WREG32(mmSIF_RTR_CTRL_1_E2E_HBM_WR_SIZE, 275 >> 3);
1285 WREG32(mmSIF_RTR_CTRL_1_E2E_HBM_RD_SIZE, 614 >> 3);
1286 WREG32(mmSIF_RTR_CTRL_1_E2E_PCI_WR_SIZE, 1);
1287 WREG32(mmSIF_RTR_CTRL_1_E2E_PCI_RD_SIZE, 39);
1288
1289 WREG32(mmSIF_RTR_CTRL_2_E2E_HBM_WR_SIZE, 1);
1290 WREG32(mmSIF_RTR_CTRL_2_E2E_HBM_RD_SIZE, 1);
1291 WREG32(mmSIF_RTR_CTRL_2_E2E_PCI_WR_SIZE, 1);
1292 WREG32(mmSIF_RTR_CTRL_2_E2E_PCI_RD_SIZE, 32);
1293
1294 WREG32(mmSIF_RTR_CTRL_3_E2E_HBM_WR_SIZE, 176 >> 3);
1295 WREG32(mmSIF_RTR_CTRL_3_E2E_HBM_RD_SIZE, 32 >> 3);
1296 WREG32(mmSIF_RTR_CTRL_3_E2E_PCI_WR_SIZE, 19);
1297 WREG32(mmSIF_RTR_CTRL_3_E2E_PCI_RD_SIZE, 32);
1298
1299 WREG32(mmSIF_RTR_CTRL_4_E2E_HBM_WR_SIZE, 176 >> 3);
1300 WREG32(mmSIF_RTR_CTRL_4_E2E_HBM_RD_SIZE, 32 >> 3);
1301 WREG32(mmSIF_RTR_CTRL_4_E2E_PCI_WR_SIZE, 19);
1302 WREG32(mmSIF_RTR_CTRL_4_E2E_PCI_RD_SIZE, 32);
1303
1304 WREG32(mmSIF_RTR_CTRL_5_E2E_HBM_WR_SIZE, 1);
1305 WREG32(mmSIF_RTR_CTRL_5_E2E_HBM_RD_SIZE, 1);
1306 WREG32(mmSIF_RTR_CTRL_5_E2E_PCI_WR_SIZE, 1);
1307 WREG32(mmSIF_RTR_CTRL_5_E2E_PCI_RD_SIZE, 32);
1308
1309 WREG32(mmSIF_RTR_CTRL_6_E2E_HBM_WR_SIZE, 275 >> 3);
1310 WREG32(mmSIF_RTR_CTRL_6_E2E_HBM_RD_SIZE, 614 >> 3);
1311 WREG32(mmSIF_RTR_CTRL_6_E2E_PCI_WR_SIZE, 1);
1312 WREG32(mmSIF_RTR_CTRL_6_E2E_PCI_RD_SIZE, 39);
1313
1314 WREG32(mmSIF_RTR_CTRL_7_E2E_HBM_WR_SIZE, 297 >> 3);
1315 WREG32(mmSIF_RTR_CTRL_7_E2E_HBM_RD_SIZE, 908 >> 3);
1316 WREG32(mmSIF_RTR_CTRL_7_E2E_PCI_WR_SIZE, 19);
1317 WREG32(mmSIF_RTR_CTRL_7_E2E_PCI_RD_SIZE, 19);
1318
1319 WREG32(mmNIF_RTR_CTRL_0_E2E_HBM_WR_SIZE, 318 >> 3);
1320 WREG32(mmNIF_RTR_CTRL_0_E2E_HBM_RD_SIZE, 956 >> 3);
1321 WREG32(mmNIF_RTR_CTRL_0_E2E_PCI_WR_SIZE, 79);
1322 WREG32(mmNIF_RTR_CTRL_0_E2E_PCI_RD_SIZE, 163);
1323
1324 WREG32(mmNIF_RTR_CTRL_1_E2E_HBM_WR_SIZE, 275 >> 3);
1325 WREG32(mmNIF_RTR_CTRL_1_E2E_HBM_RD_SIZE, 614 >> 3);
1326 WREG32(mmNIF_RTR_CTRL_1_E2E_PCI_WR_SIZE, 1);
1327 WREG32(mmNIF_RTR_CTRL_1_E2E_PCI_RD_SIZE, 39);
1328
1329 WREG32(mmNIF_RTR_CTRL_2_E2E_HBM_WR_SIZE, 1);
1330 WREG32(mmNIF_RTR_CTRL_2_E2E_HBM_RD_SIZE, 1);
1331 WREG32(mmNIF_RTR_CTRL_2_E2E_PCI_WR_SIZE, 1);
1332 WREG32(mmNIF_RTR_CTRL_2_E2E_PCI_RD_SIZE, 32);
1333
1334 WREG32(mmNIF_RTR_CTRL_3_E2E_HBM_WR_SIZE, 176 >> 3);
1335 WREG32(mmNIF_RTR_CTRL_3_E2E_HBM_RD_SIZE, 32 >> 3);
1336 WREG32(mmNIF_RTR_CTRL_3_E2E_PCI_WR_SIZE, 19);
1337 WREG32(mmNIF_RTR_CTRL_3_E2E_PCI_RD_SIZE, 32);
1338
1339 WREG32(mmNIF_RTR_CTRL_4_E2E_HBM_WR_SIZE, 176 >> 3);
1340 WREG32(mmNIF_RTR_CTRL_4_E2E_HBM_RD_SIZE, 32 >> 3);
1341 WREG32(mmNIF_RTR_CTRL_4_E2E_PCI_WR_SIZE, 19);
1342 WREG32(mmNIF_RTR_CTRL_4_E2E_PCI_RD_SIZE, 32);
1343
1344 WREG32(mmNIF_RTR_CTRL_5_E2E_HBM_WR_SIZE, 1);
1345 WREG32(mmNIF_RTR_CTRL_5_E2E_HBM_RD_SIZE, 1);
1346 WREG32(mmNIF_RTR_CTRL_5_E2E_PCI_WR_SIZE, 1);
1347 WREG32(mmNIF_RTR_CTRL_5_E2E_PCI_RD_SIZE, 32);
1348
1349 WREG32(mmNIF_RTR_CTRL_6_E2E_HBM_WR_SIZE, 275 >> 3);
1350 WREG32(mmNIF_RTR_CTRL_6_E2E_HBM_RD_SIZE, 614 >> 3);
1351 WREG32(mmNIF_RTR_CTRL_6_E2E_PCI_WR_SIZE, 1);
1352 WREG32(mmNIF_RTR_CTRL_6_E2E_PCI_RD_SIZE, 39);
1353
1354 WREG32(mmNIF_RTR_CTRL_7_E2E_HBM_WR_SIZE, 318 >> 3);
1355 WREG32(mmNIF_RTR_CTRL_7_E2E_HBM_RD_SIZE, 956 >> 3);
1356 WREG32(mmNIF_RTR_CTRL_7_E2E_PCI_WR_SIZE, 79);
1357 WREG32(mmNIF_RTR_CTRL_7_E2E_PCI_RD_SIZE, 79);
1358
1359 WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
1360 WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
1361 WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
1362 WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
1363
1364 WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
1365 WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
1366 WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
1367 WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
1368
1369 WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
1370 WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
1371 WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
1372 WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
1373
1374 WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
1375 WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
1376 WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
1377 WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
1378
1379 WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
1380 WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
1381 WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
1382 WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
1383
1384 WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
1385 WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
1386 WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
1387 WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
1388
1389 WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
1390 WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
1391 WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
1392 WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
1393
1394 WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
1395 WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
1396 WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
1397 WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
1398
1399 if (!hdev->dram_scrambler_enable) {
1400 WREG32(mmSIF_RTR_CTRL_0_NL_HBM_SEL_0, 0x21);
1401 WREG32(mmSIF_RTR_CTRL_0_NL_HBM_SEL_1, 0x22);
1402 WREG32(mmSIF_RTR_CTRL_0_NL_HBM_OFFSET_18, 0x1F);
1403 WREG32(mmSIF_RTR_CTRL_0_NL_HBM_PC_SEL_3, 0x20);
1404
1405 WREG32(mmSIF_RTR_CTRL_1_NL_HBM_SEL_0, 0x21);
1406 WREG32(mmSIF_RTR_CTRL_1_NL_HBM_SEL_1, 0x22);
1407 WREG32(mmSIF_RTR_CTRL_1_NL_HBM_OFFSET_18, 0x1F);
1408 WREG32(mmSIF_RTR_CTRL_1_NL_HBM_PC_SEL_3, 0x20);
1409
1410 WREG32(mmSIF_RTR_CTRL_2_NL_HBM_SEL_0, 0x21);
1411 WREG32(mmSIF_RTR_CTRL_2_NL_HBM_SEL_1, 0x22);
1412 WREG32(mmSIF_RTR_CTRL_2_NL_HBM_OFFSET_18, 0x1F);
1413 WREG32(mmSIF_RTR_CTRL_2_NL_HBM_PC_SEL_3, 0x20);
1414
1415 WREG32(mmSIF_RTR_CTRL_3_NL_HBM_SEL_0, 0x21);
1416 WREG32(mmSIF_RTR_CTRL_3_NL_HBM_SEL_1, 0x22);
1417 WREG32(mmSIF_RTR_CTRL_3_NL_HBM_OFFSET_18, 0x1F);
1418 WREG32(mmSIF_RTR_CTRL_3_NL_HBM_PC_SEL_3, 0x20);
1419
1420 WREG32(mmSIF_RTR_CTRL_4_NL_HBM_SEL_0, 0x21);
1421 WREG32(mmSIF_RTR_CTRL_4_NL_HBM_SEL_1, 0x22);
1422 WREG32(mmSIF_RTR_CTRL_4_NL_HBM_OFFSET_18, 0x1F);
1423 WREG32(mmSIF_RTR_CTRL_4_NL_HBM_PC_SEL_3, 0x20);
1424
1425 WREG32(mmSIF_RTR_CTRL_5_NL_HBM_SEL_0, 0x21);
1426 WREG32(mmSIF_RTR_CTRL_5_NL_HBM_SEL_1, 0x22);
1427 WREG32(mmSIF_RTR_CTRL_5_NL_HBM_OFFSET_18, 0x1F);
1428 WREG32(mmSIF_RTR_CTRL_5_NL_HBM_PC_SEL_3, 0x20);
1429
1430 WREG32(mmSIF_RTR_CTRL_6_NL_HBM_SEL_0, 0x21);
1431 WREG32(mmSIF_RTR_CTRL_6_NL_HBM_SEL_1, 0x22);
1432 WREG32(mmSIF_RTR_CTRL_6_NL_HBM_OFFSET_18, 0x1F);
1433 WREG32(mmSIF_RTR_CTRL_6_NL_HBM_PC_SEL_3, 0x20);
1434
1435 WREG32(mmSIF_RTR_CTRL_7_NL_HBM_SEL_0, 0x21);
1436 WREG32(mmSIF_RTR_CTRL_7_NL_HBM_SEL_1, 0x22);
1437 WREG32(mmSIF_RTR_CTRL_7_NL_HBM_OFFSET_18, 0x1F);
1438 WREG32(mmSIF_RTR_CTRL_7_NL_HBM_PC_SEL_3, 0x20);
1439
1440 WREG32(mmNIF_RTR_CTRL_0_NL_HBM_SEL_0, 0x21);
1441 WREG32(mmNIF_RTR_CTRL_0_NL_HBM_SEL_1, 0x22);
1442 WREG32(mmNIF_RTR_CTRL_0_NL_HBM_OFFSET_18, 0x1F);
1443 WREG32(mmNIF_RTR_CTRL_0_NL_HBM_PC_SEL_3, 0x20);
1444
1445 WREG32(mmNIF_RTR_CTRL_1_NL_HBM_SEL_0, 0x21);
1446 WREG32(mmNIF_RTR_CTRL_1_NL_HBM_SEL_1, 0x22);
1447 WREG32(mmNIF_RTR_CTRL_1_NL_HBM_OFFSET_18, 0x1F);
1448 WREG32(mmNIF_RTR_CTRL_1_NL_HBM_PC_SEL_3, 0x20);
1449
1450 WREG32(mmNIF_RTR_CTRL_2_NL_HBM_SEL_0, 0x21);
1451 WREG32(mmNIF_RTR_CTRL_2_NL_HBM_SEL_1, 0x22);
1452 WREG32(mmNIF_RTR_CTRL_2_NL_HBM_OFFSET_18, 0x1F);
1453 WREG32(mmNIF_RTR_CTRL_2_NL_HBM_PC_SEL_3, 0x20);
1454
1455 WREG32(mmNIF_RTR_CTRL_3_NL_HBM_SEL_0, 0x21);
1456 WREG32(mmNIF_RTR_CTRL_3_NL_HBM_SEL_1, 0x22);
1457 WREG32(mmNIF_RTR_CTRL_3_NL_HBM_OFFSET_18, 0x1F);
1458 WREG32(mmNIF_RTR_CTRL_3_NL_HBM_PC_SEL_3, 0x20);
1459
1460 WREG32(mmNIF_RTR_CTRL_4_NL_HBM_SEL_0, 0x21);
1461 WREG32(mmNIF_RTR_CTRL_4_NL_HBM_SEL_1, 0x22);
1462 WREG32(mmNIF_RTR_CTRL_4_NL_HBM_OFFSET_18, 0x1F);
1463 WREG32(mmNIF_RTR_CTRL_4_NL_HBM_PC_SEL_3, 0x20);
1464
1465 WREG32(mmNIF_RTR_CTRL_5_NL_HBM_SEL_0, 0x21);
1466 WREG32(mmNIF_RTR_CTRL_5_NL_HBM_SEL_1, 0x22);
1467 WREG32(mmNIF_RTR_CTRL_5_NL_HBM_OFFSET_18, 0x1F);
1468 WREG32(mmNIF_RTR_CTRL_5_NL_HBM_PC_SEL_3, 0x20);
1469
1470 WREG32(mmNIF_RTR_CTRL_6_NL_HBM_SEL_0, 0x21);
1471 WREG32(mmNIF_RTR_CTRL_6_NL_HBM_SEL_1, 0x22);
1472 WREG32(mmNIF_RTR_CTRL_6_NL_HBM_OFFSET_18, 0x1F);
1473 WREG32(mmNIF_RTR_CTRL_6_NL_HBM_PC_SEL_3, 0x20);
1474
1475 WREG32(mmNIF_RTR_CTRL_7_NL_HBM_SEL_0, 0x21);
1476 WREG32(mmNIF_RTR_CTRL_7_NL_HBM_SEL_1, 0x22);
1477 WREG32(mmNIF_RTR_CTRL_7_NL_HBM_OFFSET_18, 0x1F);
1478 WREG32(mmNIF_RTR_CTRL_7_NL_HBM_PC_SEL_3, 0x20);
1479
1480 WREG32(mmDMA_IF_E_N_DOWN_CH0_NL_HBM_SEL_0, 0x21);
1481 WREG32(mmDMA_IF_E_N_DOWN_CH0_NL_HBM_SEL_1, 0x22);
1482 WREG32(mmDMA_IF_E_N_DOWN_CH0_NL_HBM_OFFSET_18, 0x1F);
1483 WREG32(mmDMA_IF_E_N_DOWN_CH0_NL_HBM_PC_SEL_3, 0x20);
1484
1485 WREG32(mmDMA_IF_E_N_DOWN_CH1_NL_HBM_SEL_0, 0x21);
1486 WREG32(mmDMA_IF_E_N_DOWN_CH1_NL_HBM_SEL_1, 0x22);
1487 WREG32(mmDMA_IF_E_N_DOWN_CH1_NL_HBM_OFFSET_18, 0x1F);
1488 WREG32(mmDMA_IF_E_N_DOWN_CH1_NL_HBM_PC_SEL_3, 0x20);
1489
1490 WREG32(mmDMA_IF_E_S_DOWN_CH0_NL_HBM_SEL_0, 0x21);
1491 WREG32(mmDMA_IF_E_S_DOWN_CH0_NL_HBM_SEL_1, 0x22);
1492 WREG32(mmDMA_IF_E_S_DOWN_CH0_NL_HBM_OFFSET_18, 0x1F);
1493 WREG32(mmDMA_IF_E_S_DOWN_CH0_NL_HBM_PC_SEL_3, 0x20);
1494
1495 WREG32(mmDMA_IF_E_S_DOWN_CH1_NL_HBM_SEL_0, 0x21);
1496 WREG32(mmDMA_IF_E_S_DOWN_CH1_NL_HBM_SEL_1, 0x22);
1497 WREG32(mmDMA_IF_E_S_DOWN_CH1_NL_HBM_OFFSET_18, 0x1F);
1498 WREG32(mmDMA_IF_E_S_DOWN_CH1_NL_HBM_PC_SEL_3, 0x20);
1499
1500 WREG32(mmDMA_IF_W_N_DOWN_CH0_NL_HBM_SEL_0, 0x21);
1501 WREG32(mmDMA_IF_W_N_DOWN_CH0_NL_HBM_SEL_1, 0x22);
1502 WREG32(mmDMA_IF_W_N_DOWN_CH0_NL_HBM_OFFSET_18, 0x1F);
1503 WREG32(mmDMA_IF_W_N_DOWN_CH0_NL_HBM_PC_SEL_3, 0x20);
1504
1505 WREG32(mmDMA_IF_W_N_DOWN_CH1_NL_HBM_SEL_0, 0x21);
1506 WREG32(mmDMA_IF_W_N_DOWN_CH1_NL_HBM_SEL_1, 0x22);
1507 WREG32(mmDMA_IF_W_N_DOWN_CH1_NL_HBM_OFFSET_18, 0x1F);
1508 WREG32(mmDMA_IF_W_N_DOWN_CH1_NL_HBM_PC_SEL_3, 0x20);
1509
1510 WREG32(mmDMA_IF_W_S_DOWN_CH0_NL_HBM_SEL_0, 0x21);
1511 WREG32(mmDMA_IF_W_S_DOWN_CH0_NL_HBM_SEL_1, 0x22);
1512 WREG32(mmDMA_IF_W_S_DOWN_CH0_NL_HBM_OFFSET_18, 0x1F);
1513 WREG32(mmDMA_IF_W_S_DOWN_CH0_NL_HBM_PC_SEL_3, 0x20);
1514
1515 WREG32(mmDMA_IF_W_S_DOWN_CH1_NL_HBM_SEL_0, 0x21);
1516 WREG32(mmDMA_IF_W_S_DOWN_CH1_NL_HBM_SEL_1, 0x22);
1517 WREG32(mmDMA_IF_W_S_DOWN_CH1_NL_HBM_OFFSET_18, 0x1F);
1518 WREG32(mmDMA_IF_W_S_DOWN_CH1_NL_HBM_PC_SEL_3, 0x20);
1519 }
1520
1521 WREG32(mmSIF_RTR_CTRL_0_E2E_HBM_EN,
1522 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
1523 WREG32(mmSIF_RTR_CTRL_0_E2E_PCI_EN,
1524 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
1525
1526 WREG32(mmSIF_RTR_CTRL_1_E2E_HBM_EN,
1527 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
1528 WREG32(mmSIF_RTR_CTRL_1_E2E_PCI_EN,
1529 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
1530
1531 WREG32(mmSIF_RTR_CTRL_2_E2E_HBM_EN,
1532 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
1533 WREG32(mmSIF_RTR_CTRL_2_E2E_PCI_EN,
1534 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
1535
1536 WREG32(mmSIF_RTR_CTRL_3_E2E_HBM_EN,
1537 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
1538 WREG32(mmSIF_RTR_CTRL_3_E2E_PCI_EN,
1539 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
1540
1541 WREG32(mmSIF_RTR_CTRL_4_E2E_HBM_EN,
1542 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
1543 WREG32(mmSIF_RTR_CTRL_4_E2E_PCI_EN,
1544 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
1545
1546 WREG32(mmSIF_RTR_CTRL_5_E2E_HBM_EN,
1547 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
1548 WREG32(mmSIF_RTR_CTRL_5_E2E_PCI_EN,
1549 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
1550
1551 WREG32(mmSIF_RTR_CTRL_6_E2E_HBM_EN,
1552 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
1553 WREG32(mmSIF_RTR_CTRL_6_E2E_PCI_EN,
1554 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
1555
1556 WREG32(mmSIF_RTR_CTRL_7_E2E_HBM_EN,
1557 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
1558 WREG32(mmSIF_RTR_CTRL_7_E2E_PCI_EN,
1559 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
1560
1561 WREG32(mmNIF_RTR_CTRL_0_E2E_HBM_EN,
1562 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
1563 WREG32(mmNIF_RTR_CTRL_0_E2E_PCI_EN,
1564 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
1565
1566 WREG32(mmNIF_RTR_CTRL_1_E2E_HBM_EN,
1567 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
1568 WREG32(mmNIF_RTR_CTRL_1_E2E_PCI_EN,
1569 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
1570
1571 WREG32(mmNIF_RTR_CTRL_2_E2E_HBM_EN,
1572 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
1573 WREG32(mmNIF_RTR_CTRL_2_E2E_PCI_EN,
1574 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
1575
1576 WREG32(mmNIF_RTR_CTRL_3_E2E_HBM_EN,
1577 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
1578 WREG32(mmNIF_RTR_CTRL_3_E2E_PCI_EN,
1579 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
1580
1581 WREG32(mmNIF_RTR_CTRL_4_E2E_HBM_EN,
1582 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
1583 WREG32(mmNIF_RTR_CTRL_4_E2E_PCI_EN,
1584 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
1585
1586 WREG32(mmNIF_RTR_CTRL_5_E2E_HBM_EN,
1587 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
1588 WREG32(mmNIF_RTR_CTRL_5_E2E_PCI_EN,
1589 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
1590
1591 WREG32(mmNIF_RTR_CTRL_6_E2E_HBM_EN,
1592 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
1593 WREG32(mmNIF_RTR_CTRL_6_E2E_PCI_EN,
1594 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
1595
1596 WREG32(mmNIF_RTR_CTRL_7_E2E_HBM_EN,
1597 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
1598 WREG32(mmNIF_RTR_CTRL_7_E2E_PCI_EN,
1599 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
1600
1601 WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_HBM_EN,
1602 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
1603 WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_PCI_EN,
1604 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
1605
1606 WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_HBM_EN,
1607 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
1608 WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_PCI_EN,
1609 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
1610
1611 WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_HBM_EN,
1612 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
1613 WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_PCI_EN,
1614 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
1615
1616 WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_HBM_EN,
1617 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
1618 WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_PCI_EN,
1619 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
1620
1621 WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_HBM_EN,
1622 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
1623 WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_PCI_EN,
1624 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
1625
1626 WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_HBM_EN,
1627 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
1628 WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_PCI_EN,
1629 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
1630
1631 WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_HBM_EN,
1632 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
1633 WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_PCI_EN,
1634 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
1635
1636 WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_HBM_EN,
1637 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
1638 WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_PCI_EN,
1639 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
1640}
1641
1642static void gaudi_init_hbm_cred(struct hl_device *hdev)
1643{
1644 uint32_t hbm0_wr, hbm1_wr, hbm0_rd, hbm1_rd;
1645
1646 hbm0_wr = 0x33333333;
1647 hbm1_wr = 0x33333333;
1648 hbm0_rd = 0x77777777;
1649 hbm1_rd = 0xDDDDDDDD;
1650
1651 WREG32(mmDMA_IF_E_N_HBM0_WR_CRED_CNT, hbm0_wr);
1652 WREG32(mmDMA_IF_E_N_HBM1_WR_CRED_CNT, hbm1_wr);
1653 WREG32(mmDMA_IF_E_N_HBM0_RD_CRED_CNT, hbm0_rd);
1654 WREG32(mmDMA_IF_E_N_HBM1_RD_CRED_CNT, hbm1_rd);
1655
1656 WREG32(mmDMA_IF_E_S_HBM0_WR_CRED_CNT, hbm0_wr);
1657 WREG32(mmDMA_IF_E_S_HBM1_WR_CRED_CNT, hbm1_wr);
1658 WREG32(mmDMA_IF_E_S_HBM0_RD_CRED_CNT, hbm0_rd);
1659 WREG32(mmDMA_IF_E_S_HBM1_RD_CRED_CNT, hbm1_rd);
1660
1661 WREG32(mmDMA_IF_W_N_HBM0_WR_CRED_CNT, hbm0_wr);
1662 WREG32(mmDMA_IF_W_N_HBM1_WR_CRED_CNT, hbm1_wr);
1663 WREG32(mmDMA_IF_W_N_HBM0_RD_CRED_CNT, hbm0_rd);
1664 WREG32(mmDMA_IF_W_N_HBM1_RD_CRED_CNT, hbm1_rd);
1665
1666 WREG32(mmDMA_IF_W_S_HBM0_WR_CRED_CNT, hbm0_wr);
1667 WREG32(mmDMA_IF_W_S_HBM1_WR_CRED_CNT, hbm1_wr);
1668 WREG32(mmDMA_IF_W_S_HBM0_RD_CRED_CNT, hbm0_rd);
1669 WREG32(mmDMA_IF_W_S_HBM1_RD_CRED_CNT, hbm1_rd);
1670
1671 WREG32(mmDMA_IF_E_N_HBM_CRED_EN_0,
1672 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
1673 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
1674 WREG32(mmDMA_IF_E_S_HBM_CRED_EN_0,
1675 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
1676 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
1677 WREG32(mmDMA_IF_W_N_HBM_CRED_EN_0,
1678 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
1679 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
1680 WREG32(mmDMA_IF_W_S_HBM_CRED_EN_0,
1681 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
1682 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
1683
1684 WREG32(mmDMA_IF_E_N_HBM_CRED_EN_1,
1685 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
1686 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
1687 WREG32(mmDMA_IF_E_S_HBM_CRED_EN_1,
1688 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
1689 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
1690 WREG32(mmDMA_IF_W_N_HBM_CRED_EN_1,
1691 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
1692 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
1693 WREG32(mmDMA_IF_W_S_HBM_CRED_EN_1,
1694 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
1695 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
1696}
1697
1698static void gaudi_init_rate_limiter(struct hl_device *hdev)
1699{
1700 u32 nr, nf, od, sat, rst, timeout;
1701 u64 freq;
1702
1703 nr = RREG32(mmPSOC_HBM_PLL_NR);
1704 nf = RREG32(mmPSOC_HBM_PLL_NF);
1705 od = RREG32(mmPSOC_HBM_PLL_OD);
1706 freq = (50 * (nf + 1)) / ((nr + 1) * (od + 1));
1707
1708 dev_dbg(hdev->dev, "HBM frequency is %lluMHz\n", freq);
1709
1710 /* Configuration is for five (5) DDMA channels */
1711 if (freq == 800) {
1712 sat = 4;
1713 rst = 11;
1714 timeout = 15;
1715 } else if (freq == 900) {
1716 sat = 4;
1717 rst = 15;
1718 timeout = 16;
1719 } else if (freq == 950) {
1720 sat = 4;
1721 rst = 15;
1722 timeout = 15;
1723 } else {
1724 dev_warn(hdev->dev,
1725 "unsupported HBM frequency %lluMHz, no rate-limiters\n",
1726 freq);
1727 return;
1728 }
1729
1730 WREG32(mmDMA_IF_W_S_DOWN_RSP_MID_WGHT_0, 0x111);
1731 WREG32(mmDMA_IF_W_S_DOWN_RSP_MID_WGHT_1, 0x111);
1732 WREG32(mmDMA_IF_E_S_DOWN_RSP_MID_WGHT_0, 0x111);
1733 WREG32(mmDMA_IF_E_S_DOWN_RSP_MID_WGHT_1, 0x111);
1734 WREG32(mmDMA_IF_W_N_DOWN_RSP_MID_WGHT_0, 0x111);
1735 WREG32(mmDMA_IF_W_N_DOWN_RSP_MID_WGHT_1, 0x111);
1736 WREG32(mmDMA_IF_E_N_DOWN_RSP_MID_WGHT_0, 0x111);
1737 WREG32(mmDMA_IF_E_N_DOWN_RSP_MID_WGHT_1, 0x111);
1738
1739 if (!hdev->rl_enable) {
1740 dev_info(hdev->dev, "Rate limiters disabled\n");
1741 return;
1742 }
1743
1744 WREG32(mmDMA_IF_W_S_DOWN_CH0_RL_HBM_SAT, sat);
1745 WREG32(mmDMA_IF_W_S_DOWN_CH1_RL_HBM_SAT, sat);
1746 WREG32(mmDMA_IF_E_S_DOWN_CH0_RL_HBM_SAT, sat);
1747 WREG32(mmDMA_IF_E_S_DOWN_CH1_RL_HBM_SAT, sat);
1748 WREG32(mmDMA_IF_W_N_DOWN_CH0_RL_HBM_SAT, sat);
1749 WREG32(mmDMA_IF_W_N_DOWN_CH1_RL_HBM_SAT, sat);
1750 WREG32(mmDMA_IF_E_N_DOWN_CH0_RL_HBM_SAT, sat);
1751 WREG32(mmDMA_IF_E_N_DOWN_CH1_RL_HBM_SAT, sat);
1752
1753 WREG32(mmDMA_IF_W_S_DOWN_CH0_RL_HBM_RST, rst);
1754 WREG32(mmDMA_IF_W_S_DOWN_CH1_RL_HBM_RST, rst);
1755 WREG32(mmDMA_IF_E_S_DOWN_CH0_RL_HBM_RST, rst);
1756 WREG32(mmDMA_IF_E_S_DOWN_CH1_RL_HBM_RST, rst);
1757 WREG32(mmDMA_IF_W_N_DOWN_CH0_RL_HBM_RST, rst);
1758 WREG32(mmDMA_IF_W_N_DOWN_CH1_RL_HBM_RST, rst);
1759 WREG32(mmDMA_IF_E_N_DOWN_CH0_RL_HBM_RST, rst);
1760 WREG32(mmDMA_IF_E_N_DOWN_CH1_RL_HBM_RST, rst);
1761
1762 WREG32(mmDMA_IF_W_S_DOWN_CH0_RL_HBM_TIMEOUT, timeout);
1763 WREG32(mmDMA_IF_W_S_DOWN_CH1_RL_HBM_TIMEOUT, timeout);
1764 WREG32(mmDMA_IF_E_S_DOWN_CH0_RL_HBM_TIMEOUT, timeout);
1765 WREG32(mmDMA_IF_E_S_DOWN_CH1_RL_HBM_TIMEOUT, timeout);
1766 WREG32(mmDMA_IF_W_N_DOWN_CH0_RL_HBM_TIMEOUT, timeout);
1767 WREG32(mmDMA_IF_W_N_DOWN_CH1_RL_HBM_TIMEOUT, timeout);
1768 WREG32(mmDMA_IF_E_N_DOWN_CH0_RL_HBM_TIMEOUT, timeout);
1769 WREG32(mmDMA_IF_E_N_DOWN_CH1_RL_HBM_TIMEOUT, timeout);
1770
1771 WREG32(mmDMA_IF_W_S_DOWN_CH0_RL_HBM_EN, 1);
1772 WREG32(mmDMA_IF_W_S_DOWN_CH1_RL_HBM_EN, 1);
1773 WREG32(mmDMA_IF_E_S_DOWN_CH0_RL_HBM_EN, 1);
1774 WREG32(mmDMA_IF_E_S_DOWN_CH1_RL_HBM_EN, 1);
1775 WREG32(mmDMA_IF_W_N_DOWN_CH0_RL_HBM_EN, 1);
1776 WREG32(mmDMA_IF_W_N_DOWN_CH1_RL_HBM_EN, 1);
1777 WREG32(mmDMA_IF_E_N_DOWN_CH0_RL_HBM_EN, 1);
1778 WREG32(mmDMA_IF_E_N_DOWN_CH1_RL_HBM_EN, 1);
1779
1780 WREG32(mmDMA_IF_W_S_DOWN_CH0_RL_SRAM_SAT, sat);
1781 WREG32(mmDMA_IF_W_S_DOWN_CH1_RL_SRAM_SAT, sat);
1782 WREG32(mmDMA_IF_E_S_DOWN_CH0_RL_SRAM_SAT, sat);
1783 WREG32(mmDMA_IF_E_S_DOWN_CH1_RL_SRAM_SAT, sat);
1784 WREG32(mmDMA_IF_W_N_DOWN_CH0_RL_SRAM_SAT, sat);
1785 WREG32(mmDMA_IF_W_N_DOWN_CH1_RL_SRAM_SAT, sat);
1786 WREG32(mmDMA_IF_E_N_DOWN_CH0_RL_SRAM_SAT, sat);
1787 WREG32(mmDMA_IF_E_N_DOWN_CH1_RL_SRAM_SAT, sat);
1788
1789 WREG32(mmDMA_IF_W_S_DOWN_CH0_RL_SRAM_RST, rst);
1790 WREG32(mmDMA_IF_W_S_DOWN_CH1_RL_SRAM_RST, rst);
1791 WREG32(mmDMA_IF_E_S_DOWN_CH0_RL_SRAM_RST, rst);
1792 WREG32(mmDMA_IF_E_S_DOWN_CH1_RL_SRAM_RST, rst);
1793 WREG32(mmDMA_IF_W_N_DOWN_CH0_RL_SRAM_RST, rst);
1794 WREG32(mmDMA_IF_W_N_DOWN_CH1_RL_SRAM_RST, rst);
1795 WREG32(mmDMA_IF_E_N_DOWN_CH0_RL_SRAM_RST, rst);
1796 WREG32(mmDMA_IF_E_N_DOWN_CH1_RL_SRAM_RST, rst);
1797
1798 WREG32(mmDMA_IF_W_S_DOWN_CH0_RL_SRAM_TIMEOUT, timeout);
1799 WREG32(mmDMA_IF_W_S_DOWN_CH1_RL_SRAM_TIMEOUT, timeout);
1800 WREG32(mmDMA_IF_E_S_DOWN_CH0_RL_SRAM_TIMEOUT, timeout);
1801 WREG32(mmDMA_IF_E_S_DOWN_CH1_RL_SRAM_TIMEOUT, timeout);
1802 WREG32(mmDMA_IF_W_N_DOWN_CH0_RL_SRAM_TIMEOUT, timeout);
1803 WREG32(mmDMA_IF_W_N_DOWN_CH1_RL_SRAM_TIMEOUT, timeout);
1804 WREG32(mmDMA_IF_E_N_DOWN_CH0_RL_SRAM_TIMEOUT, timeout);
1805 WREG32(mmDMA_IF_E_N_DOWN_CH1_RL_SRAM_TIMEOUT, timeout);
1806
1807 WREG32(mmDMA_IF_W_S_DOWN_CH0_RL_SRAM_EN, 1);
1808 WREG32(mmDMA_IF_W_S_DOWN_CH1_RL_SRAM_EN, 1);
1809 WREG32(mmDMA_IF_E_S_DOWN_CH0_RL_SRAM_EN, 1);
1810 WREG32(mmDMA_IF_E_S_DOWN_CH1_RL_SRAM_EN, 1);
1811 WREG32(mmDMA_IF_W_N_DOWN_CH0_RL_SRAM_EN, 1);
1812 WREG32(mmDMA_IF_W_N_DOWN_CH1_RL_SRAM_EN, 1);
1813 WREG32(mmDMA_IF_E_N_DOWN_CH0_RL_SRAM_EN, 1);
1814 WREG32(mmDMA_IF_E_N_DOWN_CH1_RL_SRAM_EN, 1);
1815}
1816
1817static void gaudi_init_golden_registers(struct hl_device *hdev)
1818{
1819 u32 tpc_offset;
1820 int tpc_id, i;
1821
1822 gaudi_init_e2e(hdev);
1823
1824 gaudi_init_hbm_cred(hdev);
1825
1826 gaudi_init_rate_limiter(hdev);
1827
Oded Gabbaye38bfd32020-07-03 20:46:12 +03001828 hdev->asic_funcs->disable_clock_gating(hdev);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03001829
1830 for (tpc_id = 0, tpc_offset = 0;
1831 tpc_id < TPC_NUMBER_OF_ENGINES;
1832 tpc_id++, tpc_offset += TPC_CFG_OFFSET) {
1833 /* Mask all arithmetic interrupts from TPC */
1834 WREG32(mmTPC0_CFG_TPC_INTR_MASK + tpc_offset, 0x8FFF);
1835 /* Set 16 cache lines */
1836 WREG32_FIELD(TPC0_CFG_MSS_CONFIG, tpc_offset,
1837 ICACHE_FETCH_LINE_NUM, 2);
1838 }
1839
1840 /* Make sure 1st 128 bytes in SRAM are 0 for Tensor DMA */
1841 for (i = 0 ; i < 128 ; i += 8)
1842 writeq(0, hdev->pcie_bar[SRAM_BAR_ID] + i);
1843
1844 WREG32(mmMME0_CTRL_EUS_ROLLUP_CNT_ADD, 3);
1845 WREG32(mmMME1_CTRL_EUS_ROLLUP_CNT_ADD, 3);
1846 WREG32(mmMME2_CTRL_EUS_ROLLUP_CNT_ADD, 3);
1847 WREG32(mmMME3_CTRL_EUS_ROLLUP_CNT_ADD, 3);
1848
1849 /* WA for H3-2081 */
1850 WREG32(mmPCIE_WRAP_MAX_OUTSTAND, 0x10ff);
1851}
1852
1853static void gaudi_init_pci_dma_qman(struct hl_device *hdev, int dma_id,
1854 int qman_id, dma_addr_t qman_pq_addr)
1855{
1856 u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi;
1857 u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi;
1858 u32 q_off, dma_qm_offset;
1859 u32 dma_qm_err_cfg;
1860
1861 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
1862
1863 mtr_base_en_lo = lower_32_bits(CFG_BASE +
1864 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
1865 mtr_base_en_hi = upper_32_bits(CFG_BASE +
1866 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
1867 so_base_en_lo = lower_32_bits(CFG_BASE +
1868 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
1869 so_base_en_hi = upper_32_bits(CFG_BASE +
1870 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
1871 mtr_base_ws_lo = lower_32_bits(CFG_BASE +
1872 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
1873 mtr_base_ws_hi = upper_32_bits(CFG_BASE +
1874 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
1875 so_base_ws_lo = lower_32_bits(CFG_BASE +
1876 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
1877 so_base_ws_hi = upper_32_bits(CFG_BASE +
1878 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
1879
1880 q_off = dma_qm_offset + qman_id * 4;
1881
1882 WREG32(mmDMA0_QM_PQ_BASE_LO_0 + q_off, lower_32_bits(qman_pq_addr));
1883 WREG32(mmDMA0_QM_PQ_BASE_HI_0 + q_off, upper_32_bits(qman_pq_addr));
1884
1885 WREG32(mmDMA0_QM_PQ_SIZE_0 + q_off, ilog2(HL_QUEUE_LENGTH));
1886 WREG32(mmDMA0_QM_PQ_PI_0 + q_off, 0);
1887 WREG32(mmDMA0_QM_PQ_CI_0 + q_off, 0);
1888
1889 WREG32(mmDMA0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off, 0x74);
1890 WREG32(mmDMA0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off, 0x14);
1891 WREG32(mmDMA0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off, 0x1C);
1892
1893 WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo);
1894 WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi);
1895 WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo);
1896 WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi);
1897 WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_LO_0 + q_off, mtr_base_ws_lo);
1898 WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_HI_0 + q_off, mtr_base_ws_hi);
1899 WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_LO_0 + q_off, so_base_ws_lo);
1900 WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_HI_0 + q_off, so_base_ws_hi);
1901
Omer Shpigelmance043262020-06-16 17:56:27 +03001902 WREG32(mmDMA0_QM_CP_BARRIER_CFG_0 + q_off, 0x100);
1903
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03001904 /* The following configuration is needed only once per QMAN */
1905 if (qman_id == 0) {
1906 /* Configure RAZWI IRQ */
1907 dma_qm_err_cfg = PCI_DMA_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
1908 if (hdev->stop_on_err) {
1909 dma_qm_err_cfg |=
1910 PCI_DMA_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
1911 }
1912
1913 WREG32(mmDMA0_QM_GLBL_ERR_CFG + dma_qm_offset, dma_qm_err_cfg);
1914 WREG32(mmDMA0_QM_GLBL_ERR_ADDR_LO + dma_qm_offset,
1915 lower_32_bits(CFG_BASE +
1916 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR));
1917 WREG32(mmDMA0_QM_GLBL_ERR_ADDR_HI + dma_qm_offset,
1918 upper_32_bits(CFG_BASE +
1919 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR));
1920 WREG32(mmDMA0_QM_GLBL_ERR_WDATA + dma_qm_offset,
1921 gaudi_irq_map_table[GAUDI_EVENT_DMA0_QM].cpu_id +
1922 dma_id);
1923
1924 WREG32(mmDMA0_QM_ARB_ERR_MSG_EN + dma_qm_offset,
1925 QM_ARB_ERR_MSG_EN_MASK);
1926
1927 /* Increase ARB WDT to support streams architecture */
1928 WREG32(mmDMA0_QM_ARB_SLV_CHOISE_WDT + dma_qm_offset,
1929 GAUDI_ARB_WDT_TIMEOUT);
1930
1931 WREG32(mmDMA0_QM_GLBL_PROT + dma_qm_offset,
1932 QMAN_EXTERNAL_MAKE_TRUSTED);
1933
1934 WREG32(mmDMA0_QM_GLBL_CFG1 + dma_qm_offset, 0);
1935 }
1936}
1937
1938static void gaudi_init_dma_core(struct hl_device *hdev, int dma_id)
1939{
1940 u32 dma_offset = dma_id * DMA_CORE_OFFSET;
1941 u32 dma_err_cfg = 1 << DMA0_CORE_ERR_CFG_ERR_MSG_EN_SHIFT;
1942
1943 /* Set to maximum possible according to physical size */
1944 WREG32(mmDMA0_CORE_RD_MAX_OUTSTAND + dma_offset, 0);
1945 WREG32(mmDMA0_CORE_RD_MAX_SIZE + dma_offset, 0);
1946
1947 /* STOP_ON bit implies no completion to operation in case of RAZWI */
1948 if (hdev->stop_on_err)
1949 dma_err_cfg |= 1 << DMA0_CORE_ERR_CFG_STOP_ON_ERR_SHIFT;
1950
1951 WREG32(mmDMA0_CORE_ERR_CFG + dma_offset, dma_err_cfg);
1952 WREG32(mmDMA0_CORE_ERRMSG_ADDR_LO + dma_offset,
1953 lower_32_bits(CFG_BASE + mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR));
1954 WREG32(mmDMA0_CORE_ERRMSG_ADDR_HI + dma_offset,
1955 upper_32_bits(CFG_BASE + mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR));
1956 WREG32(mmDMA0_CORE_ERRMSG_WDATA + dma_offset,
1957 gaudi_irq_map_table[GAUDI_EVENT_DMA0_CORE].cpu_id + dma_id);
1958 WREG32(mmDMA0_CORE_PROT + dma_offset,
1959 1 << DMA0_CORE_PROT_ERR_VAL_SHIFT);
1960 /* If the channel is secured, it should be in MMU bypass mode */
1961 WREG32(mmDMA0_CORE_SECURE_PROPS + dma_offset,
1962 1 << DMA0_CORE_SECURE_PROPS_MMBP_SHIFT);
1963 WREG32(mmDMA0_CORE_CFG_0 + dma_offset, 1 << DMA0_CORE_CFG_0_EN_SHIFT);
1964}
1965
1966static void gaudi_enable_qman(struct hl_device *hdev, int dma_id,
1967 u32 enable_mask)
1968{
1969 u32 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
1970
1971 WREG32(mmDMA0_QM_GLBL_CFG0 + dma_qm_offset, enable_mask);
1972}
1973
1974static void gaudi_init_pci_dma_qmans(struct hl_device *hdev)
1975{
1976 struct gaudi_device *gaudi = hdev->asic_specific;
1977 struct hl_hw_queue *q;
1978 int i, j, dma_id, cpu_skip, nic_skip, cq_id = 0, q_idx, msi_vec = 0;
1979
1980 if (gaudi->hw_cap_initialized & HW_CAP_PCI_DMA)
1981 return;
1982
1983 for (i = 0 ; i < PCI_DMA_NUMBER_OF_CHNLS ; i++) {
1984 dma_id = gaudi_dma_assignment[i];
1985 /*
1986 * For queues after the CPU Q need to add 1 to get the correct
1987 * queue. In addition, need to add the CPU EQ and NIC IRQs in
1988 * order to get the correct MSI register.
1989 */
1990 if (dma_id > 1) {
1991 cpu_skip = 1;
1992 nic_skip = NIC_NUMBER_OF_ENGINES;
1993 } else {
1994 cpu_skip = 0;
1995 nic_skip = 0;
1996 }
1997
1998 for (j = 0 ; j < QMAN_STREAMS ; j++) {
1999 q_idx = 4 * dma_id + j + cpu_skip;
2000 q = &hdev->kernel_queues[q_idx];
2001 q->cq_id = cq_id++;
2002 q->msi_vec = nic_skip + cpu_skip + msi_vec++;
2003 gaudi_init_pci_dma_qman(hdev, dma_id, j,
2004 q->bus_address);
2005 }
2006
2007 gaudi_init_dma_core(hdev, dma_id);
2008
2009 gaudi_enable_qman(hdev, dma_id, PCI_DMA_QMAN_ENABLE);
2010 }
2011
2012 gaudi->hw_cap_initialized |= HW_CAP_PCI_DMA;
2013}
2014
2015static void gaudi_init_hbm_dma_qman(struct hl_device *hdev, int dma_id,
2016 int qman_id, u64 qman_base_addr)
2017{
2018 u32 mtr_base_lo, mtr_base_hi;
2019 u32 so_base_lo, so_base_hi;
2020 u32 q_off, dma_qm_offset;
2021 u32 dma_qm_err_cfg;
2022
2023 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
2024
2025 mtr_base_lo = lower_32_bits(CFG_BASE +
2026 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2027 mtr_base_hi = upper_32_bits(CFG_BASE +
2028 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2029 so_base_lo = lower_32_bits(CFG_BASE +
2030 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2031 so_base_hi = upper_32_bits(CFG_BASE +
2032 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2033
2034 q_off = dma_qm_offset + qman_id * 4;
2035
2036 if (qman_id < 4) {
2037 WREG32(mmDMA0_QM_PQ_BASE_LO_0 + q_off,
2038 lower_32_bits(qman_base_addr));
2039 WREG32(mmDMA0_QM_PQ_BASE_HI_0 + q_off,
2040 upper_32_bits(qman_base_addr));
2041
2042 WREG32(mmDMA0_QM_PQ_SIZE_0 + q_off, ilog2(HBM_DMA_QMAN_LENGTH));
2043 WREG32(mmDMA0_QM_PQ_PI_0 + q_off, 0);
2044 WREG32(mmDMA0_QM_PQ_CI_0 + q_off, 0);
2045
2046 WREG32(mmDMA0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off, 0x81BC);
2047 WREG32(mmDMA0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off, 0x81B4);
2048 WREG32(mmDMA0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off, 0x1C);
2049 } else {
2050 WREG32(mmDMA0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off, 0x74);
2051 WREG32(mmDMA0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off, 0x14);
2052 WREG32(mmDMA0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off, 0x1C);
2053
2054 /* Configure RAZWI IRQ */
2055 dma_qm_err_cfg = HBM_DMA_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
2056 if (hdev->stop_on_err) {
2057 dma_qm_err_cfg |=
2058 HBM_DMA_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
2059 }
2060 WREG32(mmDMA0_QM_GLBL_ERR_CFG + dma_qm_offset, dma_qm_err_cfg);
2061
2062 WREG32(mmDMA0_QM_GLBL_ERR_ADDR_LO + dma_qm_offset,
2063 lower_32_bits(CFG_BASE +
2064 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR));
2065 WREG32(mmDMA0_QM_GLBL_ERR_ADDR_HI + dma_qm_offset,
2066 upper_32_bits(CFG_BASE +
2067 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR));
2068 WREG32(mmDMA0_QM_GLBL_ERR_WDATA + dma_qm_offset,
2069 gaudi_irq_map_table[GAUDI_EVENT_DMA0_QM].cpu_id +
2070 dma_id);
2071
2072 WREG32(mmDMA0_QM_ARB_ERR_MSG_EN + dma_qm_offset,
2073 QM_ARB_ERR_MSG_EN_MASK);
2074
2075 /* Increase ARB WDT to support streams architecture */
2076 WREG32(mmDMA0_QM_ARB_SLV_CHOISE_WDT + dma_qm_offset,
2077 GAUDI_ARB_WDT_TIMEOUT);
2078
2079 WREG32(mmDMA0_QM_GLBL_CFG1 + dma_qm_offset, 0);
2080 WREG32(mmDMA0_QM_GLBL_PROT + dma_qm_offset,
2081 QMAN_INTERNAL_MAKE_TRUSTED);
2082 }
2083
2084 WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_lo);
2085 WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_hi);
2086 WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_lo);
2087 WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_hi);
2088}
2089
2090static void gaudi_init_hbm_dma_qmans(struct hl_device *hdev)
2091{
2092 struct gaudi_device *gaudi = hdev->asic_specific;
2093 struct gaudi_internal_qman_info *q;
2094 u64 qman_base_addr;
2095 int i, j, dma_id, internal_q_index;
2096
2097 if (gaudi->hw_cap_initialized & HW_CAP_HBM_DMA)
2098 return;
2099
2100 for (i = 0 ; i < HBM_DMA_NUMBER_OF_CHNLS ; i++) {
2101 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_1 + i];
2102
2103 for (j = 0 ; j < QMAN_STREAMS ; j++) {
2104 /*
2105 * Add the CPU queue in order to get the correct queue
2106 * number as all internal queue are placed after it
2107 */
2108 internal_q_index = dma_id * QMAN_STREAMS + j + 1;
2109
2110 q = &gaudi->internal_qmans[internal_q_index];
2111 qman_base_addr = (u64) q->pq_dma_addr;
2112 gaudi_init_hbm_dma_qman(hdev, dma_id, j,
2113 qman_base_addr);
2114 }
2115
2116 /* Initializing lower CP for HBM DMA QMAN */
2117 gaudi_init_hbm_dma_qman(hdev, dma_id, 4, 0);
2118
2119 gaudi_init_dma_core(hdev, dma_id);
2120
2121 gaudi_enable_qman(hdev, dma_id, HBM_DMA_QMAN_ENABLE);
2122 }
2123
2124 gaudi->hw_cap_initialized |= HW_CAP_HBM_DMA;
2125}
2126
2127static void gaudi_init_mme_qman(struct hl_device *hdev, u32 mme_offset,
2128 int qman_id, u64 qman_base_addr)
2129{
2130 u32 mtr_base_lo, mtr_base_hi;
2131 u32 so_base_lo, so_base_hi;
2132 u32 q_off, mme_id;
2133 u32 mme_qm_err_cfg;
2134
2135 mtr_base_lo = lower_32_bits(CFG_BASE +
2136 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2137 mtr_base_hi = upper_32_bits(CFG_BASE +
2138 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2139 so_base_lo = lower_32_bits(CFG_BASE +
2140 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2141 so_base_hi = upper_32_bits(CFG_BASE +
2142 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2143
2144 q_off = mme_offset + qman_id * 4;
2145
2146 if (qman_id < 4) {
2147 WREG32(mmMME0_QM_PQ_BASE_LO_0 + q_off,
2148 lower_32_bits(qman_base_addr));
2149 WREG32(mmMME0_QM_PQ_BASE_HI_0 + q_off,
2150 upper_32_bits(qman_base_addr));
2151
2152 WREG32(mmMME0_QM_PQ_SIZE_0 + q_off, ilog2(MME_QMAN_LENGTH));
2153 WREG32(mmMME0_QM_PQ_PI_0 + q_off, 0);
2154 WREG32(mmMME0_QM_PQ_CI_0 + q_off, 0);
2155
2156 WREG32(mmMME0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off, 0x81BC);
2157 WREG32(mmMME0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off, 0x81B4);
2158 WREG32(mmMME0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off, 0x1C);
2159 } else {
2160 WREG32(mmMME0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off, 0x74);
2161 WREG32(mmMME0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off, 0x14);
2162 WREG32(mmMME0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off, 0x1C);
2163
2164 /* Configure RAZWI IRQ */
2165 mme_id = mme_offset /
2166 (mmMME1_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0);
2167
2168 mme_qm_err_cfg = MME_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
2169 if (hdev->stop_on_err) {
2170 mme_qm_err_cfg |=
2171 MME_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
2172 }
2173 WREG32(mmMME0_QM_GLBL_ERR_CFG + mme_offset, mme_qm_err_cfg);
2174 WREG32(mmMME0_QM_GLBL_ERR_ADDR_LO + mme_offset,
2175 lower_32_bits(CFG_BASE +
2176 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR));
2177 WREG32(mmMME0_QM_GLBL_ERR_ADDR_HI + mme_offset,
2178 upper_32_bits(CFG_BASE +
2179 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR));
2180 WREG32(mmMME0_QM_GLBL_ERR_WDATA + mme_offset,
2181 gaudi_irq_map_table[GAUDI_EVENT_MME0_QM].cpu_id +
2182 mme_id);
2183
2184 WREG32(mmMME0_QM_ARB_ERR_MSG_EN + mme_offset,
2185 QM_ARB_ERR_MSG_EN_MASK);
2186
2187 /* Increase ARB WDT to support streams architecture */
2188 WREG32(mmMME0_QM_ARB_SLV_CHOISE_WDT + mme_offset,
2189 GAUDI_ARB_WDT_TIMEOUT);
2190
2191 WREG32(mmMME0_QM_GLBL_CFG1 + mme_offset, 0);
2192 WREG32(mmMME0_QM_GLBL_PROT + mme_offset,
2193 QMAN_INTERNAL_MAKE_TRUSTED);
2194 }
2195
2196 WREG32(mmMME0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_lo);
2197 WREG32(mmMME0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_hi);
2198 WREG32(mmMME0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_lo);
2199 WREG32(mmMME0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_hi);
2200}
2201
2202static void gaudi_init_mme_qmans(struct hl_device *hdev)
2203{
2204 struct gaudi_device *gaudi = hdev->asic_specific;
2205 struct gaudi_internal_qman_info *q;
2206 u64 qman_base_addr;
2207 u32 mme_offset;
2208 int i, internal_q_index;
2209
2210 if (gaudi->hw_cap_initialized & HW_CAP_MME)
2211 return;
2212
2213 /*
2214 * map GAUDI_QUEUE_ID_MME_0_X to the N_W_MME (mmMME2_QM_BASE)
2215 * and GAUDI_QUEUE_ID_MME_1_X to the S_W_MME (mmMME0_QM_BASE)
2216 */
2217
2218 mme_offset = mmMME2_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0;
2219
2220 for (i = 0 ; i < MME_NUMBER_OF_QMANS ; i++) {
2221 internal_q_index = GAUDI_QUEUE_ID_MME_0_0 + i;
2222 q = &gaudi->internal_qmans[internal_q_index];
2223 qman_base_addr = (u64) q->pq_dma_addr;
2224 gaudi_init_mme_qman(hdev, mme_offset, (i & 0x3),
2225 qman_base_addr);
2226 if (i == 3)
2227 mme_offset = 0;
2228 }
2229
2230 /* Initializing lower CP for MME QMANs */
2231 mme_offset = mmMME2_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0;
2232 gaudi_init_mme_qman(hdev, mme_offset, 4, 0);
2233 gaudi_init_mme_qman(hdev, 0, 4, 0);
2234
2235 WREG32(mmMME2_QM_GLBL_CFG0, QMAN_MME_ENABLE);
2236 WREG32(mmMME0_QM_GLBL_CFG0, QMAN_MME_ENABLE);
2237
2238 gaudi->hw_cap_initialized |= HW_CAP_MME;
2239}
2240
2241static void gaudi_init_tpc_qman(struct hl_device *hdev, u32 tpc_offset,
2242 int qman_id, u64 qman_base_addr)
2243{
2244 u32 mtr_base_lo, mtr_base_hi;
2245 u32 so_base_lo, so_base_hi;
2246 u32 q_off, tpc_id;
2247 u32 tpc_qm_err_cfg;
2248
2249 mtr_base_lo = lower_32_bits(CFG_BASE +
2250 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2251 mtr_base_hi = upper_32_bits(CFG_BASE +
2252 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2253 so_base_lo = lower_32_bits(CFG_BASE +
2254 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2255 so_base_hi = upper_32_bits(CFG_BASE +
2256 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2257
2258 q_off = tpc_offset + qman_id * 4;
2259
2260 if (qman_id < 4) {
2261 WREG32(mmTPC0_QM_PQ_BASE_LO_0 + q_off,
2262 lower_32_bits(qman_base_addr));
2263 WREG32(mmTPC0_QM_PQ_BASE_HI_0 + q_off,
2264 upper_32_bits(qman_base_addr));
2265
2266 WREG32(mmTPC0_QM_PQ_SIZE_0 + q_off, ilog2(TPC_QMAN_LENGTH));
2267 WREG32(mmTPC0_QM_PQ_PI_0 + q_off, 0);
2268 WREG32(mmTPC0_QM_PQ_CI_0 + q_off, 0);
2269
2270 WREG32(mmTPC0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off, 0x81BC);
2271 WREG32(mmTPC0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off, 0x81B4);
2272 WREG32(mmTPC0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off, 0x1C);
2273 } else {
2274 WREG32(mmTPC0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off, 0x74);
2275 WREG32(mmTPC0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off, 0x14);
2276 WREG32(mmTPC0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off, 0x1C);
2277
2278 /* Configure RAZWI IRQ */
2279 tpc_id = tpc_offset /
2280 (mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0);
2281
2282 tpc_qm_err_cfg = TPC_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
2283 if (hdev->stop_on_err) {
2284 tpc_qm_err_cfg |=
2285 TPC_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
2286 }
2287
2288 WREG32(mmTPC0_QM_GLBL_ERR_CFG + tpc_offset, tpc_qm_err_cfg);
2289 WREG32(mmTPC0_QM_GLBL_ERR_ADDR_LO + tpc_offset,
2290 lower_32_bits(CFG_BASE +
2291 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR));
2292 WREG32(mmTPC0_QM_GLBL_ERR_ADDR_HI + tpc_offset,
2293 upper_32_bits(CFG_BASE +
2294 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR));
2295 WREG32(mmTPC0_QM_GLBL_ERR_WDATA + tpc_offset,
2296 gaudi_irq_map_table[GAUDI_EVENT_TPC0_QM].cpu_id +
2297 tpc_id);
2298
2299 WREG32(mmTPC0_QM_ARB_ERR_MSG_EN + tpc_offset,
2300 QM_ARB_ERR_MSG_EN_MASK);
2301
2302 /* Increase ARB WDT to support streams architecture */
2303 WREG32(mmTPC0_QM_ARB_SLV_CHOISE_WDT + tpc_offset,
2304 GAUDI_ARB_WDT_TIMEOUT);
2305
2306 WREG32(mmTPC0_QM_GLBL_CFG1 + tpc_offset, 0);
2307 WREG32(mmTPC0_QM_GLBL_PROT + tpc_offset,
2308 QMAN_INTERNAL_MAKE_TRUSTED);
2309 }
2310
2311 WREG32(mmTPC0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_lo);
2312 WREG32(mmTPC0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_hi);
2313 WREG32(mmTPC0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_lo);
2314 WREG32(mmTPC0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_hi);
2315}
2316
2317static void gaudi_init_tpc_qmans(struct hl_device *hdev)
2318{
2319 struct gaudi_device *gaudi = hdev->asic_specific;
2320 struct gaudi_internal_qman_info *q;
2321 u64 qman_base_addr;
2322 u32 so_base_hi, tpc_offset = 0;
2323 u32 tpc_delta = mmTPC1_CFG_SM_BASE_ADDRESS_HIGH -
2324 mmTPC0_CFG_SM_BASE_ADDRESS_HIGH;
2325 int i, tpc_id, internal_q_index;
2326
2327 if (gaudi->hw_cap_initialized & HW_CAP_TPC_MASK)
2328 return;
2329
2330 so_base_hi = upper_32_bits(CFG_BASE +
2331 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2332
2333 for (tpc_id = 0 ; tpc_id < TPC_NUMBER_OF_ENGINES ; tpc_id++) {
2334 for (i = 0 ; i < QMAN_STREAMS ; i++) {
2335 internal_q_index = GAUDI_QUEUE_ID_TPC_0_0 +
2336 tpc_id * QMAN_STREAMS + i;
2337 q = &gaudi->internal_qmans[internal_q_index];
2338 qman_base_addr = (u64) q->pq_dma_addr;
2339 gaudi_init_tpc_qman(hdev, tpc_offset, i,
2340 qman_base_addr);
2341
2342 if (i == 3) {
2343 /* Initializing lower CP for TPC QMAN */
2344 gaudi_init_tpc_qman(hdev, tpc_offset, 4, 0);
2345
2346 /* Enable the QMAN and TPC channel */
2347 WREG32(mmTPC0_QM_GLBL_CFG0 + tpc_offset,
2348 QMAN_TPC_ENABLE);
2349 }
2350 }
2351
2352 WREG32(mmTPC0_CFG_SM_BASE_ADDRESS_HIGH + tpc_id * tpc_delta,
2353 so_base_hi);
2354
2355 tpc_offset += mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0;
2356
2357 gaudi->hw_cap_initialized |= 1 << (HW_CAP_TPC_SHIFT + tpc_id);
2358 }
2359}
2360
2361static void gaudi_disable_pci_dma_qmans(struct hl_device *hdev)
2362{
2363 struct gaudi_device *gaudi = hdev->asic_specific;
2364
2365 if (!(gaudi->hw_cap_initialized & HW_CAP_PCI_DMA))
2366 return;
2367
2368 WREG32(mmDMA0_QM_GLBL_CFG0, 0);
2369 WREG32(mmDMA1_QM_GLBL_CFG0, 0);
2370 WREG32(mmDMA5_QM_GLBL_CFG0, 0);
2371}
2372
2373static void gaudi_disable_hbm_dma_qmans(struct hl_device *hdev)
2374{
2375 struct gaudi_device *gaudi = hdev->asic_specific;
2376
2377 if (!(gaudi->hw_cap_initialized & HW_CAP_HBM_DMA))
2378 return;
2379
2380 WREG32(mmDMA2_QM_GLBL_CFG0, 0);
2381 WREG32(mmDMA3_QM_GLBL_CFG0, 0);
2382 WREG32(mmDMA4_QM_GLBL_CFG0, 0);
2383 WREG32(mmDMA6_QM_GLBL_CFG0, 0);
2384 WREG32(mmDMA7_QM_GLBL_CFG0, 0);
2385}
2386
2387static void gaudi_disable_mme_qmans(struct hl_device *hdev)
2388{
2389 struct gaudi_device *gaudi = hdev->asic_specific;
2390
2391 if (!(gaudi->hw_cap_initialized & HW_CAP_MME))
2392 return;
2393
2394 WREG32(mmMME2_QM_GLBL_CFG0, 0);
2395 WREG32(mmMME0_QM_GLBL_CFG0, 0);
2396}
2397
2398static void gaudi_disable_tpc_qmans(struct hl_device *hdev)
2399{
2400 struct gaudi_device *gaudi = hdev->asic_specific;
2401 u32 tpc_offset = 0;
2402 int tpc_id;
2403
2404 if (!(gaudi->hw_cap_initialized & HW_CAP_TPC_MASK))
2405 return;
2406
2407 for (tpc_id = 0 ; tpc_id < TPC_NUMBER_OF_ENGINES ; tpc_id++) {
2408 WREG32(mmTPC0_QM_GLBL_CFG0 + tpc_offset, 0);
2409 tpc_offset += mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0;
2410 }
2411}
2412
2413static void gaudi_stop_pci_dma_qmans(struct hl_device *hdev)
2414{
2415 struct gaudi_device *gaudi = hdev->asic_specific;
2416
2417 if (!(gaudi->hw_cap_initialized & HW_CAP_PCI_DMA))
2418 return;
2419
2420 /* Stop upper CPs of QMANs 0.0 to 1.3 and 5.0 to 5.3 */
2421 WREG32(mmDMA0_QM_GLBL_CFG1, 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
2422 WREG32(mmDMA1_QM_GLBL_CFG1, 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
2423 WREG32(mmDMA5_QM_GLBL_CFG1, 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
2424}
2425
2426static void gaudi_stop_hbm_dma_qmans(struct hl_device *hdev)
2427{
2428 struct gaudi_device *gaudi = hdev->asic_specific;
2429
2430 if (!(gaudi->hw_cap_initialized & HW_CAP_HBM_DMA))
2431 return;
2432
2433 /* Stop CPs of HBM DMA QMANs */
2434
2435 WREG32(mmDMA2_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
2436 WREG32(mmDMA3_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
2437 WREG32(mmDMA4_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
2438 WREG32(mmDMA6_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
2439 WREG32(mmDMA7_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
2440}
2441
2442static void gaudi_stop_mme_qmans(struct hl_device *hdev)
2443{
2444 struct gaudi_device *gaudi = hdev->asic_specific;
2445
2446 if (!(gaudi->hw_cap_initialized & HW_CAP_MME))
2447 return;
2448
2449 /* Stop CPs of MME QMANs */
2450 WREG32(mmMME2_QM_GLBL_CFG1, 0x1F << MME0_QM_GLBL_CFG1_CP_STOP_SHIFT);
2451 WREG32(mmMME0_QM_GLBL_CFG1, 0x1F << MME0_QM_GLBL_CFG1_CP_STOP_SHIFT);
2452}
2453
2454static void gaudi_stop_tpc_qmans(struct hl_device *hdev)
2455{
2456 struct gaudi_device *gaudi = hdev->asic_specific;
2457
2458 if (!(gaudi->hw_cap_initialized & HW_CAP_TPC_MASK))
2459 return;
2460
2461 WREG32(mmTPC0_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
2462 WREG32(mmTPC1_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
2463 WREG32(mmTPC2_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
2464 WREG32(mmTPC3_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
2465 WREG32(mmTPC4_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
2466 WREG32(mmTPC5_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
2467 WREG32(mmTPC6_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
2468 WREG32(mmTPC7_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
2469}
2470
2471static void gaudi_pci_dma_stall(struct hl_device *hdev)
2472{
2473 struct gaudi_device *gaudi = hdev->asic_specific;
2474
2475 if (!(gaudi->hw_cap_initialized & HW_CAP_PCI_DMA))
2476 return;
2477
2478 WREG32(mmDMA0_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
2479 WREG32(mmDMA1_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
2480 WREG32(mmDMA5_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
2481}
2482
2483static void gaudi_hbm_dma_stall(struct hl_device *hdev)
2484{
2485 struct gaudi_device *gaudi = hdev->asic_specific;
2486
2487 if (!(gaudi->hw_cap_initialized & HW_CAP_HBM_DMA))
2488 return;
2489
2490 WREG32(mmDMA2_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
2491 WREG32(mmDMA3_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
2492 WREG32(mmDMA4_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
2493 WREG32(mmDMA6_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
2494 WREG32(mmDMA7_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
2495}
2496
2497static void gaudi_mme_stall(struct hl_device *hdev)
2498{
2499 struct gaudi_device *gaudi = hdev->asic_specific;
2500
2501 if (!(gaudi->hw_cap_initialized & HW_CAP_MME))
2502 return;
2503
2504 /* WA for H3-1800 bug: do ACC and SBAB writes twice */
2505 WREG32(mmMME0_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
2506 WREG32(mmMME0_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
2507 WREG32(mmMME0_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
2508 WREG32(mmMME0_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
2509 WREG32(mmMME1_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
2510 WREG32(mmMME1_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
2511 WREG32(mmMME1_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
2512 WREG32(mmMME1_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
2513 WREG32(mmMME2_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
2514 WREG32(mmMME2_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
2515 WREG32(mmMME2_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
2516 WREG32(mmMME2_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
2517 WREG32(mmMME3_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
2518 WREG32(mmMME3_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
2519 WREG32(mmMME3_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
2520 WREG32(mmMME3_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
2521}
2522
2523static void gaudi_tpc_stall(struct hl_device *hdev)
2524{
2525 struct gaudi_device *gaudi = hdev->asic_specific;
2526
2527 if (!(gaudi->hw_cap_initialized & HW_CAP_TPC_MASK))
2528 return;
2529
2530 WREG32(mmTPC0_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
2531 WREG32(mmTPC1_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
2532 WREG32(mmTPC2_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
2533 WREG32(mmTPC3_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
2534 WREG32(mmTPC4_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
2535 WREG32(mmTPC5_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
2536 WREG32(mmTPC6_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
2537 WREG32(mmTPC7_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
2538}
2539
Oded Gabbaye38bfd32020-07-03 20:46:12 +03002540static void gaudi_set_clock_gating(struct hl_device *hdev)
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03002541{
2542 struct gaudi_device *gaudi = hdev->asic_specific;
2543 u32 qman_offset;
2544 int i;
2545
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03002546 /* In case we are during debug session, don't enable the clock gate
2547 * as it may interfere
2548 */
2549 if (hdev->in_debug)
2550 return;
2551
Oded Gabbaye38bfd32020-07-03 20:46:12 +03002552 for (i = GAUDI_PCI_DMA_1, qman_offset = 0 ; i < GAUDI_HBM_DMA_1 ; i++) {
2553 if (!(hdev->clock_gating_mask &
2554 (BIT_ULL(gaudi_dma_assignment[i]))))
2555 continue;
2556
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03002557 qman_offset = gaudi_dma_assignment[i] * DMA_QMAN_OFFSET;
2558 WREG32(mmDMA0_QM_CGM_CFG1 + qman_offset, QMAN_CGM1_PWR_GATE_EN);
2559 WREG32(mmDMA0_QM_CGM_CFG + qman_offset,
2560 QMAN_UPPER_CP_CGM_PWR_GATE_EN);
2561 }
2562
Oded Gabbaye38bfd32020-07-03 20:46:12 +03002563 for (i = GAUDI_HBM_DMA_1 ; i < GAUDI_DMA_MAX ; i++) {
2564 if (!(hdev->clock_gating_mask &
2565 (BIT_ULL(gaudi_dma_assignment[i]))))
2566 continue;
2567
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03002568 qman_offset = gaudi_dma_assignment[i] * DMA_QMAN_OFFSET;
2569 WREG32(mmDMA0_QM_CGM_CFG1 + qman_offset, QMAN_CGM1_PWR_GATE_EN);
2570 WREG32(mmDMA0_QM_CGM_CFG + qman_offset,
2571 QMAN_COMMON_CP_CGM_PWR_GATE_EN);
2572 }
2573
Oded Gabbaye38bfd32020-07-03 20:46:12 +03002574 if (hdev->clock_gating_mask & (BIT_ULL(GAUDI_ENGINE_ID_MME_0))) {
2575 WREG32(mmMME0_QM_CGM_CFG1, QMAN_CGM1_PWR_GATE_EN);
2576 WREG32(mmMME0_QM_CGM_CFG, QMAN_COMMON_CP_CGM_PWR_GATE_EN);
2577 }
2578
2579 if (hdev->clock_gating_mask & (BIT_ULL(GAUDI_ENGINE_ID_MME_2))) {
2580 WREG32(mmMME2_QM_CGM_CFG1, QMAN_CGM1_PWR_GATE_EN);
2581 WREG32(mmMME2_QM_CGM_CFG, QMAN_COMMON_CP_CGM_PWR_GATE_EN);
2582 }
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03002583
2584 for (i = 0, qman_offset = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) {
Oded Gabbaye38bfd32020-07-03 20:46:12 +03002585 if (!(hdev->clock_gating_mask &
2586 (BIT_ULL(GAUDI_ENGINE_ID_TPC_0 + i))))
2587 continue;
2588
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03002589 WREG32(mmTPC0_QM_CGM_CFG1 + qman_offset,
2590 QMAN_CGM1_PWR_GATE_EN);
2591 WREG32(mmTPC0_QM_CGM_CFG + qman_offset,
2592 QMAN_COMMON_CP_CGM_PWR_GATE_EN);
2593
2594 qman_offset += TPC_QMAN_OFFSET;
2595 }
2596
2597 gaudi->hw_cap_initialized |= HW_CAP_CLK_GATE;
2598}
2599
2600static void gaudi_disable_clock_gating(struct hl_device *hdev)
2601{
2602 struct gaudi_device *gaudi = hdev->asic_specific;
2603 u32 qman_offset;
2604 int i;
2605
2606 if (!(gaudi->hw_cap_initialized & HW_CAP_CLK_GATE))
2607 return;
2608
2609 for (i = 0, qman_offset = 0 ; i < DMA_NUMBER_OF_CHANNELS ; i++) {
2610 WREG32(mmDMA0_QM_CGM_CFG + qman_offset, 0);
2611 WREG32(mmDMA0_QM_CGM_CFG1 + qman_offset, 0);
2612
2613 qman_offset += (mmDMA1_QM_CGM_CFG - mmDMA0_QM_CGM_CFG);
2614 }
2615
2616 WREG32(mmMME0_QM_CGM_CFG, 0);
2617 WREG32(mmMME0_QM_CGM_CFG1, 0);
2618 WREG32(mmMME2_QM_CGM_CFG, 0);
2619 WREG32(mmMME2_QM_CGM_CFG1, 0);
2620
2621 for (i = 0, qman_offset = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) {
2622 WREG32(mmTPC0_QM_CGM_CFG + qman_offset, 0);
2623 WREG32(mmTPC0_QM_CGM_CFG1 + qman_offset, 0);
2624
2625 qman_offset += (mmTPC1_QM_CGM_CFG - mmTPC0_QM_CGM_CFG);
2626 }
2627
2628 gaudi->hw_cap_initialized &= ~(HW_CAP_CLK_GATE);
2629}
2630
2631static void gaudi_enable_timestamp(struct hl_device *hdev)
2632{
2633 /* Disable the timestamp counter */
2634 WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 0);
2635
2636 /* Zero the lower/upper parts of the 64-bit counter */
2637 WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE + 0xC, 0);
2638 WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE + 0x8, 0);
2639
2640 /* Enable the counter */
2641 WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 1);
2642}
2643
2644static void gaudi_disable_timestamp(struct hl_device *hdev)
2645{
2646 /* Disable the timestamp counter */
2647 WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 0);
2648}
2649
2650static void gaudi_halt_engines(struct hl_device *hdev, bool hard_reset)
2651{
2652 u32 wait_timeout_ms, cpu_timeout_ms;
2653
2654 dev_info(hdev->dev,
2655 "Halting compute engines and disabling interrupts\n");
2656
2657 if (hdev->pldm) {
2658 wait_timeout_ms = GAUDI_PLDM_RESET_WAIT_MSEC;
2659 cpu_timeout_ms = GAUDI_PLDM_RESET_WAIT_MSEC;
2660 } else {
2661 wait_timeout_ms = GAUDI_RESET_WAIT_MSEC;
2662 cpu_timeout_ms = GAUDI_CPU_RESET_WAIT_MSEC;
2663 }
2664
2665 if (hard_reset) {
2666 /*
2667 * I don't know what is the state of the CPU so make sure it is
2668 * stopped in any means necessary
2669 */
2670 WREG32(mmPSOC_GLOBAL_CONF_KMD_MSG_TO_CPU, KMD_MSG_GOTO_WFE);
2671 WREG32(mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR,
Ofir Bittonebd8d122020-05-10 13:41:28 +03002672 GAUDI_EVENT_HALT_MACHINE);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03002673 msleep(cpu_timeout_ms);
2674 }
2675
2676 gaudi_stop_mme_qmans(hdev);
2677 gaudi_stop_tpc_qmans(hdev);
2678 gaudi_stop_hbm_dma_qmans(hdev);
2679 gaudi_stop_pci_dma_qmans(hdev);
2680
Oded Gabbaye38bfd32020-07-03 20:46:12 +03002681 hdev->asic_funcs->disable_clock_gating(hdev);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03002682
2683 msleep(wait_timeout_ms);
2684
2685 gaudi_pci_dma_stall(hdev);
2686 gaudi_hbm_dma_stall(hdev);
2687 gaudi_tpc_stall(hdev);
2688 gaudi_mme_stall(hdev);
2689
2690 msleep(wait_timeout_ms);
2691
2692 gaudi_disable_mme_qmans(hdev);
2693 gaudi_disable_tpc_qmans(hdev);
2694 gaudi_disable_hbm_dma_qmans(hdev);
2695 gaudi_disable_pci_dma_qmans(hdev);
2696
2697 gaudi_disable_timestamp(hdev);
2698
2699 if (hard_reset)
2700 gaudi_disable_msi(hdev);
2701 else
2702 gaudi_sync_irqs(hdev);
2703}
2704
2705static int gaudi_mmu_init(struct hl_device *hdev)
2706{
2707 struct asic_fixed_properties *prop = &hdev->asic_prop;
2708 struct gaudi_device *gaudi = hdev->asic_specific;
2709 u64 hop0_addr;
2710 int rc, i;
2711
2712 if (!hdev->mmu_enable)
2713 return 0;
2714
2715 if (gaudi->hw_cap_initialized & HW_CAP_MMU)
2716 return 0;
2717
2718 hdev->dram_supports_virtual_memory = false;
2719
2720 for (i = 0 ; i < prop->max_asid ; i++) {
2721 hop0_addr = prop->mmu_pgt_addr +
2722 (i * prop->mmu_hop_table_size);
2723
2724 rc = gaudi_mmu_update_asid_hop0_addr(hdev, i, hop0_addr);
2725 if (rc) {
2726 dev_err(hdev->dev,
2727 "failed to set hop0 addr for asid %d\n", i);
2728 goto err;
2729 }
2730 }
2731
2732 /* init MMU cache manage page */
2733 WREG32(mmSTLB_CACHE_INV_BASE_39_8, MMU_CACHE_MNG_ADDR >> 8);
2734 WREG32(mmSTLB_CACHE_INV_BASE_49_40, MMU_CACHE_MNG_ADDR >> 40);
2735
2736 hdev->asic_funcs->mmu_invalidate_cache(hdev, true,
2737 VM_TYPE_USERPTR | VM_TYPE_PHYS_PACK);
2738
2739 WREG32(mmMMU_UP_MMU_ENABLE, 1);
2740 WREG32(mmMMU_UP_SPI_MASK, 0xF);
2741
2742 WREG32(mmSTLB_HOP_CONFIGURATION,
2743 hdev->mmu_huge_page_opt ? 0x30440 : 0x40440);
2744
Omer Shpigelmancfd41762020-06-03 13:03:35 +03002745 /*
2746 * The H/W expects the first PI after init to be 1. After wraparound
2747 * we'll write 0.
2748 */
2749 gaudi->mmu_cache_inv_pi = 1;
2750
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03002751 gaudi->hw_cap_initialized |= HW_CAP_MMU;
2752
2753 return 0;
2754
2755err:
2756 return rc;
2757}
2758
2759static int gaudi_load_firmware_to_device(struct hl_device *hdev)
2760{
2761 void __iomem *dst;
2762
2763 /* HBM scrambler must be initialized before pushing F/W to HBM */
2764 gaudi_init_scrambler_hbm(hdev);
2765
2766 dst = hdev->pcie_bar[HBM_BAR_ID] + LINUX_FW_OFFSET;
2767
2768 return hl_fw_load_fw_to_device(hdev, GAUDI_LINUX_FW_FILE, dst);
2769}
2770
2771static int gaudi_load_boot_fit_to_device(struct hl_device *hdev)
2772{
2773 void __iomem *dst;
2774
2775 dst = hdev->pcie_bar[SRAM_BAR_ID] + BOOT_FIT_SRAM_OFFSET;
2776
2777 return hl_fw_load_fw_to_device(hdev, GAUDI_BOOT_FIT_FILE, dst);
2778}
2779
2780static void gaudi_read_device_fw_version(struct hl_device *hdev,
2781 enum hl_fw_component fwc)
2782{
2783 const char *name;
2784 u32 ver_off;
2785 char *dest;
2786
2787 switch (fwc) {
2788 case FW_COMP_UBOOT:
2789 ver_off = RREG32(mmUBOOT_VER_OFFSET);
2790 dest = hdev->asic_prop.uboot_ver;
2791 name = "U-Boot";
2792 break;
2793 case FW_COMP_PREBOOT:
2794 ver_off = RREG32(mmPREBOOT_VER_OFFSET);
2795 dest = hdev->asic_prop.preboot_ver;
2796 name = "Preboot";
2797 break;
2798 default:
2799 dev_warn(hdev->dev, "Undefined FW component: %d\n", fwc);
2800 return;
2801 }
2802
2803 ver_off &= ~((u32)SRAM_BASE_ADDR);
2804
2805 if (ver_off < SRAM_SIZE - VERSION_MAX_LEN) {
2806 memcpy_fromio(dest, hdev->pcie_bar[SRAM_BAR_ID] + ver_off,
2807 VERSION_MAX_LEN);
2808 } else {
2809 dev_err(hdev->dev, "%s version offset (0x%x) is above SRAM\n",
2810 name, ver_off);
2811 strcpy(dest, "unavailable");
2812 }
2813}
2814
2815static int gaudi_init_cpu(struct hl_device *hdev)
2816{
2817 struct gaudi_device *gaudi = hdev->asic_specific;
2818 int rc;
2819
2820 if (!hdev->cpu_enable)
2821 return 0;
2822
2823 if (gaudi->hw_cap_initialized & HW_CAP_CPU)
2824 return 0;
2825
2826 /*
2827 * The device CPU works with 40 bits addresses.
2828 * This register sets the extension to 50 bits.
2829 */
2830 WREG32(mmCPU_IF_CPU_MSB_ADDR, hdev->cpu_pci_msb_addr);
2831
2832 rc = hl_fw_init_cpu(hdev, mmPSOC_GLOBAL_CONF_CPU_BOOT_STATUS,
2833 mmPSOC_GLOBAL_CONF_KMD_MSG_TO_CPU,
2834 mmCPU_CMD_STATUS_TO_HOST,
2835 mmCPU_BOOT_ERR0,
2836 !hdev->bmc_enable, GAUDI_CPU_TIMEOUT_USEC,
2837 GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC);
2838
2839 if (rc)
2840 return rc;
2841
2842 gaudi->hw_cap_initialized |= HW_CAP_CPU;
2843
2844 return 0;
2845}
2846
2847static int gaudi_init_cpu_queues(struct hl_device *hdev, u32 cpu_timeout)
2848{
2849 struct gaudi_device *gaudi = hdev->asic_specific;
2850 struct hl_eq *eq;
2851 u32 status;
2852 struct hl_hw_queue *cpu_pq =
2853 &hdev->kernel_queues[GAUDI_QUEUE_ID_CPU_PQ];
2854 int err;
2855
2856 if (!hdev->cpu_queues_enable)
2857 return 0;
2858
2859 if (gaudi->hw_cap_initialized & HW_CAP_CPU_Q)
2860 return 0;
2861
2862 eq = &hdev->event_queue;
2863
2864 WREG32(mmCPU_IF_PQ_BASE_ADDR_LOW, lower_32_bits(cpu_pq->bus_address));
2865 WREG32(mmCPU_IF_PQ_BASE_ADDR_HIGH, upper_32_bits(cpu_pq->bus_address));
2866
2867 WREG32(mmCPU_IF_EQ_BASE_ADDR_LOW, lower_32_bits(eq->bus_address));
2868 WREG32(mmCPU_IF_EQ_BASE_ADDR_HIGH, upper_32_bits(eq->bus_address));
2869
2870 WREG32(mmCPU_IF_CQ_BASE_ADDR_LOW,
2871 lower_32_bits(hdev->cpu_accessible_dma_address));
2872 WREG32(mmCPU_IF_CQ_BASE_ADDR_HIGH,
2873 upper_32_bits(hdev->cpu_accessible_dma_address));
2874
2875 WREG32(mmCPU_IF_PQ_LENGTH, HL_QUEUE_SIZE_IN_BYTES);
2876 WREG32(mmCPU_IF_EQ_LENGTH, HL_EQ_SIZE_IN_BYTES);
2877 WREG32(mmCPU_IF_CQ_LENGTH, HL_CPU_ACCESSIBLE_MEM_SIZE);
2878
2879 /* Used for EQ CI */
2880 WREG32(mmCPU_IF_EQ_RD_OFFS, 0);
2881
2882 WREG32(mmCPU_IF_PF_PQ_PI, 0);
2883
2884 if (gaudi->multi_msi_mode)
2885 WREG32(mmCPU_IF_QUEUE_INIT, PQ_INIT_STATUS_READY_FOR_CP);
2886 else
2887 WREG32(mmCPU_IF_QUEUE_INIT,
2888 PQ_INIT_STATUS_READY_FOR_CP_SINGLE_MSI);
2889
2890 WREG32(mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR, GAUDI_EVENT_PI_UPDATE);
2891
2892 err = hl_poll_timeout(
2893 hdev,
2894 mmCPU_IF_QUEUE_INIT,
2895 status,
2896 (status == PQ_INIT_STATUS_READY_FOR_HOST),
2897 1000,
2898 cpu_timeout);
2899
2900 if (err) {
2901 dev_err(hdev->dev,
2902 "Failed to communicate with ARM CPU (ArmCP timeout)\n");
2903 return -EIO;
2904 }
2905
2906 gaudi->hw_cap_initialized |= HW_CAP_CPU_Q;
2907 return 0;
2908}
2909
2910static void gaudi_pre_hw_init(struct hl_device *hdev)
2911{
2912 /* Perform read from the device to make sure device is up */
2913 RREG32(mmPCIE_DBI_DEVICE_ID_VENDOR_ID_REG);
2914
2915 /*
2916 * Let's mark in the H/W that we have reached this point. We check
2917 * this value in the reset_before_init function to understand whether
2918 * we need to reset the chip before doing H/W init. This register is
2919 * cleared by the H/W upon H/W reset
2920 */
2921 WREG32(mmHW_STATE, HL_DEVICE_HW_STATE_DIRTY);
2922
2923 /* Set the access through PCI bars (Linux driver only) as secured */
2924 WREG32(mmPCIE_WRAP_LBW_PROT_OVR, (PCIE_WRAP_LBW_PROT_OVR_RD_EN_MASK |
2925 PCIE_WRAP_LBW_PROT_OVR_WR_EN_MASK));
2926
2927 /* Perform read to flush the waiting writes to ensure configuration
2928 * was set in the device
2929 */
2930 RREG32(mmPCIE_WRAP_LBW_PROT_OVR);
2931
2932 if (hdev->axi_drain) {
2933 WREG32(mmPCIE_WRAP_LBW_DRAIN_CFG,
2934 1 << PCIE_WRAP_LBW_DRAIN_CFG_EN_SHIFT);
2935 WREG32(mmPCIE_WRAP_HBW_DRAIN_CFG,
2936 1 << PCIE_WRAP_HBW_DRAIN_CFG_EN_SHIFT);
2937
2938 /* Perform read to flush the DRAIN cfg */
2939 RREG32(mmPCIE_WRAP_HBW_DRAIN_CFG);
2940 } else {
2941 WREG32(mmPCIE_WRAP_LBW_DRAIN_CFG, 0);
2942 WREG32(mmPCIE_WRAP_HBW_DRAIN_CFG, 0);
2943
2944 /* Perform read to flush the DRAIN cfg */
2945 RREG32(mmPCIE_WRAP_HBW_DRAIN_CFG);
2946 }
2947
2948 /* Configure the reset registers. Must be done as early as possible
2949 * in case we fail during H/W initialization
2950 */
2951 WREG32(mmPSOC_GLOBAL_CONF_SOFT_RST_CFG_H,
2952 (CFG_RST_H_DMA_MASK |
2953 CFG_RST_H_MME_MASK |
2954 CFG_RST_H_SM_MASK |
2955 CFG_RST_H_TPC_MASK));
2956
2957 WREG32(mmPSOC_GLOBAL_CONF_SOFT_RST_CFG_L, CFG_RST_L_TPC_MASK);
2958
2959 WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST_CFG_H,
2960 (CFG_RST_H_HBM_MASK |
2961 CFG_RST_H_TPC_MASK |
2962 CFG_RST_H_NIC_MASK |
2963 CFG_RST_H_SM_MASK |
2964 CFG_RST_H_DMA_MASK |
2965 CFG_RST_H_MME_MASK |
2966 CFG_RST_H_CPU_MASK |
2967 CFG_RST_H_MMU_MASK));
2968
2969 WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST_CFG_L,
2970 (CFG_RST_L_IF_MASK |
2971 CFG_RST_L_PSOC_MASK |
2972 CFG_RST_L_TPC_MASK));
2973}
2974
2975static int gaudi_hw_init(struct hl_device *hdev)
2976{
2977 int rc;
2978
2979 dev_info(hdev->dev, "Starting initialization of H/W\n");
2980
2981 gaudi_pre_hw_init(hdev);
2982
2983 gaudi_init_pci_dma_qmans(hdev);
2984
2985 gaudi_init_hbm_dma_qmans(hdev);
2986
2987 /*
2988 * Before pushing u-boot/linux to device, need to set the hbm bar to
2989 * base address of dram
2990 */
2991 if (gaudi_set_hbm_bar_base(hdev, DRAM_PHYS_BASE) == U64_MAX) {
2992 dev_err(hdev->dev,
2993 "failed to map HBM bar to DRAM base address\n");
2994 return -EIO;
2995 }
2996
2997 rc = gaudi_init_cpu(hdev);
2998 if (rc) {
2999 dev_err(hdev->dev, "failed to initialize CPU\n");
3000 return rc;
3001 }
3002
3003 /* SRAM scrambler must be initialized after CPU is running from HBM */
3004 gaudi_init_scrambler_sram(hdev);
3005
3006 /* This is here just in case we are working without CPU */
3007 gaudi_init_scrambler_hbm(hdev);
3008
3009 gaudi_init_golden_registers(hdev);
3010
3011 rc = gaudi_mmu_init(hdev);
3012 if (rc)
3013 return rc;
3014
Omer Shpigelman3a3a5bf12020-05-11 10:45:12 +03003015 gaudi_init_security(hdev);
3016
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003017 gaudi_init_mme_qmans(hdev);
3018
3019 gaudi_init_tpc_qmans(hdev);
3020
Oded Gabbaye38bfd32020-07-03 20:46:12 +03003021 hdev->asic_funcs->set_clock_gating(hdev);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003022
3023 gaudi_enable_timestamp(hdev);
3024
3025 /* MSI must be enabled before CPU queues are initialized */
3026 rc = gaudi_enable_msi(hdev);
3027 if (rc)
3028 goto disable_queues;
3029
3030 /* must be called after MSI was enabled */
3031 rc = gaudi_init_cpu_queues(hdev, GAUDI_CPU_TIMEOUT_USEC);
3032 if (rc) {
3033 dev_err(hdev->dev, "failed to initialize CPU H/W queues %d\n",
3034 rc);
3035 goto disable_msi;
3036 }
3037
3038 /* Perform read from the device to flush all configuration */
3039 RREG32(mmPCIE_DBI_DEVICE_ID_VENDOR_ID_REG);
3040
3041 return 0;
3042
3043disable_msi:
3044 gaudi_disable_msi(hdev);
3045disable_queues:
3046 gaudi_disable_mme_qmans(hdev);
3047 gaudi_disable_pci_dma_qmans(hdev);
3048
3049 return rc;
3050}
3051
3052static void gaudi_hw_fini(struct hl_device *hdev, bool hard_reset)
3053{
3054 struct gaudi_device *gaudi = hdev->asic_specific;
3055 u32 status, reset_timeout_ms, boot_strap = 0;
3056
3057 if (hdev->pldm) {
3058 if (hard_reset)
3059 reset_timeout_ms = GAUDI_PLDM_HRESET_TIMEOUT_MSEC;
3060 else
3061 reset_timeout_ms = GAUDI_PLDM_SRESET_TIMEOUT_MSEC;
3062 } else {
3063 reset_timeout_ms = GAUDI_RESET_TIMEOUT_MSEC;
3064 }
3065
3066 if (hard_reset) {
3067 /* Tell ASIC not to re-initialize PCIe */
3068 WREG32(mmPREBOOT_PCIE_EN, LKD_HARD_RESET_MAGIC);
3069
3070 boot_strap = RREG32(mmPSOC_GLOBAL_CONF_BOOT_STRAP_PINS);
3071 /* H/W bug WA:
3072 * rdata[31:0] = strap_read_val;
3073 * wdata[31:0] = rdata[30:21],1'b0,rdata[20:0]
3074 */
3075 boot_strap = (((boot_strap & 0x7FE00000) << 1) |
3076 (boot_strap & 0x001FFFFF));
3077 WREG32(mmPSOC_GLOBAL_CONF_BOOT_STRAP_PINS, boot_strap & ~0x2);
3078
3079 /* Restart BTL/BLR upon hard-reset */
3080 WREG32(mmPSOC_GLOBAL_CONF_BOOT_SEQ_RE_START, 1);
3081
3082 WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST,
3083 1 << PSOC_GLOBAL_CONF_SW_ALL_RST_IND_SHIFT);
3084 dev_info(hdev->dev,
3085 "Issued HARD reset command, going to wait %dms\n",
3086 reset_timeout_ms);
3087 } else {
3088 /* Don't restart BTL/BLR upon soft-reset */
3089 WREG32(mmPSOC_GLOBAL_CONF_BOOT_SEQ_RE_START, 0);
3090
3091 WREG32(mmPSOC_GLOBAL_CONF_SOFT_RST,
3092 1 << PSOC_GLOBAL_CONF_SOFT_RST_IND_SHIFT);
3093 dev_info(hdev->dev,
3094 "Issued SOFT reset command, going to wait %dms\n",
3095 reset_timeout_ms);
3096 }
3097
3098 /*
3099 * After hard reset, we can't poll the BTM_FSM register because the PSOC
3100 * itself is in reset. Need to wait until the reset is deasserted
3101 */
3102 msleep(reset_timeout_ms);
3103
3104 status = RREG32(mmPSOC_GLOBAL_CONF_BTM_FSM);
3105 if (status & PSOC_GLOBAL_CONF_BTM_FSM_STATE_MASK)
3106 dev_err(hdev->dev,
3107 "Timeout while waiting for device to reset 0x%x\n",
3108 status);
3109
3110 if (!hard_reset) {
3111 gaudi->hw_cap_initialized &= ~(HW_CAP_PCI_DMA | HW_CAP_MME |
3112 HW_CAP_TPC_MASK |
3113 HW_CAP_HBM_DMA);
3114
3115 WREG32(mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR,
3116 GAUDI_EVENT_SOFT_RESET);
3117 return;
3118 }
3119
3120 /* We continue here only for hard-reset */
3121
3122 WREG32(mmPSOC_GLOBAL_CONF_BOOT_STRAP_PINS, boot_strap);
3123
3124 gaudi->hw_cap_initialized &= ~(HW_CAP_CPU | HW_CAP_CPU_Q |
3125 HW_CAP_HBM | HW_CAP_PCI_DMA |
3126 HW_CAP_MME | HW_CAP_TPC_MASK |
3127 HW_CAP_HBM_DMA | HW_CAP_PLL |
3128 HW_CAP_MMU |
3129 HW_CAP_SRAM_SCRAMBLER |
Oded Gabbaye38bfd32020-07-03 20:46:12 +03003130 HW_CAP_HBM_SCRAMBLER |
3131 HW_CAP_CLK_GATE);
3132
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003133 memset(gaudi->events_stat, 0, sizeof(gaudi->events_stat));
3134}
3135
3136static int gaudi_suspend(struct hl_device *hdev)
3137{
3138 int rc;
3139
3140 rc = hl_fw_send_pci_access_msg(hdev, ARMCP_PACKET_DISABLE_PCI_ACCESS);
3141 if (rc)
3142 dev_err(hdev->dev, "Failed to disable PCI access from CPU\n");
3143
3144 return rc;
3145}
3146
3147static int gaudi_resume(struct hl_device *hdev)
3148{
3149 return gaudi_init_iatu(hdev);
3150}
3151
3152static int gaudi_cb_mmap(struct hl_device *hdev, struct vm_area_struct *vma,
3153 u64 kaddress, phys_addr_t paddress, u32 size)
3154{
3155 int rc;
3156
3157 vma->vm_flags |= VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP |
3158 VM_DONTCOPY | VM_NORESERVE;
3159
3160 rc = remap_pfn_range(vma, vma->vm_start, paddress >> PAGE_SHIFT,
3161 size, vma->vm_page_prot);
3162 if (rc)
3163 dev_err(hdev->dev, "remap_pfn_range error %d", rc);
3164
3165 return rc;
3166}
3167
3168static void gaudi_ring_doorbell(struct hl_device *hdev, u32 hw_queue_id, u32 pi)
3169{
3170 struct gaudi_device *gaudi = hdev->asic_specific;
3171 u32 db_reg_offset, db_value, dma_qm_offset, q_off;
3172 int dma_id;
3173 bool invalid_queue = false;
3174
3175 switch (hw_queue_id) {
3176 case GAUDI_QUEUE_ID_DMA_0_0...GAUDI_QUEUE_ID_DMA_0_3:
3177 dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_1];
3178 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
3179 q_off = dma_qm_offset + (hw_queue_id & 0x3) * 4;
3180 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
3181 break;
3182
3183 case GAUDI_QUEUE_ID_DMA_1_0...GAUDI_QUEUE_ID_DMA_1_3:
3184 dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_2];
3185 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
3186 q_off = dma_qm_offset + (hw_queue_id & 0x3) * 4;
3187 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
3188 break;
3189
3190 case GAUDI_QUEUE_ID_DMA_2_0...GAUDI_QUEUE_ID_DMA_2_3:
3191 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_1];
3192 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
3193 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
3194 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
3195 break;
3196
3197 case GAUDI_QUEUE_ID_DMA_3_0...GAUDI_QUEUE_ID_DMA_3_3:
3198 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_2];
3199 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
3200 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
3201 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
3202 break;
3203
3204 case GAUDI_QUEUE_ID_DMA_4_0...GAUDI_QUEUE_ID_DMA_4_3:
3205 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_3];
3206 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
3207 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
3208 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
3209 break;
3210
3211 case GAUDI_QUEUE_ID_DMA_5_0...GAUDI_QUEUE_ID_DMA_5_3:
3212 dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_3];
3213 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
3214 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
3215 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
3216 break;
3217
3218 case GAUDI_QUEUE_ID_DMA_6_0...GAUDI_QUEUE_ID_DMA_6_3:
3219 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_4];
3220 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
3221 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
3222 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
3223 break;
3224
3225 case GAUDI_QUEUE_ID_DMA_7_0...GAUDI_QUEUE_ID_DMA_7_3:
3226 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_5];
3227 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
3228 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
3229 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
3230 break;
3231
3232 case GAUDI_QUEUE_ID_CPU_PQ:
3233 if (gaudi->hw_cap_initialized & HW_CAP_CPU_Q)
3234 db_reg_offset = mmCPU_IF_PF_PQ_PI;
3235 else
3236 invalid_queue = true;
3237 break;
3238
3239 case GAUDI_QUEUE_ID_MME_0_0:
3240 db_reg_offset = mmMME2_QM_PQ_PI_0;
3241 break;
3242
3243 case GAUDI_QUEUE_ID_MME_0_1:
3244 db_reg_offset = mmMME2_QM_PQ_PI_1;
3245 break;
3246
3247 case GAUDI_QUEUE_ID_MME_0_2:
3248 db_reg_offset = mmMME2_QM_PQ_PI_2;
3249 break;
3250
3251 case GAUDI_QUEUE_ID_MME_0_3:
3252 db_reg_offset = mmMME2_QM_PQ_PI_3;
3253 break;
3254
3255 case GAUDI_QUEUE_ID_MME_1_0:
3256 db_reg_offset = mmMME0_QM_PQ_PI_0;
3257 break;
3258
3259 case GAUDI_QUEUE_ID_MME_1_1:
3260 db_reg_offset = mmMME0_QM_PQ_PI_1;
3261 break;
3262
3263 case GAUDI_QUEUE_ID_MME_1_2:
3264 db_reg_offset = mmMME0_QM_PQ_PI_2;
3265 break;
3266
3267 case GAUDI_QUEUE_ID_MME_1_3:
3268 db_reg_offset = mmMME0_QM_PQ_PI_3;
3269 break;
3270
3271 case GAUDI_QUEUE_ID_TPC_0_0:
3272 db_reg_offset = mmTPC0_QM_PQ_PI_0;
3273 break;
3274
3275 case GAUDI_QUEUE_ID_TPC_0_1:
3276 db_reg_offset = mmTPC0_QM_PQ_PI_1;
3277 break;
3278
3279 case GAUDI_QUEUE_ID_TPC_0_2:
3280 db_reg_offset = mmTPC0_QM_PQ_PI_2;
3281 break;
3282
3283 case GAUDI_QUEUE_ID_TPC_0_3:
3284 db_reg_offset = mmTPC0_QM_PQ_PI_3;
3285 break;
3286
3287 case GAUDI_QUEUE_ID_TPC_1_0:
3288 db_reg_offset = mmTPC1_QM_PQ_PI_0;
3289 break;
3290
3291 case GAUDI_QUEUE_ID_TPC_1_1:
3292 db_reg_offset = mmTPC1_QM_PQ_PI_1;
3293 break;
3294
3295 case GAUDI_QUEUE_ID_TPC_1_2:
3296 db_reg_offset = mmTPC1_QM_PQ_PI_2;
3297 break;
3298
3299 case GAUDI_QUEUE_ID_TPC_1_3:
3300 db_reg_offset = mmTPC1_QM_PQ_PI_3;
3301 break;
3302
3303 case GAUDI_QUEUE_ID_TPC_2_0:
3304 db_reg_offset = mmTPC2_QM_PQ_PI_0;
3305 break;
3306
3307 case GAUDI_QUEUE_ID_TPC_2_1:
3308 db_reg_offset = mmTPC2_QM_PQ_PI_1;
3309 break;
3310
3311 case GAUDI_QUEUE_ID_TPC_2_2:
3312 db_reg_offset = mmTPC2_QM_PQ_PI_2;
3313 break;
3314
3315 case GAUDI_QUEUE_ID_TPC_2_3:
3316 db_reg_offset = mmTPC2_QM_PQ_PI_3;
3317 break;
3318
3319 case GAUDI_QUEUE_ID_TPC_3_0:
3320 db_reg_offset = mmTPC3_QM_PQ_PI_0;
3321 break;
3322
3323 case GAUDI_QUEUE_ID_TPC_3_1:
3324 db_reg_offset = mmTPC3_QM_PQ_PI_1;
3325 break;
3326
3327 case GAUDI_QUEUE_ID_TPC_3_2:
3328 db_reg_offset = mmTPC3_QM_PQ_PI_2;
3329 break;
3330
3331 case GAUDI_QUEUE_ID_TPC_3_3:
3332 db_reg_offset = mmTPC3_QM_PQ_PI_3;
3333 break;
3334
3335 case GAUDI_QUEUE_ID_TPC_4_0:
3336 db_reg_offset = mmTPC4_QM_PQ_PI_0;
3337 break;
3338
3339 case GAUDI_QUEUE_ID_TPC_4_1:
3340 db_reg_offset = mmTPC4_QM_PQ_PI_1;
3341 break;
3342
3343 case GAUDI_QUEUE_ID_TPC_4_2:
3344 db_reg_offset = mmTPC4_QM_PQ_PI_2;
3345 break;
3346
3347 case GAUDI_QUEUE_ID_TPC_4_3:
3348 db_reg_offset = mmTPC4_QM_PQ_PI_3;
3349 break;
3350
3351 case GAUDI_QUEUE_ID_TPC_5_0:
3352 db_reg_offset = mmTPC5_QM_PQ_PI_0;
3353 break;
3354
3355 case GAUDI_QUEUE_ID_TPC_5_1:
3356 db_reg_offset = mmTPC5_QM_PQ_PI_1;
3357 break;
3358
3359 case GAUDI_QUEUE_ID_TPC_5_2:
3360 db_reg_offset = mmTPC5_QM_PQ_PI_2;
3361 break;
3362
3363 case GAUDI_QUEUE_ID_TPC_5_3:
3364 db_reg_offset = mmTPC5_QM_PQ_PI_3;
3365 break;
3366
3367 case GAUDI_QUEUE_ID_TPC_6_0:
3368 db_reg_offset = mmTPC6_QM_PQ_PI_0;
3369 break;
3370
3371 case GAUDI_QUEUE_ID_TPC_6_1:
3372 db_reg_offset = mmTPC6_QM_PQ_PI_1;
3373 break;
3374
3375 case GAUDI_QUEUE_ID_TPC_6_2:
3376 db_reg_offset = mmTPC6_QM_PQ_PI_2;
3377 break;
3378
3379 case GAUDI_QUEUE_ID_TPC_6_3:
3380 db_reg_offset = mmTPC6_QM_PQ_PI_3;
3381 break;
3382
3383 case GAUDI_QUEUE_ID_TPC_7_0:
3384 db_reg_offset = mmTPC7_QM_PQ_PI_0;
3385 break;
3386
3387 case GAUDI_QUEUE_ID_TPC_7_1:
3388 db_reg_offset = mmTPC7_QM_PQ_PI_1;
3389 break;
3390
3391 case GAUDI_QUEUE_ID_TPC_7_2:
3392 db_reg_offset = mmTPC7_QM_PQ_PI_2;
3393 break;
3394
3395 case GAUDI_QUEUE_ID_TPC_7_3:
3396 db_reg_offset = mmTPC7_QM_PQ_PI_3;
3397 break;
3398
3399 default:
3400 invalid_queue = true;
3401 }
3402
3403 if (invalid_queue) {
3404 /* Should never get here */
3405 dev_err(hdev->dev, "h/w queue %d is invalid. Can't set pi\n",
3406 hw_queue_id);
3407 return;
3408 }
3409
3410 db_value = pi;
3411
3412 /* ring the doorbell */
3413 WREG32(db_reg_offset, db_value);
3414
3415 if (hw_queue_id == GAUDI_QUEUE_ID_CPU_PQ)
3416 WREG32(mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR,
3417 GAUDI_EVENT_PI_UPDATE);
3418}
3419
3420static void gaudi_pqe_write(struct hl_device *hdev, __le64 *pqe,
3421 struct hl_bd *bd)
3422{
3423 __le64 *pbd = (__le64 *) bd;
3424
3425 /* The QMANs are on the host memory so a simple copy suffice */
3426 pqe[0] = pbd[0];
3427 pqe[1] = pbd[1];
3428}
3429
3430static void *gaudi_dma_alloc_coherent(struct hl_device *hdev, size_t size,
3431 dma_addr_t *dma_handle, gfp_t flags)
3432{
3433 void *kernel_addr = dma_alloc_coherent(&hdev->pdev->dev, size,
3434 dma_handle, flags);
3435
3436 /* Shift to the device's base physical address of host memory */
3437 if (kernel_addr)
3438 *dma_handle += HOST_PHYS_BASE;
3439
3440 return kernel_addr;
3441}
3442
3443static void gaudi_dma_free_coherent(struct hl_device *hdev, size_t size,
3444 void *cpu_addr, dma_addr_t dma_handle)
3445{
3446 /* Cancel the device's base physical address of host memory */
3447 dma_addr_t fixed_dma_handle = dma_handle - HOST_PHYS_BASE;
3448
3449 dma_free_coherent(&hdev->pdev->dev, size, cpu_addr, fixed_dma_handle);
3450}
3451
3452static void *gaudi_get_int_queue_base(struct hl_device *hdev,
3453 u32 queue_id, dma_addr_t *dma_handle,
3454 u16 *queue_len)
3455{
3456 struct gaudi_device *gaudi = hdev->asic_specific;
3457 struct gaudi_internal_qman_info *q;
3458
3459 if (queue_id >= GAUDI_QUEUE_ID_SIZE ||
3460 gaudi_queue_type[queue_id] != QUEUE_TYPE_INT) {
3461 dev_err(hdev->dev, "Got invalid queue id %d\n", queue_id);
3462 return NULL;
3463 }
3464
3465 q = &gaudi->internal_qmans[queue_id];
3466 *dma_handle = q->pq_dma_addr;
3467 *queue_len = q->pq_size / QMAN_PQ_ENTRY_SIZE;
3468
3469 return q->pq_kernel_addr;
3470}
3471
3472static int gaudi_send_cpu_message(struct hl_device *hdev, u32 *msg,
3473 u16 len, u32 timeout, long *result)
3474{
3475 struct gaudi_device *gaudi = hdev->asic_specific;
3476
3477 if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q)) {
3478 if (result)
3479 *result = 0;
3480 return 0;
3481 }
3482
Oded Gabbay788cacf2020-07-07 17:30:13 +03003483 if (!timeout)
3484 timeout = GAUDI_MSG_TO_CPU_TIMEOUT_USEC;
3485
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003486 return hl_fw_send_cpu_message(hdev, GAUDI_QUEUE_ID_CPU_PQ, msg, len,
3487 timeout, result);
3488}
3489
3490static int gaudi_test_queue(struct hl_device *hdev, u32 hw_queue_id)
3491{
3492 struct packet_msg_prot *fence_pkt;
3493 dma_addr_t pkt_dma_addr;
3494 u32 fence_val, tmp, timeout_usec;
3495 dma_addr_t fence_dma_addr;
3496 u32 *fence_ptr;
3497 int rc;
3498
3499 if (hdev->pldm)
3500 timeout_usec = GAUDI_PLDM_TEST_QUEUE_WAIT_USEC;
3501 else
3502 timeout_usec = GAUDI_TEST_QUEUE_WAIT_USEC;
3503
3504 fence_val = GAUDI_QMAN0_FENCE_VAL;
3505
3506 fence_ptr = hdev->asic_funcs->asic_dma_pool_zalloc(hdev, 4, GFP_KERNEL,
3507 &fence_dma_addr);
3508 if (!fence_ptr) {
3509 dev_err(hdev->dev,
3510 "Failed to allocate memory for queue testing\n");
3511 return -ENOMEM;
3512 }
3513
3514 *fence_ptr = 0;
3515
3516 fence_pkt = hdev->asic_funcs->asic_dma_pool_zalloc(hdev,
3517 sizeof(struct packet_msg_prot),
3518 GFP_KERNEL, &pkt_dma_addr);
3519 if (!fence_pkt) {
3520 dev_err(hdev->dev,
3521 "Failed to allocate packet for queue testing\n");
3522 rc = -ENOMEM;
3523 goto free_fence_ptr;
3524 }
3525
3526 tmp = (PACKET_MSG_PROT << GAUDI_PKT_CTL_OPCODE_SHIFT) |
3527 (1 << GAUDI_PKT_CTL_EB_SHIFT) |
3528 (1 << GAUDI_PKT_CTL_MB_SHIFT);
3529 fence_pkt->ctl = cpu_to_le32(tmp);
3530 fence_pkt->value = cpu_to_le32(fence_val);
3531 fence_pkt->addr = cpu_to_le64(fence_dma_addr);
3532
3533 rc = hl_hw_queue_send_cb_no_cmpl(hdev, hw_queue_id,
3534 sizeof(struct packet_msg_prot),
3535 pkt_dma_addr);
3536 if (rc) {
3537 dev_err(hdev->dev,
3538 "Failed to send fence packet\n");
3539 goto free_pkt;
3540 }
3541
3542 rc = hl_poll_timeout_memory(hdev, fence_ptr, tmp, (tmp == fence_val),
3543 1000, timeout_usec, true);
3544
3545 hl_hw_queue_inc_ci_kernel(hdev, hw_queue_id);
3546
3547 if (rc == -ETIMEDOUT) {
3548 dev_err(hdev->dev,
3549 "H/W queue %d test failed (scratch(0x%08llX) == 0x%08X)\n",
3550 hw_queue_id, (unsigned long long) fence_dma_addr, tmp);
3551 rc = -EIO;
3552 }
3553
3554free_pkt:
3555 hdev->asic_funcs->asic_dma_pool_free(hdev, (void *) fence_pkt,
3556 pkt_dma_addr);
3557free_fence_ptr:
3558 hdev->asic_funcs->asic_dma_pool_free(hdev, (void *) fence_ptr,
3559 fence_dma_addr);
3560 return rc;
3561}
3562
3563static int gaudi_test_cpu_queue(struct hl_device *hdev)
3564{
3565 struct gaudi_device *gaudi = hdev->asic_specific;
3566
3567 /*
3568 * check capability here as send_cpu_message() won't update the result
3569 * value if no capability
3570 */
3571 if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
3572 return 0;
3573
3574 return hl_fw_test_cpu_queue(hdev);
3575}
3576
3577static int gaudi_test_queues(struct hl_device *hdev)
3578{
3579 int i, rc, ret_val = 0;
3580
3581 for (i = 0 ; i < HL_MAX_QUEUES ; i++) {
3582 if (hdev->asic_prop.hw_queues_props[i].type == QUEUE_TYPE_EXT) {
3583 rc = gaudi_test_queue(hdev, i);
3584 if (rc)
3585 ret_val = -EINVAL;
3586 }
3587 }
3588
3589 rc = gaudi_test_cpu_queue(hdev);
3590 if (rc)
3591 ret_val = -EINVAL;
3592
3593 return ret_val;
3594}
3595
3596static void *gaudi_dma_pool_zalloc(struct hl_device *hdev, size_t size,
3597 gfp_t mem_flags, dma_addr_t *dma_handle)
3598{
3599 void *kernel_addr;
3600
3601 if (size > GAUDI_DMA_POOL_BLK_SIZE)
3602 return NULL;
3603
3604 kernel_addr = dma_pool_zalloc(hdev->dma_pool, mem_flags, dma_handle);
3605
3606 /* Shift to the device's base physical address of host memory */
3607 if (kernel_addr)
3608 *dma_handle += HOST_PHYS_BASE;
3609
3610 return kernel_addr;
3611}
3612
3613static void gaudi_dma_pool_free(struct hl_device *hdev, void *vaddr,
3614 dma_addr_t dma_addr)
3615{
3616 /* Cancel the device's base physical address of host memory */
3617 dma_addr_t fixed_dma_addr = dma_addr - HOST_PHYS_BASE;
3618
3619 dma_pool_free(hdev->dma_pool, vaddr, fixed_dma_addr);
3620}
3621
3622static void *gaudi_cpu_accessible_dma_pool_alloc(struct hl_device *hdev,
3623 size_t size, dma_addr_t *dma_handle)
3624{
3625 return hl_fw_cpu_accessible_dma_pool_alloc(hdev, size, dma_handle);
3626}
3627
3628static void gaudi_cpu_accessible_dma_pool_free(struct hl_device *hdev,
3629 size_t size, void *vaddr)
3630{
3631 hl_fw_cpu_accessible_dma_pool_free(hdev, size, vaddr);
3632}
3633
3634static int gaudi_dma_map_sg(struct hl_device *hdev, struct scatterlist *sgl,
3635 int nents, enum dma_data_direction dir)
3636{
3637 struct scatterlist *sg;
3638 int i;
3639
3640 if (!dma_map_sg(&hdev->pdev->dev, sgl, nents, dir))
3641 return -ENOMEM;
3642
3643 /* Shift to the device's base physical address of host memory */
3644 for_each_sg(sgl, sg, nents, i)
3645 sg->dma_address += HOST_PHYS_BASE;
3646
3647 return 0;
3648}
3649
3650static void gaudi_dma_unmap_sg(struct hl_device *hdev, struct scatterlist *sgl,
3651 int nents, enum dma_data_direction dir)
3652{
3653 struct scatterlist *sg;
3654 int i;
3655
3656 /* Cancel the device's base physical address of host memory */
3657 for_each_sg(sgl, sg, nents, i)
3658 sg->dma_address -= HOST_PHYS_BASE;
3659
3660 dma_unmap_sg(&hdev->pdev->dev, sgl, nents, dir);
3661}
3662
3663static u32 gaudi_get_dma_desc_list_size(struct hl_device *hdev,
3664 struct sg_table *sgt)
3665{
3666 struct scatterlist *sg, *sg_next_iter;
3667 u32 count, dma_desc_cnt;
3668 u64 len, len_next;
3669 dma_addr_t addr, addr_next;
3670
3671 dma_desc_cnt = 0;
3672
3673 for_each_sg(sgt->sgl, sg, sgt->nents, count) {
3674
3675 len = sg_dma_len(sg);
3676 addr = sg_dma_address(sg);
3677
3678 if (len == 0)
3679 break;
3680
3681 while ((count + 1) < sgt->nents) {
3682 sg_next_iter = sg_next(sg);
3683 len_next = sg_dma_len(sg_next_iter);
3684 addr_next = sg_dma_address(sg_next_iter);
3685
3686 if (len_next == 0)
3687 break;
3688
3689 if ((addr + len == addr_next) &&
3690 (len + len_next <= DMA_MAX_TRANSFER_SIZE)) {
3691 len += len_next;
3692 count++;
3693 sg = sg_next_iter;
3694 } else {
3695 break;
3696 }
3697 }
3698
3699 dma_desc_cnt++;
3700 }
3701
3702 return dma_desc_cnt * sizeof(struct packet_lin_dma);
3703}
3704
3705static int gaudi_pin_memory_before_cs(struct hl_device *hdev,
3706 struct hl_cs_parser *parser,
3707 struct packet_lin_dma *user_dma_pkt,
3708 u64 addr, enum dma_data_direction dir)
3709{
3710 struct hl_userptr *userptr;
3711 int rc;
3712
3713 if (hl_userptr_is_pinned(hdev, addr, le32_to_cpu(user_dma_pkt->tsize),
3714 parser->job_userptr_list, &userptr))
3715 goto already_pinned;
3716
3717 userptr = kzalloc(sizeof(*userptr), GFP_ATOMIC);
3718 if (!userptr)
3719 return -ENOMEM;
3720
3721 rc = hl_pin_host_memory(hdev, addr, le32_to_cpu(user_dma_pkt->tsize),
3722 userptr);
3723 if (rc)
3724 goto free_userptr;
3725
3726 list_add_tail(&userptr->job_node, parser->job_userptr_list);
3727
3728 rc = hdev->asic_funcs->asic_dma_map_sg(hdev, userptr->sgt->sgl,
3729 userptr->sgt->nents, dir);
3730 if (rc) {
3731 dev_err(hdev->dev, "failed to map sgt with DMA region\n");
3732 goto unpin_memory;
3733 }
3734
3735 userptr->dma_mapped = true;
3736 userptr->dir = dir;
3737
3738already_pinned:
3739 parser->patched_cb_size +=
3740 gaudi_get_dma_desc_list_size(hdev, userptr->sgt);
3741
3742 return 0;
3743
3744unpin_memory:
3745 hl_unpin_host_memory(hdev, userptr);
3746free_userptr:
3747 kfree(userptr);
3748 return rc;
3749}
3750
3751static int gaudi_validate_dma_pkt_host(struct hl_device *hdev,
3752 struct hl_cs_parser *parser,
3753 struct packet_lin_dma *user_dma_pkt,
3754 bool src_in_host)
3755{
3756 enum dma_data_direction dir;
3757 bool skip_host_mem_pin = false, user_memset;
3758 u64 addr;
3759 int rc = 0;
3760
3761 user_memset = (le32_to_cpu(user_dma_pkt->ctl) &
3762 GAUDI_PKT_LIN_DMA_CTL_MEMSET_MASK) >>
3763 GAUDI_PKT_LIN_DMA_CTL_MEMSET_SHIFT;
3764
3765 if (src_in_host) {
3766 if (user_memset)
3767 skip_host_mem_pin = true;
3768
3769 dev_dbg(hdev->dev, "DMA direction is HOST --> DEVICE\n");
3770 dir = DMA_TO_DEVICE;
3771 addr = le64_to_cpu(user_dma_pkt->src_addr);
3772 } else {
3773 dev_dbg(hdev->dev, "DMA direction is DEVICE --> HOST\n");
3774 dir = DMA_FROM_DEVICE;
3775 addr = (le64_to_cpu(user_dma_pkt->dst_addr) &
3776 GAUDI_PKT_LIN_DMA_DST_ADDR_MASK) >>
3777 GAUDI_PKT_LIN_DMA_DST_ADDR_SHIFT;
3778 }
3779
3780 if (skip_host_mem_pin)
3781 parser->patched_cb_size += sizeof(*user_dma_pkt);
3782 else
3783 rc = gaudi_pin_memory_before_cs(hdev, parser, user_dma_pkt,
3784 addr, dir);
3785
3786 return rc;
3787}
3788
3789static int gaudi_validate_dma_pkt_no_mmu(struct hl_device *hdev,
3790 struct hl_cs_parser *parser,
3791 struct packet_lin_dma *user_dma_pkt)
3792{
3793 bool src_in_host = false;
3794 u64 dst_addr = (le64_to_cpu(user_dma_pkt->dst_addr) &
3795 GAUDI_PKT_LIN_DMA_DST_ADDR_MASK) >>
3796 GAUDI_PKT_LIN_DMA_DST_ADDR_SHIFT;
3797
3798 dev_dbg(hdev->dev, "DMA packet details:\n");
3799 dev_dbg(hdev->dev, "source == 0x%llx\n",
3800 le64_to_cpu(user_dma_pkt->src_addr));
3801 dev_dbg(hdev->dev, "destination == 0x%llx\n", dst_addr);
3802 dev_dbg(hdev->dev, "size == %u\n", le32_to_cpu(user_dma_pkt->tsize));
3803
3804 /*
3805 * Special handling for DMA with size 0. Bypass all validations
3806 * because no transactions will be done except for WR_COMP, which
3807 * is not a security issue
3808 */
3809 if (!le32_to_cpu(user_dma_pkt->tsize)) {
3810 parser->patched_cb_size += sizeof(*user_dma_pkt);
3811 return 0;
3812 }
3813
3814 if (parser->hw_queue_id <= GAUDI_QUEUE_ID_DMA_0_3)
3815 src_in_host = true;
3816
3817 return gaudi_validate_dma_pkt_host(hdev, parser, user_dma_pkt,
3818 src_in_host);
3819}
3820
Oded Gabbay64536ab2020-05-27 12:38:16 +03003821static int gaudi_validate_load_and_exe_pkt(struct hl_device *hdev,
3822 struct hl_cs_parser *parser,
3823 struct packet_load_and_exe *user_pkt)
3824{
3825 u32 cfg;
3826
3827 cfg = le32_to_cpu(user_pkt->cfg);
3828
3829 if (cfg & GAUDI_PKT_LOAD_AND_EXE_CFG_DST_MASK) {
3830 dev_err(hdev->dev,
3831 "User not allowed to use Load and Execute\n");
3832 return -EPERM;
3833 }
3834
3835 parser->patched_cb_size += sizeof(struct packet_load_and_exe);
3836
3837 return 0;
3838}
3839
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003840static int gaudi_validate_cb(struct hl_device *hdev,
3841 struct hl_cs_parser *parser, bool is_mmu)
3842{
3843 u32 cb_parsed_length = 0;
3844 int rc = 0;
3845
3846 parser->patched_cb_size = 0;
3847
3848 /* cb_user_size is more than 0 so loop will always be executed */
3849 while (cb_parsed_length < parser->user_cb_size) {
3850 enum packet_id pkt_id;
3851 u16 pkt_size;
3852 struct gaudi_packet *user_pkt;
3853
3854 user_pkt = (struct gaudi_packet *) (uintptr_t)
3855 (parser->user_cb->kernel_address + cb_parsed_length);
3856
3857 pkt_id = (enum packet_id) (
3858 (le64_to_cpu(user_pkt->header) &
3859 PACKET_HEADER_PACKET_ID_MASK) >>
3860 PACKET_HEADER_PACKET_ID_SHIFT);
3861
3862 pkt_size = gaudi_packet_sizes[pkt_id];
3863 cb_parsed_length += pkt_size;
3864 if (cb_parsed_length > parser->user_cb_size) {
3865 dev_err(hdev->dev,
3866 "packet 0x%x is out of CB boundary\n", pkt_id);
3867 rc = -EINVAL;
3868 break;
3869 }
3870
3871 switch (pkt_id) {
3872 case PACKET_MSG_PROT:
3873 dev_err(hdev->dev,
3874 "User not allowed to use MSG_PROT\n");
3875 rc = -EPERM;
3876 break;
3877
3878 case PACKET_CP_DMA:
3879 dev_err(hdev->dev, "User not allowed to use CP_DMA\n");
3880 rc = -EPERM;
3881 break;
3882
3883 case PACKET_STOP:
3884 dev_err(hdev->dev, "User not allowed to use STOP\n");
3885 rc = -EPERM;
3886 break;
3887
Oded Gabbay2edc66e2020-07-03 19:28:54 +03003888 case PACKET_WREG_BULK:
3889 dev_err(hdev->dev,
3890 "User not allowed to use WREG_BULK\n");
3891 rc = -EPERM;
3892 break;
3893
Oded Gabbay64536ab2020-05-27 12:38:16 +03003894 case PACKET_LOAD_AND_EXE:
3895 rc = gaudi_validate_load_and_exe_pkt(hdev, parser,
3896 (struct packet_load_and_exe *) user_pkt);
3897 break;
3898
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003899 case PACKET_LIN_DMA:
3900 parser->contains_dma_pkt = true;
3901 if (is_mmu)
3902 parser->patched_cb_size += pkt_size;
3903 else
3904 rc = gaudi_validate_dma_pkt_no_mmu(hdev, parser,
3905 (struct packet_lin_dma *) user_pkt);
3906 break;
3907
3908 case PACKET_WREG_32:
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003909 case PACKET_MSG_LONG:
3910 case PACKET_MSG_SHORT:
3911 case PACKET_REPEAT:
3912 case PACKET_FENCE:
3913 case PACKET_NOP:
3914 case PACKET_ARB_POINT:
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003915 parser->patched_cb_size += pkt_size;
3916 break;
3917
3918 default:
3919 dev_err(hdev->dev, "Invalid packet header 0x%x\n",
3920 pkt_id);
3921 rc = -EINVAL;
3922 break;
3923 }
3924
3925 if (rc)
3926 break;
3927 }
3928
3929 /*
3930 * The new CB should have space at the end for two MSG_PROT packets:
3931 * 1. A packet that will act as a completion packet
3932 * 2. A packet that will generate MSI-X interrupt
3933 */
3934 parser->patched_cb_size += sizeof(struct packet_msg_prot) * 2;
3935
3936 return rc;
3937}
3938
3939static int gaudi_patch_dma_packet(struct hl_device *hdev,
3940 struct hl_cs_parser *parser,
3941 struct packet_lin_dma *user_dma_pkt,
3942 struct packet_lin_dma *new_dma_pkt,
3943 u32 *new_dma_pkt_size)
3944{
3945 struct hl_userptr *userptr;
3946 struct scatterlist *sg, *sg_next_iter;
3947 u32 count, dma_desc_cnt, user_wrcomp_en_mask, ctl;
3948 u64 len, len_next;
3949 dma_addr_t dma_addr, dma_addr_next;
3950 u64 device_memory_addr, addr;
3951 enum dma_data_direction dir;
3952 struct sg_table *sgt;
3953 bool src_in_host = false;
3954 bool skip_host_mem_pin = false;
3955 bool user_memset;
3956
3957 ctl = le32_to_cpu(user_dma_pkt->ctl);
3958
3959 if (parser->hw_queue_id <= GAUDI_QUEUE_ID_DMA_0_3)
3960 src_in_host = true;
3961
3962 user_memset = (ctl & GAUDI_PKT_LIN_DMA_CTL_MEMSET_MASK) >>
3963 GAUDI_PKT_LIN_DMA_CTL_MEMSET_SHIFT;
3964
3965 if (src_in_host) {
3966 addr = le64_to_cpu(user_dma_pkt->src_addr);
3967 device_memory_addr = le64_to_cpu(user_dma_pkt->dst_addr);
3968 dir = DMA_TO_DEVICE;
3969 if (user_memset)
3970 skip_host_mem_pin = true;
3971 } else {
3972 addr = le64_to_cpu(user_dma_pkt->dst_addr);
3973 device_memory_addr = le64_to_cpu(user_dma_pkt->src_addr);
3974 dir = DMA_FROM_DEVICE;
3975 }
3976
3977 if ((!skip_host_mem_pin) &&
3978 (!hl_userptr_is_pinned(hdev, addr,
3979 le32_to_cpu(user_dma_pkt->tsize),
3980 parser->job_userptr_list, &userptr))) {
3981 dev_err(hdev->dev, "Userptr 0x%llx + 0x%x NOT mapped\n",
3982 addr, user_dma_pkt->tsize);
3983 return -EFAULT;
3984 }
3985
3986 if ((user_memset) && (dir == DMA_TO_DEVICE)) {
3987 memcpy(new_dma_pkt, user_dma_pkt, sizeof(*user_dma_pkt));
3988 *new_dma_pkt_size = sizeof(*user_dma_pkt);
3989 return 0;
3990 }
3991
3992 user_wrcomp_en_mask = ctl & GAUDI_PKT_LIN_DMA_CTL_WRCOMP_EN_MASK;
3993
3994 sgt = userptr->sgt;
3995 dma_desc_cnt = 0;
3996
3997 for_each_sg(sgt->sgl, sg, sgt->nents, count) {
3998 len = sg_dma_len(sg);
3999 dma_addr = sg_dma_address(sg);
4000
4001 if (len == 0)
4002 break;
4003
4004 while ((count + 1) < sgt->nents) {
4005 sg_next_iter = sg_next(sg);
4006 len_next = sg_dma_len(sg_next_iter);
4007 dma_addr_next = sg_dma_address(sg_next_iter);
4008
4009 if (len_next == 0)
4010 break;
4011
4012 if ((dma_addr + len == dma_addr_next) &&
4013 (len + len_next <= DMA_MAX_TRANSFER_SIZE)) {
4014 len += len_next;
4015 count++;
4016 sg = sg_next_iter;
4017 } else {
4018 break;
4019 }
4020 }
4021
4022 new_dma_pkt->ctl = user_dma_pkt->ctl;
4023
4024 ctl = le32_to_cpu(user_dma_pkt->ctl);
4025 if (likely(dma_desc_cnt))
4026 ctl &= ~GAUDI_PKT_CTL_EB_MASK;
4027 ctl &= ~GAUDI_PKT_LIN_DMA_CTL_WRCOMP_EN_MASK;
4028 new_dma_pkt->ctl = cpu_to_le32(ctl);
4029 new_dma_pkt->tsize = cpu_to_le32(len);
4030
4031 if (dir == DMA_TO_DEVICE) {
4032 new_dma_pkt->src_addr = cpu_to_le64(dma_addr);
4033 new_dma_pkt->dst_addr = cpu_to_le64(device_memory_addr);
4034 } else {
4035 new_dma_pkt->src_addr = cpu_to_le64(device_memory_addr);
4036 new_dma_pkt->dst_addr = cpu_to_le64(dma_addr);
4037 }
4038
4039 if (!user_memset)
4040 device_memory_addr += len;
4041 dma_desc_cnt++;
4042 new_dma_pkt++;
4043 }
4044
4045 if (!dma_desc_cnt) {
4046 dev_err(hdev->dev,
4047 "Error of 0 SG entries when patching DMA packet\n");
4048 return -EFAULT;
4049 }
4050
4051 /* Fix the last dma packet - wrcomp must be as user set it */
4052 new_dma_pkt--;
4053 new_dma_pkt->ctl |= cpu_to_le32(user_wrcomp_en_mask);
4054
4055 *new_dma_pkt_size = dma_desc_cnt * sizeof(struct packet_lin_dma);
4056
4057 return 0;
4058}
4059
4060static int gaudi_patch_cb(struct hl_device *hdev,
4061 struct hl_cs_parser *parser)
4062{
4063 u32 cb_parsed_length = 0;
4064 u32 cb_patched_cur_length = 0;
4065 int rc = 0;
4066
4067 /* cb_user_size is more than 0 so loop will always be executed */
4068 while (cb_parsed_length < parser->user_cb_size) {
4069 enum packet_id pkt_id;
4070 u16 pkt_size;
4071 u32 new_pkt_size = 0;
4072 struct gaudi_packet *user_pkt, *kernel_pkt;
4073
4074 user_pkt = (struct gaudi_packet *) (uintptr_t)
4075 (parser->user_cb->kernel_address + cb_parsed_length);
4076 kernel_pkt = (struct gaudi_packet *) (uintptr_t)
4077 (parser->patched_cb->kernel_address +
4078 cb_patched_cur_length);
4079
4080 pkt_id = (enum packet_id) (
4081 (le64_to_cpu(user_pkt->header) &
4082 PACKET_HEADER_PACKET_ID_MASK) >>
4083 PACKET_HEADER_PACKET_ID_SHIFT);
4084
4085 pkt_size = gaudi_packet_sizes[pkt_id];
4086 cb_parsed_length += pkt_size;
4087 if (cb_parsed_length > parser->user_cb_size) {
4088 dev_err(hdev->dev,
4089 "packet 0x%x is out of CB boundary\n", pkt_id);
4090 rc = -EINVAL;
4091 break;
4092 }
4093
4094 switch (pkt_id) {
4095 case PACKET_LIN_DMA:
4096 rc = gaudi_patch_dma_packet(hdev, parser,
4097 (struct packet_lin_dma *) user_pkt,
4098 (struct packet_lin_dma *) kernel_pkt,
4099 &new_pkt_size);
4100 cb_patched_cur_length += new_pkt_size;
4101 break;
4102
4103 case PACKET_MSG_PROT:
4104 dev_err(hdev->dev,
4105 "User not allowed to use MSG_PROT\n");
4106 rc = -EPERM;
4107 break;
4108
4109 case PACKET_CP_DMA:
4110 dev_err(hdev->dev, "User not allowed to use CP_DMA\n");
4111 rc = -EPERM;
4112 break;
4113
4114 case PACKET_STOP:
4115 dev_err(hdev->dev, "User not allowed to use STOP\n");
4116 rc = -EPERM;
4117 break;
4118
4119 case PACKET_WREG_32:
4120 case PACKET_WREG_BULK:
4121 case PACKET_MSG_LONG:
4122 case PACKET_MSG_SHORT:
4123 case PACKET_REPEAT:
4124 case PACKET_FENCE:
4125 case PACKET_NOP:
4126 case PACKET_ARB_POINT:
4127 case PACKET_LOAD_AND_EXE:
4128 memcpy(kernel_pkt, user_pkt, pkt_size);
4129 cb_patched_cur_length += pkt_size;
4130 break;
4131
4132 default:
4133 dev_err(hdev->dev, "Invalid packet header 0x%x\n",
4134 pkt_id);
4135 rc = -EINVAL;
4136 break;
4137 }
4138
4139 if (rc)
4140 break;
4141 }
4142
4143 return rc;
4144}
4145
4146static int gaudi_parse_cb_mmu(struct hl_device *hdev,
4147 struct hl_cs_parser *parser)
4148{
4149 u64 patched_cb_handle;
4150 u32 patched_cb_size;
4151 struct hl_cb *user_cb;
4152 int rc;
4153
4154 /*
4155 * The new CB should have space at the end for two MSG_PROT pkt:
4156 * 1. A packet that will act as a completion packet
4157 * 2. A packet that will generate MSI interrupt
4158 */
4159 parser->patched_cb_size = parser->user_cb_size +
4160 sizeof(struct packet_msg_prot) * 2;
4161
4162 rc = hl_cb_create(hdev, &hdev->kernel_cb_mgr,
4163 parser->patched_cb_size,
4164 &patched_cb_handle, HL_KERNEL_ASID_ID);
4165
4166 if (rc) {
4167 dev_err(hdev->dev,
4168 "Failed to allocate patched CB for DMA CS %d\n",
4169 rc);
4170 return rc;
4171 }
4172
4173 patched_cb_handle >>= PAGE_SHIFT;
4174 parser->patched_cb = hl_cb_get(hdev, &hdev->kernel_cb_mgr,
4175 (u32) patched_cb_handle);
4176 /* hl_cb_get should never fail here so use kernel WARN */
4177 WARN(!parser->patched_cb, "DMA CB handle invalid 0x%x\n",
4178 (u32) patched_cb_handle);
4179 if (!parser->patched_cb) {
4180 rc = -EFAULT;
4181 goto out;
4182 }
4183
4184 /*
4185 * The check that parser->user_cb_size <= parser->user_cb->size was done
4186 * in validate_queue_index().
4187 */
4188 memcpy((void *) (uintptr_t) parser->patched_cb->kernel_address,
4189 (void *) (uintptr_t) parser->user_cb->kernel_address,
4190 parser->user_cb_size);
4191
4192 patched_cb_size = parser->patched_cb_size;
4193
4194 /* Validate patched CB instead of user CB */
4195 user_cb = parser->user_cb;
4196 parser->user_cb = parser->patched_cb;
4197 rc = gaudi_validate_cb(hdev, parser, true);
4198 parser->user_cb = user_cb;
4199
4200 if (rc) {
4201 hl_cb_put(parser->patched_cb);
4202 goto out;
4203 }
4204
4205 if (patched_cb_size != parser->patched_cb_size) {
4206 dev_err(hdev->dev, "user CB size mismatch\n");
4207 hl_cb_put(parser->patched_cb);
4208 rc = -EINVAL;
4209 goto out;
4210 }
4211
4212out:
4213 /*
4214 * Always call cb destroy here because we still have 1 reference
4215 * to it by calling cb_get earlier. After the job will be completed,
4216 * cb_put will release it, but here we want to remove it from the
4217 * idr
4218 */
4219 hl_cb_destroy(hdev, &hdev->kernel_cb_mgr,
4220 patched_cb_handle << PAGE_SHIFT);
4221
4222 return rc;
4223}
4224
4225static int gaudi_parse_cb_no_mmu(struct hl_device *hdev,
4226 struct hl_cs_parser *parser)
4227{
4228 u64 patched_cb_handle;
4229 int rc;
4230
4231 rc = gaudi_validate_cb(hdev, parser, false);
4232
4233 if (rc)
4234 goto free_userptr;
4235
4236 rc = hl_cb_create(hdev, &hdev->kernel_cb_mgr,
4237 parser->patched_cb_size,
4238 &patched_cb_handle, HL_KERNEL_ASID_ID);
4239 if (rc) {
4240 dev_err(hdev->dev,
4241 "Failed to allocate patched CB for DMA CS %d\n", rc);
4242 goto free_userptr;
4243 }
4244
4245 patched_cb_handle >>= PAGE_SHIFT;
4246 parser->patched_cb = hl_cb_get(hdev, &hdev->kernel_cb_mgr,
4247 (u32) patched_cb_handle);
4248 /* hl_cb_get should never fail here so use kernel WARN */
4249 WARN(!parser->patched_cb, "DMA CB handle invalid 0x%x\n",
4250 (u32) patched_cb_handle);
4251 if (!parser->patched_cb) {
4252 rc = -EFAULT;
4253 goto out;
4254 }
4255
4256 rc = gaudi_patch_cb(hdev, parser);
4257
4258 if (rc)
4259 hl_cb_put(parser->patched_cb);
4260
4261out:
4262 /*
4263 * Always call cb destroy here because we still have 1 reference
4264 * to it by calling cb_get earlier. After the job will be completed,
4265 * cb_put will release it, but here we want to remove it from the
4266 * idr
4267 */
4268 hl_cb_destroy(hdev, &hdev->kernel_cb_mgr,
4269 patched_cb_handle << PAGE_SHIFT);
4270
4271free_userptr:
4272 if (rc)
4273 hl_userptr_delete_list(hdev, parser->job_userptr_list);
4274 return rc;
4275}
4276
4277static int gaudi_parse_cb_no_ext_queue(struct hl_device *hdev,
4278 struct hl_cs_parser *parser)
4279{
4280 struct asic_fixed_properties *asic_prop = &hdev->asic_prop;
4281
4282 /* For internal queue jobs just check if CB address is valid */
4283 if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,
4284 parser->user_cb_size,
4285 asic_prop->sram_user_base_address,
4286 asic_prop->sram_end_address))
4287 return 0;
4288
4289 if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,
4290 parser->user_cb_size,
4291 asic_prop->dram_user_base_address,
4292 asic_prop->dram_end_address))
4293 return 0;
4294
4295 /* PMMU and HPMMU addresses are equal, check only one of them */
4296 if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,
4297 parser->user_cb_size,
4298 asic_prop->pmmu.start_addr,
4299 asic_prop->pmmu.end_addr))
4300 return 0;
4301
4302 dev_err(hdev->dev,
4303 "CB address 0x%px + 0x%x for internal QMAN is not valid\n",
4304 parser->user_cb, parser->user_cb_size);
4305
4306 return -EFAULT;
4307}
4308
4309static int gaudi_cs_parser(struct hl_device *hdev, struct hl_cs_parser *parser)
4310{
4311 struct gaudi_device *gaudi = hdev->asic_specific;
4312
4313 if (parser->queue_type == QUEUE_TYPE_INT)
4314 return gaudi_parse_cb_no_ext_queue(hdev, parser);
4315
4316 if (gaudi->hw_cap_initialized & HW_CAP_MMU)
4317 return gaudi_parse_cb_mmu(hdev, parser);
4318 else
4319 return gaudi_parse_cb_no_mmu(hdev, parser);
4320}
4321
4322static void gaudi_add_end_of_cb_packets(struct hl_device *hdev,
4323 u64 kernel_address, u32 len,
4324 u64 cq_addr, u32 cq_val, u32 msi_vec,
4325 bool eb)
4326{
4327 struct gaudi_device *gaudi = hdev->asic_specific;
4328 struct packet_msg_prot *cq_pkt;
4329 u32 tmp;
4330
4331 cq_pkt = (struct packet_msg_prot *) (uintptr_t)
4332 (kernel_address + len - (sizeof(struct packet_msg_prot) * 2));
4333
4334 tmp = (PACKET_MSG_PROT << GAUDI_PKT_CTL_OPCODE_SHIFT) |
4335 (1 << GAUDI_PKT_CTL_MB_SHIFT);
4336
4337 if (eb)
4338 tmp |= (1 << GAUDI_PKT_CTL_EB_SHIFT);
4339
4340 cq_pkt->ctl = cpu_to_le32(tmp);
4341 cq_pkt->value = cpu_to_le32(cq_val);
4342 cq_pkt->addr = cpu_to_le64(cq_addr);
4343
4344 cq_pkt++;
4345
4346 tmp = (PACKET_MSG_PROT << GAUDI_PKT_CTL_OPCODE_SHIFT) |
4347 (1 << GAUDI_PKT_CTL_MB_SHIFT);
4348 cq_pkt->ctl = cpu_to_le32(tmp);
4349 cq_pkt->value = cpu_to_le32(1);
4350
4351 if (!gaudi->multi_msi_mode)
4352 msi_vec = 0;
4353
4354 cq_pkt->addr = cpu_to_le64(CFG_BASE + mmPCIE_MSI_INTR_0 + msi_vec * 4);
4355}
4356
4357static void gaudi_update_eq_ci(struct hl_device *hdev, u32 val)
4358{
4359 WREG32(mmCPU_IF_EQ_RD_OFFS, val);
4360}
4361
4362static int gaudi_memset_device_memory(struct hl_device *hdev, u64 addr,
4363 u32 size, u64 val)
4364{
4365 struct packet_lin_dma *lin_dma_pkt;
4366 struct hl_cs_job *job;
4367 u32 cb_size, ctl;
4368 struct hl_cb *cb;
4369 int rc;
4370
4371 cb = hl_cb_kernel_create(hdev, PAGE_SIZE);
4372 if (!cb)
4373 return -EFAULT;
4374
4375 lin_dma_pkt = (struct packet_lin_dma *) (uintptr_t) cb->kernel_address;
4376 memset(lin_dma_pkt, 0, sizeof(*lin_dma_pkt));
4377 cb_size = sizeof(*lin_dma_pkt);
4378
4379 ctl = ((PACKET_LIN_DMA << GAUDI_PKT_CTL_OPCODE_SHIFT) |
4380 (1 << GAUDI_PKT_LIN_DMA_CTL_MEMSET_SHIFT) |
4381 (1 << GAUDI_PKT_LIN_DMA_CTL_LIN_SHIFT) |
4382 (1 << GAUDI_PKT_CTL_RB_SHIFT) |
4383 (1 << GAUDI_PKT_CTL_MB_SHIFT));
4384 lin_dma_pkt->ctl = cpu_to_le32(ctl);
4385 lin_dma_pkt->src_addr = cpu_to_le64(val);
4386 lin_dma_pkt->dst_addr |= cpu_to_le64(addr);
4387 lin_dma_pkt->tsize = cpu_to_le32(size);
4388
4389 job = hl_cs_allocate_job(hdev, QUEUE_TYPE_EXT, true);
4390 if (!job) {
4391 dev_err(hdev->dev, "Failed to allocate a new job\n");
4392 rc = -ENOMEM;
4393 goto release_cb;
4394 }
4395
4396 job->id = 0;
4397 job->user_cb = cb;
4398 job->user_cb->cs_cnt++;
4399 job->user_cb_size = cb_size;
4400 job->hw_queue_id = GAUDI_QUEUE_ID_DMA_0_0;
4401 job->patched_cb = job->user_cb;
4402 job->job_cb_size = job->user_cb_size + sizeof(struct packet_msg_prot);
4403
4404 hl_debugfs_add_job(hdev, job);
4405
4406 rc = gaudi_send_job_on_qman0(hdev, job);
4407
4408 hl_debugfs_remove_job(hdev, job);
4409 kfree(job);
4410 cb->cs_cnt--;
4411
4412release_cb:
4413 hl_cb_put(cb);
4414 hl_cb_destroy(hdev, &hdev->kernel_cb_mgr, cb->id << PAGE_SHIFT);
4415
4416 return rc;
4417}
4418
4419static void gaudi_restore_sm_registers(struct hl_device *hdev)
4420{
4421 int i;
4422
4423 for (i = 0 ; i < NUM_OF_SOB_IN_BLOCK << 2 ; i += 4) {
4424 WREG32(mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0 + i, 0);
4425 WREG32(mmSYNC_MNGR_E_S_SYNC_MNGR_OBJS_SOB_OBJ_0 + i, 0);
4426 WREG32(mmSYNC_MNGR_W_N_SYNC_MNGR_OBJS_SOB_OBJ_0 + i, 0);
4427 }
4428
4429 for (i = 0 ; i < NUM_OF_MONITORS_IN_BLOCK << 2 ; i += 4) {
4430 WREG32(mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_STATUS_0 + i, 0);
4431 WREG32(mmSYNC_MNGR_E_S_SYNC_MNGR_OBJS_MON_STATUS_0 + i, 0);
4432 WREG32(mmSYNC_MNGR_W_N_SYNC_MNGR_OBJS_MON_STATUS_0 + i, 0);
4433 }
4434
4435 i = GAUDI_FIRST_AVAILABLE_W_S_SYNC_OBJECT * 4;
4436
4437 for (; i < NUM_OF_SOB_IN_BLOCK << 2 ; i += 4)
4438 WREG32(mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 + i, 0);
4439
4440 i = GAUDI_FIRST_AVAILABLE_W_S_MONITOR * 4;
4441
4442 for (; i < NUM_OF_MONITORS_IN_BLOCK << 2 ; i += 4)
4443 WREG32(mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_STATUS_0 + i, 0);
4444}
4445
4446static void gaudi_restore_dma_registers(struct hl_device *hdev)
4447{
4448 u32 sob_delta = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_1 -
4449 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0;
4450 int i;
4451
4452 for (i = 0 ; i < DMA_NUMBER_OF_CHANNELS ; i++) {
4453 u64 sob_addr = CFG_BASE +
4454 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0 +
4455 (i * sob_delta);
4456 u32 dma_offset = i * DMA_CORE_OFFSET;
4457
4458 WREG32(mmDMA0_CORE_WR_COMP_ADDR_LO + dma_offset,
4459 lower_32_bits(sob_addr));
4460 WREG32(mmDMA0_CORE_WR_COMP_ADDR_HI + dma_offset,
4461 upper_32_bits(sob_addr));
4462 WREG32(mmDMA0_CORE_WR_COMP_WDATA + dma_offset, 0x80000001);
4463
4464 /* For DMAs 2-7, need to restore WR_AWUSER_31_11 as it can be
4465 * modified by the user for SRAM reduction
4466 */
4467 if (i > 1)
4468 WREG32(mmDMA0_CORE_WR_AWUSER_31_11 + dma_offset,
4469 0x00000001);
4470 }
4471}
4472
4473static void gaudi_restore_qm_registers(struct hl_device *hdev)
4474{
4475 u32 qman_offset;
4476 int i;
4477
4478 for (i = 0 ; i < DMA_NUMBER_OF_CHANNELS ; i++) {
4479 qman_offset = i * DMA_QMAN_OFFSET;
4480 WREG32(mmDMA0_QM_ARB_CFG_0 + qman_offset, 0);
4481 }
4482
4483 for (i = 0 ; i < MME_NUMBER_OF_MASTER_ENGINES ; i++) {
4484 qman_offset = i * (mmMME2_QM_BASE - mmMME0_QM_BASE);
4485 WREG32(mmMME0_QM_ARB_CFG_0 + qman_offset, 0);
4486 }
4487
4488 for (i = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) {
4489 qman_offset = i * TPC_QMAN_OFFSET;
4490 WREG32(mmTPC0_QM_ARB_CFG_0 + qman_offset, 0);
4491 }
4492}
4493
4494static void gaudi_restore_user_registers(struct hl_device *hdev)
4495{
4496 gaudi_restore_sm_registers(hdev);
4497 gaudi_restore_dma_registers(hdev);
4498 gaudi_restore_qm_registers(hdev);
4499}
4500
4501static int gaudi_context_switch(struct hl_device *hdev, u32 asid)
4502{
4503 struct asic_fixed_properties *prop = &hdev->asic_prop;
4504 u64 addr = prop->sram_user_base_address;
4505 u32 size = hdev->pldm ? 0x10000 :
4506 (prop->sram_size - SRAM_USER_BASE_OFFSET);
4507 u64 val = 0x7777777777777777ull;
4508 int rc;
4509
4510 rc = gaudi_memset_device_memory(hdev, addr, size, val);
4511 if (rc) {
4512 dev_err(hdev->dev, "Failed to clear SRAM in context switch\n");
4513 return rc;
4514 }
4515
4516 gaudi_mmu_prepare(hdev, asid);
4517
4518 gaudi_restore_user_registers(hdev);
4519
4520 return 0;
4521}
4522
4523static int gaudi_mmu_clear_pgt_range(struct hl_device *hdev)
4524{
4525 struct asic_fixed_properties *prop = &hdev->asic_prop;
4526 struct gaudi_device *gaudi = hdev->asic_specific;
4527 u64 addr = prop->mmu_pgt_addr;
4528 u32 size = prop->mmu_pgt_size + MMU_CACHE_MNG_SIZE;
4529
4530 if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
4531 return 0;
4532
4533 return gaudi_memset_device_memory(hdev, addr, size, 0);
4534}
4535
4536static void gaudi_restore_phase_topology(struct hl_device *hdev)
4537{
4538
4539}
4540
4541static int gaudi_debugfs_read32(struct hl_device *hdev, u64 addr, u32 *val)
4542{
4543 struct asic_fixed_properties *prop = &hdev->asic_prop;
4544 struct gaudi_device *gaudi = hdev->asic_specific;
4545 u64 hbm_bar_addr;
4546 int rc = 0;
4547
4548 if ((addr >= CFG_BASE) && (addr < CFG_BASE + CFG_SIZE)) {
Oded Gabbaye38bfd32020-07-03 20:46:12 +03004549
4550 if ((gaudi->hw_cap_initialized & HW_CAP_CLK_GATE) &&
4551 (hdev->clock_gating_mask &
4552 GAUDI_CLK_GATE_DEBUGFS_MASK)) {
4553
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03004554 dev_err_ratelimited(hdev->dev,
4555 "Can't read register - clock gating is enabled!\n");
4556 rc = -EFAULT;
4557 } else {
4558 *val = RREG32(addr - CFG_BASE);
4559 }
Oded Gabbaye38bfd32020-07-03 20:46:12 +03004560
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03004561 } else if ((addr >= SRAM_BASE_ADDR) &&
4562 (addr < SRAM_BASE_ADDR + SRAM_BAR_SIZE)) {
4563 *val = readl(hdev->pcie_bar[SRAM_BAR_ID] +
4564 (addr - SRAM_BASE_ADDR));
4565 } else if (addr < DRAM_PHYS_BASE + hdev->asic_prop.dram_size) {
4566 u64 bar_base_addr = DRAM_PHYS_BASE +
4567 (addr & ~(prop->dram_pci_bar_size - 0x1ull));
4568
4569 hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, bar_base_addr);
4570 if (hbm_bar_addr != U64_MAX) {
4571 *val = readl(hdev->pcie_bar[HBM_BAR_ID] +
4572 (addr - bar_base_addr));
4573
4574 hbm_bar_addr = gaudi_set_hbm_bar_base(hdev,
4575 hbm_bar_addr);
4576 }
4577 if (hbm_bar_addr == U64_MAX)
4578 rc = -EIO;
4579 } else if (addr >= HOST_PHYS_BASE && !iommu_present(&pci_bus_type)) {
4580 *val = *(u32 *) phys_to_virt(addr - HOST_PHYS_BASE);
4581 } else {
4582 rc = -EFAULT;
4583 }
4584
4585 return rc;
4586}
4587
4588static int gaudi_debugfs_write32(struct hl_device *hdev, u64 addr, u32 val)
4589{
4590 struct asic_fixed_properties *prop = &hdev->asic_prop;
4591 struct gaudi_device *gaudi = hdev->asic_specific;
4592 u64 hbm_bar_addr;
4593 int rc = 0;
4594
4595 if ((addr >= CFG_BASE) && (addr < CFG_BASE + CFG_SIZE)) {
Oded Gabbaye38bfd32020-07-03 20:46:12 +03004596
4597 if ((gaudi->hw_cap_initialized & HW_CAP_CLK_GATE) &&
4598 (hdev->clock_gating_mask &
4599 GAUDI_CLK_GATE_DEBUGFS_MASK)) {
4600
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03004601 dev_err_ratelimited(hdev->dev,
4602 "Can't write register - clock gating is enabled!\n");
4603 rc = -EFAULT;
4604 } else {
4605 WREG32(addr - CFG_BASE, val);
4606 }
Oded Gabbaye38bfd32020-07-03 20:46:12 +03004607
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03004608 } else if ((addr >= SRAM_BASE_ADDR) &&
4609 (addr < SRAM_BASE_ADDR + SRAM_BAR_SIZE)) {
4610 writel(val, hdev->pcie_bar[SRAM_BAR_ID] +
4611 (addr - SRAM_BASE_ADDR));
4612 } else if (addr < DRAM_PHYS_BASE + hdev->asic_prop.dram_size) {
4613 u64 bar_base_addr = DRAM_PHYS_BASE +
4614 (addr & ~(prop->dram_pci_bar_size - 0x1ull));
4615
4616 hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, bar_base_addr);
4617 if (hbm_bar_addr != U64_MAX) {
4618 writel(val, hdev->pcie_bar[HBM_BAR_ID] +
4619 (addr - bar_base_addr));
4620
4621 hbm_bar_addr = gaudi_set_hbm_bar_base(hdev,
4622 hbm_bar_addr);
4623 }
4624 if (hbm_bar_addr == U64_MAX)
4625 rc = -EIO;
4626 } else if (addr >= HOST_PHYS_BASE && !iommu_present(&pci_bus_type)) {
4627 *(u32 *) phys_to_virt(addr - HOST_PHYS_BASE) = val;
4628 } else {
4629 rc = -EFAULT;
4630 }
4631
4632 return rc;
4633}
4634
4635static int gaudi_debugfs_read64(struct hl_device *hdev, u64 addr, u64 *val)
4636{
4637 struct asic_fixed_properties *prop = &hdev->asic_prop;
4638 struct gaudi_device *gaudi = hdev->asic_specific;
4639 u64 hbm_bar_addr;
4640 int rc = 0;
4641
4642 if ((addr >= CFG_BASE) && (addr <= CFG_BASE + CFG_SIZE - sizeof(u64))) {
Oded Gabbaye38bfd32020-07-03 20:46:12 +03004643
4644 if ((gaudi->hw_cap_initialized & HW_CAP_CLK_GATE) &&
4645 (hdev->clock_gating_mask &
4646 GAUDI_CLK_GATE_DEBUGFS_MASK)) {
4647
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03004648 dev_err_ratelimited(hdev->dev,
4649 "Can't read register - clock gating is enabled!\n");
4650 rc = -EFAULT;
4651 } else {
4652 u32 val_l = RREG32(addr - CFG_BASE);
4653 u32 val_h = RREG32(addr + sizeof(u32) - CFG_BASE);
4654
4655 *val = (((u64) val_h) << 32) | val_l;
4656 }
Oded Gabbaye38bfd32020-07-03 20:46:12 +03004657
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03004658 } else if ((addr >= SRAM_BASE_ADDR) &&
4659 (addr <= SRAM_BASE_ADDR + SRAM_BAR_SIZE - sizeof(u64))) {
4660 *val = readq(hdev->pcie_bar[SRAM_BAR_ID] +
4661 (addr - SRAM_BASE_ADDR));
4662 } else if (addr <=
4663 DRAM_PHYS_BASE + hdev->asic_prop.dram_size - sizeof(u64)) {
4664 u64 bar_base_addr = DRAM_PHYS_BASE +
4665 (addr & ~(prop->dram_pci_bar_size - 0x1ull));
4666
4667 hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, bar_base_addr);
4668 if (hbm_bar_addr != U64_MAX) {
4669 *val = readq(hdev->pcie_bar[HBM_BAR_ID] +
4670 (addr - bar_base_addr));
4671
4672 hbm_bar_addr = gaudi_set_hbm_bar_base(hdev,
4673 hbm_bar_addr);
4674 }
4675 if (hbm_bar_addr == U64_MAX)
4676 rc = -EIO;
4677 } else if (addr >= HOST_PHYS_BASE && !iommu_present(&pci_bus_type)) {
4678 *val = *(u64 *) phys_to_virt(addr - HOST_PHYS_BASE);
4679 } else {
4680 rc = -EFAULT;
4681 }
4682
4683 return rc;
4684}
4685
4686static int gaudi_debugfs_write64(struct hl_device *hdev, u64 addr, u64 val)
4687{
4688 struct asic_fixed_properties *prop = &hdev->asic_prop;
4689 struct gaudi_device *gaudi = hdev->asic_specific;
4690 u64 hbm_bar_addr;
4691 int rc = 0;
4692
4693 if ((addr >= CFG_BASE) && (addr <= CFG_BASE + CFG_SIZE - sizeof(u64))) {
Oded Gabbaye38bfd32020-07-03 20:46:12 +03004694
4695 if ((gaudi->hw_cap_initialized & HW_CAP_CLK_GATE) &&
4696 (hdev->clock_gating_mask &
4697 GAUDI_CLK_GATE_DEBUGFS_MASK)) {
4698
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03004699 dev_err_ratelimited(hdev->dev,
4700 "Can't write register - clock gating is enabled!\n");
4701 rc = -EFAULT;
4702 } else {
4703 WREG32(addr - CFG_BASE, lower_32_bits(val));
4704 WREG32(addr + sizeof(u32) - CFG_BASE,
4705 upper_32_bits(val));
4706 }
Oded Gabbaye38bfd32020-07-03 20:46:12 +03004707
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03004708 } else if ((addr >= SRAM_BASE_ADDR) &&
4709 (addr <= SRAM_BASE_ADDR + SRAM_BAR_SIZE - sizeof(u64))) {
4710 writeq(val, hdev->pcie_bar[SRAM_BAR_ID] +
4711 (addr - SRAM_BASE_ADDR));
4712 } else if (addr <=
4713 DRAM_PHYS_BASE + hdev->asic_prop.dram_size - sizeof(u64)) {
4714 u64 bar_base_addr = DRAM_PHYS_BASE +
4715 (addr & ~(prop->dram_pci_bar_size - 0x1ull));
4716
4717 hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, bar_base_addr);
4718 if (hbm_bar_addr != U64_MAX) {
4719 writeq(val, hdev->pcie_bar[HBM_BAR_ID] +
4720 (addr - bar_base_addr));
4721
4722 hbm_bar_addr = gaudi_set_hbm_bar_base(hdev,
4723 hbm_bar_addr);
4724 }
4725 if (hbm_bar_addr == U64_MAX)
4726 rc = -EIO;
4727 } else if (addr >= HOST_PHYS_BASE && !iommu_present(&pci_bus_type)) {
4728 *(u64 *) phys_to_virt(addr - HOST_PHYS_BASE) = val;
4729 } else {
4730 rc = -EFAULT;
4731 }
4732
4733 return rc;
4734}
4735
4736static u64 gaudi_read_pte(struct hl_device *hdev, u64 addr)
4737{
4738 struct gaudi_device *gaudi = hdev->asic_specific;
4739
4740 if (hdev->hard_reset_pending)
4741 return U64_MAX;
4742
4743 return readq(hdev->pcie_bar[HBM_BAR_ID] +
4744 (addr - gaudi->hbm_bar_cur_addr));
4745}
4746
4747static void gaudi_write_pte(struct hl_device *hdev, u64 addr, u64 val)
4748{
4749 struct gaudi_device *gaudi = hdev->asic_specific;
4750
4751 if (hdev->hard_reset_pending)
4752 return;
4753
4754 writeq(val, hdev->pcie_bar[HBM_BAR_ID] +
4755 (addr - gaudi->hbm_bar_cur_addr));
4756}
4757
4758static void gaudi_mmu_prepare_reg(struct hl_device *hdev, u64 reg, u32 asid)
4759{
4760 /* mask to zero the MMBP and ASID bits */
4761 WREG32_AND(reg, ~0x7FF);
4762 WREG32_OR(reg, asid);
4763}
4764
4765static void gaudi_mmu_prepare(struct hl_device *hdev, u32 asid)
4766{
4767 struct gaudi_device *gaudi = hdev->asic_specific;
4768
4769 if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
4770 return;
4771
4772 if (asid & ~DMA0_QM_GLBL_NON_SECURE_PROPS_0_ASID_MASK) {
4773 WARN(1, "asid %u is too big\n", asid);
4774 return;
4775 }
4776
4777 mutex_lock(&gaudi->clk_gate_mutex);
4778
4779 hdev->asic_funcs->disable_clock_gating(hdev);
4780
4781 gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_0, asid);
4782 gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_1, asid);
4783 gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_2, asid);
4784 gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_3, asid);
4785 gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_4, asid);
4786
4787 gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_0, asid);
4788 gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_1, asid);
4789 gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_2, asid);
4790 gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_3, asid);
4791 gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_4, asid);
4792
4793 gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_0, asid);
4794 gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_1, asid);
4795 gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_2, asid);
4796 gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_3, asid);
4797 gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_4, asid);
4798
4799 gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_0, asid);
4800 gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_1, asid);
4801 gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_2, asid);
4802 gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_3, asid);
4803 gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_4, asid);
4804
4805 gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_0, asid);
4806 gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_1, asid);
4807 gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_2, asid);
4808 gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_3, asid);
4809 gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_4, asid);
4810
4811 gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_0, asid);
4812 gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_1, asid);
4813 gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_2, asid);
4814 gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_3, asid);
4815 gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_4, asid);
4816
4817 gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_0, asid);
4818 gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_1, asid);
4819 gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_2, asid);
4820 gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_3, asid);
4821 gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_4, asid);
4822
4823 gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_0, asid);
4824 gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_1, asid);
4825 gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_2, asid);
4826 gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_3, asid);
4827 gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_4, asid);
4828
4829 gaudi_mmu_prepare_reg(hdev, mmDMA0_CORE_NON_SECURE_PROPS, asid);
4830 gaudi_mmu_prepare_reg(hdev, mmDMA1_CORE_NON_SECURE_PROPS, asid);
4831 gaudi_mmu_prepare_reg(hdev, mmDMA2_CORE_NON_SECURE_PROPS, asid);
4832 gaudi_mmu_prepare_reg(hdev, mmDMA3_CORE_NON_SECURE_PROPS, asid);
4833 gaudi_mmu_prepare_reg(hdev, mmDMA4_CORE_NON_SECURE_PROPS, asid);
4834 gaudi_mmu_prepare_reg(hdev, mmDMA5_CORE_NON_SECURE_PROPS, asid);
4835 gaudi_mmu_prepare_reg(hdev, mmDMA6_CORE_NON_SECURE_PROPS, asid);
4836 gaudi_mmu_prepare_reg(hdev, mmDMA7_CORE_NON_SECURE_PROPS, asid);
4837
4838 gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_0, asid);
4839 gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_1, asid);
4840 gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_2, asid);
4841 gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_3, asid);
4842 gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_4, asid);
4843 gaudi_mmu_prepare_reg(hdev, mmTPC0_CFG_ARUSER_LO, asid);
4844 gaudi_mmu_prepare_reg(hdev, mmTPC0_CFG_AWUSER_LO, asid);
4845
4846 gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_0, asid);
4847 gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_1, asid);
4848 gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_2, asid);
4849 gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_3, asid);
4850 gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_4, asid);
4851 gaudi_mmu_prepare_reg(hdev, mmTPC1_CFG_ARUSER_LO, asid);
4852 gaudi_mmu_prepare_reg(hdev, mmTPC1_CFG_AWUSER_LO, asid);
4853
4854 gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_0, asid);
4855 gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_1, asid);
4856 gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_2, asid);
4857 gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_3, asid);
4858 gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_4, asid);
4859 gaudi_mmu_prepare_reg(hdev, mmTPC2_CFG_ARUSER_LO, asid);
4860 gaudi_mmu_prepare_reg(hdev, mmTPC2_CFG_AWUSER_LO, asid);
4861
4862 gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_0, asid);
4863 gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_1, asid);
4864 gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_2, asid);
4865 gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_3, asid);
4866 gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_4, asid);
4867 gaudi_mmu_prepare_reg(hdev, mmTPC3_CFG_ARUSER_LO, asid);
4868 gaudi_mmu_prepare_reg(hdev, mmTPC3_CFG_AWUSER_LO, asid);
4869
4870 gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_0, asid);
4871 gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_1, asid);
4872 gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_2, asid);
4873 gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_3, asid);
4874 gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_4, asid);
4875 gaudi_mmu_prepare_reg(hdev, mmTPC4_CFG_ARUSER_LO, asid);
4876 gaudi_mmu_prepare_reg(hdev, mmTPC4_CFG_AWUSER_LO, asid);
4877
4878 gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_0, asid);
4879 gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_1, asid);
4880 gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_2, asid);
4881 gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_3, asid);
4882 gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_4, asid);
4883 gaudi_mmu_prepare_reg(hdev, mmTPC5_CFG_ARUSER_LO, asid);
4884 gaudi_mmu_prepare_reg(hdev, mmTPC5_CFG_AWUSER_LO, asid);
4885
4886 gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_0, asid);
4887 gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_1, asid);
4888 gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_2, asid);
4889 gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_3, asid);
4890 gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_4, asid);
4891 gaudi_mmu_prepare_reg(hdev, mmTPC6_CFG_ARUSER_LO, asid);
4892 gaudi_mmu_prepare_reg(hdev, mmTPC6_CFG_AWUSER_LO, asid);
4893
4894 gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_0, asid);
4895 gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_1, asid);
4896 gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_2, asid);
4897 gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_3, asid);
4898 gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_4, asid);
4899 gaudi_mmu_prepare_reg(hdev, mmTPC7_CFG_ARUSER_LO, asid);
4900 gaudi_mmu_prepare_reg(hdev, mmTPC7_CFG_AWUSER_LO, asid);
4901
4902 gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_0, asid);
4903 gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_1, asid);
4904 gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_2, asid);
4905 gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_3, asid);
4906 gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_4, asid);
4907 gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_0, asid);
4908 gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_1, asid);
4909 gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_2, asid);
4910 gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_3, asid);
4911 gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_4, asid);
4912
4913 gaudi_mmu_prepare_reg(hdev, mmMME0_SBAB_ARUSER0, asid);
4914 gaudi_mmu_prepare_reg(hdev, mmMME0_SBAB_ARUSER1, asid);
4915 gaudi_mmu_prepare_reg(hdev, mmMME1_SBAB_ARUSER0, asid);
4916 gaudi_mmu_prepare_reg(hdev, mmMME1_SBAB_ARUSER1, asid);
4917 gaudi_mmu_prepare_reg(hdev, mmMME2_SBAB_ARUSER0, asid);
4918 gaudi_mmu_prepare_reg(hdev, mmMME2_SBAB_ARUSER1, asid);
4919 gaudi_mmu_prepare_reg(hdev, mmMME3_SBAB_ARUSER0, asid);
4920 gaudi_mmu_prepare_reg(hdev, mmMME3_SBAB_ARUSER1, asid);
4921 gaudi_mmu_prepare_reg(hdev, mmMME0_ACC_WBC, asid);
4922 gaudi_mmu_prepare_reg(hdev, mmMME1_ACC_WBC, asid);
4923 gaudi_mmu_prepare_reg(hdev, mmMME2_ACC_WBC, asid);
4924 gaudi_mmu_prepare_reg(hdev, mmMME3_ACC_WBC, asid);
4925
4926 gaudi_mmu_prepare_reg(hdev, mmPSOC_GLOBAL_CONF_TRACE_ARUSER, asid);
4927 gaudi_mmu_prepare_reg(hdev, mmPSOC_GLOBAL_CONF_TRACE_AWUSER, asid);
4928
Oded Gabbaye38bfd32020-07-03 20:46:12 +03004929 hdev->asic_funcs->set_clock_gating(hdev);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03004930
4931 mutex_unlock(&gaudi->clk_gate_mutex);
4932}
4933
4934static int gaudi_send_job_on_qman0(struct hl_device *hdev,
4935 struct hl_cs_job *job)
4936{
4937 struct packet_msg_prot *fence_pkt;
4938 u32 *fence_ptr;
4939 dma_addr_t fence_dma_addr;
4940 struct hl_cb *cb;
4941 u32 tmp, timeout, dma_offset;
4942 int rc;
4943
4944 if (hdev->pldm)
4945 timeout = GAUDI_PLDM_QMAN0_TIMEOUT_USEC;
4946 else
4947 timeout = HL_DEVICE_TIMEOUT_USEC;
4948
4949 if (!hdev->asic_funcs->is_device_idle(hdev, NULL, NULL)) {
4950 dev_err_ratelimited(hdev->dev,
4951 "Can't send driver job on QMAN0 because the device is not idle\n");
4952 return -EBUSY;
4953 }
4954
4955 fence_ptr = hdev->asic_funcs->asic_dma_pool_zalloc(hdev, 4, GFP_KERNEL,
4956 &fence_dma_addr);
4957 if (!fence_ptr) {
4958 dev_err(hdev->dev,
4959 "Failed to allocate fence memory for QMAN0\n");
4960 return -ENOMEM;
4961 }
4962
4963 cb = job->patched_cb;
4964
4965 fence_pkt = (struct packet_msg_prot *) (uintptr_t) (cb->kernel_address +
4966 job->job_cb_size - sizeof(struct packet_msg_prot));
4967
4968 tmp = (PACKET_MSG_PROT << GAUDI_PKT_CTL_OPCODE_SHIFT) |
4969 (1 << GAUDI_PKT_CTL_EB_SHIFT) |
4970 (1 << GAUDI_PKT_CTL_MB_SHIFT);
4971 fence_pkt->ctl = cpu_to_le32(tmp);
4972 fence_pkt->value = cpu_to_le32(GAUDI_QMAN0_FENCE_VAL);
4973 fence_pkt->addr = cpu_to_le64(fence_dma_addr);
4974
4975 dma_offset = gaudi_dma_assignment[GAUDI_PCI_DMA_1] * DMA_CORE_OFFSET;
4976
4977 WREG32_OR(mmDMA0_CORE_PROT + dma_offset, BIT(DMA0_CORE_PROT_VAL_SHIFT));
4978
4979 rc = hl_hw_queue_send_cb_no_cmpl(hdev, GAUDI_QUEUE_ID_DMA_0_0,
4980 job->job_cb_size, cb->bus_address);
4981 if (rc) {
4982 dev_err(hdev->dev, "Failed to send CB on QMAN0, %d\n", rc);
4983 goto free_fence_ptr;
4984 }
4985
4986 rc = hl_poll_timeout_memory(hdev, fence_ptr, tmp,
4987 (tmp == GAUDI_QMAN0_FENCE_VAL), 1000,
4988 timeout, true);
4989
4990 hl_hw_queue_inc_ci_kernel(hdev, GAUDI_QUEUE_ID_DMA_0_0);
4991
4992 if (rc == -ETIMEDOUT) {
4993 dev_err(hdev->dev, "QMAN0 Job timeout (0x%x)\n", tmp);
4994 goto free_fence_ptr;
4995 }
4996
4997free_fence_ptr:
4998 WREG32_AND(mmDMA0_CORE_PROT + dma_offset,
4999 ~BIT(DMA0_CORE_PROT_VAL_SHIFT));
5000
5001 hdev->asic_funcs->asic_dma_pool_free(hdev, (void *) fence_ptr,
5002 fence_dma_addr);
5003 return rc;
5004}
5005
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005006static void gaudi_get_event_desc(u16 event_type, char *desc, size_t size)
5007{
Ofir Bittonebd8d122020-05-10 13:41:28 +03005008 if (event_type >= GAUDI_EVENT_SIZE)
5009 goto event_not_supported;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005010
Ofir Bittonebd8d122020-05-10 13:41:28 +03005011 if (!gaudi_irq_map_table[event_type].valid)
5012 goto event_not_supported;
5013
5014 snprintf(desc, size, gaudi_irq_map_table[event_type].name);
5015
5016 return;
5017
5018event_not_supported:
5019 snprintf(desc, size, "N/A");
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005020}
5021
5022static const char *gaudi_get_razwi_initiator_dma_name(struct hl_device *hdev,
5023 u32 x_y, bool is_write)
5024{
5025 u32 dma_id[2], dma_offset, err_cause[2], mask, i;
5026
5027 mask = is_write ? DMA0_CORE_ERR_CAUSE_HBW_WR_ERR_MASK :
5028 DMA0_CORE_ERR_CAUSE_HBW_RD_ERR_MASK;
5029
5030 switch (x_y) {
5031 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_0:
5032 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_1:
5033 dma_id[0] = 0;
5034 dma_id[1] = 2;
5035 break;
5036 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_0:
5037 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_1:
5038 dma_id[0] = 1;
5039 dma_id[1] = 3;
5040 break;
5041 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_0:
5042 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_1:
5043 dma_id[0] = 4;
5044 dma_id[1] = 6;
5045 break;
5046 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_0:
5047 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_1:
5048 dma_id[0] = 5;
5049 dma_id[1] = 7;
5050 break;
5051 default:
5052 goto unknown_initiator;
5053 }
5054
5055 for (i = 0 ; i < 2 ; i++) {
5056 dma_offset = dma_id[i] * DMA_CORE_OFFSET;
5057 err_cause[i] = RREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset);
5058 }
5059
5060 switch (x_y) {
5061 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_0:
5062 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_1:
5063 if ((err_cause[0] & mask) && !(err_cause[1] & mask))
5064 return "DMA0";
5065 else if (!(err_cause[0] & mask) && (err_cause[1] & mask))
5066 return "DMA2";
5067 else
5068 return "DMA0 or DMA2";
5069 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_0:
5070 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_1:
5071 if ((err_cause[0] & mask) && !(err_cause[1] & mask))
5072 return "DMA1";
5073 else if (!(err_cause[0] & mask) && (err_cause[1] & mask))
5074 return "DMA3";
5075 else
5076 return "DMA1 or DMA3";
5077 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_0:
5078 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_1:
5079 if ((err_cause[0] & mask) && !(err_cause[1] & mask))
5080 return "DMA4";
5081 else if (!(err_cause[0] & mask) && (err_cause[1] & mask))
5082 return "DMA6";
5083 else
5084 return "DMA4 or DMA6";
5085 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_0:
5086 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_1:
5087 if ((err_cause[0] & mask) && !(err_cause[1] & mask))
5088 return "DMA5";
5089 else if (!(err_cause[0] & mask) && (err_cause[1] & mask))
5090 return "DMA7";
5091 else
5092 return "DMA5 or DMA7";
5093 }
5094
5095unknown_initiator:
5096 return "unknown initiator";
5097}
5098
5099static const char *gaudi_get_razwi_initiator_name(struct hl_device *hdev,
5100 bool is_write)
5101{
5102 u32 val, x_y, axi_id;
5103
5104 val = is_write ? RREG32(mmMMU_UP_RAZWI_WRITE_ID) :
5105 RREG32(mmMMU_UP_RAZWI_READ_ID);
5106 x_y = val & ((RAZWI_INITIATOR_Y_MASK << RAZWI_INITIATOR_Y_SHIFT) |
5107 (RAZWI_INITIATOR_X_MASK << RAZWI_INITIATOR_X_SHIFT));
5108 axi_id = val & (RAZWI_INITIATOR_AXI_ID_MASK <<
5109 RAZWI_INITIATOR_AXI_ID_SHIFT);
5110
5111 switch (x_y) {
5112 case RAZWI_INITIATOR_ID_X_Y_TPC0_NIC0:
5113 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC))
5114 return "TPC0";
5115 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC))
5116 return "NIC0";
5117 break;
5118 case RAZWI_INITIATOR_ID_X_Y_TPC1:
5119 return "TPC1";
5120 case RAZWI_INITIATOR_ID_X_Y_MME0_0:
5121 case RAZWI_INITIATOR_ID_X_Y_MME0_1:
5122 return "MME0";
5123 case RAZWI_INITIATOR_ID_X_Y_MME1_0:
5124 case RAZWI_INITIATOR_ID_X_Y_MME1_1:
5125 return "MME1";
5126 case RAZWI_INITIATOR_ID_X_Y_TPC2:
5127 return "TPC2";
5128 case RAZWI_INITIATOR_ID_X_Y_TPC3_PCI_CPU_PSOC:
5129 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC))
5130 return "TPC3";
5131 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_PCI))
5132 return "PCI";
5133 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_CPU))
5134 return "CPU";
5135 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_PSOC))
5136 return "PSOC";
5137 break;
5138 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_0:
5139 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_1:
5140 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_0:
5141 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_1:
5142 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_0:
5143 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_1:
5144 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_0:
5145 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_1:
5146 return gaudi_get_razwi_initiator_dma_name(hdev, x_y, is_write);
5147 case RAZWI_INITIATOR_ID_X_Y_TPC4_NIC1_NIC2:
5148 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC))
5149 return "TPC4";
5150 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC))
5151 return "NIC1";
5152 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC_FT))
5153 return "NIC2";
5154 break;
5155 case RAZWI_INITIATOR_ID_X_Y_TPC5:
5156 return "TPC5";
5157 case RAZWI_INITIATOR_ID_X_Y_MME2_0:
5158 case RAZWI_INITIATOR_ID_X_Y_MME2_1:
5159 return "MME2";
5160 case RAZWI_INITIATOR_ID_X_Y_MME3_0:
5161 case RAZWI_INITIATOR_ID_X_Y_MME3_1:
5162 return "MME3";
5163 case RAZWI_INITIATOR_ID_X_Y_TPC6:
5164 return "TPC6";
5165 case RAZWI_INITIATOR_ID_X_Y_TPC7_NIC4_NIC5:
5166 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC))
5167 return "TPC7";
5168 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC))
5169 return "NIC4";
5170 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC_FT))
5171 return "NIC5";
5172 break;
5173 default:
5174 break;
5175 }
5176
5177 dev_err(hdev->dev,
5178 "Unknown RAZWI initiator ID 0x%x [Y=%d, X=%d, AXI_ID=%d]\n",
5179 val,
5180 (val >> RAZWI_INITIATOR_Y_SHIFT) & RAZWI_INITIATOR_Y_MASK,
5181 (val >> RAZWI_INITIATOR_X_SHIFT) & RAZWI_INITIATOR_X_MASK,
5182 (val >> RAZWI_INITIATOR_AXI_ID_SHIFT) &
5183 RAZWI_INITIATOR_AXI_ID_MASK);
5184
5185 return "unknown initiator";
5186}
5187
5188static void gaudi_print_razwi_info(struct hl_device *hdev)
5189{
5190 if (RREG32(mmMMU_UP_RAZWI_WRITE_VLD)) {
5191 dev_err_ratelimited(hdev->dev,
5192 "RAZWI event caused by illegal write of %s\n",
5193 gaudi_get_razwi_initiator_name(hdev, true));
5194 WREG32(mmMMU_UP_RAZWI_WRITE_VLD, 0);
5195 }
5196
5197 if (RREG32(mmMMU_UP_RAZWI_READ_VLD)) {
5198 dev_err_ratelimited(hdev->dev,
5199 "RAZWI event caused by illegal read of %s\n",
5200 gaudi_get_razwi_initiator_name(hdev, false));
5201 WREG32(mmMMU_UP_RAZWI_READ_VLD, 0);
5202 }
5203}
5204
5205static void gaudi_print_mmu_error_info(struct hl_device *hdev)
5206{
5207 struct gaudi_device *gaudi = hdev->asic_specific;
5208 u64 addr;
5209 u32 val;
5210
5211 if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
5212 return;
5213
5214 val = RREG32(mmMMU_UP_PAGE_ERROR_CAPTURE);
5215 if (val & MMU_UP_PAGE_ERROR_CAPTURE_ENTRY_VALID_MASK) {
5216 addr = val & MMU_UP_PAGE_ERROR_CAPTURE_VA_49_32_MASK;
5217 addr <<= 32;
5218 addr |= RREG32(mmMMU_UP_PAGE_ERROR_CAPTURE_VA);
5219
5220 dev_err_ratelimited(hdev->dev, "MMU page fault on va 0x%llx\n",
5221 addr);
5222
5223 WREG32(mmMMU_UP_PAGE_ERROR_CAPTURE, 0);
5224 }
5225
5226 val = RREG32(mmMMU_UP_ACCESS_ERROR_CAPTURE);
5227 if (val & MMU_UP_ACCESS_ERROR_CAPTURE_ENTRY_VALID_MASK) {
5228 addr = val & MMU_UP_ACCESS_ERROR_CAPTURE_VA_49_32_MASK;
5229 addr <<= 32;
5230 addr |= RREG32(mmMMU_UP_ACCESS_ERROR_CAPTURE_VA);
5231
5232 dev_err_ratelimited(hdev->dev,
5233 "MMU access error on va 0x%llx\n", addr);
5234
5235 WREG32(mmMMU_UP_ACCESS_ERROR_CAPTURE, 0);
5236 }
5237}
5238
5239/*
5240 * +-------------------+------------------------------------------------------+
5241 * | Configuration Reg | Description |
5242 * | Address | |
5243 * +-------------------+------------------------------------------------------+
5244 * | 0xF30 - 0xF3F |ECC single error indication (1 bit per memory wrapper)|
5245 * | |0xF30 memory wrappers 31:0 (MSB to LSB) |
5246 * | |0xF34 memory wrappers 63:32 |
5247 * | |0xF38 memory wrappers 95:64 |
5248 * | |0xF3C memory wrappers 127:96 |
5249 * +-------------------+------------------------------------------------------+
5250 * | 0xF40 - 0xF4F |ECC double error indication (1 bit per memory wrapper)|
5251 * | |0xF40 memory wrappers 31:0 (MSB to LSB) |
5252 * | |0xF44 memory wrappers 63:32 |
5253 * | |0xF48 memory wrappers 95:64 |
5254 * | |0xF4C memory wrappers 127:96 |
5255 * +-------------------+------------------------------------------------------+
5256 */
5257static void gaudi_print_ecc_info_generic(struct hl_device *hdev,
5258 const char *block_name,
5259 u64 block_address, int num_memories,
5260 bool derr, bool disable_clock_gating)
5261{
5262 struct gaudi_device *gaudi = hdev->asic_specific;
5263 int num_mem_regs = num_memories / 32 + ((num_memories % 32) ? 1 : 0);
5264
5265 if (block_address >= CFG_BASE)
5266 block_address -= CFG_BASE;
5267
5268 if (derr)
5269 block_address += GAUDI_ECC_DERR0_OFFSET;
5270 else
5271 block_address += GAUDI_ECC_SERR0_OFFSET;
5272
5273 if (disable_clock_gating) {
5274 mutex_lock(&gaudi->clk_gate_mutex);
5275 hdev->asic_funcs->disable_clock_gating(hdev);
5276 }
5277
5278 switch (num_mem_regs) {
5279 case 1:
5280 dev_err(hdev->dev,
5281 "%s ECC indication: 0x%08x\n",
5282 block_name, RREG32(block_address));
5283 break;
5284 case 2:
5285 dev_err(hdev->dev,
5286 "%s ECC indication: 0x%08x 0x%08x\n",
5287 block_name,
5288 RREG32(block_address), RREG32(block_address + 4));
5289 break;
5290 case 3:
5291 dev_err(hdev->dev,
5292 "%s ECC indication: 0x%08x 0x%08x 0x%08x\n",
5293 block_name,
5294 RREG32(block_address), RREG32(block_address + 4),
5295 RREG32(block_address + 8));
5296 break;
5297 case 4:
5298 dev_err(hdev->dev,
5299 "%s ECC indication: 0x%08x 0x%08x 0x%08x 0x%08x\n",
5300 block_name,
5301 RREG32(block_address), RREG32(block_address + 4),
5302 RREG32(block_address + 8), RREG32(block_address + 0xc));
5303 break;
5304 default:
5305 break;
5306
5307 }
5308
5309 if (disable_clock_gating) {
Oded Gabbaye38bfd32020-07-03 20:46:12 +03005310 hdev->asic_funcs->set_clock_gating(hdev);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005311 mutex_unlock(&gaudi->clk_gate_mutex);
5312 }
5313}
5314
5315static void gaudi_handle_qman_err_generic(struct hl_device *hdev,
5316 const char *qm_name,
5317 u64 glbl_sts_addr,
5318 u64 arb_err_addr)
5319{
5320 u32 i, j, glbl_sts_val, arb_err_val, glbl_sts_clr_val;
5321 char reg_desc[32];
5322
5323 /* Iterate through all stream GLBL_STS1 registers + Lower CP */
5324 for (i = 0 ; i < QMAN_STREAMS + 1 ; i++) {
5325 glbl_sts_clr_val = 0;
5326 glbl_sts_val = RREG32(glbl_sts_addr + 4 * i);
5327
5328 if (!glbl_sts_val)
5329 continue;
5330
5331 if (i == QMAN_STREAMS)
5332 snprintf(reg_desc, ARRAY_SIZE(reg_desc), "LowerCP");
5333 else
5334 snprintf(reg_desc, ARRAY_SIZE(reg_desc), "stream%u", i);
5335
5336 for (j = 0 ; j < GAUDI_NUM_OF_QM_ERR_CAUSE ; j++) {
5337 if (glbl_sts_val & BIT(j)) {
5338 dev_err_ratelimited(hdev->dev,
5339 "%s %s. err cause: %s\n",
5340 qm_name, reg_desc,
5341 gaudi_qman_error_cause[j]);
5342 glbl_sts_clr_val |= BIT(j);
5343 }
5344 }
5345
5346 /* Write 1 clear errors */
5347 WREG32(glbl_sts_addr + 4 * i, glbl_sts_clr_val);
5348 }
5349
5350 arb_err_val = RREG32(arb_err_addr);
5351
5352 if (!arb_err_val)
5353 return;
5354
5355 for (j = 0 ; j < GAUDI_NUM_OF_QM_ARB_ERR_CAUSE ; j++) {
5356 if (arb_err_val & BIT(j)) {
5357 dev_err_ratelimited(hdev->dev,
5358 "%s ARB_ERR. err cause: %s\n",
5359 qm_name,
5360 gaudi_qman_arb_error_cause[j]);
5361 }
5362 }
5363}
5364
5365static void gaudi_print_ecc_info(struct hl_device *hdev, u16 event_type)
5366{
5367 u64 block_address;
5368 u8 index;
5369 int num_memories;
5370 char desc[32];
5371 bool derr;
5372 bool disable_clock_gating;
5373
5374 switch (event_type) {
5375 case GAUDI_EVENT_PCIE_CORE_SERR:
5376 snprintf(desc, ARRAY_SIZE(desc), "%s", "PCIE_CORE");
5377 block_address = mmPCIE_CORE_BASE;
5378 num_memories = 51;
5379 derr = false;
5380 disable_clock_gating = false;
5381 break;
5382 case GAUDI_EVENT_PCIE_CORE_DERR:
5383 snprintf(desc, ARRAY_SIZE(desc), "%s", "PCIE_CORE");
5384 block_address = mmPCIE_CORE_BASE;
5385 num_memories = 51;
5386 derr = true;
5387 disable_clock_gating = false;
5388 break;
5389 case GAUDI_EVENT_PCIE_IF_SERR:
5390 snprintf(desc, ARRAY_SIZE(desc), "%s", "PCIE_WRAP");
5391 block_address = mmPCIE_WRAP_BASE;
5392 num_memories = 11;
5393 derr = false;
5394 disable_clock_gating = false;
5395 break;
5396 case GAUDI_EVENT_PCIE_IF_DERR:
5397 snprintf(desc, ARRAY_SIZE(desc), "%s", "PCIE_WRAP");
5398 block_address = mmPCIE_WRAP_BASE;
5399 num_memories = 11;
5400 derr = true;
5401 disable_clock_gating = false;
5402 break;
5403 case GAUDI_EVENT_PCIE_PHY_SERR:
5404 snprintf(desc, ARRAY_SIZE(desc), "%s", "PCIE_PHY");
5405 block_address = mmPCIE_PHY_BASE;
5406 num_memories = 4;
5407 derr = false;
5408 disable_clock_gating = false;
5409 break;
5410 case GAUDI_EVENT_PCIE_PHY_DERR:
5411 snprintf(desc, ARRAY_SIZE(desc), "%s", "PCIE_PHY");
5412 block_address = mmPCIE_PHY_BASE;
5413 num_memories = 4;
5414 derr = true;
5415 disable_clock_gating = false;
5416 break;
5417 case GAUDI_EVENT_TPC0_SERR ... GAUDI_EVENT_TPC7_SERR:
5418 index = event_type - GAUDI_EVENT_TPC0_SERR;
5419 block_address = mmTPC0_CFG_BASE + index * TPC_CFG_OFFSET;
5420 snprintf(desc, ARRAY_SIZE(desc), "%s%d", "TPC", index);
5421 num_memories = 90;
5422 derr = false;
5423 disable_clock_gating = true;
5424 break;
5425 case GAUDI_EVENT_TPC0_DERR ... GAUDI_EVENT_TPC7_DERR:
5426 index = event_type - GAUDI_EVENT_TPC0_DERR;
5427 block_address =
5428 mmTPC0_CFG_BASE + index * TPC_CFG_OFFSET;
5429 snprintf(desc, ARRAY_SIZE(desc), "%s%d", "TPC", index);
5430 num_memories = 90;
5431 derr = true;
5432 disable_clock_gating = true;
5433 break;
5434 case GAUDI_EVENT_MME0_ACC_SERR:
5435 case GAUDI_EVENT_MME1_ACC_SERR:
5436 case GAUDI_EVENT_MME2_ACC_SERR:
5437 case GAUDI_EVENT_MME3_ACC_SERR:
5438 index = (event_type - GAUDI_EVENT_MME0_ACC_SERR) / 4;
5439 block_address = mmMME0_ACC_BASE + index * MME_ACC_OFFSET;
5440 snprintf(desc, ARRAY_SIZE(desc), "MME%d_ACC", index);
5441 num_memories = 128;
5442 derr = false;
5443 disable_clock_gating = true;
5444 break;
5445 case GAUDI_EVENT_MME0_ACC_DERR:
5446 case GAUDI_EVENT_MME1_ACC_DERR:
5447 case GAUDI_EVENT_MME2_ACC_DERR:
5448 case GAUDI_EVENT_MME3_ACC_DERR:
5449 index = (event_type - GAUDI_EVENT_MME0_ACC_DERR) / 4;
5450 block_address = mmMME0_ACC_BASE + index * MME_ACC_OFFSET;
5451 snprintf(desc, ARRAY_SIZE(desc), "MME%d_ACC", index);
5452 num_memories = 128;
5453 derr = true;
5454 disable_clock_gating = true;
5455 break;
5456 case GAUDI_EVENT_MME0_SBAB_SERR:
5457 case GAUDI_EVENT_MME1_SBAB_SERR:
5458 case GAUDI_EVENT_MME2_SBAB_SERR:
5459 case GAUDI_EVENT_MME3_SBAB_SERR:
5460 index = (event_type - GAUDI_EVENT_MME0_SBAB_SERR) / 4;
5461 block_address = mmMME0_SBAB_BASE + index * MME_ACC_OFFSET;
5462 snprintf(desc, ARRAY_SIZE(desc), "MME%d_SBAB", index);
5463 num_memories = 33;
5464 derr = false;
5465 disable_clock_gating = true;
5466 break;
5467 case GAUDI_EVENT_MME0_SBAB_DERR:
5468 case GAUDI_EVENT_MME1_SBAB_DERR:
5469 case GAUDI_EVENT_MME2_SBAB_DERR:
5470 case GAUDI_EVENT_MME3_SBAB_DERR:
5471 index = (event_type - GAUDI_EVENT_MME0_SBAB_DERR) / 4;
5472 block_address = mmMME0_SBAB_BASE + index * MME_ACC_OFFSET;
5473 snprintf(desc, ARRAY_SIZE(desc), "MME%d_SBAB", index);
5474 num_memories = 33;
5475 derr = true;
5476 disable_clock_gating = true;
5477 break;
5478 case GAUDI_EVENT_DMA0_SERR_ECC ... GAUDI_EVENT_DMA7_SERR_ECC:
5479 index = event_type - GAUDI_EVENT_DMA0_SERR_ECC;
5480 block_address = mmDMA0_CORE_BASE + index * DMA_CORE_OFFSET;
5481 snprintf(desc, ARRAY_SIZE(desc), "DMA%d_CORE", index);
5482 num_memories = 16;
5483 derr = false;
5484 disable_clock_gating = false;
5485 break;
5486 case GAUDI_EVENT_DMA0_DERR_ECC ... GAUDI_EVENT_DMA7_DERR_ECC:
5487 index = event_type - GAUDI_EVENT_DMA0_DERR_ECC;
5488 block_address = mmDMA0_CORE_BASE + index * DMA_CORE_OFFSET;
5489 snprintf(desc, ARRAY_SIZE(desc), "DMA%d_CORE", index);
5490 num_memories = 16;
5491 derr = true;
5492 disable_clock_gating = false;
5493 break;
5494 case GAUDI_EVENT_CPU_IF_ECC_SERR:
5495 block_address = mmCPU_IF_BASE;
5496 snprintf(desc, ARRAY_SIZE(desc), "%s", "CPU");
5497 num_memories = 4;
5498 derr = false;
5499 disable_clock_gating = false;
5500 break;
5501 case GAUDI_EVENT_CPU_IF_ECC_DERR:
5502 block_address = mmCPU_IF_BASE;
5503 snprintf(desc, ARRAY_SIZE(desc), "%s", "CPU");
5504 num_memories = 4;
5505 derr = true;
5506 disable_clock_gating = false;
5507 break;
5508 case GAUDI_EVENT_PSOC_MEM_SERR:
5509 block_address = mmPSOC_GLOBAL_CONF_BASE;
5510 snprintf(desc, ARRAY_SIZE(desc), "%s", "CPU");
5511 num_memories = 4;
5512 derr = false;
5513 disable_clock_gating = false;
5514 break;
5515 case GAUDI_EVENT_PSOC_MEM_DERR:
5516 block_address = mmPSOC_GLOBAL_CONF_BASE;
5517 snprintf(desc, ARRAY_SIZE(desc), "%s", "CPU");
5518 num_memories = 4;
5519 derr = true;
5520 disable_clock_gating = false;
5521 break;
5522 case GAUDI_EVENT_PSOC_CORESIGHT_SERR:
5523 block_address = mmPSOC_CS_TRACE_BASE;
5524 snprintf(desc, ARRAY_SIZE(desc), "%s", "CPU");
5525 num_memories = 2;
5526 derr = false;
5527 disable_clock_gating = false;
5528 break;
5529 case GAUDI_EVENT_PSOC_CORESIGHT_DERR:
5530 block_address = mmPSOC_CS_TRACE_BASE;
5531 snprintf(desc, ARRAY_SIZE(desc), "%s", "CPU");
5532 num_memories = 2;
5533 derr = true;
5534 disable_clock_gating = false;
5535 break;
5536 case GAUDI_EVENT_SRAM0_SERR ... GAUDI_EVENT_SRAM28_SERR:
5537 index = event_type - GAUDI_EVENT_SRAM0_SERR;
5538 block_address =
5539 mmSRAM_Y0_X0_BANK_BASE + index * SRAM_BANK_OFFSET;
5540 snprintf(desc, ARRAY_SIZE(desc), "SRAM%d", index);
5541 num_memories = 2;
5542 derr = false;
5543 disable_clock_gating = false;
5544 break;
5545 case GAUDI_EVENT_SRAM0_DERR ... GAUDI_EVENT_SRAM28_DERR:
5546 index = event_type - GAUDI_EVENT_SRAM0_DERR;
5547 block_address =
5548 mmSRAM_Y0_X0_BANK_BASE + index * SRAM_BANK_OFFSET;
5549 snprintf(desc, ARRAY_SIZE(desc), "SRAM%d", index);
5550 num_memories = 2;
5551 derr = true;
5552 disable_clock_gating = false;
5553 break;
5554 case GAUDI_EVENT_DMA_IF0_SERR ... GAUDI_EVENT_DMA_IF3_SERR:
5555 index = event_type - GAUDI_EVENT_DMA_IF0_SERR;
5556 block_address = mmDMA_IF_W_S_BASE +
5557 index * (mmDMA_IF_E_S_BASE - mmDMA_IF_W_S_BASE);
5558 snprintf(desc, ARRAY_SIZE(desc), "DMA_IF%d", index);
5559 num_memories = 60;
5560 derr = false;
5561 disable_clock_gating = false;
5562 break;
5563 case GAUDI_EVENT_DMA_IF0_DERR ... GAUDI_EVENT_DMA_IF3_DERR:
5564 index = event_type - GAUDI_EVENT_DMA_IF0_DERR;
5565 block_address = mmDMA_IF_W_S_BASE +
5566 index * (mmDMA_IF_E_S_BASE - mmDMA_IF_W_S_BASE);
5567 snprintf(desc, ARRAY_SIZE(desc), "DMA_IF%d", index);
5568 derr = true;
5569 num_memories = 60;
5570 disable_clock_gating = false;
5571 break;
5572 case GAUDI_EVENT_HBM_0_SERR ... GAUDI_EVENT_HBM_3_SERR:
5573 index = event_type - GAUDI_EVENT_HBM_0_SERR;
5574 /* HBM Registers are at different offsets */
5575 block_address = mmHBM0_BASE + 0x8000 +
5576 index * (mmHBM1_BASE - mmHBM0_BASE);
5577 snprintf(desc, ARRAY_SIZE(desc), "HBM%d", index);
5578 derr = false;
5579 num_memories = 64;
5580 disable_clock_gating = false;
5581 break;
5582 case GAUDI_EVENT_HBM_0_DERR ... GAUDI_EVENT_HBM_3_DERR:
5583 index = event_type - GAUDI_EVENT_HBM_0_SERR;
5584 /* HBM Registers are at different offsets */
5585 block_address = mmHBM0_BASE + 0x8000 +
5586 index * (mmHBM1_BASE - mmHBM0_BASE);
5587 snprintf(desc, ARRAY_SIZE(desc), "HBM%d", index);
5588 derr = true;
5589 num_memories = 64;
5590 disable_clock_gating = false;
5591 break;
5592 default:
5593 return;
5594 }
5595
5596 gaudi_print_ecc_info_generic(hdev, desc, block_address, num_memories,
5597 derr, disable_clock_gating);
5598}
5599
5600static void gaudi_handle_qman_err(struct hl_device *hdev, u16 event_type)
5601{
5602 u64 glbl_sts_addr, arb_err_addr;
5603 u8 index;
5604 char desc[32];
5605
5606 switch (event_type) {
5607 case GAUDI_EVENT_TPC0_QM ... GAUDI_EVENT_TPC7_QM:
5608 index = event_type - GAUDI_EVENT_TPC0_QM;
5609 glbl_sts_addr =
5610 mmTPC0_QM_GLBL_STS1_0 + index * TPC_QMAN_OFFSET;
5611 arb_err_addr =
5612 mmTPC0_QM_ARB_ERR_CAUSE + index * TPC_QMAN_OFFSET;
5613 snprintf(desc, ARRAY_SIZE(desc), "%s%d", "TPC_QM", index);
5614 break;
5615 case GAUDI_EVENT_MME0_QM ... GAUDI_EVENT_MME2_QM:
5616 index = event_type - GAUDI_EVENT_MME0_QM;
5617 glbl_sts_addr =
5618 mmMME0_QM_GLBL_STS1_0 + index * MME_QMAN_OFFSET;
5619 arb_err_addr =
5620 mmMME0_QM_ARB_ERR_CAUSE + index * MME_QMAN_OFFSET;
5621 snprintf(desc, ARRAY_SIZE(desc), "%s%d", "MME_QM", index);
5622 break;
5623 case GAUDI_EVENT_DMA0_QM ... GAUDI_EVENT_DMA7_QM:
5624 index = event_type - GAUDI_EVENT_DMA0_QM;
5625 glbl_sts_addr =
5626 mmDMA0_QM_GLBL_STS1_0 + index * DMA_QMAN_OFFSET;
5627 arb_err_addr =
5628 mmDMA0_QM_ARB_ERR_CAUSE + index * DMA_QMAN_OFFSET;
5629 snprintf(desc, ARRAY_SIZE(desc), "%s%d", "DMA_QM", index);
5630 break;
5631 default:
5632 return;
5633 }
5634
5635 gaudi_handle_qman_err_generic(hdev, desc, glbl_sts_addr, arb_err_addr);
5636}
5637
5638static void gaudi_print_irq_info(struct hl_device *hdev, u16 event_type,
5639 bool razwi)
5640{
Ofir Bittonebd8d122020-05-10 13:41:28 +03005641 char desc[64] = "";
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005642
5643 gaudi_get_event_desc(event_type, desc, sizeof(desc));
5644 dev_err_ratelimited(hdev->dev, "Received H/W interrupt %d [\"%s\"]\n",
5645 event_type, desc);
5646
5647 gaudi_print_ecc_info(hdev, event_type);
5648
5649 if (razwi) {
5650 gaudi_print_razwi_info(hdev);
5651 gaudi_print_mmu_error_info(hdev);
5652 }
5653}
5654
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005655static int gaudi_soft_reset_late_init(struct hl_device *hdev)
5656{
Ofir Bittonebd8d122020-05-10 13:41:28 +03005657 struct gaudi_device *gaudi = hdev->asic_specific;
5658
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005659 /* Unmask all IRQs since some could have been received
5660 * during the soft reset
5661 */
Ofir Bittonebd8d122020-05-10 13:41:28 +03005662 return hl_fw_unmask_irq_arr(hdev, gaudi->events, sizeof(gaudi->events));
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005663}
5664
5665static int gaudi_hbm_read_interrupts(struct hl_device *hdev, int device)
5666{
5667 int ch, err = 0;
5668 u32 base, val, val2;
5669
5670 base = GAUDI_HBM_CFG_BASE + device * GAUDI_HBM_CFG_OFFSET;
5671 for (ch = 0 ; ch < GAUDI_HBM_CHANNELS ; ch++) {
5672 val = RREG32_MASK(base + ch * 0x1000 + 0x06C, 0x0000FFFF);
5673 val = (val & 0xFF) | ((val >> 8) & 0xFF);
5674 if (val) {
5675 err = 1;
5676 dev_err(hdev->dev,
5677 "HBM%d pc%d interrupts info: WR_PAR=%d, RD_PAR=%d, CA_PAR=%d, SERR=%d, DERR=%d\n",
5678 device, ch * 2, val & 0x1, (val >> 1) & 0x1,
5679 (val >> 2) & 0x1, (val >> 3) & 0x1,
5680 (val >> 4) & 0x1);
5681
5682 val2 = RREG32(base + ch * 0x1000 + 0x060);
5683 dev_err(hdev->dev,
5684 "HBM%d pc%d ECC info: 1ST_ERR_ADDR=0x%x, 1ST_ERR_TYPE=%d, SEC_CONT_CNT=%d, SEC_CNT=%d, DED_CNT=%d\n",
5685 device, ch * 2,
5686 RREG32(base + ch * 0x1000 + 0x064),
5687 (val2 & 0x200) >> 9, (val2 & 0xFC00) >> 10,
5688 (val2 & 0xFF0000) >> 16,
5689 (val2 & 0xFF000000) >> 24);
5690 }
5691
5692 val = RREG32_MASK(base + ch * 0x1000 + 0x07C, 0x0000FFFF);
5693 val = (val & 0xFF) | ((val >> 8) & 0xFF);
5694 if (val) {
5695 err = 1;
5696 dev_err(hdev->dev,
5697 "HBM%d pc%d interrupts info: WR_PAR=%d, RD_PAR=%d, CA_PAR=%d, SERR=%d, DERR=%d\n",
5698 device, ch * 2 + 1, val & 0x1, (val >> 1) & 0x1,
5699 (val >> 2) & 0x1, (val >> 3) & 0x1,
5700 (val >> 4) & 0x1);
5701
5702 val2 = RREG32(base + ch * 0x1000 + 0x070);
5703 dev_err(hdev->dev,
5704 "HBM%d pc%d ECC info: 1ST_ERR_ADDR=0x%x, 1ST_ERR_TYPE=%d, SEC_CONT_CNT=%d, SEC_CNT=%d, DED_CNT=%d\n",
5705 device, ch * 2 + 1,
5706 RREG32(base + ch * 0x1000 + 0x074),
5707 (val2 & 0x200) >> 9, (val2 & 0xFC00) >> 10,
5708 (val2 & 0xFF0000) >> 16,
5709 (val2 & 0xFF000000) >> 24);
5710 }
5711
5712 /* Clear interrupts */
5713 RMWREG32(base + (ch * 0x1000) + 0x060, 0x1C8, 0x1FF);
5714 RMWREG32(base + (ch * 0x1000) + 0x070, 0x1C8, 0x1FF);
5715 WREG32(base + (ch * 0x1000) + 0x06C, 0x1F1F);
5716 WREG32(base + (ch * 0x1000) + 0x07C, 0x1F1F);
5717 RMWREG32(base + (ch * 0x1000) + 0x060, 0x0, 0xF);
5718 RMWREG32(base + (ch * 0x1000) + 0x070, 0x0, 0xF);
5719 }
5720
5721 val = RREG32(base + 0x8F30);
5722 val2 = RREG32(base + 0x8F34);
5723 if (val | val2) {
5724 err = 1;
5725 dev_err(hdev->dev,
5726 "HBM %d MC SRAM SERR info: Reg 0x8F30=0x%x, Reg 0x8F34=0x%x\n",
5727 device, val, val2);
5728 }
5729 val = RREG32(base + 0x8F40);
5730 val2 = RREG32(base + 0x8F44);
5731 if (val | val2) {
5732 err = 1;
5733 dev_err(hdev->dev,
5734 "HBM %d MC SRAM DERR info: Reg 0x8F40=0x%x, Reg 0x8F44=0x%x\n",
5735 device, val, val2);
5736 }
5737
5738 return err;
5739}
5740
5741static int gaudi_hbm_event_to_dev(u16 hbm_event_type)
5742{
5743 switch (hbm_event_type) {
5744 case GAUDI_EVENT_HBM0_SPI_0:
5745 case GAUDI_EVENT_HBM0_SPI_1:
5746 return 0;
5747 case GAUDI_EVENT_HBM1_SPI_0:
5748 case GAUDI_EVENT_HBM1_SPI_1:
5749 return 1;
5750 case GAUDI_EVENT_HBM2_SPI_0:
5751 case GAUDI_EVENT_HBM2_SPI_1:
5752 return 2;
5753 case GAUDI_EVENT_HBM3_SPI_0:
5754 case GAUDI_EVENT_HBM3_SPI_1:
5755 return 3;
5756 default:
5757 break;
5758 }
5759
5760 /* Should never happen */
5761 return 0;
5762}
5763
5764static bool gaudi_tpc_read_interrupts(struct hl_device *hdev, u8 tpc_id,
5765 char *interrupt_name)
5766{
5767 struct gaudi_device *gaudi = hdev->asic_specific;
5768 u32 tpc_offset = tpc_id * TPC_CFG_OFFSET, tpc_interrupts_cause, i;
5769 bool soft_reset_required = false;
5770
5771 /* Accessing the TPC_INTR_CAUSE registers requires disabling the clock
5772 * gating, and thus cannot be done in ArmCP and should be done instead
5773 * by the driver.
5774 */
5775
5776 mutex_lock(&gaudi->clk_gate_mutex);
5777
5778 hdev->asic_funcs->disable_clock_gating(hdev);
5779
5780 tpc_interrupts_cause = RREG32(mmTPC0_CFG_TPC_INTR_CAUSE + tpc_offset) &
5781 TPC0_CFG_TPC_INTR_CAUSE_CAUSE_MASK;
5782
5783 for (i = 0 ; i < GAUDI_NUM_OF_TPC_INTR_CAUSE ; i++)
5784 if (tpc_interrupts_cause & BIT(i)) {
5785 dev_err_ratelimited(hdev->dev,
5786 "TPC%d_%s interrupt cause: %s\n",
5787 tpc_id, interrupt_name,
5788 gaudi_tpc_interrupts_cause[i]);
5789 /* If this is QM error, we need to soft-reset */
5790 if (i == 15)
5791 soft_reset_required = true;
5792 }
5793
5794 /* Clear interrupts */
5795 WREG32(mmTPC0_CFG_TPC_INTR_CAUSE + tpc_offset, 0);
5796
Oded Gabbaye38bfd32020-07-03 20:46:12 +03005797 hdev->asic_funcs->set_clock_gating(hdev);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005798
5799 mutex_unlock(&gaudi->clk_gate_mutex);
5800
5801 return soft_reset_required;
5802}
5803
5804static int tpc_dec_event_to_tpc_id(u16 tpc_dec_event_type)
5805{
5806 return (tpc_dec_event_type - GAUDI_EVENT_TPC0_DEC) >> 1;
5807}
5808
5809static int tpc_krn_event_to_tpc_id(u16 tpc_dec_event_type)
5810{
5811 return (tpc_dec_event_type - GAUDI_EVENT_TPC0_KRN_ERR) / 6;
5812}
5813
5814static void gaudi_print_clk_change_info(struct hl_device *hdev,
5815 u16 event_type)
5816{
5817 switch (event_type) {
5818 case GAUDI_EVENT_FIX_POWER_ENV_S:
5819 dev_info_ratelimited(hdev->dev,
5820 "Clock throttling due to power consumption\n");
5821 break;
5822
5823 case GAUDI_EVENT_FIX_POWER_ENV_E:
5824 dev_info_ratelimited(hdev->dev,
5825 "Power envelop is safe, back to optimal clock\n");
5826 break;
5827
5828 case GAUDI_EVENT_FIX_THERMAL_ENV_S:
5829 dev_info_ratelimited(hdev->dev,
5830 "Clock throttling due to overheating\n");
5831 break;
5832
5833 case GAUDI_EVENT_FIX_THERMAL_ENV_E:
5834 dev_info_ratelimited(hdev->dev,
5835 "Thermal envelop is safe, back to optimal clock\n");
5836 break;
5837
5838 default:
5839 dev_err(hdev->dev, "Received invalid clock change event %d\n",
5840 event_type);
5841 break;
5842 }
5843}
5844
5845static void gaudi_handle_eqe(struct hl_device *hdev,
5846 struct hl_eq_entry *eq_entry)
5847{
5848 struct gaudi_device *gaudi = hdev->asic_specific;
5849 u32 ctl = le32_to_cpu(eq_entry->hdr.ctl);
5850 u16 event_type = ((ctl & EQ_CTL_EVENT_TYPE_MASK)
5851 >> EQ_CTL_EVENT_TYPE_SHIFT);
5852 u8 cause;
Oded Gabbay66446822020-05-18 16:48:01 +03005853 bool reset_required;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005854
5855 gaudi->events_stat[event_type]++;
5856 gaudi->events_stat_aggregate[event_type]++;
5857
5858 switch (event_type) {
5859 case GAUDI_EVENT_PCIE_CORE_DERR:
5860 case GAUDI_EVENT_PCIE_IF_DERR:
5861 case GAUDI_EVENT_PCIE_PHY_DERR:
5862 case GAUDI_EVENT_TPC0_DERR ... GAUDI_EVENT_TPC7_DERR:
5863 case GAUDI_EVENT_MME0_ACC_DERR:
5864 case GAUDI_EVENT_MME0_SBAB_DERR:
5865 case GAUDI_EVENT_MME1_ACC_DERR:
5866 case GAUDI_EVENT_MME1_SBAB_DERR:
5867 case GAUDI_EVENT_MME2_ACC_DERR:
5868 case GAUDI_EVENT_MME2_SBAB_DERR:
5869 case GAUDI_EVENT_MME3_ACC_DERR:
5870 case GAUDI_EVENT_MME3_SBAB_DERR:
5871 case GAUDI_EVENT_DMA0_DERR_ECC ... GAUDI_EVENT_DMA7_DERR_ECC:
5872 fallthrough;
5873 case GAUDI_EVENT_CPU_IF_ECC_DERR:
5874 case GAUDI_EVENT_PSOC_MEM_DERR:
5875 case GAUDI_EVENT_PSOC_CORESIGHT_DERR:
5876 case GAUDI_EVENT_SRAM0_DERR ... GAUDI_EVENT_SRAM28_DERR:
5877 case GAUDI_EVENT_DMA_IF0_DERR ... GAUDI_EVENT_DMA_IF3_DERR:
5878 fallthrough;
5879 case GAUDI_EVENT_GIC500:
5880 case GAUDI_EVENT_HBM_0_DERR ... GAUDI_EVENT_HBM_3_DERR:
5881 case GAUDI_EVENT_MMU_DERR:
5882 case GAUDI_EVENT_AXI_ECC:
5883 case GAUDI_EVENT_L2_RAM_ECC:
5884 case GAUDI_EVENT_PLL0 ... GAUDI_EVENT_PLL17:
5885 gaudi_print_irq_info(hdev, event_type, false);
5886 if (hdev->hard_reset_on_fw_events)
5887 hl_device_reset(hdev, true, false);
5888 break;
5889
5890 case GAUDI_EVENT_HBM0_SPI_0:
5891 case GAUDI_EVENT_HBM1_SPI_0:
5892 case GAUDI_EVENT_HBM2_SPI_0:
5893 case GAUDI_EVENT_HBM3_SPI_0:
5894 gaudi_print_irq_info(hdev, event_type, false);
5895 gaudi_hbm_read_interrupts(hdev,
5896 gaudi_hbm_event_to_dev(event_type));
5897 if (hdev->hard_reset_on_fw_events)
5898 hl_device_reset(hdev, true, false);
5899 break;
5900
5901 case GAUDI_EVENT_HBM0_SPI_1:
5902 case GAUDI_EVENT_HBM1_SPI_1:
5903 case GAUDI_EVENT_HBM2_SPI_1:
5904 case GAUDI_EVENT_HBM3_SPI_1:
5905 gaudi_print_irq_info(hdev, event_type, false);
5906 gaudi_hbm_read_interrupts(hdev,
5907 gaudi_hbm_event_to_dev(event_type));
5908 break;
5909
5910 case GAUDI_EVENT_TPC0_DEC:
5911 case GAUDI_EVENT_TPC1_DEC:
5912 case GAUDI_EVENT_TPC2_DEC:
5913 case GAUDI_EVENT_TPC3_DEC:
5914 case GAUDI_EVENT_TPC4_DEC:
5915 case GAUDI_EVENT_TPC5_DEC:
5916 case GAUDI_EVENT_TPC6_DEC:
5917 case GAUDI_EVENT_TPC7_DEC:
5918 gaudi_print_irq_info(hdev, event_type, true);
Oded Gabbay66446822020-05-18 16:48:01 +03005919 reset_required = gaudi_tpc_read_interrupts(hdev,
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005920 tpc_dec_event_to_tpc_id(event_type),
5921 "AXI_SLV_DEC_Error");
Oded Gabbay66446822020-05-18 16:48:01 +03005922 if (reset_required) {
5923 dev_err(hdev->dev, "hard reset required due to %s\n",
5924 gaudi_irq_map_table[event_type].name);
5925
5926 if (hdev->hard_reset_on_fw_events)
5927 hl_device_reset(hdev, true, false);
5928 } else {
5929 hl_fw_unmask_irq(hdev, event_type);
Omer Shpigelmand7985072020-05-17 23:01:22 +03005930 }
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005931 break;
5932
5933 case GAUDI_EVENT_TPC0_KRN_ERR:
5934 case GAUDI_EVENT_TPC1_KRN_ERR:
5935 case GAUDI_EVENT_TPC2_KRN_ERR:
5936 case GAUDI_EVENT_TPC3_KRN_ERR:
5937 case GAUDI_EVENT_TPC4_KRN_ERR:
5938 case GAUDI_EVENT_TPC5_KRN_ERR:
5939 case GAUDI_EVENT_TPC6_KRN_ERR:
5940 case GAUDI_EVENT_TPC7_KRN_ERR:
5941 gaudi_print_irq_info(hdev, event_type, true);
Oded Gabbay66446822020-05-18 16:48:01 +03005942 reset_required = gaudi_tpc_read_interrupts(hdev,
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005943 tpc_krn_event_to_tpc_id(event_type),
5944 "KRN_ERR");
Oded Gabbay66446822020-05-18 16:48:01 +03005945 if (reset_required) {
5946 dev_err(hdev->dev, "hard reset required due to %s\n",
5947 gaudi_irq_map_table[event_type].name);
5948
5949 if (hdev->hard_reset_on_fw_events)
5950 hl_device_reset(hdev, true, false);
5951 } else {
5952 hl_fw_unmask_irq(hdev, event_type);
Omer Shpigelmand7985072020-05-17 23:01:22 +03005953 }
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005954 break;
5955
5956 case GAUDI_EVENT_PCIE_CORE_SERR:
5957 case GAUDI_EVENT_PCIE_IF_SERR:
5958 case GAUDI_EVENT_PCIE_PHY_SERR:
5959 case GAUDI_EVENT_TPC0_SERR ... GAUDI_EVENT_TPC7_SERR:
5960 case GAUDI_EVENT_MME0_ACC_SERR:
5961 case GAUDI_EVENT_MME0_SBAB_SERR:
5962 case GAUDI_EVENT_MME1_ACC_SERR:
5963 case GAUDI_EVENT_MME1_SBAB_SERR:
5964 case GAUDI_EVENT_MME2_ACC_SERR:
5965 case GAUDI_EVENT_MME2_SBAB_SERR:
5966 case GAUDI_EVENT_MME3_ACC_SERR:
5967 case GAUDI_EVENT_MME3_SBAB_SERR:
5968 case GAUDI_EVENT_DMA0_SERR_ECC ... GAUDI_EVENT_DMA7_SERR_ECC:
5969 case GAUDI_EVENT_CPU_IF_ECC_SERR:
5970 case GAUDI_EVENT_PSOC_MEM_SERR:
5971 case GAUDI_EVENT_PSOC_CORESIGHT_SERR:
5972 case GAUDI_EVENT_SRAM0_SERR ... GAUDI_EVENT_SRAM28_SERR:
5973 case GAUDI_EVENT_DMA_IF0_SERR ... GAUDI_EVENT_DMA_IF3_SERR:
5974 case GAUDI_EVENT_HBM_0_SERR ... GAUDI_EVENT_HBM_3_SERR:
5975 fallthrough;
5976 case GAUDI_EVENT_MMU_SERR:
5977 case GAUDI_EVENT_PCIE_DEC:
5978 case GAUDI_EVENT_MME0_WBC_RSP:
5979 case GAUDI_EVENT_MME0_SBAB0_RSP:
5980 case GAUDI_EVENT_MME1_WBC_RSP:
5981 case GAUDI_EVENT_MME1_SBAB0_RSP:
5982 case GAUDI_EVENT_MME2_WBC_RSP:
5983 case GAUDI_EVENT_MME2_SBAB0_RSP:
5984 case GAUDI_EVENT_MME3_WBC_RSP:
5985 case GAUDI_EVENT_MME3_SBAB0_RSP:
5986 case GAUDI_EVENT_CPU_AXI_SPLITTER:
5987 case GAUDI_EVENT_PSOC_AXI_DEC:
5988 case GAUDI_EVENT_PSOC_PRSTN_FALL:
5989 case GAUDI_EVENT_MMU_PAGE_FAULT:
5990 case GAUDI_EVENT_MMU_WR_PERM:
5991 case GAUDI_EVENT_RAZWI_OR_ADC:
5992 case GAUDI_EVENT_TPC0_QM ... GAUDI_EVENT_TPC7_QM:
5993 case GAUDI_EVENT_MME0_QM ... GAUDI_EVENT_MME2_QM:
5994 case GAUDI_EVENT_DMA0_QM ... GAUDI_EVENT_DMA7_QM:
5995 fallthrough;
5996 case GAUDI_EVENT_DMA0_CORE ... GAUDI_EVENT_DMA7_CORE:
5997 gaudi_print_irq_info(hdev, event_type, true);
5998 gaudi_handle_qman_err(hdev, event_type);
Ofir Bittonebd8d122020-05-10 13:41:28 +03005999 hl_fw_unmask_irq(hdev, event_type);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006000 break;
6001
6002 case GAUDI_EVENT_RAZWI_OR_ADC_SW:
6003 gaudi_print_irq_info(hdev, event_type, true);
Oded Gabbay66446822020-05-18 16:48:01 +03006004 if (hdev->hard_reset_on_fw_events)
6005 hl_device_reset(hdev, true, false);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006006 break;
6007
6008 case GAUDI_EVENT_TPC0_BMON_SPMU:
6009 case GAUDI_EVENT_TPC1_BMON_SPMU:
6010 case GAUDI_EVENT_TPC2_BMON_SPMU:
6011 case GAUDI_EVENT_TPC3_BMON_SPMU:
6012 case GAUDI_EVENT_TPC4_BMON_SPMU:
6013 case GAUDI_EVENT_TPC5_BMON_SPMU:
6014 case GAUDI_EVENT_TPC6_BMON_SPMU:
6015 case GAUDI_EVENT_TPC7_BMON_SPMU:
6016 case GAUDI_EVENT_DMA_BM_CH0 ... GAUDI_EVENT_DMA_BM_CH7:
6017 gaudi_print_irq_info(hdev, event_type, false);
Ofir Bittonebd8d122020-05-10 13:41:28 +03006018 hl_fw_unmask_irq(hdev, event_type);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006019 break;
6020
6021 case GAUDI_EVENT_FIX_POWER_ENV_S ... GAUDI_EVENT_FIX_THERMAL_ENV_E:
6022 gaudi_print_clk_change_info(hdev, event_type);
Ofir Bittonebd8d122020-05-10 13:41:28 +03006023 hl_fw_unmask_irq(hdev, event_type);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006024 break;
6025
6026 case GAUDI_EVENT_PSOC_GPIO_U16_0:
6027 cause = le64_to_cpu(eq_entry->data[0]) & 0xFF;
6028 dev_err(hdev->dev,
6029 "Received high temp H/W interrupt %d (cause %d)\n",
6030 event_type, cause);
6031 break;
6032
6033 default:
6034 dev_err(hdev->dev, "Received invalid H/W interrupt %d\n",
6035 event_type);
6036 break;
6037 }
6038}
6039
6040static void *gaudi_get_events_stat(struct hl_device *hdev, bool aggregate,
6041 u32 *size)
6042{
6043 struct gaudi_device *gaudi = hdev->asic_specific;
6044
6045 if (aggregate) {
6046 *size = (u32) sizeof(gaudi->events_stat_aggregate);
6047 return gaudi->events_stat_aggregate;
6048 }
6049
6050 *size = (u32) sizeof(gaudi->events_stat);
6051 return gaudi->events_stat;
6052}
6053
Omer Shpigelman8ff5f4f2020-05-24 23:06:59 +03006054static int gaudi_mmu_invalidate_cache(struct hl_device *hdev, bool is_hard,
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006055 u32 flags)
6056{
6057 struct gaudi_device *gaudi = hdev->asic_specific;
6058 u32 status, timeout_usec;
6059 int rc;
6060
6061 if (!(gaudi->hw_cap_initialized & HW_CAP_MMU) ||
6062 hdev->hard_reset_pending)
Omer Shpigelman8ff5f4f2020-05-24 23:06:59 +03006063 return 0;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006064
6065 if (hdev->pldm)
6066 timeout_usec = GAUDI_PLDM_MMU_TIMEOUT_USEC;
6067 else
6068 timeout_usec = MMU_CONFIG_TIMEOUT_USEC;
6069
Omer Shpigelman8ff5f4f2020-05-24 23:06:59 +03006070 mutex_lock(&hdev->mmu_cache_lock);
6071
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006072 /* L0 & L1 invalidation */
Omer Shpigelmancfd41762020-06-03 13:03:35 +03006073 WREG32(mmSTLB_INV_PS, 3);
6074 WREG32(mmSTLB_CACHE_INV, gaudi->mmu_cache_inv_pi++);
Omer Shpigelman42d0b0b2020-05-17 17:35:39 +03006075 WREG32(mmSTLB_INV_PS, 2);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006076
6077 rc = hl_poll_timeout(
6078 hdev,
Omer Shpigelman42d0b0b2020-05-17 17:35:39 +03006079 mmSTLB_INV_PS,
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006080 status,
6081 !status,
6082 1000,
6083 timeout_usec);
6084
Omer Shpigelman42d0b0b2020-05-17 17:35:39 +03006085 WREG32(mmSTLB_INV_SET, 0);
6086
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006087 mutex_unlock(&hdev->mmu_cache_lock);
Omer Shpigelman8ff5f4f2020-05-24 23:06:59 +03006088
6089 if (rc) {
6090 dev_err_ratelimited(hdev->dev,
6091 "MMU cache invalidation timeout\n");
6092 hl_device_reset(hdev, true, false);
6093 }
6094
6095 return rc;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006096}
6097
Omer Shpigelman8ff5f4f2020-05-24 23:06:59 +03006098static int gaudi_mmu_invalidate_cache_range(struct hl_device *hdev,
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006099 bool is_hard, u32 asid, u64 va, u64 size)
6100{
6101 struct gaudi_device *gaudi = hdev->asic_specific;
6102 u32 status, timeout_usec;
6103 u32 inv_data;
6104 u32 pi;
6105 int rc;
6106
6107 if (!(gaudi->hw_cap_initialized & HW_CAP_MMU) ||
6108 hdev->hard_reset_pending)
Omer Shpigelman8ff5f4f2020-05-24 23:06:59 +03006109 return 0;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006110
6111 mutex_lock(&hdev->mmu_cache_lock);
6112
6113 if (hdev->pldm)
6114 timeout_usec = GAUDI_PLDM_MMU_TIMEOUT_USEC;
6115 else
6116 timeout_usec = MMU_CONFIG_TIMEOUT_USEC;
6117
6118 /*
6119 * TODO: currently invalidate entire L0 & L1 as in regular hard
6120 * invalidation. Need to apply invalidation of specific cache
6121 * lines with mask of ASID & VA & size.
6122 * Note that L1 with be flushed entirely in any case.
6123 */
6124
6125 /* L0 & L1 invalidation */
6126 inv_data = RREG32(mmSTLB_CACHE_INV);
6127 /* PI is 8 bit */
6128 pi = ((inv_data & STLB_CACHE_INV_PRODUCER_INDEX_MASK) + 1) & 0xFF;
6129 WREG32(mmSTLB_CACHE_INV,
6130 (inv_data & STLB_CACHE_INV_INDEX_MASK_MASK) | pi);
6131
6132 rc = hl_poll_timeout(
6133 hdev,
6134 mmSTLB_INV_CONSUMER_INDEX,
6135 status,
6136 status == pi,
6137 1000,
6138 timeout_usec);
6139
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006140 mutex_unlock(&hdev->mmu_cache_lock);
Omer Shpigelman8ff5f4f2020-05-24 23:06:59 +03006141
6142 if (rc) {
6143 dev_err_ratelimited(hdev->dev,
6144 "MMU cache invalidation timeout\n");
6145 hl_device_reset(hdev, true, false);
6146 }
6147
6148 return rc;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006149}
6150
6151static int gaudi_mmu_update_asid_hop0_addr(struct hl_device *hdev,
6152 u32 asid, u64 phys_addr)
6153{
6154 u32 status, timeout_usec;
6155 int rc;
6156
6157 if (hdev->pldm)
6158 timeout_usec = GAUDI_PLDM_MMU_TIMEOUT_USEC;
6159 else
6160 timeout_usec = MMU_CONFIG_TIMEOUT_USEC;
6161
6162 WREG32(MMU_ASID, asid);
6163 WREG32(MMU_HOP0_PA43_12, phys_addr >> MMU_HOP0_PA43_12_SHIFT);
6164 WREG32(MMU_HOP0_PA49_44, phys_addr >> MMU_HOP0_PA49_44_SHIFT);
6165 WREG32(MMU_BUSY, 0x80000000);
6166
6167 rc = hl_poll_timeout(
6168 hdev,
6169 MMU_BUSY,
6170 status,
6171 !(status & 0x80000000),
6172 1000,
6173 timeout_usec);
6174
6175 if (rc) {
6176 dev_err(hdev->dev,
6177 "Timeout during MMU hop0 config of asid %d\n", asid);
6178 return rc;
6179 }
6180
6181 return 0;
6182}
6183
6184static int gaudi_send_heartbeat(struct hl_device *hdev)
6185{
6186 struct gaudi_device *gaudi = hdev->asic_specific;
6187
6188 if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
6189 return 0;
6190
6191 return hl_fw_send_heartbeat(hdev);
6192}
6193
6194static int gaudi_armcp_info_get(struct hl_device *hdev)
6195{
6196 struct gaudi_device *gaudi = hdev->asic_specific;
6197 struct asic_fixed_properties *prop = &hdev->asic_prop;
6198 int rc;
6199
6200 if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
6201 return 0;
6202
6203 rc = hl_fw_armcp_info_get(hdev);
6204 if (rc)
6205 return rc;
6206
6207 if (!strlen(prop->armcp_info.card_name))
6208 strncpy(prop->armcp_info.card_name, GAUDI_DEFAULT_CARD_NAME,
6209 CARD_NAME_MAX_LEN);
6210
6211 return 0;
6212}
6213
6214static bool gaudi_is_device_idle(struct hl_device *hdev, u32 *mask,
6215 struct seq_file *s)
6216{
6217 struct gaudi_device *gaudi = hdev->asic_specific;
6218 const char *fmt = "%-5d%-9s%#-14x%#-12x%#x\n";
6219 const char *mme_slave_fmt = "%-5d%-9s%-14s%-12s%#x\n";
6220 u32 qm_glbl_sts0, qm_cgm_sts, dma_core_sts0, tpc_cfg_sts, mme_arch_sts;
6221 bool is_idle = true, is_eng_idle, is_slave;
6222 u64 offset;
6223 int i, dma_id;
6224
6225 mutex_lock(&gaudi->clk_gate_mutex);
6226
6227 hdev->asic_funcs->disable_clock_gating(hdev);
6228
6229 if (s)
6230 seq_puts(s,
6231 "\nDMA is_idle QM_GLBL_STS0 QM_CGM_STS DMA_CORE_STS0\n"
6232 "--- ------- ------------ ---------- -------------\n");
6233
6234 for (i = 0 ; i < DMA_NUMBER_OF_CHNLS ; i++) {
6235 dma_id = gaudi_dma_assignment[i];
6236 offset = dma_id * DMA_QMAN_OFFSET;
6237
6238 qm_glbl_sts0 = RREG32(mmDMA0_QM_GLBL_STS0 + offset);
6239 qm_cgm_sts = RREG32(mmDMA0_QM_CGM_STS + offset);
6240 dma_core_sts0 = RREG32(mmDMA0_CORE_STS0 + offset);
6241 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts) &&
6242 IS_DMA_IDLE(dma_core_sts0);
6243 is_idle &= is_eng_idle;
6244
6245 if (mask)
6246 *mask |= !is_eng_idle <<
6247 (GAUDI_ENGINE_ID_DMA_0 + dma_id);
6248 if (s)
6249 seq_printf(s, fmt, dma_id,
6250 is_eng_idle ? "Y" : "N", qm_glbl_sts0,
6251 qm_cgm_sts, dma_core_sts0);
6252 }
6253
6254 if (s)
6255 seq_puts(s,
6256 "\nTPC is_idle QM_GLBL_STS0 QM_CGM_STS CFG_STATUS\n"
6257 "--- ------- ------------ ---------- ----------\n");
6258
6259 for (i = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) {
6260 offset = i * TPC_QMAN_OFFSET;
6261 qm_glbl_sts0 = RREG32(mmTPC0_QM_GLBL_STS0 + offset);
6262 qm_cgm_sts = RREG32(mmTPC0_QM_CGM_STS + offset);
6263 tpc_cfg_sts = RREG32(mmTPC0_CFG_STATUS + offset);
6264 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts) &&
6265 IS_TPC_IDLE(tpc_cfg_sts);
6266 is_idle &= is_eng_idle;
6267
6268 if (mask)
6269 *mask |= !is_eng_idle << (GAUDI_ENGINE_ID_TPC_0 + i);
6270 if (s)
6271 seq_printf(s, fmt, i,
6272 is_eng_idle ? "Y" : "N",
6273 qm_glbl_sts0, qm_cgm_sts, tpc_cfg_sts);
6274 }
6275
6276 if (s)
6277 seq_puts(s,
6278 "\nMME is_idle QM_GLBL_STS0 QM_CGM_STS ARCH_STATUS\n"
6279 "--- ------- ------------ ---------- -----------\n");
6280
6281 for (i = 0 ; i < MME_NUMBER_OF_ENGINES ; i++) {
6282 offset = i * MME_QMAN_OFFSET;
6283 mme_arch_sts = RREG32(mmMME0_CTRL_ARCH_STATUS + offset);
6284 is_eng_idle = IS_MME_IDLE(mme_arch_sts);
6285
6286 /* MME 1 & 3 are slaves, no need to check their QMANs */
6287 is_slave = i % 2;
6288 if (!is_slave) {
6289 qm_glbl_sts0 = RREG32(mmMME0_QM_GLBL_STS0 + offset);
6290 qm_cgm_sts = RREG32(mmMME0_QM_CGM_STS + offset);
6291 is_eng_idle &= IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts);
6292 }
6293
6294 is_idle &= is_eng_idle;
6295
6296 if (mask)
6297 *mask |= !is_eng_idle << (GAUDI_ENGINE_ID_MME_0 + i);
6298 if (s) {
6299 if (!is_slave)
6300 seq_printf(s, fmt, i,
6301 is_eng_idle ? "Y" : "N",
6302 qm_glbl_sts0, qm_cgm_sts, mme_arch_sts);
6303 else
6304 seq_printf(s, mme_slave_fmt, i,
6305 is_eng_idle ? "Y" : "N", "-",
6306 "-", mme_arch_sts);
6307 }
6308 }
6309
6310 if (s)
6311 seq_puts(s, "\n");
6312
Oded Gabbaye38bfd32020-07-03 20:46:12 +03006313 hdev->asic_funcs->set_clock_gating(hdev);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006314
6315 mutex_unlock(&gaudi->clk_gate_mutex);
6316
6317 return is_idle;
6318}
6319
6320static void gaudi_hw_queues_lock(struct hl_device *hdev)
6321 __acquires(&gaudi->hw_queues_lock)
6322{
6323 struct gaudi_device *gaudi = hdev->asic_specific;
6324
6325 spin_lock(&gaudi->hw_queues_lock);
6326}
6327
6328static void gaudi_hw_queues_unlock(struct hl_device *hdev)
6329 __releases(&gaudi->hw_queues_lock)
6330{
6331 struct gaudi_device *gaudi = hdev->asic_specific;
6332
6333 spin_unlock(&gaudi->hw_queues_lock);
6334}
6335
6336static u32 gaudi_get_pci_id(struct hl_device *hdev)
6337{
6338 return hdev->pdev->device;
6339}
6340
6341static int gaudi_get_eeprom_data(struct hl_device *hdev, void *data,
6342 size_t max_size)
6343{
6344 struct gaudi_device *gaudi = hdev->asic_specific;
6345
6346 if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
6347 return 0;
6348
6349 return hl_fw_get_eeprom_data(hdev, data, max_size);
6350}
6351
6352/*
6353 * this function should be used only during initialization and/or after reset,
6354 * when there are no active users.
6355 */
6356static int gaudi_run_tpc_kernel(struct hl_device *hdev, u64 tpc_kernel,
6357 u32 tpc_id)
6358{
6359 struct gaudi_device *gaudi = hdev->asic_specific;
6360 u64 kernel_timeout;
6361 u32 status, offset;
6362 int rc;
6363
6364 offset = tpc_id * (mmTPC1_CFG_STATUS - mmTPC0_CFG_STATUS);
6365
6366 if (hdev->pldm)
6367 kernel_timeout = GAUDI_PLDM_TPC_KERNEL_WAIT_USEC;
6368 else
6369 kernel_timeout = HL_DEVICE_TIMEOUT_USEC;
6370
6371 mutex_lock(&gaudi->clk_gate_mutex);
6372
6373 hdev->asic_funcs->disable_clock_gating(hdev);
6374
6375 WREG32(mmTPC0_CFG_QM_KERNEL_BASE_ADDRESS_LOW + offset,
6376 lower_32_bits(tpc_kernel));
6377 WREG32(mmTPC0_CFG_QM_KERNEL_BASE_ADDRESS_HIGH + offset,
6378 upper_32_bits(tpc_kernel));
6379
6380 WREG32(mmTPC0_CFG_ICACHE_BASE_ADDERESS_LOW + offset,
6381 lower_32_bits(tpc_kernel));
6382 WREG32(mmTPC0_CFG_ICACHE_BASE_ADDERESS_HIGH + offset,
6383 upper_32_bits(tpc_kernel));
6384 /* set a valid LUT pointer, content is of no significance */
6385 WREG32(mmTPC0_CFG_LUT_FUNC256_BASE_ADDR_LO + offset,
6386 lower_32_bits(tpc_kernel));
6387 WREG32(mmTPC0_CFG_LUT_FUNC256_BASE_ADDR_HI + offset,
6388 upper_32_bits(tpc_kernel));
6389
6390 WREG32(mmTPC0_CFG_QM_SYNC_OBJECT_ADDR + offset,
6391 lower_32_bits(CFG_BASE +
6392 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0));
6393
6394 WREG32(mmTPC0_CFG_TPC_CMD + offset,
6395 (1 << TPC0_CFG_TPC_CMD_ICACHE_INVALIDATE_SHIFT |
6396 1 << TPC0_CFG_TPC_CMD_ICACHE_PREFETCH_64KB_SHIFT));
6397 /* wait a bit for the engine to start executing */
6398 usleep_range(1000, 1500);
6399
6400 /* wait until engine has finished executing */
6401 rc = hl_poll_timeout(
6402 hdev,
6403 mmTPC0_CFG_STATUS + offset,
6404 status,
6405 (status & TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK) ==
6406 TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK,
6407 1000,
6408 kernel_timeout);
6409
6410 if (rc) {
6411 dev_err(hdev->dev,
6412 "Timeout while waiting for TPC%d icache prefetch\n",
6413 tpc_id);
Oded Gabbaye38bfd32020-07-03 20:46:12 +03006414 hdev->asic_funcs->set_clock_gating(hdev);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006415 mutex_unlock(&gaudi->clk_gate_mutex);
6416 return -EIO;
6417 }
6418
6419 WREG32(mmTPC0_CFG_TPC_EXECUTE + offset,
6420 1 << TPC0_CFG_TPC_EXECUTE_V_SHIFT);
6421
6422 /* wait a bit for the engine to start executing */
6423 usleep_range(1000, 1500);
6424
6425 /* wait until engine has finished executing */
6426 rc = hl_poll_timeout(
6427 hdev,
6428 mmTPC0_CFG_STATUS + offset,
6429 status,
6430 (status & TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK) ==
6431 TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK,
6432 1000,
6433 kernel_timeout);
6434
6435 rc = hl_poll_timeout(
6436 hdev,
6437 mmTPC0_CFG_WQ_INFLIGHT_CNTR + offset,
6438 status,
6439 (status == 0),
6440 1000,
6441 kernel_timeout);
6442
Oded Gabbaye38bfd32020-07-03 20:46:12 +03006443 hdev->asic_funcs->set_clock_gating(hdev);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006444 mutex_unlock(&gaudi->clk_gate_mutex);
6445
6446 if (rc) {
6447 dev_err(hdev->dev,
6448 "Timeout while waiting for TPC%d kernel to execute\n",
6449 tpc_id);
6450 return -EIO;
6451 }
6452
6453 return 0;
6454}
6455
6456static enum hl_device_hw_state gaudi_get_hw_state(struct hl_device *hdev)
6457{
6458 return RREG32(mmHW_STATE);
6459}
6460
6461static u32 gaudi_get_queue_id_for_cq(struct hl_device *hdev, u32 cq_idx)
6462{
6463 return gaudi_cq_assignment[cq_idx];
6464}
6465
6466static void gaudi_ext_queue_init(struct hl_device *hdev, u32 q_idx)
6467{
6468 struct gaudi_device *gaudi = hdev->asic_specific;
6469 struct hl_hw_queue *hw_queue = &hdev->kernel_queues[q_idx];
6470 struct hl_hw_sob *hw_sob;
6471 int sob, ext_idx = gaudi->ext_queue_idx++;
6472
6473 /*
6474 * The external queues might not sit sequentially, hence use the
6475 * real external queue index for the SOB/MON base id.
6476 */
6477 hw_queue->base_sob_id = ext_idx * HL_RSVD_SOBS;
6478 hw_queue->base_mon_id = ext_idx * HL_RSVD_MONS;
6479 hw_queue->next_sob_val = 1;
6480 hw_queue->curr_sob_offset = 0;
6481
6482 for (sob = 0 ; sob < HL_RSVD_SOBS ; sob++) {
6483 hw_sob = &hw_queue->hw_sob[sob];
6484 hw_sob->hdev = hdev;
6485 hw_sob->sob_id = hw_queue->base_sob_id + sob;
6486 hw_sob->q_idx = q_idx;
6487 kref_init(&hw_sob->kref);
6488 }
6489}
6490
6491static void gaudi_ext_queue_reset(struct hl_device *hdev, u32 q_idx)
6492{
6493 struct hl_hw_queue *hw_queue = &hdev->kernel_queues[q_idx];
6494
6495 /*
6496 * In case we got here due to a stuck CS, the refcnt might be bigger
6497 * than 1 and therefore we reset it.
6498 */
6499 kref_init(&hw_queue->hw_sob[hw_queue->curr_sob_offset].kref);
6500 hw_queue->curr_sob_offset = 0;
6501 hw_queue->next_sob_val = 1;
6502}
6503
6504static u32 gaudi_get_signal_cb_size(struct hl_device *hdev)
6505{
6506 return sizeof(struct packet_msg_short) +
6507 sizeof(struct packet_msg_prot) * 2;
6508}
6509
6510static u32 gaudi_get_wait_cb_size(struct hl_device *hdev)
6511{
6512 return sizeof(struct packet_msg_short) * 4 +
6513 sizeof(struct packet_fence) +
6514 sizeof(struct packet_msg_prot) * 2;
6515}
6516
6517static void gaudi_gen_signal_cb(struct hl_device *hdev, void *data, u16 sob_id)
6518{
6519 struct hl_cb *cb = (struct hl_cb *) data;
6520 struct packet_msg_short *pkt;
6521 u32 value, ctl;
6522
6523 pkt = (struct packet_msg_short *) (uintptr_t) cb->kernel_address;
6524 memset(pkt, 0, sizeof(*pkt));
6525
6526 value = 1 << GAUDI_PKT_SHORT_VAL_SOB_SYNC_VAL_SHIFT; /* inc by 1 */
6527 value |= 1 << GAUDI_PKT_SHORT_VAL_SOB_MOD_SHIFT; /* add mode */
6528
6529 ctl = (sob_id * 4) << GAUDI_PKT_SHORT_CTL_ADDR_SHIFT; /* SOB id */
6530 ctl |= 0 << GAUDI_PKT_SHORT_CTL_OP_SHIFT; /* write the value */
6531 ctl |= 3 << GAUDI_PKT_SHORT_CTL_BASE_SHIFT; /* W_S SOB base */
6532 ctl |= PACKET_MSG_SHORT << GAUDI_PKT_SHORT_CTL_OPCODE_SHIFT;
6533 ctl |= 1 << GAUDI_PKT_SHORT_CTL_EB_SHIFT;
6534 ctl |= 1 << GAUDI_PKT_SHORT_CTL_RB_SHIFT;
6535 ctl |= 1 << GAUDI_PKT_SHORT_CTL_MB_SHIFT;
6536
6537 pkt->value = cpu_to_le32(value);
6538 pkt->ctl = cpu_to_le32(ctl);
6539}
6540
6541static u32 gaudi_add_mon_msg_short(struct packet_msg_short *pkt, u32 value,
6542 u16 addr)
6543{
6544 u32 ctl, pkt_size = sizeof(*pkt);
6545
6546 memset(pkt, 0, pkt_size);
6547
6548 ctl = addr << GAUDI_PKT_SHORT_CTL_ADDR_SHIFT;
6549 ctl |= 2 << GAUDI_PKT_SHORT_CTL_BASE_SHIFT; /* W_S MON base */
6550 ctl |= PACKET_MSG_SHORT << GAUDI_PKT_SHORT_CTL_OPCODE_SHIFT;
6551 ctl |= 0 << GAUDI_PKT_SHORT_CTL_EB_SHIFT;
6552 ctl |= 1 << GAUDI_PKT_SHORT_CTL_RB_SHIFT;
6553 ctl |= 0 << GAUDI_PKT_SHORT_CTL_MB_SHIFT; /* only last pkt needs MB */
6554
6555 pkt->value = cpu_to_le32(value);
6556 pkt->ctl = cpu_to_le32(ctl);
6557
6558 return pkt_size;
6559}
6560
6561static u32 gaudi_add_arm_monitor_pkt(struct packet_msg_short *pkt, u16 sob_id,
6562 u16 sob_val, u16 addr)
6563{
6564 u32 ctl, value, pkt_size = sizeof(*pkt);
6565 u8 mask = ~(1 << (sob_id & 0x7));
6566
6567 memset(pkt, 0, pkt_size);
6568
6569 value = (sob_id / 8) << GAUDI_PKT_SHORT_VAL_MON_SYNC_GID_SHIFT;
6570 value |= sob_val << GAUDI_PKT_SHORT_VAL_MON_SYNC_VAL_SHIFT;
6571 value |= 0 << GAUDI_PKT_SHORT_VAL_MON_MODE_SHIFT; /* GREATER_OR_EQUAL */
6572 value |= mask << GAUDI_PKT_SHORT_VAL_MON_MASK_SHIFT;
6573
6574 ctl = addr << GAUDI_PKT_SHORT_CTL_ADDR_SHIFT;
6575 ctl |= 0 << GAUDI_PKT_SHORT_CTL_OP_SHIFT; /* write the value */
6576 ctl |= 2 << GAUDI_PKT_SHORT_CTL_BASE_SHIFT; /* W_S MON base */
6577 ctl |= PACKET_MSG_SHORT << GAUDI_PKT_SHORT_CTL_OPCODE_SHIFT;
6578 ctl |= 0 << GAUDI_PKT_SHORT_CTL_EB_SHIFT;
6579 ctl |= 1 << GAUDI_PKT_SHORT_CTL_RB_SHIFT;
6580 ctl |= 1 << GAUDI_PKT_SHORT_CTL_MB_SHIFT;
6581
6582 pkt->value = cpu_to_le32(value);
6583 pkt->ctl = cpu_to_le32(ctl);
6584
6585 return pkt_size;
6586}
6587
6588static u32 gaudi_add_fence_pkt(struct packet_fence *pkt)
6589{
6590 u32 ctl, cfg, pkt_size = sizeof(*pkt);
6591
6592 memset(pkt, 0, pkt_size);
6593
6594 cfg = 1 << GAUDI_PKT_FENCE_CFG_DEC_VAL_SHIFT;
6595 cfg |= 1 << GAUDI_PKT_FENCE_CFG_TARGET_VAL_SHIFT;
6596 cfg |= 2 << GAUDI_PKT_FENCE_CFG_ID_SHIFT;
6597
6598 ctl = 0 << GAUDI_PKT_FENCE_CTL_PRED_SHIFT;
6599 ctl |= PACKET_FENCE << GAUDI_PKT_FENCE_CTL_OPCODE_SHIFT;
6600 ctl |= 0 << GAUDI_PKT_FENCE_CTL_EB_SHIFT;
6601 ctl |= 1 << GAUDI_PKT_FENCE_CTL_RB_SHIFT;
6602 ctl |= 1 << GAUDI_PKT_FENCE_CTL_MB_SHIFT;
6603
6604 pkt->cfg = cpu_to_le32(cfg);
6605 pkt->ctl = cpu_to_le32(ctl);
6606
6607 return pkt_size;
6608}
6609
6610static void gaudi_gen_wait_cb(struct hl_device *hdev, void *data, u16 sob_id,
6611 u16 sob_val, u16 mon_id, u32 q_idx)
6612{
6613 struct hl_cb *cb = (struct hl_cb *) data;
6614 void *buf = (void *) (uintptr_t) cb->kernel_address;
6615 u64 monitor_base, fence_addr = 0;
6616 u32 size = 0;
6617 u16 msg_addr_offset;
6618
6619 switch (q_idx) {
6620 case GAUDI_QUEUE_ID_DMA_0_0:
6621 fence_addr = mmDMA0_QM_CP_FENCE2_RDATA_0;
6622 break;
6623 case GAUDI_QUEUE_ID_DMA_0_1:
6624 fence_addr = mmDMA0_QM_CP_FENCE2_RDATA_1;
6625 break;
6626 case GAUDI_QUEUE_ID_DMA_0_2:
6627 fence_addr = mmDMA0_QM_CP_FENCE2_RDATA_2;
6628 break;
6629 case GAUDI_QUEUE_ID_DMA_0_3:
6630 fence_addr = mmDMA0_QM_CP_FENCE2_RDATA_3;
6631 break;
6632 case GAUDI_QUEUE_ID_DMA_1_0:
6633 fence_addr = mmDMA1_QM_CP_FENCE2_RDATA_0;
6634 break;
6635 case GAUDI_QUEUE_ID_DMA_1_1:
6636 fence_addr = mmDMA1_QM_CP_FENCE2_RDATA_1;
6637 break;
6638 case GAUDI_QUEUE_ID_DMA_1_2:
6639 fence_addr = mmDMA1_QM_CP_FENCE2_RDATA_2;
6640 break;
6641 case GAUDI_QUEUE_ID_DMA_1_3:
6642 fence_addr = mmDMA1_QM_CP_FENCE2_RDATA_3;
6643 break;
6644 case GAUDI_QUEUE_ID_DMA_5_0:
6645 fence_addr = mmDMA5_QM_CP_FENCE2_RDATA_0;
6646 break;
6647 case GAUDI_QUEUE_ID_DMA_5_1:
6648 fence_addr = mmDMA5_QM_CP_FENCE2_RDATA_1;
6649 break;
6650 case GAUDI_QUEUE_ID_DMA_5_2:
6651 fence_addr = mmDMA5_QM_CP_FENCE2_RDATA_2;
6652 break;
6653 case GAUDI_QUEUE_ID_DMA_5_3:
6654 fence_addr = mmDMA5_QM_CP_FENCE2_RDATA_3;
6655 break;
6656 default:
6657 /* queue index should be valid here */
6658 dev_crit(hdev->dev, "wrong queue id %d for wait packet\n",
6659 q_idx);
6660 return;
6661 }
6662
6663 fence_addr += CFG_BASE;
6664
6665 /*
6666 * monitor_base should be the content of the base0 address registers,
6667 * so it will be added to the msg short offsets
6668 */
6669 monitor_base = mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0;
6670
6671 /* First monitor config packet: low address of the sync */
6672 msg_addr_offset =
6673 (mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0 + mon_id * 4) -
6674 monitor_base;
6675
6676 size += gaudi_add_mon_msg_short(buf + size, (u32) fence_addr,
6677 msg_addr_offset);
6678
6679 /* Second monitor config packet: high address of the sync */
6680 msg_addr_offset =
6681 (mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRH_0 + mon_id * 4) -
6682 monitor_base;
6683
6684 size += gaudi_add_mon_msg_short(buf + size, (u32) (fence_addr >> 32),
6685 msg_addr_offset);
6686
6687 /*
6688 * Third monitor config packet: the payload, i.e. what to write when the
6689 * sync triggers
6690 */
6691 msg_addr_offset =
6692 (mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_DATA_0 + mon_id * 4) -
6693 monitor_base;
6694
6695 size += gaudi_add_mon_msg_short(buf + size, 1, msg_addr_offset);
6696
6697 /* Fourth monitor config packet: bind the monitor to a sync object */
6698 msg_addr_offset =
6699 (mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0 + mon_id * 4) -
6700 monitor_base;
6701 size += gaudi_add_arm_monitor_pkt(buf + size, sob_id, sob_val,
6702 msg_addr_offset);
6703
6704 /* Fence packet */
6705 size += gaudi_add_fence_pkt(buf + size);
6706}
6707
6708static void gaudi_reset_sob(struct hl_device *hdev, void *data)
6709{
6710 struct hl_hw_sob *hw_sob = (struct hl_hw_sob *) data;
6711
6712 dev_dbg(hdev->dev, "reset SOB, q_idx: %d, sob_id: %d\n", hw_sob->q_idx,
6713 hw_sob->sob_id);
6714
6715 WREG32(mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 + hw_sob->sob_id * 4,
6716 0);
6717
6718 kref_init(&hw_sob->kref);
6719}
6720
6721static void gaudi_set_dma_mask_from_fw(struct hl_device *hdev)
6722{
6723 if (RREG32(mmPSOC_GLOBAL_CONF_NON_RST_FLOPS_0) ==
6724 HL_POWER9_HOST_MAGIC) {
6725 hdev->power9_64bit_dma_enable = 1;
6726 hdev->dma_mask = 64;
6727 } else {
6728 hdev->power9_64bit_dma_enable = 0;
6729 hdev->dma_mask = 48;
6730 }
6731}
6732
6733static u64 gaudi_get_device_time(struct hl_device *hdev)
6734{
6735 u64 device_time = ((u64) RREG32(mmPSOC_TIMESTAMP_CNTCVU)) << 32;
6736
6737 return device_time | RREG32(mmPSOC_TIMESTAMP_CNTCVL);
6738}
6739
6740static const struct hl_asic_funcs gaudi_funcs = {
6741 .early_init = gaudi_early_init,
6742 .early_fini = gaudi_early_fini,
6743 .late_init = gaudi_late_init,
6744 .late_fini = gaudi_late_fini,
6745 .sw_init = gaudi_sw_init,
6746 .sw_fini = gaudi_sw_fini,
6747 .hw_init = gaudi_hw_init,
6748 .hw_fini = gaudi_hw_fini,
6749 .halt_engines = gaudi_halt_engines,
6750 .suspend = gaudi_suspend,
6751 .resume = gaudi_resume,
6752 .cb_mmap = gaudi_cb_mmap,
6753 .ring_doorbell = gaudi_ring_doorbell,
6754 .pqe_write = gaudi_pqe_write,
6755 .asic_dma_alloc_coherent = gaudi_dma_alloc_coherent,
6756 .asic_dma_free_coherent = gaudi_dma_free_coherent,
6757 .get_int_queue_base = gaudi_get_int_queue_base,
6758 .test_queues = gaudi_test_queues,
6759 .asic_dma_pool_zalloc = gaudi_dma_pool_zalloc,
6760 .asic_dma_pool_free = gaudi_dma_pool_free,
6761 .cpu_accessible_dma_pool_alloc = gaudi_cpu_accessible_dma_pool_alloc,
6762 .cpu_accessible_dma_pool_free = gaudi_cpu_accessible_dma_pool_free,
6763 .hl_dma_unmap_sg = gaudi_dma_unmap_sg,
6764 .cs_parser = gaudi_cs_parser,
6765 .asic_dma_map_sg = gaudi_dma_map_sg,
6766 .get_dma_desc_list_size = gaudi_get_dma_desc_list_size,
6767 .add_end_of_cb_packets = gaudi_add_end_of_cb_packets,
6768 .update_eq_ci = gaudi_update_eq_ci,
6769 .context_switch = gaudi_context_switch,
6770 .restore_phase_topology = gaudi_restore_phase_topology,
6771 .debugfs_read32 = gaudi_debugfs_read32,
6772 .debugfs_write32 = gaudi_debugfs_write32,
6773 .debugfs_read64 = gaudi_debugfs_read64,
6774 .debugfs_write64 = gaudi_debugfs_write64,
Oded Gabbaybcaf4152020-05-11 10:41:37 +03006775 .add_device_attr = gaudi_add_device_attr,
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006776 .handle_eqe = gaudi_handle_eqe,
Oded Gabbaybcaf4152020-05-11 10:41:37 +03006777 .set_pll_profile = gaudi_set_pll_profile,
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006778 .get_events_stat = gaudi_get_events_stat,
6779 .read_pte = gaudi_read_pte,
6780 .write_pte = gaudi_write_pte,
6781 .mmu_invalidate_cache = gaudi_mmu_invalidate_cache,
6782 .mmu_invalidate_cache_range = gaudi_mmu_invalidate_cache_range,
6783 .send_heartbeat = gaudi_send_heartbeat,
Oded Gabbaye38bfd32020-07-03 20:46:12 +03006784 .set_clock_gating = gaudi_set_clock_gating,
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006785 .disable_clock_gating = gaudi_disable_clock_gating,
Omer Shpigelman79fc7a92020-05-11 10:46:29 +03006786 .debug_coresight = gaudi_debug_coresight,
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006787 .is_device_idle = gaudi_is_device_idle,
6788 .soft_reset_late_init = gaudi_soft_reset_late_init,
6789 .hw_queues_lock = gaudi_hw_queues_lock,
6790 .hw_queues_unlock = gaudi_hw_queues_unlock,
6791 .get_pci_id = gaudi_get_pci_id,
6792 .get_eeprom_data = gaudi_get_eeprom_data,
6793 .send_cpu_message = gaudi_send_cpu_message,
6794 .get_hw_state = gaudi_get_hw_state,
6795 .pci_bars_map = gaudi_pci_bars_map,
6796 .set_dram_bar_base = gaudi_set_hbm_bar_base,
6797 .init_iatu = gaudi_init_iatu,
6798 .rreg = hl_rreg,
6799 .wreg = hl_wreg,
Omer Shpigelman79fc7a92020-05-11 10:46:29 +03006800 .halt_coresight = gaudi_halt_coresight,
Oded Gabbaybcaf4152020-05-11 10:41:37 +03006801 .get_clk_rate = gaudi_get_clk_rate,
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006802 .get_queue_id_for_cq = gaudi_get_queue_id_for_cq,
6803 .read_device_fw_version = gaudi_read_device_fw_version,
6804 .load_firmware_to_device = gaudi_load_firmware_to_device,
6805 .load_boot_fit_to_device = gaudi_load_boot_fit_to_device,
6806 .ext_queue_init = gaudi_ext_queue_init,
6807 .ext_queue_reset = gaudi_ext_queue_reset,
6808 .get_signal_cb_size = gaudi_get_signal_cb_size,
6809 .get_wait_cb_size = gaudi_get_wait_cb_size,
6810 .gen_signal_cb = gaudi_gen_signal_cb,
6811 .gen_wait_cb = gaudi_gen_wait_cb,
6812 .reset_sob = gaudi_reset_sob,
6813 .set_dma_mask_from_fw = gaudi_set_dma_mask_from_fw,
6814 .get_device_time = gaudi_get_device_time
6815};
6816
6817/**
6818 * gaudi_set_asic_funcs - set GAUDI function pointers
6819 *
6820 * @*hdev: pointer to hl_device structure
6821 *
6822 */
6823void gaudi_set_asic_funcs(struct hl_device *hdev)
6824{
6825 hdev->asic_funcs = &gaudi_funcs;
6826}