blob: ae1a8b4e694c17cb53a0495ee26dc3d13e313719 [file] [log] [blame]
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03001// SPDX-License-Identifier: GPL-2.0
2
3/*
4 * Copyright 2016-2020 HabanaLabs, Ltd.
5 * All Rights Reserved.
6 */
7
8#include "gaudiP.h"
Greg Kroah-Hartman7b16a152020-07-28 19:18:51 +02009#include "../include/hw_ip/mmu/mmu_general.h"
10#include "../include/hw_ip/mmu/mmu_v1_1.h"
11#include "../include/gaudi/gaudi_masks.h"
12#include "../include/gaudi/gaudi_fw_if.h"
13#include "../include/gaudi/gaudi_reg_map.h"
14#include "../include/gaudi/gaudi_async_ids_map_extended.h"
Oded Gabbayac0ae6a2020-05-11 10:29:27 +030015
16#include <linux/module.h>
17#include <linux/pci.h>
18#include <linux/firmware.h>
19#include <linux/hwmon.h>
Oded Gabbayac0ae6a2020-05-11 10:29:27 +030020#include <linux/iommu.h>
21#include <linux/seq_file.h>
22
23/*
24 * Gaudi security scheme:
25 *
26 * 1. Host is protected by:
27 * - Range registers
28 * - MMU
29 *
30 * 2. DDR is protected by:
31 * - Range registers (protect the first 512MB)
32 *
33 * 3. Configuration is protected by:
34 * - Range registers
35 * - Protection bits
36 *
37 * MMU is always enabled.
38 *
Ofir Bitton0940cab2020-08-31 08:52:56 +030039 * QMAN DMA channels 0,1 (PCI DMAN):
Oded Gabbayac0ae6a2020-05-11 10:29:27 +030040 * - DMA is not secured.
41 * - PQ and CQ are secured.
42 * - CP is secured: The driver needs to parse CB but WREG should be allowed
43 * because of TDMA (tensor DMA). Hence, WREG is always not
44 * secured.
45 *
46 * When the driver needs to use DMA it will check that Gaudi is idle, set DMA
47 * channel 0 to be secured, execute the DMA and change it back to not secured.
48 * Currently, the driver doesn't use the DMA while there are compute jobs
49 * running.
50 *
51 * The current use cases for the driver to use the DMA are:
52 * - Clear SRAM on context switch (happens on context switch when device is
53 * idle)
54 * - MMU page tables area clear (happens on init)
55 *
Ofir Bitton0940cab2020-08-31 08:52:56 +030056 * QMAN DMA 2-7, TPC, MME, NIC:
Oded Gabbayac0ae6a2020-05-11 10:29:27 +030057 * PQ is secured and is located on the Host (HBM CON TPC3 bug)
58 * CQ, CP and the engine are not secured
59 *
60 */
61
62#define GAUDI_BOOT_FIT_FILE "habanalabs/gaudi/gaudi-boot-fit.itb"
63#define GAUDI_LINUX_FW_FILE "habanalabs/gaudi/gaudi-fit.itb"
64#define GAUDI_TPC_FW_FILE "habanalabs/gaudi/gaudi_tpc.bin"
65
66#define GAUDI_DMA_POOL_BLK_SIZE 0x100 /* 256 bytes */
67
Ofir Bittonb90c8942020-11-08 12:59:04 +020068#define GAUDI_RESET_TIMEOUT_MSEC 2000 /* 2000ms */
Oded Gabbayac0ae6a2020-05-11 10:29:27 +030069#define GAUDI_RESET_WAIT_MSEC 1 /* 1ms */
70#define GAUDI_CPU_RESET_WAIT_MSEC 200 /* 200ms */
71#define GAUDI_TEST_QUEUE_WAIT_USEC 100000 /* 100ms */
72
73#define GAUDI_PLDM_RESET_WAIT_MSEC 1000 /* 1s */
74#define GAUDI_PLDM_HRESET_TIMEOUT_MSEC 20000 /* 20s */
Oded Gabbayac0ae6a2020-05-11 10:29:27 +030075#define GAUDI_PLDM_TEST_QUEUE_WAIT_USEC 1000000 /* 1s */
76#define GAUDI_PLDM_MMU_TIMEOUT_USEC (MMU_CONFIG_TIMEOUT_USEC * 100)
77#define GAUDI_PLDM_QMAN0_TIMEOUT_USEC (HL_DEVICE_TIMEOUT_USEC * 30)
78#define GAUDI_PLDM_TPC_KERNEL_WAIT_USEC (HL_DEVICE_TIMEOUT_USEC * 30)
79#define GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC 1000000 /* 1s */
Oded Gabbay788cacf2020-07-07 17:30:13 +030080#define GAUDI_MSG_TO_CPU_TIMEOUT_USEC 4000000 /* 4s */
Ohad Sharabib31e59b2021-04-22 10:01:22 +030081#define GAUDI_WAIT_FOR_BL_TIMEOUT_USEC 15000000 /* 15s */
Oded Gabbayac0ae6a2020-05-11 10:29:27 +030082
83#define GAUDI_QMAN0_FENCE_VAL 0x72E91AB9
84
85#define GAUDI_MAX_STRING_LEN 20
86
87#define GAUDI_CB_POOL_CB_CNT 512
88#define GAUDI_CB_POOL_CB_SIZE 0x20000 /* 128KB */
89
90#define GAUDI_ALLOC_CPU_MEM_RETRY_CNT 3
91
92#define GAUDI_NUM_OF_TPC_INTR_CAUSE 20
93
94#define GAUDI_NUM_OF_QM_ERR_CAUSE 16
95
96#define GAUDI_NUM_OF_QM_ARB_ERR_CAUSE 3
97
Oded Gabbay647e8352020-06-07 11:26:48 +030098#define GAUDI_ARB_WDT_TIMEOUT 0x1000000
Oded Gabbayac0ae6a2020-05-11 10:29:27 +030099
Oded Gabbaye38bfd32020-07-03 20:46:12 +0300100#define GAUDI_CLK_GATE_DEBUGFS_MASK (\
101 BIT(GAUDI_ENGINE_ID_MME_0) |\
102 BIT(GAUDI_ENGINE_ID_MME_2) |\
103 GENMASK_ULL(GAUDI_ENGINE_ID_TPC_7, GAUDI_ENGINE_ID_TPC_0))
104
farah kassabri03df1362020-05-06 11:17:38 +0300105#define HBM_SCRUBBING_TIMEOUT_US 1000000 /* 1s */
106
Alon Mizrahi41478642020-11-17 14:25:14 +0200107#define GAUDI_PLL_MAX 10
108
Yuri Nudelman77977ac2021-06-06 10:30:41 +0300109#define BIN_REG_STRING_SIZE sizeof("0b10101010101010101010101010101010")
110
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300111static const char gaudi_irq_name[GAUDI_MSI_ENTRIES][GAUDI_MAX_STRING_LEN] = {
112 "gaudi cq 0_0", "gaudi cq 0_1", "gaudi cq 0_2", "gaudi cq 0_3",
113 "gaudi cq 1_0", "gaudi cq 1_1", "gaudi cq 1_2", "gaudi cq 1_3",
114 "gaudi cq 5_0", "gaudi cq 5_1", "gaudi cq 5_2", "gaudi cq 5_3",
115 "gaudi cpu eq"
116};
117
118static const u8 gaudi_dma_assignment[GAUDI_DMA_MAX] = {
Oded Gabbaye38bfd32020-07-03 20:46:12 +0300119 [GAUDI_PCI_DMA_1] = GAUDI_ENGINE_ID_DMA_0,
120 [GAUDI_PCI_DMA_2] = GAUDI_ENGINE_ID_DMA_1,
Oded Gabbaye38bfd32020-07-03 20:46:12 +0300121 [GAUDI_HBM_DMA_1] = GAUDI_ENGINE_ID_DMA_2,
122 [GAUDI_HBM_DMA_2] = GAUDI_ENGINE_ID_DMA_3,
123 [GAUDI_HBM_DMA_3] = GAUDI_ENGINE_ID_DMA_4,
Ofir Bitton0940cab2020-08-31 08:52:56 +0300124 [GAUDI_HBM_DMA_4] = GAUDI_ENGINE_ID_DMA_5,
125 [GAUDI_HBM_DMA_5] = GAUDI_ENGINE_ID_DMA_6,
126 [GAUDI_HBM_DMA_6] = GAUDI_ENGINE_ID_DMA_7
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300127};
128
129static const u8 gaudi_cq_assignment[NUMBER_OF_CMPLT_QUEUES] = {
130 [0] = GAUDI_QUEUE_ID_DMA_0_0,
131 [1] = GAUDI_QUEUE_ID_DMA_0_1,
132 [2] = GAUDI_QUEUE_ID_DMA_0_2,
133 [3] = GAUDI_QUEUE_ID_DMA_0_3,
134 [4] = GAUDI_QUEUE_ID_DMA_1_0,
135 [5] = GAUDI_QUEUE_ID_DMA_1_1,
136 [6] = GAUDI_QUEUE_ID_DMA_1_2,
137 [7] = GAUDI_QUEUE_ID_DMA_1_3,
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300138};
139
140static const u16 gaudi_packet_sizes[MAX_PACKET_ID] = {
141 [PACKET_WREG_32] = sizeof(struct packet_wreg32),
142 [PACKET_WREG_BULK] = sizeof(struct packet_wreg_bulk),
143 [PACKET_MSG_LONG] = sizeof(struct packet_msg_long),
144 [PACKET_MSG_SHORT] = sizeof(struct packet_msg_short),
145 [PACKET_CP_DMA] = sizeof(struct packet_cp_dma),
146 [PACKET_REPEAT] = sizeof(struct packet_repeat),
147 [PACKET_MSG_PROT] = sizeof(struct packet_msg_prot),
148 [PACKET_FENCE] = sizeof(struct packet_fence),
149 [PACKET_LIN_DMA] = sizeof(struct packet_lin_dma),
150 [PACKET_NOP] = sizeof(struct packet_nop),
151 [PACKET_STOP] = sizeof(struct packet_stop),
152 [PACKET_ARB_POINT] = sizeof(struct packet_arb_point),
153 [PACKET_WAIT] = sizeof(struct packet_wait),
154 [PACKET_LOAD_AND_EXE] = sizeof(struct packet_load_and_exe)
155};
156
Ofir Bittonbc75be22020-07-30 14:56:38 +0300157static inline bool validate_packet_id(enum packet_id id)
158{
159 switch (id) {
160 case PACKET_WREG_32:
161 case PACKET_WREG_BULK:
162 case PACKET_MSG_LONG:
163 case PACKET_MSG_SHORT:
164 case PACKET_CP_DMA:
165 case PACKET_REPEAT:
166 case PACKET_MSG_PROT:
167 case PACKET_FENCE:
168 case PACKET_LIN_DMA:
169 case PACKET_NOP:
170 case PACKET_STOP:
171 case PACKET_ARB_POINT:
172 case PACKET_WAIT:
173 case PACKET_LOAD_AND_EXE:
174 return true;
175 default:
176 return false;
177 }
178}
179
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300180static const char * const
181gaudi_tpc_interrupts_cause[GAUDI_NUM_OF_TPC_INTR_CAUSE] = {
182 "tpc_address_exceed_slm",
183 "tpc_div_by_0",
184 "tpc_spu_mac_overflow",
185 "tpc_spu_addsub_overflow",
186 "tpc_spu_abs_overflow",
187 "tpc_spu_fp_dst_nan_inf",
188 "tpc_spu_fp_dst_denorm",
189 "tpc_vpu_mac_overflow",
190 "tpc_vpu_addsub_overflow",
191 "tpc_vpu_abs_overflow",
192 "tpc_vpu_fp_dst_nan_inf",
193 "tpc_vpu_fp_dst_denorm",
194 "tpc_assertions",
195 "tpc_illegal_instruction",
196 "tpc_pc_wrap_around",
197 "tpc_qm_sw_err",
198 "tpc_hbw_rresp_err",
199 "tpc_hbw_bresp_err",
200 "tpc_lbw_rresp_err",
201 "tpc_lbw_bresp_err"
202};
203
204static const char * const
205gaudi_qman_error_cause[GAUDI_NUM_OF_QM_ERR_CAUSE] = {
206 "PQ AXI HBW error",
207 "CQ AXI HBW error",
208 "CP AXI HBW error",
209 "CP error due to undefined OPCODE",
210 "CP encountered STOP OPCODE",
211 "CP AXI LBW error",
212 "CP WRREG32 or WRBULK returned error",
213 "N/A",
214 "FENCE 0 inc over max value and clipped",
215 "FENCE 1 inc over max value and clipped",
216 "FENCE 2 inc over max value and clipped",
217 "FENCE 3 inc over max value and clipped",
218 "FENCE 0 dec under min value and clipped",
219 "FENCE 1 dec under min value and clipped",
220 "FENCE 2 dec under min value and clipped",
221 "FENCE 3 dec under min value and clipped"
222};
223
224static const char * const
225gaudi_qman_arb_error_cause[GAUDI_NUM_OF_QM_ARB_ERR_CAUSE] = {
226 "Choice push while full error",
227 "Choice Q watchdog error",
228 "MSG AXI LBW returned with error"
229};
230
Ofir Bittonf8bc7f02021-01-03 20:52:40 +0200231enum gaudi_sm_sei_cause {
232 GAUDI_SM_SEI_SO_OVERFLOW,
233 GAUDI_SM_SEI_LBW_4B_UNALIGNED,
234 GAUDI_SM_SEI_AXI_RESPONSE_ERR
235};
236
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300237static enum hl_queue_type gaudi_queue_type[GAUDI_QUEUE_ID_SIZE] = {
238 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_0 */
239 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_1 */
240 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_2 */
241 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_3 */
242 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_0 */
243 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_1 */
244 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_2 */
245 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_3 */
246 QUEUE_TYPE_CPU, /* GAUDI_QUEUE_ID_CPU_PQ */
247 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_0 */
248 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_1 */
249 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_2 */
250 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_3 */
251 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_0 */
252 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_1 */
253 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_2 */
254 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_3 */
255 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_0 */
256 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_1 */
257 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_2 */
258 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_3 */
Ofir Bitton0940cab2020-08-31 08:52:56 +0300259 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_5_0 */
260 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_5_1 */
261 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_5_2 */
262 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_5_3 */
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300263 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_0 */
264 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_1 */
265 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_2 */
266 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_3 */
267 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_0 */
268 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_1 */
269 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_2 */
270 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_3 */
271 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_0 */
272 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_1 */
273 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_2 */
274 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_3 */
275 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_0 */
276 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_1 */
277 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_2 */
278 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_3 */
279 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_0 */
280 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_1 */
281 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_2 */
282 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_3 */
283 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_0 */
284 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_1 */
285 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_2 */
286 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_3 */
287 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_0 */
288 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_1 */
289 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_2 */
290 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_3 */
291 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_0 */
292 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_1 */
293 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_2 */
294 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_3 */
295 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_0 */
296 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_1 */
297 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_2 */
298 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_3 */
299 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_0 */
300 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_1 */
301 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_2 */
302 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_3 */
303 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_0 */
304 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_1 */
305 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_2 */
306 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_3 */
307 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_0 */
308 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_1 */
309 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_2 */
310 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_3 */
Oded Gabbay3c681572020-11-02 21:10:39 +0200311 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_0_0 */
312 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_0_1 */
313 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_0_2 */
314 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_0_3 */
315 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_1_0 */
316 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_1_1 */
317 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_1_2 */
318 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_1_3 */
319 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_2_0 */
320 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_2_1 */
321 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_2_2 */
322 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_2_3 */
323 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_3_0 */
324 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_3_1 */
325 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_3_2 */
326 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_3_3 */
327 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_4_0 */
328 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_4_1 */
329 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_4_2 */
330 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_4_3 */
331 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_5_0 */
332 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_5_1 */
333 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_5_2 */
334 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_5_3 */
335 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_6_0 */
336 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_6_1 */
337 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_6_2 */
338 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_6_3 */
339 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_7_0 */
340 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_7_1 */
341 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_7_2 */
342 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_7_3 */
343 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_8_0 */
344 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_8_1 */
345 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_8_2 */
346 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_8_3 */
347 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_9_0 */
348 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_9_1 */
349 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_9_2 */
350 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_9_3 */
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300351};
352
Yuri Nudelman77977ac2021-06-06 10:30:41 +0300353static struct hl_hw_obj_name_entry gaudi_so_id_to_str[] = {
354 { .id = 0, .name = "SYNC_OBJ_DMA_DOWN_FEEDBACK" },
355 { .id = 1, .name = "SYNC_OBJ_DMA_UP_FEEDBACK" },
356 { .id = 2, .name = "SYNC_OBJ_DMA_STATIC_DRAM_SRAM_FEEDBACK" },
357 { .id = 3, .name = "SYNC_OBJ_DMA_SRAM_DRAM_FEEDBACK" },
358 { .id = 4, .name = "SYNC_OBJ_FIRST_COMPUTE_FINISH" },
359 { .id = 5, .name = "SYNC_OBJ_HOST_DRAM_DONE" },
360 { .id = 6, .name = "SYNC_OBJ_DBG_CTR_DEPRECATED" },
361 { .id = 7, .name = "SYNC_OBJ_DMA_ACTIVATIONS_DRAM_SRAM_FEEDBACK" },
362 { .id = 8, .name = "SYNC_OBJ_ENGINE_SEM_MME_0" },
363 { .id = 9, .name = "SYNC_OBJ_ENGINE_SEM_MME_1" },
364 { .id = 10, .name = "SYNC_OBJ_ENGINE_SEM_TPC_0" },
365 { .id = 11, .name = "SYNC_OBJ_ENGINE_SEM_TPC_1" },
366 { .id = 12, .name = "SYNC_OBJ_ENGINE_SEM_TPC_2" },
367 { .id = 13, .name = "SYNC_OBJ_ENGINE_SEM_TPC_3" },
368 { .id = 14, .name = "SYNC_OBJ_ENGINE_SEM_TPC_4" },
369 { .id = 15, .name = "SYNC_OBJ_ENGINE_SEM_TPC_5" },
370 { .id = 16, .name = "SYNC_OBJ_ENGINE_SEM_TPC_6" },
371 { .id = 17, .name = "SYNC_OBJ_ENGINE_SEM_TPC_7" },
372 { .id = 18, .name = "SYNC_OBJ_ENGINE_SEM_DMA_1" },
373 { .id = 19, .name = "SYNC_OBJ_ENGINE_SEM_DMA_2" },
374 { .id = 20, .name = "SYNC_OBJ_ENGINE_SEM_DMA_3" },
375 { .id = 21, .name = "SYNC_OBJ_ENGINE_SEM_DMA_4" },
376 { .id = 22, .name = "SYNC_OBJ_ENGINE_SEM_DMA_5" },
377 { .id = 23, .name = "SYNC_OBJ_ENGINE_SEM_DMA_6" },
378 { .id = 24, .name = "SYNC_OBJ_ENGINE_SEM_DMA_7" },
379 { .id = 25, .name = "SYNC_OBJ_DBG_CTR_0" },
380 { .id = 26, .name = "SYNC_OBJ_DBG_CTR_1" },
381};
382
383static struct hl_hw_obj_name_entry gaudi_monitor_id_to_str[] = {
384 { .id = 200, .name = "MON_OBJ_DMA_DOWN_FEEDBACK_RESET" },
385 { .id = 201, .name = "MON_OBJ_DMA_UP_FEADBACK_RESET" },
386 { .id = 203, .name = "MON_OBJ_DRAM_TO_SRAM_QUEUE_FENCE" },
387 { .id = 204, .name = "MON_OBJ_TPC_0_CLK_GATE" },
388 { .id = 205, .name = "MON_OBJ_TPC_1_CLK_GATE" },
389 { .id = 206, .name = "MON_OBJ_TPC_2_CLK_GATE" },
390 { .id = 207, .name = "MON_OBJ_TPC_3_CLK_GATE" },
391 { .id = 208, .name = "MON_OBJ_TPC_4_CLK_GATE" },
392 { .id = 209, .name = "MON_OBJ_TPC_5_CLK_GATE" },
393 { .id = 210, .name = "MON_OBJ_TPC_6_CLK_GATE" },
394 { .id = 211, .name = "MON_OBJ_TPC_7_CLK_GATE" },
395};
396
397static s64 gaudi_state_dump_specs_props[] = {
398 [SP_SYNC_OBJ_BASE_ADDR] = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0,
399 [SP_NEXT_SYNC_OBJ_ADDR] = NEXT_SYNC_OBJ_ADDR_INTERVAL,
400 [SP_SYNC_OBJ_AMOUNT] = NUM_OF_SOB_IN_BLOCK,
401 [SP_MON_OBJ_WR_ADDR_LOW] =
402 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0,
403 [SP_MON_OBJ_WR_ADDR_HIGH] =
404 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRH_0,
405 [SP_MON_OBJ_WR_DATA] = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_DATA_0,
406 [SP_MON_OBJ_ARM_DATA] = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_ARM_0,
407 [SP_MON_OBJ_STATUS] = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_STATUS_0,
408 [SP_MONITORS_AMOUNT] = NUM_OF_MONITORS_IN_BLOCK,
409 [SP_TPC0_CMDQ] = mmTPC0_QM_GLBL_CFG0,
410 [SP_TPC0_CFG_SO] = mmTPC0_CFG_QM_SYNC_OBJECT_ADDR,
411 [SP_NEXT_TPC] = mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0,
412 [SP_MME_CMDQ] = mmMME0_QM_GLBL_CFG0,
413 [SP_MME_CFG_SO] = mmMME0_CTRL_ARCH_DESC_SYNC_OBJECT_ADDR_LOW_LOCAL,
414 [SP_NEXT_MME] = mmMME2_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0,
415 [SP_DMA_CMDQ] = mmDMA0_QM_GLBL_CFG0,
416 [SP_DMA_CFG_SO] = mmDMA0_CORE_WR_COMP_ADDR_LO,
417 [SP_DMA_QUEUES_OFFSET] = mmDMA1_QM_GLBL_CFG0 - mmDMA0_QM_GLBL_CFG0,
418 [SP_NUM_OF_MME_ENGINES] = NUM_OF_MME_ENGINES,
419 [SP_SUB_MME_ENG_NUM] = NUM_OF_MME_SUB_ENGINES,
420 [SP_NUM_OF_DMA_ENGINES] = NUM_OF_DMA_ENGINES,
421 [SP_NUM_OF_TPC_ENGINES] = NUM_OF_TPC_ENGINES,
422 [SP_ENGINE_NUM_OF_QUEUES] = NUM_OF_QUEUES,
423 [SP_ENGINE_NUM_OF_STREAMS] = NUM_OF_STREAMS,
424 [SP_ENGINE_NUM_OF_FENCES] = NUM_OF_FENCES,
425 [SP_FENCE0_CNT_OFFSET] =
426 mmDMA0_QM_CP_FENCE0_CNT_0 - mmDMA0_QM_GLBL_CFG0,
427 [SP_FENCE0_RDATA_OFFSET] =
428 mmDMA0_QM_CP_FENCE0_RDATA_0 - mmDMA0_QM_GLBL_CFG0,
429 [SP_CP_STS_OFFSET] = mmDMA0_QM_CP_STS_0 - mmDMA0_QM_GLBL_CFG0,
430 [SP_NUM_CORES] = 1,
431};
432
Oded Gabbayae2021d32021-07-12 13:48:56 +0300433/* The order here is opposite to the order of the indexing in the h/w.
434 * i.e. SYNC_MGR_W_S is actually 0, SYNC_MGR_E_S is 1, etc.
435 */
Yuri Nudelman77977ac2021-06-06 10:30:41 +0300436static const char * const gaudi_sync_manager_names[] = {
Oded Gabbayae2021d32021-07-12 13:48:56 +0300437 "SYNC_MGR_E_N",
438 "SYNC_MGR_W_N",
439 "SYNC_MGR_E_S",
440 "SYNC_MGR_W_S",
Yuri Nudelman77977ac2021-06-06 10:30:41 +0300441 NULL
442};
Yuri Nudelman938b7932021-06-06 10:28:51 +0300443
Oded Gabbayfcc6a4e2020-05-17 08:20:35 +0300444struct ecc_info_extract_params {
445 u64 block_address;
446 u32 num_memories;
447 bool derr;
448 bool disable_clock_gating;
449};
450
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300451static int gaudi_mmu_update_asid_hop0_addr(struct hl_device *hdev, u32 asid,
452 u64 phys_addr);
453static int gaudi_send_job_on_qman0(struct hl_device *hdev,
454 struct hl_cs_job *job);
455static int gaudi_memset_device_memory(struct hl_device *hdev, u64 addr,
456 u32 size, u64 val);
Ofir Bitton423815b2021-01-05 09:04:07 +0200457static int gaudi_memset_registers(struct hl_device *hdev, u64 reg_base,
458 u32 num_regs, u32 val);
459static int gaudi_schedule_register_memset(struct hl_device *hdev,
460 u32 hw_queue_id, u64 reg_base, u32 num_regs, u32 val);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300461static int gaudi_run_tpc_kernel(struct hl_device *hdev, u64 tpc_kernel,
462 u32 tpc_id);
463static int gaudi_mmu_clear_pgt_range(struct hl_device *hdev);
Oded Gabbay2f553422020-08-15 16:28:10 +0300464static int gaudi_cpucp_info_get(struct hl_device *hdev);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300465static void gaudi_disable_clock_gating(struct hl_device *hdev);
466static void gaudi_mmu_prepare(struct hl_device *hdev, u32 asid);
Ofir Bitton5de406c2020-09-10 10:56:26 +0300467static u32 gaudi_gen_signal_cb(struct hl_device *hdev, void *data, u16 sob_id,
Alon Mizrahi72ab9ca52020-12-02 19:55:30 +0200468 u32 size, bool eb);
Ofir Bitton5de406c2020-09-10 10:56:26 +0300469static u32 gaudi_gen_wait_cb(struct hl_device *hdev,
470 struct hl_gen_wait_properties *prop);
471
472static inline enum hl_collective_mode
473get_collective_mode(struct hl_device *hdev, u32 queue_id)
474{
475 if (gaudi_queue_type[queue_id] == QUEUE_TYPE_EXT)
476 return HL_COLLECTIVE_MASTER;
477
478 if (queue_id >= GAUDI_QUEUE_ID_DMA_5_0 &&
479 queue_id <= GAUDI_QUEUE_ID_DMA_5_3)
480 return HL_COLLECTIVE_SLAVE;
481
482 if (queue_id >= GAUDI_QUEUE_ID_TPC_7_0 &&
483 queue_id <= GAUDI_QUEUE_ID_TPC_7_3)
484 return HL_COLLECTIVE_SLAVE;
485
486 if (queue_id >= GAUDI_QUEUE_ID_NIC_0_0 &&
487 queue_id <= GAUDI_QUEUE_ID_NIC_9_3)
488 return HL_COLLECTIVE_SLAVE;
489
490 return HL_COLLECTIVE_NOT_SUPPORTED;
491}
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300492
Koby Elbazcd5def82021-02-23 21:31:27 +0200493static inline void set_default_power_values(struct hl_device *hdev)
494{
495 struct asic_fixed_properties *prop = &hdev->asic_prop;
496
497 if (hdev->card_type == cpucp_card_type_pmc) {
498 prop->max_power_default = MAX_POWER_DEFAULT_PMC;
499 prop->dc_power_default = DC_POWER_DEFAULT_PMC;
500 } else {
501 prop->max_power_default = MAX_POWER_DEFAULT_PCI;
502 prop->dc_power_default = DC_POWER_DEFAULT_PCI;
503 }
504}
505
Koby Elbaz12d133d2021-06-03 13:18:20 +0300506static int gaudi_set_fixed_properties(struct hl_device *hdev)
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300507{
508 struct asic_fixed_properties *prop = &hdev->asic_prop;
Ofir Bitton843839b2020-07-19 11:08:09 +0300509 u32 num_sync_stream_queues = 0;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300510 int i;
511
Ofir Bitton3abc99b2020-06-23 14:50:39 +0300512 prop->max_queues = GAUDI_QUEUE_ID_SIZE;
513 prop->hw_queues_props = kcalloc(prop->max_queues,
514 sizeof(struct hw_queue_properties),
515 GFP_KERNEL);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300516
Ofir Bitton3abc99b2020-06-23 14:50:39 +0300517 if (!prop->hw_queues_props)
518 return -ENOMEM;
519
520 for (i = 0 ; i < prop->max_queues ; i++) {
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300521 if (gaudi_queue_type[i] == QUEUE_TYPE_EXT) {
522 prop->hw_queues_props[i].type = QUEUE_TYPE_EXT;
523 prop->hw_queues_props[i].driver_only = 0;
Ofir Bitton21e7a342020-05-14 18:25:47 +0300524 prop->hw_queues_props[i].supports_sync_stream = 1;
Tal Cohen4bb1f2f2020-06-03 09:25:27 +0300525 prop->hw_queues_props[i].cb_alloc_flags =
526 CB_ALLOC_KERNEL;
Ofir Bitton843839b2020-07-19 11:08:09 +0300527 num_sync_stream_queues++;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300528 } else if (gaudi_queue_type[i] == QUEUE_TYPE_CPU) {
529 prop->hw_queues_props[i].type = QUEUE_TYPE_CPU;
530 prop->hw_queues_props[i].driver_only = 1;
Ofir Bitton21e7a342020-05-14 18:25:47 +0300531 prop->hw_queues_props[i].supports_sync_stream = 0;
Tal Cohen4bb1f2f2020-06-03 09:25:27 +0300532 prop->hw_queues_props[i].cb_alloc_flags =
533 CB_ALLOC_KERNEL;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300534 } else if (gaudi_queue_type[i] == QUEUE_TYPE_INT) {
535 prop->hw_queues_props[i].type = QUEUE_TYPE_INT;
536 prop->hw_queues_props[i].driver_only = 0;
Tal Cohen4bb1f2f2020-06-03 09:25:27 +0300537 prop->hw_queues_props[i].supports_sync_stream = 0;
538 prop->hw_queues_props[i].cb_alloc_flags =
539 CB_ALLOC_USER;
Ofir Bitton5de406c2020-09-10 10:56:26 +0300540
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300541 }
Ofir Bitton5de406c2020-09-10 10:56:26 +0300542 prop->hw_queues_props[i].collective_mode =
543 get_collective_mode(hdev, i);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300544 }
545
Oded Gabbay2b5bbef2021-07-10 21:12:45 +0300546 prop->device_dma_offset_for_host_access = HOST_PHYS_BASE;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300547 prop->completion_queues_count = NUMBER_OF_CMPLT_QUEUES;
Ofir Bitton5de406c2020-09-10 10:56:26 +0300548 prop->collective_first_sob = 0;
549 prop->collective_first_mon = 0;
550
551 /* 2 SOBs per internal queue stream are reserved for collective */
552 prop->sync_stream_first_sob =
553 ALIGN(NUMBER_OF_SOBS_IN_GRP, HL_MAX_SOBS_PER_MONITOR)
554 * QMAN_STREAMS * HL_RSVD_SOBS;
555
556 /* 1 monitor per internal queue stream are reserved for collective
557 * 2 monitors per external queue stream are reserved for collective
558 */
559 prop->sync_stream_first_mon =
560 (NUMBER_OF_COLLECTIVE_QUEUES * QMAN_STREAMS) +
561 (NUMBER_OF_EXT_HW_QUEUES * 2);
562
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300563 prop->dram_base_address = DRAM_PHYS_BASE;
564 prop->dram_size = GAUDI_HBM_SIZE_32GB;
565 prop->dram_end_address = prop->dram_base_address +
566 prop->dram_size;
567 prop->dram_user_base_address = DRAM_BASE_ADDR_USER;
568
569 prop->sram_base_address = SRAM_BASE_ADDR;
570 prop->sram_size = SRAM_SIZE;
571 prop->sram_end_address = prop->sram_base_address +
572 prop->sram_size;
573 prop->sram_user_base_address = prop->sram_base_address +
574 SRAM_USER_BASE_OFFSET;
575
576 prop->mmu_pgt_addr = MMU_PAGE_TABLES_ADDR;
577 if (hdev->pldm)
578 prop->mmu_pgt_size = 0x800000; /* 8MB */
579 else
580 prop->mmu_pgt_size = MMU_PAGE_TABLES_SIZE;
581 prop->mmu_pte_size = HL_PTE_SIZE;
582 prop->mmu_hop_table_size = HOP_TABLE_SIZE;
583 prop->mmu_hop0_tables_total_size = HOP0_TABLES_TOTAL_SIZE;
584 prop->dram_page_size = PAGE_SIZE_2MB;
Oded Gabbay7f070c92020-11-09 09:48:31 +0200585 prop->dram_supports_virtual_memory = false;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300586
587 prop->pmmu.hop0_shift = HOP0_SHIFT;
588 prop->pmmu.hop1_shift = HOP1_SHIFT;
589 prop->pmmu.hop2_shift = HOP2_SHIFT;
590 prop->pmmu.hop3_shift = HOP3_SHIFT;
591 prop->pmmu.hop4_shift = HOP4_SHIFT;
592 prop->pmmu.hop0_mask = HOP0_MASK;
593 prop->pmmu.hop1_mask = HOP1_MASK;
594 prop->pmmu.hop2_mask = HOP2_MASK;
595 prop->pmmu.hop3_mask = HOP3_MASK;
596 prop->pmmu.hop4_mask = HOP4_MASK;
597 prop->pmmu.start_addr = VA_HOST_SPACE_START;
598 prop->pmmu.end_addr =
599 (VA_HOST_SPACE_START + VA_HOST_SPACE_SIZE / 2) - 1;
600 prop->pmmu.page_size = PAGE_SIZE_4KB;
Moti Haimovski7edf3412020-08-23 13:23:13 +0300601 prop->pmmu.num_hops = MMU_ARCH_5_HOPS;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300602
603 /* PMMU and HPMMU are the same except of page size */
604 memcpy(&prop->pmmu_huge, &prop->pmmu, sizeof(prop->pmmu));
605 prop->pmmu_huge.page_size = PAGE_SIZE_2MB;
606
607 /* shifts and masks are the same in PMMU and DMMU */
608 memcpy(&prop->dmmu, &prop->pmmu, sizeof(prop->pmmu));
609 prop->dmmu.start_addr = (VA_HOST_SPACE_START + VA_HOST_SPACE_SIZE / 2);
610 prop->dmmu.end_addr = VA_HOST_SPACE_END;
611 prop->dmmu.page_size = PAGE_SIZE_2MB;
612
613 prop->cfg_size = CFG_SIZE;
614 prop->max_asid = MAX_ASID;
615 prop->num_of_events = GAUDI_EVENT_SIZE;
616 prop->tpc_enabled_mask = TPC_ENABLED_MASK;
617
Koby Elbazcd5def82021-02-23 21:31:27 +0200618 set_default_power_values(hdev);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300619
620 prop->cb_pool_cb_cnt = GAUDI_CB_POOL_CB_CNT;
621 prop->cb_pool_cb_size = GAUDI_CB_POOL_CB_SIZE;
622
623 prop->pcie_dbi_base_address = mmPCIE_DBI_BASE;
624 prop->pcie_aux_dbi_reg_addr = CFG_BASE + mmPCIE_AUX_DBI;
625
Oded Gabbay2f553422020-08-15 16:28:10 +0300626 strncpy(prop->cpucp_info.card_name, GAUDI_DEFAULT_CARD_NAME,
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300627 CARD_NAME_MAX_LEN);
628
Ofir Bittonc16d45f2020-06-02 12:28:27 +0300629 prop->max_pending_cs = GAUDI_MAX_PENDING_CS;
630
Ofir Bitton843839b2020-07-19 11:08:09 +0300631 prop->first_available_user_sob[HL_GAUDI_WS_DCORE] =
Ofir Bitton3cf74b32020-09-10 09:17:50 +0300632 prop->sync_stream_first_sob +
633 (num_sync_stream_queues * HL_RSVD_SOBS);
Ofir Bitton843839b2020-07-19 11:08:09 +0300634 prop->first_available_user_mon[HL_GAUDI_WS_DCORE] =
Ofir Bitton3cf74b32020-09-10 09:17:50 +0300635 prop->sync_stream_first_mon +
636 (num_sync_stream_queues * HL_RSVD_MONS);
Ofir Bitton843839b2020-07-19 11:08:09 +0300637
Ofir Bittone1fa7242021-01-06 15:40:37 +0200638 prop->first_available_user_msix_interrupt = USHRT_MAX;
639
Ofir Bittone52606d2021-01-27 16:34:37 +0200640 for (i = 0 ; i < HL_MAX_DCORES ; i++)
641 prop->first_available_cq[i] = USHRT_MAX;
642
Ohad Sharabie67a60402021-05-02 15:45:21 +0300643 prop->fw_cpu_boot_dev_sts0_valid = false;
644 prop->fw_cpu_boot_dev_sts1_valid = false;
Ofir Bittond611b9f2020-11-08 13:10:09 +0200645 prop->hard_reset_done_by_fw = false;
Koby Elbaz81217362021-05-03 23:03:15 +0300646 prop->gic_interrupts_enable = true;
Ofir Bitton323b7262020-10-04 09:09:19 +0300647
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300648 return 0;
649}
650
651static int gaudi_pci_bars_map(struct hl_device *hdev)
652{
653 static const char * const name[] = {"SRAM", "CFG", "HBM"};
654 bool is_wc[3] = {false, false, true};
655 int rc;
656
657 rc = hl_pci_bars_map(hdev, name, is_wc);
658 if (rc)
659 return rc;
660
661 hdev->rmmio = hdev->pcie_bar[CFG_BAR_ID] +
662 (CFG_BASE - SPI_FLASH_BASE_ADDR);
663
664 return 0;
665}
666
667static u64 gaudi_set_hbm_bar_base(struct hl_device *hdev, u64 addr)
668{
669 struct gaudi_device *gaudi = hdev->asic_specific;
Ofir Bittonf4cbfd22020-06-15 17:45:12 +0300670 struct hl_inbound_pci_region pci_region;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300671 u64 old_addr = addr;
672 int rc;
673
674 if ((gaudi) && (gaudi->hbm_bar_cur_addr == addr))
675 return old_addr;
676
Ofir Bitton1dae12f2021-05-12 09:07:39 +0300677 if (hdev->asic_prop.iatu_done_by_fw)
678 return U64_MAX;
679
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300680 /* Inbound Region 2 - Bar 4 - Point to HBM */
Ofir Bittonf4cbfd22020-06-15 17:45:12 +0300681 pci_region.mode = PCI_BAR_MATCH_MODE;
682 pci_region.bar = HBM_BAR_ID;
683 pci_region.addr = addr;
684 rc = hl_pci_set_inbound_region(hdev, 2, &pci_region);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300685 if (rc)
686 return U64_MAX;
687
688 if (gaudi) {
689 old_addr = gaudi->hbm_bar_cur_addr;
690 gaudi->hbm_bar_cur_addr = addr;
691 }
692
693 return old_addr;
694}
695
696static int gaudi_init_iatu(struct hl_device *hdev)
697{
Ofir Bittonf4cbfd22020-06-15 17:45:12 +0300698 struct hl_inbound_pci_region inbound_region;
699 struct hl_outbound_pci_region outbound_region;
700 int rc;
701
Ohad Sharabi6542c352021-05-02 09:56:51 +0300702 if (hdev->asic_prop.iatu_done_by_fw)
Ofir Bitton41f458f2021-03-10 15:08:44 +0200703 return 0;
Ofir Bitton41f458f2021-03-10 15:08:44 +0200704
Ofir Bittonf4cbfd22020-06-15 17:45:12 +0300705 /* Inbound Region 0 - Bar 0 - Point to SRAM + CFG */
706 inbound_region.mode = PCI_BAR_MATCH_MODE;
707 inbound_region.bar = SRAM_BAR_ID;
708 inbound_region.addr = SRAM_BASE_ADDR;
709 rc = hl_pci_set_inbound_region(hdev, 0, &inbound_region);
710 if (rc)
711 goto done;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300712
713 /* Inbound Region 1 - Bar 2 - Point to SPI FLASH */
Ofir Bittonf4cbfd22020-06-15 17:45:12 +0300714 inbound_region.mode = PCI_BAR_MATCH_MODE;
715 inbound_region.bar = CFG_BAR_ID;
716 inbound_region.addr = SPI_FLASH_BASE_ADDR;
717 rc = hl_pci_set_inbound_region(hdev, 1, &inbound_region);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300718 if (rc)
Ofir Bittonf4cbfd22020-06-15 17:45:12 +0300719 goto done;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300720
Ofir Bittonf4cbfd22020-06-15 17:45:12 +0300721 /* Inbound Region 2 - Bar 4 - Point to HBM */
722 inbound_region.mode = PCI_BAR_MATCH_MODE;
723 inbound_region.bar = HBM_BAR_ID;
724 inbound_region.addr = DRAM_PHYS_BASE;
725 rc = hl_pci_set_inbound_region(hdev, 2, &inbound_region);
726 if (rc)
727 goto done;
728
729 hdev->asic_funcs->set_dma_mask_from_fw(hdev);
730
731 /* Outbound Region 0 - Point to Host */
732 outbound_region.addr = HOST_PHYS_BASE;
733 outbound_region.size = HOST_PHYS_SIZE;
734 rc = hl_pci_set_outbound_region(hdev, &outbound_region);
735
736done:
737 return rc;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300738}
739
Ofir Bittond1ddd902020-10-19 17:04:20 +0300740static enum hl_device_hw_state gaudi_get_hw_state(struct hl_device *hdev)
741{
742 return RREG32(mmHW_STATE);
743}
744
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300745static int gaudi_early_init(struct hl_device *hdev)
746{
747 struct asic_fixed_properties *prop = &hdev->asic_prop;
748 struct pci_dev *pdev = hdev->pdev;
Ofir Bitton41f458f2021-03-10 15:08:44 +0200749 u32 fw_boot_status;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300750 int rc;
751
Koby Elbaz12d133d2021-06-03 13:18:20 +0300752 rc = gaudi_set_fixed_properties(hdev);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300753 if (rc) {
Koby Elbaz12d133d2021-06-03 13:18:20 +0300754 dev_err(hdev->dev, "Failed setting fixed properties\n");
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300755 return rc;
756 }
757
758 /* Check BAR sizes */
759 if (pci_resource_len(pdev, SRAM_BAR_ID) != SRAM_BAR_SIZE) {
760 dev_err(hdev->dev,
761 "Not " HL_NAME "? BAR %d size %llu, expecting %llu\n",
762 SRAM_BAR_ID,
763 (unsigned long long) pci_resource_len(pdev,
764 SRAM_BAR_ID),
765 SRAM_BAR_SIZE);
Ofir Bitton3abc99b2020-06-23 14:50:39 +0300766 rc = -ENODEV;
767 goto free_queue_props;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300768 }
769
770 if (pci_resource_len(pdev, CFG_BAR_ID) != CFG_BAR_SIZE) {
771 dev_err(hdev->dev,
772 "Not " HL_NAME "? BAR %d size %llu, expecting %llu\n",
773 CFG_BAR_ID,
774 (unsigned long long) pci_resource_len(pdev,
775 CFG_BAR_ID),
776 CFG_BAR_SIZE);
Ofir Bitton3abc99b2020-06-23 14:50:39 +0300777 rc = -ENODEV;
778 goto free_queue_props;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300779 }
780
781 prop->dram_pci_bar_size = pci_resource_len(pdev, HBM_BAR_ID);
782
Ofir Bitton41f458f2021-03-10 15:08:44 +0200783 /* If FW security is enabled at this point it means no access to ELBI */
Ohad Sharabi4cb45082021-05-20 09:09:03 +0300784 if (hdev->asic_prop.fw_security_enabled) {
Ofir Bitton41f458f2021-03-10 15:08:44 +0200785 hdev->asic_prop.iatu_done_by_fw = true;
Koby Elbaz3649eae2021-05-18 15:43:47 +0300786
787 /*
788 * GIC-security-bit can ONLY be set by CPUCP, so in this stage
789 * decision can only be taken based on PCI ID security.
790 */
791 hdev->asic_prop.gic_interrupts_enable = false;
Ofir Bitton41f458f2021-03-10 15:08:44 +0200792 goto pci_init;
793 }
794
795 rc = hl_pci_elbi_read(hdev, CFG_BASE + mmCPU_BOOT_DEV_STS0,
796 &fw_boot_status);
797 if (rc)
798 goto free_queue_props;
799
800 /* Check whether FW is configuring iATU */
801 if ((fw_boot_status & CPU_BOOT_DEV_STS0_ENABLED) &&
802 (fw_boot_status & CPU_BOOT_DEV_STS0_FW_IATU_CONF_EN))
803 hdev->asic_prop.iatu_done_by_fw = true;
804
805pci_init:
Ofir Bittond1ddd902020-10-19 17:04:20 +0300806 rc = hl_pci_init(hdev);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300807 if (rc)
Ofir Bitton3abc99b2020-06-23 14:50:39 +0300808 goto free_queue_props;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300809
Ofir Bittond1ddd902020-10-19 17:04:20 +0300810 /* Before continuing in the initialization, we need to read the preboot
811 * version to determine whether we run with a security-enabled firmware
812 */
813 rc = hl_fw_read_preboot_status(hdev, mmPSOC_GLOBAL_CONF_CPU_BOOT_STATUS,
Ohad Sharabie67a60402021-05-02 15:45:21 +0300814 mmCPU_BOOT_DEV_STS0,
815 mmCPU_BOOT_DEV_STS1, mmCPU_BOOT_ERR0,
816 mmCPU_BOOT_ERR1,
817 GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC);
Ofir Bittond1ddd902020-10-19 17:04:20 +0300818 if (rc) {
819 if (hdev->reset_on_preboot_fail)
820 hdev->asic_funcs->hw_fini(hdev, true);
821 goto pci_fini;
822 }
823
Ofir Bitton9c9013c2020-12-01 10:39:54 +0200824 if (gaudi_get_hw_state(hdev) == HL_DEVICE_HW_STATE_DIRTY) {
825 dev_info(hdev->dev,
826 "H/W state is dirty, must reset before initializing\n");
827 hdev->asic_funcs->hw_fini(hdev, true);
828 }
829
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300830 return 0;
Ofir Bitton3abc99b2020-06-23 14:50:39 +0300831
Ofir Bittond1ddd902020-10-19 17:04:20 +0300832pci_fini:
833 hl_pci_fini(hdev);
Ofir Bitton3abc99b2020-06-23 14:50:39 +0300834free_queue_props:
835 kfree(hdev->asic_prop.hw_queues_props);
836 return rc;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300837}
838
839static int gaudi_early_fini(struct hl_device *hdev)
840{
Ofir Bitton3abc99b2020-06-23 14:50:39 +0300841 kfree(hdev->asic_prop.hw_queues_props);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300842 hl_pci_fini(hdev);
843
844 return 0;
845}
846
847/**
848 * gaudi_fetch_psoc_frequency - Fetch PSOC frequency values
849 *
850 * @hdev: pointer to hl_device structure
851 *
852 */
Ofir Bitton1cbca892020-10-05 11:36:00 +0300853static int gaudi_fetch_psoc_frequency(struct hl_device *hdev)
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300854{
855 struct asic_fixed_properties *prop = &hdev->asic_prop;
Alon Mizrahi65854892020-11-19 16:34:19 +0200856 u32 nr = 0, nf = 0, od = 0, div_fctr = 0, pll_clk, div_sel;
857 u16 pll_freq_arr[HL_PLL_NUM_OUTPUTS], freq;
Ofir Bitton1cbca892020-10-05 11:36:00 +0300858 int rc;
859
Ohad Sharabi4cb45082021-05-20 09:09:03 +0300860 if (hdev->asic_prop.fw_security_enabled) {
861 rc = hl_fw_cpucp_pll_info_get(hdev, HL_GAUDI_CPU_PLL, pll_freq_arr);
862
863 if (rc)
864 return rc;
865
866 freq = pll_freq_arr[2];
867 } else {
Alon Mizrahi65854892020-11-19 16:34:19 +0200868 /* Backward compatibility */
869 div_fctr = RREG32(mmPSOC_CPU_PLL_DIV_FACTOR_2);
870 div_sel = RREG32(mmPSOC_CPU_PLL_DIV_SEL_2);
871 nr = RREG32(mmPSOC_CPU_PLL_NR);
872 nf = RREG32(mmPSOC_CPU_PLL_NF);
873 od = RREG32(mmPSOC_CPU_PLL_OD);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300874
Alon Mizrahi65854892020-11-19 16:34:19 +0200875 if (div_sel == DIV_SEL_REF_CLK ||
876 div_sel == DIV_SEL_DIVIDED_REF) {
877 if (div_sel == DIV_SEL_REF_CLK)
878 freq = PLL_REF_CLK;
879 else
880 freq = PLL_REF_CLK / (div_fctr + 1);
881 } else if (div_sel == DIV_SEL_PLL_CLK ||
882 div_sel == DIV_SEL_DIVIDED_PLL) {
883 pll_clk = PLL_REF_CLK * (nf + 1) /
884 ((nr + 1) * (od + 1));
885 if (div_sel == DIV_SEL_PLL_CLK)
886 freq = pll_clk;
887 else
888 freq = pll_clk / (div_fctr + 1);
889 } else {
890 dev_warn(hdev->dev,
891 "Received invalid div select value: %d",
892 div_sel);
893 freq = 0;
894 }
Alon Mizrahi65854892020-11-19 16:34:19 +0200895 }
896
897 prop->psoc_timestamp_frequency = freq;
898 prop->psoc_pci_pll_nr = nr;
899 prop->psoc_pci_pll_nf = nf;
900 prop->psoc_pci_pll_od = od;
901 prop->psoc_pci_pll_div_factor = div_fctr;
Ofir Bitton1cbca892020-10-05 11:36:00 +0300902
903 return 0;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300904}
905
906static int _gaudi_init_tpc_mem(struct hl_device *hdev,
907 dma_addr_t tpc_kernel_src_addr, u32 tpc_kernel_size)
908{
909 struct asic_fixed_properties *prop = &hdev->asic_prop;
910 struct packet_lin_dma *init_tpc_mem_pkt;
911 struct hl_cs_job *job;
912 struct hl_cb *cb;
913 u64 dst_addr;
914 u32 cb_size, ctl;
915 u8 tpc_id;
916 int rc;
917
Ofir Bittona04b7cd2020-07-13 13:36:55 +0300918 cb = hl_cb_kernel_create(hdev, PAGE_SIZE, false);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300919 if (!cb)
920 return -EFAULT;
921
Arnd Bergmann82948e62020-10-26 17:08:06 +0100922 init_tpc_mem_pkt = cb->kernel_address;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300923 cb_size = sizeof(*init_tpc_mem_pkt);
924 memset(init_tpc_mem_pkt, 0, cb_size);
925
926 init_tpc_mem_pkt->tsize = cpu_to_le32(tpc_kernel_size);
927
Oded Gabbay65887292020-08-12 11:21:01 +0300928 ctl = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_LIN_DMA);
929 ctl |= FIELD_PREP(GAUDI_PKT_LIN_DMA_CTL_LIN_MASK, 1);
930 ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
931 ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300932
933 init_tpc_mem_pkt->ctl = cpu_to_le32(ctl);
934
935 init_tpc_mem_pkt->src_addr = cpu_to_le64(tpc_kernel_src_addr);
936 dst_addr = (prop->sram_user_base_address &
937 GAUDI_PKT_LIN_DMA_DST_ADDR_MASK) >>
938 GAUDI_PKT_LIN_DMA_DST_ADDR_SHIFT;
939 init_tpc_mem_pkt->dst_addr |= cpu_to_le64(dst_addr);
940
941 job = hl_cs_allocate_job(hdev, QUEUE_TYPE_EXT, true);
942 if (!job) {
943 dev_err(hdev->dev, "Failed to allocate a new job\n");
944 rc = -ENOMEM;
945 goto release_cb;
946 }
947
948 job->id = 0;
949 job->user_cb = cb;
Tomer Tayarf07486742020-08-02 22:51:31 +0300950 atomic_inc(&job->user_cb->cs_cnt);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300951 job->user_cb_size = cb_size;
952 job->hw_queue_id = GAUDI_QUEUE_ID_DMA_0_0;
953 job->patched_cb = job->user_cb;
954 job->job_cb_size = job->user_cb_size + sizeof(struct packet_msg_prot);
955
956 hl_debugfs_add_job(hdev, job);
957
958 rc = gaudi_send_job_on_qman0(hdev, job);
959
960 if (rc)
961 goto free_job;
962
963 for (tpc_id = 0 ; tpc_id < TPC_NUMBER_OF_ENGINES ; tpc_id++) {
964 rc = gaudi_run_tpc_kernel(hdev, dst_addr, tpc_id);
965 if (rc)
966 break;
967 }
968
969free_job:
970 hl_userptr_delete_list(hdev, &job->userptr_list);
971 hl_debugfs_remove_job(hdev, job);
972 kfree(job);
Tomer Tayarf07486742020-08-02 22:51:31 +0300973 atomic_dec(&cb->cs_cnt);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300974
975release_cb:
976 hl_cb_put(cb);
977 hl_cb_destroy(hdev, &hdev->kernel_cb_mgr, cb->id << PAGE_SHIFT);
978
979 return rc;
980}
981
982/*
983 * gaudi_init_tpc_mem() - Initialize TPC memories.
984 * @hdev: Pointer to hl_device structure.
985 *
986 * Copy TPC kernel fw from firmware file and run it to initialize TPC memories.
987 *
988 * Return: 0 for success, negative value for error.
989 */
990static int gaudi_init_tpc_mem(struct hl_device *hdev)
991{
992 const struct firmware *fw;
993 size_t fw_size;
994 void *cpu_addr;
995 dma_addr_t dma_handle;
Oded Gabbay98e87812020-12-09 23:07:58 +0200996 int rc, count = 5;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300997
Oded Gabbay98e87812020-12-09 23:07:58 +0200998again:
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300999 rc = request_firmware(&fw, GAUDI_TPC_FW_FILE, hdev->dev);
Oded Gabbay98e87812020-12-09 23:07:58 +02001000 if (rc == -EINTR && count-- > 0) {
1001 msleep(50);
1002 goto again;
1003 }
1004
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03001005 if (rc) {
Oded Gabbay98e87812020-12-09 23:07:58 +02001006 dev_err(hdev->dev, "Failed to load firmware file %s\n",
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03001007 GAUDI_TPC_FW_FILE);
1008 goto out;
1009 }
1010
1011 fw_size = fw->size;
1012 cpu_addr = hdev->asic_funcs->asic_dma_alloc_coherent(hdev, fw_size,
1013 &dma_handle, GFP_KERNEL | __GFP_ZERO);
1014 if (!cpu_addr) {
1015 dev_err(hdev->dev,
1016 "Failed to allocate %zu of dma memory for TPC kernel\n",
1017 fw_size);
1018 rc = -ENOMEM;
1019 goto out;
1020 }
1021
1022 memcpy(cpu_addr, fw->data, fw_size);
1023
1024 rc = _gaudi_init_tpc_mem(hdev, dma_handle, fw_size);
1025
1026 hdev->asic_funcs->asic_dma_free_coherent(hdev, fw->size, cpu_addr,
1027 dma_handle);
1028
1029out:
1030 release_firmware(fw);
1031 return rc;
1032}
1033
Ofir Bitton5de406c2020-09-10 10:56:26 +03001034static void gaudi_collective_map_sobs(struct hl_device *hdev, u32 stream)
Ofir Bitton5fe1c172020-09-10 10:10:55 +03001035{
Ofir Bitton5de406c2020-09-10 10:56:26 +03001036 struct gaudi_device *gaudi = hdev->asic_specific;
1037 struct gaudi_collective_properties *prop = &gaudi->collective_props;
1038 struct hl_hw_queue *q;
1039 u32 i, sob_id, sob_group_id, queue_id;
Ofir Bitton5fe1c172020-09-10 10:10:55 +03001040
Ofir Bitton5de406c2020-09-10 10:56:26 +03001041 /* Iterate through SOB groups and assign a SOB for each slave queue */
1042 sob_group_id =
1043 stream * HL_RSVD_SOBS + prop->curr_sob_group_idx[stream];
1044 sob_id = prop->hw_sob_group[sob_group_id].base_sob_id;
1045
1046 queue_id = GAUDI_QUEUE_ID_NIC_0_0 + stream;
1047 for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++) {
1048 q = &hdev->kernel_queues[queue_id + (4 * i)];
1049 q->sync_stream_prop.collective_sob_id = sob_id + i;
1050 }
1051
1052 /* Both DMA5 and TPC7 use the same resources since only a single
1053 * engine need to participate in the reduction process
1054 */
1055 queue_id = GAUDI_QUEUE_ID_DMA_5_0 + stream;
1056 q = &hdev->kernel_queues[queue_id];
1057 q->sync_stream_prop.collective_sob_id =
1058 sob_id + NIC_NUMBER_OF_ENGINES;
1059
1060 queue_id = GAUDI_QUEUE_ID_TPC_7_0 + stream;
1061 q = &hdev->kernel_queues[queue_id];
1062 q->sync_stream_prop.collective_sob_id =
1063 sob_id + NIC_NUMBER_OF_ENGINES;
1064}
1065
1066static void gaudi_sob_group_hw_reset(struct kref *ref)
1067{
1068 struct gaudi_hw_sob_group *hw_sob_group =
1069 container_of(ref, struct gaudi_hw_sob_group, kref);
1070 struct hl_device *hdev = hw_sob_group->hdev;
Ofir Bitton423815b2021-01-05 09:04:07 +02001071 u64 base_addr;
1072 int rc;
Ofir Bitton5de406c2020-09-10 10:56:26 +03001073
Ofir Bitton423815b2021-01-05 09:04:07 +02001074 base_addr = CFG_BASE + mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 +
1075 hw_sob_group->base_sob_id * 4;
1076 rc = gaudi_schedule_register_memset(hdev, hw_sob_group->queue_id,
1077 base_addr, NUMBER_OF_SOBS_IN_GRP, 0);
1078 if (rc)
1079 dev_err(hdev->dev,
1080 "failed resetting sob group - sob base %u, count %u",
1081 hw_sob_group->base_sob_id, NUMBER_OF_SOBS_IN_GRP);
Ofir Bitton5de406c2020-09-10 10:56:26 +03001082
1083 kref_init(&hw_sob_group->kref);
1084}
1085
1086static void gaudi_sob_group_reset_error(struct kref *ref)
1087{
1088 struct gaudi_hw_sob_group *hw_sob_group =
1089 container_of(ref, struct gaudi_hw_sob_group, kref);
1090 struct hl_device *hdev = hw_sob_group->hdev;
1091
1092 dev_crit(hdev->dev,
1093 "SOB release shouldn't be called here, base_sob_id: %d\n",
1094 hw_sob_group->base_sob_id);
1095}
1096
Ofir Bittona3972582021-05-24 22:58:44 +03001097static void gaudi_collective_mstr_sob_mask_set(struct gaudi_device *gaudi)
1098{
1099 struct gaudi_collective_properties *prop;
1100 int i;
1101
1102 prop = &gaudi->collective_props;
1103
1104 memset(prop->mstr_sob_mask, 0, sizeof(prop->mstr_sob_mask));
1105
1106 for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++)
1107 if (gaudi->hw_cap_initialized & BIT(HW_CAP_NIC_SHIFT + i))
1108 prop->mstr_sob_mask[i / HL_MAX_SOBS_PER_MONITOR] |=
1109 BIT(i % HL_MAX_SOBS_PER_MONITOR);
1110 /* Set collective engine bit */
1111 prop->mstr_sob_mask[i / HL_MAX_SOBS_PER_MONITOR] |=
1112 BIT(i % HL_MAX_SOBS_PER_MONITOR);
1113}
1114
Ofir Bitton5de406c2020-09-10 10:56:26 +03001115static int gaudi_collective_init(struct hl_device *hdev)
1116{
Ofir Bittona3972582021-05-24 22:58:44 +03001117 u32 i, sob_id, reserved_sobs_per_group;
Ofir Bitton5de406c2020-09-10 10:56:26 +03001118 struct gaudi_collective_properties *prop;
1119 struct gaudi_device *gaudi;
1120
1121 gaudi = hdev->asic_specific;
1122 prop = &gaudi->collective_props;
1123 sob_id = hdev->asic_prop.collective_first_sob;
1124
1125 /* First sob in group must be aligned to HL_MAX_SOBS_PER_MONITOR */
1126 reserved_sobs_per_group =
1127 ALIGN(NUMBER_OF_SOBS_IN_GRP, HL_MAX_SOBS_PER_MONITOR);
1128
1129 /* Init SOB groups */
1130 for (i = 0 ; i < NUM_SOB_GROUPS; i++) {
1131 prop->hw_sob_group[i].hdev = hdev;
1132 prop->hw_sob_group[i].base_sob_id = sob_id;
1133 sob_id += reserved_sobs_per_group;
1134 gaudi_sob_group_hw_reset(&prop->hw_sob_group[i].kref);
1135 }
1136
1137 for (i = 0 ; i < QMAN_STREAMS; i++) {
1138 prop->next_sob_group_val[i] = 1;
1139 prop->curr_sob_group_idx[i] = 0;
1140 gaudi_collective_map_sobs(hdev, i);
1141 }
1142
Ofir Bittona3972582021-05-24 22:58:44 +03001143 gaudi_collective_mstr_sob_mask_set(gaudi);
Ofir Bitton5de406c2020-09-10 10:56:26 +03001144
1145 return 0;
1146}
1147
1148static void gaudi_reset_sob_group(struct hl_device *hdev, u16 sob_group)
1149{
1150 struct gaudi_device *gaudi = hdev->asic_specific;
1151 struct gaudi_collective_properties *cprop = &gaudi->collective_props;
1152
1153 kref_put(&cprop->hw_sob_group[sob_group].kref,
1154 gaudi_sob_group_hw_reset);
1155}
1156
1157static void gaudi_collective_master_init_job(struct hl_device *hdev,
1158 struct hl_cs_job *job, u32 stream, u32 sob_group_offset)
1159{
1160 u32 master_sob_base, master_monitor, queue_id, cb_size = 0;
1161 struct gaudi_collective_properties *cprop;
1162 struct hl_gen_wait_properties wait_prop;
1163 struct hl_sync_stream_properties *prop;
1164 struct gaudi_device *gaudi;
1165
1166 gaudi = hdev->asic_specific;
1167 cprop = &gaudi->collective_props;
1168 queue_id = job->hw_queue_id;
1169 prop = &hdev->kernel_queues[queue_id].sync_stream_prop;
1170
1171 master_sob_base =
1172 cprop->hw_sob_group[sob_group_offset].base_sob_id;
1173 master_monitor = prop->collective_mstr_mon_id[0];
1174
Ofir Bitton423815b2021-01-05 09:04:07 +02001175 cprop->hw_sob_group[sob_group_offset].queue_id = queue_id;
1176
Ofir Bitton5de406c2020-09-10 10:56:26 +03001177 dev_dbg(hdev->dev,
1178 "Generate master wait CBs, sob %d (mask %#x), val:0x%x, mon %u, q %d\n",
1179 master_sob_base, cprop->mstr_sob_mask[0],
1180 cprop->next_sob_group_val[stream],
1181 master_monitor, queue_id);
1182
1183 wait_prop.data = (void *) job->patched_cb;
1184 wait_prop.sob_base = master_sob_base;
1185 wait_prop.sob_mask = cprop->mstr_sob_mask[0];
1186 wait_prop.sob_val = cprop->next_sob_group_val[stream];
1187 wait_prop.mon_id = master_monitor;
1188 wait_prop.q_idx = queue_id;
1189 wait_prop.size = cb_size;
1190 cb_size += gaudi_gen_wait_cb(hdev, &wait_prop);
1191
1192 master_sob_base += HL_MAX_SOBS_PER_MONITOR;
1193 master_monitor = prop->collective_mstr_mon_id[1];
1194
1195 dev_dbg(hdev->dev,
1196 "Generate master wait CBs, sob %d (mask %#x), val:0x%x, mon %u, q %d\n",
1197 master_sob_base, cprop->mstr_sob_mask[1],
1198 cprop->next_sob_group_val[stream],
1199 master_monitor, queue_id);
1200
1201 wait_prop.sob_base = master_sob_base;
1202 wait_prop.sob_mask = cprop->mstr_sob_mask[1];
1203 wait_prop.mon_id = master_monitor;
1204 wait_prop.size = cb_size;
1205 cb_size += gaudi_gen_wait_cb(hdev, &wait_prop);
1206}
1207
1208static void gaudi_collective_slave_init_job(struct hl_device *hdev,
1209 struct hl_cs_job *job, struct hl_cs_compl *cs_cmpl)
1210{
1211 struct hl_gen_wait_properties wait_prop;
1212 struct hl_sync_stream_properties *prop;
1213 u32 queue_id, cb_size = 0;
1214
1215 queue_id = job->hw_queue_id;
1216 prop = &hdev->kernel_queues[queue_id].sync_stream_prop;
1217
1218 /* Add to wait CBs using slave monitor */
1219 wait_prop.data = (void *) job->user_cb;
1220 wait_prop.sob_base = cs_cmpl->hw_sob->sob_id;
1221 wait_prop.sob_mask = 0x1;
1222 wait_prop.sob_val = cs_cmpl->sob_val;
1223 wait_prop.mon_id = prop->collective_slave_mon_id;
1224 wait_prop.q_idx = queue_id;
1225 wait_prop.size = cb_size;
1226
1227 dev_dbg(hdev->dev,
1228 "Generate slave wait CB, sob %d, val:0x%x, mon %d, q %d\n",
1229 cs_cmpl->hw_sob->sob_id, cs_cmpl->sob_val,
1230 prop->collective_slave_mon_id, queue_id);
1231
1232 cb_size += gaudi_gen_wait_cb(hdev, &wait_prop);
1233
1234 dev_dbg(hdev->dev,
1235 "generate signal CB, sob_id: %d, sob val: 1, q_idx: %d\n",
1236 prop->collective_sob_id, queue_id);
1237
1238 cb_size += gaudi_gen_signal_cb(hdev, job->user_cb,
Alon Mizrahi72ab9ca52020-12-02 19:55:30 +02001239 prop->collective_sob_id, cb_size, false);
Ofir Bitton5fe1c172020-09-10 10:10:55 +03001240}
1241
farah kassabri8ca20722021-06-20 11:08:19 +03001242static int gaudi_collective_wait_init_cs(struct hl_cs *cs)
Ofir Bitton5fe1c172020-09-10 10:10:55 +03001243{
Ofir Bitton5de406c2020-09-10 10:56:26 +03001244 struct hl_cs_compl *signal_cs_cmpl =
1245 container_of(cs->signal_fence, struct hl_cs_compl, base_fence);
1246 struct hl_cs_compl *cs_cmpl =
1247 container_of(cs->fence, struct hl_cs_compl, base_fence);
1248 struct gaudi_collective_properties *cprop;
1249 u32 stream, queue_id, sob_group_offset;
1250 struct gaudi_device *gaudi;
1251 struct hl_device *hdev;
1252 struct hl_cs_job *job;
1253 struct hl_ctx *ctx;
Ofir Bitton5fe1c172020-09-10 10:10:55 +03001254
Ofir Bitton5de406c2020-09-10 10:56:26 +03001255 ctx = cs->ctx;
1256 hdev = ctx->hdev;
1257 gaudi = hdev->asic_specific;
1258 cprop = &gaudi->collective_props;
1259
1260 /* copy the SOB id and value of the signal CS */
1261 cs_cmpl->hw_sob = signal_cs_cmpl->hw_sob;
1262 cs_cmpl->sob_val = signal_cs_cmpl->sob_val;
1263
farah kassabri8ca20722021-06-20 11:08:19 +03001264 /* check again if the signal cs already completed.
1265 * if yes then don't send any wait cs since the hw_sob
1266 * could be in reset already. if signal is not completed
1267 * then get refcount to hw_sob to prevent resetting the sob
1268 * while wait cs is not submitted.
1269 * note that this check is protected by two locks,
1270 * hw queue lock and completion object lock,
1271 * and the same completion object lock also protects
1272 * the hw_sob reset handler function.
1273 * The hw_queue lock prevent out of sync of hw_sob
1274 * refcount value, changed by signal/wait flows.
1275 */
1276 spin_lock(&signal_cs_cmpl->lock);
1277
1278 if (completion_done(&cs->signal_fence->completion)) {
1279 spin_unlock(&signal_cs_cmpl->lock);
1280 return -EINVAL;
1281 }
1282 /* Increment kref since all slave queues are now waiting on it */
1283 kref_get(&cs_cmpl->hw_sob->kref);
1284
1285 spin_unlock(&signal_cs_cmpl->lock);
1286
Ofir Bitton5de406c2020-09-10 10:56:26 +03001287 /* Calculate the stream from collective master queue (1st job) */
1288 job = list_first_entry(&cs->job_list, struct hl_cs_job, cs_node);
1289 stream = job->hw_queue_id % 4;
1290 sob_group_offset =
1291 stream * HL_RSVD_SOBS + cprop->curr_sob_group_idx[stream];
1292
1293 list_for_each_entry(job, &cs->job_list, cs_node) {
1294 queue_id = job->hw_queue_id;
1295
1296 if (hdev->kernel_queues[queue_id].collective_mode ==
1297 HL_COLLECTIVE_MASTER)
1298 gaudi_collective_master_init_job(hdev, job, stream,
1299 sob_group_offset);
1300 else
1301 gaudi_collective_slave_init_job(hdev, job, cs_cmpl);
1302 }
1303
1304 cs_cmpl->sob_group = sob_group_offset;
1305
1306 /* Handle sob group kref and wraparound */
1307 kref_get(&cprop->hw_sob_group[sob_group_offset].kref);
1308 cprop->next_sob_group_val[stream]++;
1309
1310 if (cprop->next_sob_group_val[stream] == HL_MAX_SOB_VAL) {
1311 /*
1312 * Decrement as we reached the max value.
1313 * The release function won't be called here as we've
1314 * just incremented the refcount.
1315 */
1316 kref_put(&cprop->hw_sob_group[sob_group_offset].kref,
1317 gaudi_sob_group_reset_error);
1318 cprop->next_sob_group_val[stream] = 1;
1319 /* only two SOBs are currently in use */
1320 cprop->curr_sob_group_idx[stream] =
1321 (cprop->curr_sob_group_idx[stream] + 1) &
1322 (HL_RSVD_SOBS - 1);
1323
1324 gaudi_collective_map_sobs(hdev, stream);
1325
1326 dev_dbg(hdev->dev, "switched to SOB group %d, stream: %d\n",
1327 cprop->curr_sob_group_idx[stream], stream);
1328 }
1329
Ofir Bitton5de406c2020-09-10 10:56:26 +03001330 mb();
1331 hl_fence_put(cs->signal_fence);
1332 cs->signal_fence = NULL;
farah kassabri8ca20722021-06-20 11:08:19 +03001333
1334 return 0;
Ofir Bitton5de406c2020-09-10 10:56:26 +03001335}
1336
1337static int gaudi_collective_wait_create_job(struct hl_device *hdev,
1338 struct hl_ctx *ctx, struct hl_cs *cs,
1339 enum hl_collective_mode mode, u32 queue_id, u32 wait_queue_id)
1340{
1341 struct hw_queue_properties *hw_queue_prop;
1342 struct hl_cs_counters_atomic *cntr;
1343 struct hl_cs_job *job;
1344 struct hl_cb *cb;
1345 u32 cb_size;
1346 bool patched_cb;
1347
1348 cntr = &hdev->aggregated_cs_counters;
1349
1350 if (mode == HL_COLLECTIVE_MASTER) {
1351 /* CB size of collective master queue contains
1352 * 4 msg short packets for monitor 1 configuration
1353 * 1 fence packet
1354 * 4 msg short packets for monitor 2 configuration
1355 * 1 fence packet
1356 * 2 msg prot packets for completion and MSI-X
1357 */
1358 cb_size = sizeof(struct packet_msg_short) * 8 +
1359 sizeof(struct packet_fence) * 2 +
1360 sizeof(struct packet_msg_prot) * 2;
1361 patched_cb = true;
1362 } else {
1363 /* CB size of collective slave queues contains
1364 * 4 msg short packets for monitor configuration
1365 * 1 fence packet
1366 * 1 additional msg short packet for sob signal
1367 */
1368 cb_size = sizeof(struct packet_msg_short) * 5 +
1369 sizeof(struct packet_fence);
1370 patched_cb = false;
1371 }
1372
1373 hw_queue_prop = &hdev->asic_prop.hw_queues_props[queue_id];
1374 job = hl_cs_allocate_job(hdev, hw_queue_prop->type, true);
1375 if (!job) {
farah kassabrie7536432020-10-12 14:30:26 +03001376 atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
Ofir Bitton5de406c2020-09-10 10:56:26 +03001377 atomic64_inc(&cntr->out_of_mem_drop_cnt);
1378 dev_err(hdev->dev, "Failed to allocate a new job\n");
1379 return -ENOMEM;
1380 }
1381
1382 /* Allocate internal mapped CB for non patched CBs */
1383 cb = hl_cb_kernel_create(hdev, cb_size,
1384 hdev->mmu_enable && !patched_cb);
1385 if (!cb) {
farah kassabrie7536432020-10-12 14:30:26 +03001386 atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
Ofir Bitton5de406c2020-09-10 10:56:26 +03001387 atomic64_inc(&cntr->out_of_mem_drop_cnt);
1388 kfree(job);
1389 return -EFAULT;
1390 }
1391
1392 job->id = 0;
1393 job->cs = cs;
1394 job->user_cb = cb;
Tomer Tayarf07486742020-08-02 22:51:31 +03001395 atomic_inc(&job->user_cb->cs_cnt);
Ofir Bitton5de406c2020-09-10 10:56:26 +03001396 job->user_cb_size = cb_size;
1397 job->hw_queue_id = queue_id;
1398
1399 /*
1400 * No need in parsing, user CB is the patched CB.
1401 * We call hl_cb_destroy() out of two reasons - we don't need
1402 * the CB in the CB idr anymore and to decrement its refcount as
1403 * it was incremented inside hl_cb_kernel_create().
1404 */
1405 if (patched_cb)
1406 job->patched_cb = job->user_cb;
1407 else
1408 job->patched_cb = NULL;
1409
1410 job->job_cb_size = job->user_cb_size;
1411 hl_cb_destroy(hdev, &hdev->kernel_cb_mgr, cb->id << PAGE_SHIFT);
1412
1413 /* increment refcount as for external queues we get completion */
1414 if (hw_queue_prop->type == QUEUE_TYPE_EXT)
1415 cs_get(cs);
1416
1417 cs->jobs_in_queue_cnt[job->hw_queue_id]++;
1418
1419 list_add_tail(&job->cs_node, &cs->job_list);
1420
1421 hl_debugfs_add_job(hdev, job);
1422
1423 return 0;
Ofir Bitton5fe1c172020-09-10 10:10:55 +03001424}
1425
1426static int gaudi_collective_wait_create_jobs(struct hl_device *hdev,
1427 struct hl_ctx *ctx, struct hl_cs *cs, u32 wait_queue_id,
1428 u32 collective_engine_id)
1429{
Ofir Bitton5de406c2020-09-10 10:56:26 +03001430 struct gaudi_device *gaudi = hdev->asic_specific;
1431 struct hw_queue_properties *hw_queue_prop;
1432 u32 queue_id, collective_queue, num_jobs;
1433 u32 stream, nic_queue, nic_idx = 0;
1434 bool skip;
Ofir Bitton266cdfa2020-12-22 15:56:12 +02001435 int i, rc = 0;
Ofir Bitton5de406c2020-09-10 10:56:26 +03001436
1437 /* Verify wait queue id is configured as master */
1438 hw_queue_prop = &hdev->asic_prop.hw_queues_props[wait_queue_id];
1439 if (!(hw_queue_prop->collective_mode == HL_COLLECTIVE_MASTER)) {
1440 dev_err(hdev->dev,
1441 "Queue %d is not configured as collective master\n",
1442 wait_queue_id);
1443 return -EINVAL;
1444 }
1445
1446 /* Verify engine id is supported */
1447 if (collective_engine_id != GAUDI_ENGINE_ID_DMA_5 &&
1448 collective_engine_id != GAUDI_ENGINE_ID_TPC_7) {
1449 dev_err(hdev->dev,
1450 "Collective wait does not support engine %u\n",
1451 collective_engine_id);
1452 return -EINVAL;
1453 }
1454
1455 stream = wait_queue_id % 4;
1456
1457 if (collective_engine_id == GAUDI_ENGINE_ID_DMA_5)
1458 collective_queue = GAUDI_QUEUE_ID_DMA_5_0 + stream;
Ofir Bitton5de406c2020-09-10 10:56:26 +03001459 else
Ofir Bitton71a984f2020-10-19 16:52:00 +03001460 collective_queue = GAUDI_QUEUE_ID_TPC_7_0 + stream;
Ofir Bitton5de406c2020-09-10 10:56:26 +03001461
1462 num_jobs = NUMBER_OF_SOBS_IN_GRP + 1;
1463 nic_queue = GAUDI_QUEUE_ID_NIC_0_0 + stream;
1464
1465 /* First job goes to the collective master queue, it will wait for
1466 * the collective slave queues to finish execution.
1467 * The synchronization is done using two monitors:
1468 * First monitor for NICs 0-7, second monitor for NICs 8-9 and the
1469 * reduction engine (DMA5/TPC7).
1470 *
1471 * Rest of the jobs goes to the collective slave queues which will
1472 * all wait for the user to signal sob 'cs_cmpl->sob_val'.
1473 */
1474 for (i = 0 ; i < num_jobs ; i++) {
1475 if (i == 0) {
1476 queue_id = wait_queue_id;
1477 rc = gaudi_collective_wait_create_job(hdev, ctx, cs,
1478 HL_COLLECTIVE_MASTER, queue_id, wait_queue_id);
1479 } else {
1480 if (nic_idx < NIC_NUMBER_OF_ENGINES) {
1481 if (gaudi->hw_cap_initialized &
1482 BIT(HW_CAP_NIC_SHIFT + nic_idx))
1483 skip = false;
1484 else
1485 skip = true;
1486
1487 queue_id = nic_queue;
1488 nic_queue += 4;
1489 nic_idx++;
1490
1491 if (skip)
1492 continue;
1493 } else {
1494 queue_id = collective_queue;
1495 }
1496
1497 rc = gaudi_collective_wait_create_job(hdev, ctx, cs,
1498 HL_COLLECTIVE_SLAVE, queue_id, wait_queue_id);
1499 }
1500
1501 if (rc)
1502 return rc;
1503 }
1504
1505 return rc;
Ofir Bitton5fe1c172020-09-10 10:10:55 +03001506}
1507
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03001508static int gaudi_late_init(struct hl_device *hdev)
1509{
1510 struct gaudi_device *gaudi = hdev->asic_specific;
1511 int rc;
1512
Oded Gabbay2f553422020-08-15 16:28:10 +03001513 rc = gaudi->cpucp_info_get(hdev);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03001514 if (rc) {
Oded Gabbay2f553422020-08-15 16:28:10 +03001515 dev_err(hdev->dev, "Failed to get cpucp info\n");
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03001516 return rc;
1517 }
1518
Oded Gabbay3c681572020-11-02 21:10:39 +02001519 if ((hdev->card_type == cpucp_card_type_pci) &&
1520 (hdev->nic_ports_mask & 0x3)) {
1521 dev_info(hdev->dev,
1522 "PCI card detected, only 8 ports are enabled\n");
1523 hdev->nic_ports_mask &= ~0x3;
1524
1525 /* Stop and disable unused NIC QMANs */
1526 WREG32(mmNIC0_QM0_GLBL_CFG1, NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
1527 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
1528 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
1529
1530 WREG32(mmNIC0_QM1_GLBL_CFG1, NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
1531 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
1532 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
1533
1534 WREG32(mmNIC0_QM0_GLBL_CFG0, 0);
1535 WREG32(mmNIC0_QM1_GLBL_CFG0, 0);
1536
1537 gaudi->hw_cap_initialized &= ~(HW_CAP_NIC0 | HW_CAP_NIC1);
1538 }
1539
Oded Gabbay2f553422020-08-15 16:28:10 +03001540 rc = hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_ENABLE_PCI_ACCESS);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03001541 if (rc) {
1542 dev_err(hdev->dev, "Failed to enable PCI access from CPU\n");
1543 return rc;
1544 }
1545
Ofir Bitton1cbca892020-10-05 11:36:00 +03001546 rc = gaudi_fetch_psoc_frequency(hdev);
1547 if (rc) {
1548 dev_err(hdev->dev, "Failed to fetch psoc frequency\n");
1549 goto disable_pci_access;
1550 }
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03001551
1552 rc = gaudi_mmu_clear_pgt_range(hdev);
1553 if (rc) {
1554 dev_err(hdev->dev, "Failed to clear MMU page tables range\n");
1555 goto disable_pci_access;
1556 }
1557
1558 rc = gaudi_init_tpc_mem(hdev);
1559 if (rc) {
1560 dev_err(hdev->dev, "Failed to initialize TPC memories\n");
1561 goto disable_pci_access;
1562 }
1563
Ofir Bitton5de406c2020-09-10 10:56:26 +03001564 rc = gaudi_collective_init(hdev);
1565 if (rc) {
1566 dev_err(hdev->dev, "Failed to init collective\n");
1567 goto disable_pci_access;
1568 }
1569
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03001570 return 0;
1571
1572disable_pci_access:
Oded Gabbay2f553422020-08-15 16:28:10 +03001573 hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_DISABLE_PCI_ACCESS);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03001574
1575 return rc;
1576}
1577
1578static void gaudi_late_fini(struct hl_device *hdev)
1579{
1580 const struct hwmon_channel_info **channel_info_arr;
1581 int i = 0;
1582
1583 if (!hdev->hl_chip_info->info)
1584 return;
1585
1586 channel_info_arr = hdev->hl_chip_info->info;
1587
1588 while (channel_info_arr[i]) {
1589 kfree(channel_info_arr[i]->config);
1590 kfree(channel_info_arr[i]);
1591 i++;
1592 }
1593
1594 kfree(channel_info_arr);
1595
1596 hdev->hl_chip_info->info = NULL;
1597}
1598
1599static int gaudi_alloc_cpu_accessible_dma_mem(struct hl_device *hdev)
1600{
1601 dma_addr_t dma_addr_arr[GAUDI_ALLOC_CPU_MEM_RETRY_CNT] = {}, end_addr;
1602 void *virt_addr_arr[GAUDI_ALLOC_CPU_MEM_RETRY_CNT] = {};
1603 int i, j, rc = 0;
1604
1605 /*
1606 * The device CPU works with 40-bits addresses, while bit 39 must be set
1607 * to '1' when accessing the host.
1608 * Bits 49:39 of the full host address are saved for a later
1609 * configuration of the HW to perform extension to 50 bits.
1610 * Because there is a single HW register that holds the extension bits,
1611 * these bits must be identical in all allocated range.
1612 */
1613
1614 for (i = 0 ; i < GAUDI_ALLOC_CPU_MEM_RETRY_CNT ; i++) {
1615 virt_addr_arr[i] =
1616 hdev->asic_funcs->asic_dma_alloc_coherent(hdev,
1617 HL_CPU_ACCESSIBLE_MEM_SIZE,
1618 &dma_addr_arr[i],
1619 GFP_KERNEL | __GFP_ZERO);
1620 if (!virt_addr_arr[i]) {
1621 rc = -ENOMEM;
1622 goto free_dma_mem_arr;
1623 }
1624
1625 end_addr = dma_addr_arr[i] + HL_CPU_ACCESSIBLE_MEM_SIZE - 1;
1626 if (GAUDI_CPU_PCI_MSB_ADDR(dma_addr_arr[i]) ==
1627 GAUDI_CPU_PCI_MSB_ADDR(end_addr))
1628 break;
1629 }
1630
1631 if (i == GAUDI_ALLOC_CPU_MEM_RETRY_CNT) {
1632 dev_err(hdev->dev,
1633 "MSB of CPU accessible DMA memory are not identical in all range\n");
1634 rc = -EFAULT;
1635 goto free_dma_mem_arr;
1636 }
1637
1638 hdev->cpu_accessible_dma_mem = virt_addr_arr[i];
1639 hdev->cpu_accessible_dma_address = dma_addr_arr[i];
1640 hdev->cpu_pci_msb_addr =
1641 GAUDI_CPU_PCI_MSB_ADDR(hdev->cpu_accessible_dma_address);
1642
Ohad Sharabi4cb45082021-05-20 09:09:03 +03001643 if (!hdev->asic_prop.fw_security_enabled)
Ofir Bittonc692dec2020-10-04 17:34:37 +03001644 GAUDI_PCI_TO_CPU_ADDR(hdev->cpu_accessible_dma_address);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03001645
1646free_dma_mem_arr:
1647 for (j = 0 ; j < i ; j++)
1648 hdev->asic_funcs->asic_dma_free_coherent(hdev,
1649 HL_CPU_ACCESSIBLE_MEM_SIZE,
1650 virt_addr_arr[j],
1651 dma_addr_arr[j]);
1652
1653 return rc;
1654}
1655
1656static void gaudi_free_internal_qmans_pq_mem(struct hl_device *hdev)
1657{
1658 struct gaudi_device *gaudi = hdev->asic_specific;
1659 struct gaudi_internal_qman_info *q;
1660 u32 i;
1661
1662 for (i = 0 ; i < GAUDI_QUEUE_ID_SIZE ; i++) {
1663 q = &gaudi->internal_qmans[i];
1664 if (!q->pq_kernel_addr)
1665 continue;
1666 hdev->asic_funcs->asic_dma_free_coherent(hdev, q->pq_size,
1667 q->pq_kernel_addr,
1668 q->pq_dma_addr);
1669 }
1670}
1671
1672static int gaudi_alloc_internal_qmans_pq_mem(struct hl_device *hdev)
1673{
1674 struct gaudi_device *gaudi = hdev->asic_specific;
1675 struct gaudi_internal_qman_info *q;
1676 int rc, i;
1677
1678 for (i = 0 ; i < GAUDI_QUEUE_ID_SIZE ; i++) {
1679 if (gaudi_queue_type[i] != QUEUE_TYPE_INT)
1680 continue;
1681
1682 q = &gaudi->internal_qmans[i];
1683
1684 switch (i) {
Ofir Bitton0940cab2020-08-31 08:52:56 +03001685 case GAUDI_QUEUE_ID_DMA_2_0 ... GAUDI_QUEUE_ID_DMA_7_3:
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03001686 q->pq_size = HBM_DMA_QMAN_SIZE_IN_BYTES;
1687 break;
1688 case GAUDI_QUEUE_ID_MME_0_0 ... GAUDI_QUEUE_ID_MME_1_3:
1689 q->pq_size = MME_QMAN_SIZE_IN_BYTES;
1690 break;
1691 case GAUDI_QUEUE_ID_TPC_0_0 ... GAUDI_QUEUE_ID_TPC_7_3:
1692 q->pq_size = TPC_QMAN_SIZE_IN_BYTES;
1693 break;
Oded Gabbay3c681572020-11-02 21:10:39 +02001694 case GAUDI_QUEUE_ID_NIC_0_0 ... GAUDI_QUEUE_ID_NIC_9_3:
1695 q->pq_size = NIC_QMAN_SIZE_IN_BYTES;
1696 break;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03001697 default:
1698 dev_err(hdev->dev, "Bad internal queue index %d", i);
1699 rc = -EINVAL;
1700 goto free_internal_qmans_pq_mem;
1701 }
1702
1703 q->pq_kernel_addr = hdev->asic_funcs->asic_dma_alloc_coherent(
1704 hdev, q->pq_size,
1705 &q->pq_dma_addr,
1706 GFP_KERNEL | __GFP_ZERO);
1707 if (!q->pq_kernel_addr) {
1708 rc = -ENOMEM;
1709 goto free_internal_qmans_pq_mem;
1710 }
1711 }
1712
1713 return 0;
1714
1715free_internal_qmans_pq_mem:
1716 gaudi_free_internal_qmans_pq_mem(hdev);
1717 return rc;
1718}
1719
Ohad Sharabic592c272021-04-21 13:03:21 +03001720static void gaudi_set_pci_memory_regions(struct hl_device *hdev)
1721{
Ohad Sharabib31e59b2021-04-22 10:01:22 +03001722 struct asic_fixed_properties *prop = &hdev->asic_prop;
Ohad Sharabic592c272021-04-21 13:03:21 +03001723 struct pci_mem_region *region;
1724
1725 /* CFG */
1726 region = &hdev->pci_mem_region[PCI_REGION_CFG];
1727 region->region_base = CFG_BASE;
1728 region->region_size = CFG_SIZE;
1729 region->offset_in_bar = CFG_BASE - SPI_FLASH_BASE_ADDR;
Ohad Sharabib31e59b2021-04-22 10:01:22 +03001730 region->bar_size = CFG_BAR_SIZE;
Ohad Sharabic592c272021-04-21 13:03:21 +03001731 region->bar_id = CFG_BAR_ID;
1732 region->used = 1;
1733
1734 /* SRAM */
1735 region = &hdev->pci_mem_region[PCI_REGION_SRAM];
1736 region->region_base = SRAM_BASE_ADDR;
1737 region->region_size = SRAM_SIZE;
1738 region->offset_in_bar = 0;
Ohad Sharabib31e59b2021-04-22 10:01:22 +03001739 region->bar_size = SRAM_BAR_SIZE;
Ohad Sharabic592c272021-04-21 13:03:21 +03001740 region->bar_id = SRAM_BAR_ID;
1741 region->used = 1;
1742
1743 /* DRAM */
1744 region = &hdev->pci_mem_region[PCI_REGION_DRAM];
1745 region->region_base = DRAM_PHYS_BASE;
1746 region->region_size = hdev->asic_prop.dram_size;
1747 region->offset_in_bar = 0;
Ohad Sharabib31e59b2021-04-22 10:01:22 +03001748 region->bar_size = prop->dram_pci_bar_size;
Ohad Sharabic592c272021-04-21 13:03:21 +03001749 region->bar_id = HBM_BAR_ID;
1750 region->used = 1;
1751
1752 /* SP SRAM */
1753 region = &hdev->pci_mem_region[PCI_REGION_SP_SRAM];
1754 region->region_base = PSOC_SCRATCHPAD_ADDR;
1755 region->region_size = PSOC_SCRATCHPAD_SIZE;
1756 region->offset_in_bar = PSOC_SCRATCHPAD_ADDR - SPI_FLASH_BASE_ADDR;
Ohad Sharabib31e59b2021-04-22 10:01:22 +03001757 region->bar_size = CFG_BAR_SIZE;
Ohad Sharabic592c272021-04-21 13:03:21 +03001758 region->bar_id = CFG_BAR_ID;
1759 region->used = 1;
1760}
1761
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03001762static int gaudi_sw_init(struct hl_device *hdev)
1763{
1764 struct gaudi_device *gaudi;
Ofir Bittonebd8d122020-05-10 13:41:28 +03001765 u32 i, event_id = 0;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03001766 int rc;
1767
1768 /* Allocate device structure */
1769 gaudi = kzalloc(sizeof(*gaudi), GFP_KERNEL);
1770 if (!gaudi)
1771 return -ENOMEM;
1772
Ofir Bittonebd8d122020-05-10 13:41:28 +03001773 for (i = 0 ; i < ARRAY_SIZE(gaudi_irq_map_table) ; i++) {
1774 if (gaudi_irq_map_table[i].valid) {
1775 if (event_id == GAUDI_EVENT_SIZE) {
1776 dev_err(hdev->dev,
1777 "Event array exceeds the limit of %u events\n",
1778 GAUDI_EVENT_SIZE);
1779 rc = -EINVAL;
1780 goto free_gaudi_device;
1781 }
1782
1783 gaudi->events[event_id++] =
1784 gaudi_irq_map_table[i].fc_id;
1785 }
1786 }
1787
Oded Gabbay2f553422020-08-15 16:28:10 +03001788 gaudi->cpucp_info_get = gaudi_cpucp_info_get;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03001789
1790 gaudi->max_freq_value = GAUDI_MAX_CLK_FREQ;
1791
1792 hdev->asic_specific = gaudi;
1793
1794 /* Create DMA pool for small allocations */
1795 hdev->dma_pool = dma_pool_create(dev_name(hdev->dev),
1796 &hdev->pdev->dev, GAUDI_DMA_POOL_BLK_SIZE, 8, 0);
1797 if (!hdev->dma_pool) {
1798 dev_err(hdev->dev, "failed to create DMA pool\n");
1799 rc = -ENOMEM;
1800 goto free_gaudi_device;
1801 }
1802
1803 rc = gaudi_alloc_cpu_accessible_dma_mem(hdev);
1804 if (rc)
1805 goto free_dma_pool;
1806
1807 hdev->cpu_accessible_dma_pool = gen_pool_create(ilog2(32), -1);
1808 if (!hdev->cpu_accessible_dma_pool) {
1809 dev_err(hdev->dev,
1810 "Failed to create CPU accessible DMA pool\n");
1811 rc = -ENOMEM;
1812 goto free_cpu_dma_mem;
1813 }
1814
1815 rc = gen_pool_add(hdev->cpu_accessible_dma_pool,
1816 (uintptr_t) hdev->cpu_accessible_dma_mem,
1817 HL_CPU_ACCESSIBLE_MEM_SIZE, -1);
1818 if (rc) {
1819 dev_err(hdev->dev,
1820 "Failed to add memory to CPU accessible DMA pool\n");
1821 rc = -EFAULT;
1822 goto free_cpu_accessible_dma_pool;
1823 }
1824
1825 rc = gaudi_alloc_internal_qmans_pq_mem(hdev);
1826 if (rc)
1827 goto free_cpu_accessible_dma_pool;
1828
1829 spin_lock_init(&gaudi->hw_queues_lock);
1830 mutex_init(&gaudi->clk_gate_mutex);
1831
1832 hdev->supports_sync_stream = true;
1833 hdev->supports_coresight = true;
Ofir Bitton2795c882020-12-08 13:47:05 +02001834 hdev->supports_staged_submission = true;
Ohad Sharabi215f0c12021-06-14 22:18:41 +03001835 hdev->supports_wait_for_multi_cs = true;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03001836
Ohad Sharabic592c272021-04-21 13:03:21 +03001837 gaudi_set_pci_memory_regions(hdev);
1838
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03001839 return 0;
1840
1841free_cpu_accessible_dma_pool:
1842 gen_pool_destroy(hdev->cpu_accessible_dma_pool);
1843free_cpu_dma_mem:
Ohad Sharabi4cb45082021-05-20 09:09:03 +03001844 if (!hdev->asic_prop.fw_security_enabled)
Ofir Bittonc692dec2020-10-04 17:34:37 +03001845 GAUDI_CPU_TO_PCI_ADDR(hdev->cpu_accessible_dma_address,
1846 hdev->cpu_pci_msb_addr);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03001847 hdev->asic_funcs->asic_dma_free_coherent(hdev,
1848 HL_CPU_ACCESSIBLE_MEM_SIZE,
1849 hdev->cpu_accessible_dma_mem,
1850 hdev->cpu_accessible_dma_address);
1851free_dma_pool:
1852 dma_pool_destroy(hdev->dma_pool);
1853free_gaudi_device:
1854 kfree(gaudi);
1855 return rc;
1856}
1857
1858static int gaudi_sw_fini(struct hl_device *hdev)
1859{
1860 struct gaudi_device *gaudi = hdev->asic_specific;
1861
1862 gaudi_free_internal_qmans_pq_mem(hdev);
1863
1864 gen_pool_destroy(hdev->cpu_accessible_dma_pool);
1865
Ohad Sharabi4cb45082021-05-20 09:09:03 +03001866 if (!hdev->asic_prop.fw_security_enabled)
Ofir Bittonc692dec2020-10-04 17:34:37 +03001867 GAUDI_CPU_TO_PCI_ADDR(hdev->cpu_accessible_dma_address,
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03001868 hdev->cpu_pci_msb_addr);
Ofir Bittonc692dec2020-10-04 17:34:37 +03001869
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03001870 hdev->asic_funcs->asic_dma_free_coherent(hdev,
1871 HL_CPU_ACCESSIBLE_MEM_SIZE,
1872 hdev->cpu_accessible_dma_mem,
1873 hdev->cpu_accessible_dma_address);
1874
1875 dma_pool_destroy(hdev->dma_pool);
1876
1877 mutex_destroy(&gaudi->clk_gate_mutex);
1878
1879 kfree(gaudi);
1880
1881 return 0;
1882}
1883
1884static irqreturn_t gaudi_irq_handler_single(int irq, void *arg)
1885{
1886 struct hl_device *hdev = arg;
1887 int i;
1888
1889 if (hdev->disabled)
1890 return IRQ_HANDLED;
1891
1892 for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++)
1893 hl_irq_handler_cq(irq, &hdev->completion_queue[i]);
1894
1895 hl_irq_handler_eq(irq, &hdev->event_queue);
1896
1897 return IRQ_HANDLED;
1898}
1899
1900/*
1901 * For backward compatibility, new MSI interrupts should be set after the
1902 * existing CPU and NIC interrupts.
1903 */
1904static int gaudi_pci_irq_vector(struct hl_device *hdev, unsigned int nr,
1905 bool cpu_eq)
1906{
1907 int msi_vec;
1908
1909 if ((nr != GAUDI_EVENT_QUEUE_MSI_IDX) && (cpu_eq))
1910 dev_crit(hdev->dev, "CPU EQ must use IRQ %d\n",
1911 GAUDI_EVENT_QUEUE_MSI_IDX);
1912
1913 msi_vec = ((nr < GAUDI_EVENT_QUEUE_MSI_IDX) || (cpu_eq)) ? nr :
1914 (nr + NIC_NUMBER_OF_ENGINES + 1);
1915
1916 return pci_irq_vector(hdev->pdev, msi_vec);
1917}
1918
1919static int gaudi_enable_msi_single(struct hl_device *hdev)
1920{
1921 int rc, irq;
1922
Oded Gabbay3b82c342020-11-27 18:10:20 +02001923 dev_dbg(hdev->dev, "Working in single MSI IRQ mode\n");
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03001924
1925 irq = gaudi_pci_irq_vector(hdev, 0, false);
1926 rc = request_irq(irq, gaudi_irq_handler_single, 0,
1927 "gaudi single msi", hdev);
1928 if (rc)
1929 dev_err(hdev->dev,
1930 "Failed to request single MSI IRQ\n");
1931
1932 return rc;
1933}
1934
1935static int gaudi_enable_msi_multi(struct hl_device *hdev)
1936{
1937 int cq_cnt = hdev->asic_prop.completion_queues_count;
1938 int rc, i, irq_cnt_init, irq;
1939
1940 for (i = 0, irq_cnt_init = 0 ; i < cq_cnt ; i++, irq_cnt_init++) {
1941 irq = gaudi_pci_irq_vector(hdev, i, false);
1942 rc = request_irq(irq, hl_irq_handler_cq, 0, gaudi_irq_name[i],
1943 &hdev->completion_queue[i]);
1944 if (rc) {
1945 dev_err(hdev->dev, "Failed to request IRQ %d", irq);
1946 goto free_irqs;
1947 }
1948 }
1949
1950 irq = gaudi_pci_irq_vector(hdev, GAUDI_EVENT_QUEUE_MSI_IDX, true);
1951 rc = request_irq(irq, hl_irq_handler_eq, 0, gaudi_irq_name[cq_cnt],
1952 &hdev->event_queue);
1953 if (rc) {
1954 dev_err(hdev->dev, "Failed to request IRQ %d", irq);
1955 goto free_irqs;
1956 }
1957
1958 return 0;
1959
1960free_irqs:
1961 for (i = 0 ; i < irq_cnt_init ; i++)
1962 free_irq(gaudi_pci_irq_vector(hdev, i, false),
1963 &hdev->completion_queue[i]);
1964 return rc;
1965}
1966
1967static int gaudi_enable_msi(struct hl_device *hdev)
1968{
1969 struct gaudi_device *gaudi = hdev->asic_specific;
1970 int rc;
1971
1972 if (gaudi->hw_cap_initialized & HW_CAP_MSI)
1973 return 0;
1974
Oded Gabbay12e66a12021-03-08 18:06:57 +02001975 rc = pci_alloc_irq_vectors(hdev->pdev, 1, 1, PCI_IRQ_MSI);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03001976 if (rc < 0) {
1977 dev_err(hdev->dev, "MSI: Failed to enable support %d\n", rc);
1978 return rc;
1979 }
1980
1981 if (rc < NUMBER_OF_INTERRUPTS) {
1982 gaudi->multi_msi_mode = false;
1983 rc = gaudi_enable_msi_single(hdev);
1984 } else {
1985 gaudi->multi_msi_mode = true;
1986 rc = gaudi_enable_msi_multi(hdev);
1987 }
1988
1989 if (rc)
1990 goto free_pci_irq_vectors;
1991
1992 gaudi->hw_cap_initialized |= HW_CAP_MSI;
1993
1994 return 0;
1995
1996free_pci_irq_vectors:
1997 pci_free_irq_vectors(hdev->pdev);
1998 return rc;
1999}
2000
2001static void gaudi_sync_irqs(struct hl_device *hdev)
2002{
2003 struct gaudi_device *gaudi = hdev->asic_specific;
2004 int i, cq_cnt = hdev->asic_prop.completion_queues_count;
2005
2006 if (!(gaudi->hw_cap_initialized & HW_CAP_MSI))
2007 return;
2008
2009 /* Wait for all pending IRQs to be finished */
2010 if (gaudi->multi_msi_mode) {
2011 for (i = 0 ; i < cq_cnt ; i++)
2012 synchronize_irq(gaudi_pci_irq_vector(hdev, i, false));
2013
2014 synchronize_irq(gaudi_pci_irq_vector(hdev,
2015 GAUDI_EVENT_QUEUE_MSI_IDX,
2016 true));
2017 } else {
2018 synchronize_irq(gaudi_pci_irq_vector(hdev, 0, false));
2019 }
2020}
2021
2022static void gaudi_disable_msi(struct hl_device *hdev)
2023{
2024 struct gaudi_device *gaudi = hdev->asic_specific;
2025 int i, irq, cq_cnt = hdev->asic_prop.completion_queues_count;
2026
2027 if (!(gaudi->hw_cap_initialized & HW_CAP_MSI))
2028 return;
2029
2030 gaudi_sync_irqs(hdev);
2031
2032 if (gaudi->multi_msi_mode) {
2033 irq = gaudi_pci_irq_vector(hdev, GAUDI_EVENT_QUEUE_MSI_IDX,
2034 true);
2035 free_irq(irq, &hdev->event_queue);
2036
2037 for (i = 0 ; i < cq_cnt ; i++) {
2038 irq = gaudi_pci_irq_vector(hdev, i, false);
2039 free_irq(irq, &hdev->completion_queue[i]);
2040 }
2041 } else {
2042 free_irq(gaudi_pci_irq_vector(hdev, 0, false), hdev);
2043 }
2044
2045 pci_free_irq_vectors(hdev->pdev);
2046
2047 gaudi->hw_cap_initialized &= ~HW_CAP_MSI;
2048}
2049
2050static void gaudi_init_scrambler_sram(struct hl_device *hdev)
2051{
2052 struct gaudi_device *gaudi = hdev->asic_specific;
2053
Ohad Sharabi4cb45082021-05-20 09:09:03 +03002054 if (hdev->asic_prop.fw_security_enabled)
Ofir Bittonc692dec2020-10-04 17:34:37 +03002055 return;
2056
Ohad Sharabi6a785e32021-05-29 23:26:10 +03002057 if (hdev->asic_prop.fw_app_cpu_boot_dev_sts0 &
2058 CPU_BOOT_DEV_STS0_SRAM_SCR_EN)
Ofir Bittonc692dec2020-10-04 17:34:37 +03002059 return;
2060
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03002061 if (gaudi->hw_cap_initialized & HW_CAP_SRAM_SCRAMBLER)
2062 return;
2063
2064 if (!hdev->sram_scrambler_enable)
2065 return;
2066
2067 WREG32(mmNIF_RTR_CTRL_0_SCRAM_SRAM_EN,
2068 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2069 WREG32(mmNIF_RTR_CTRL_1_SCRAM_SRAM_EN,
2070 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2071 WREG32(mmNIF_RTR_CTRL_2_SCRAM_SRAM_EN,
2072 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2073 WREG32(mmNIF_RTR_CTRL_3_SCRAM_SRAM_EN,
2074 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2075 WREG32(mmNIF_RTR_CTRL_4_SCRAM_SRAM_EN,
2076 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2077 WREG32(mmNIF_RTR_CTRL_5_SCRAM_SRAM_EN,
2078 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2079 WREG32(mmNIF_RTR_CTRL_6_SCRAM_SRAM_EN,
2080 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2081 WREG32(mmNIF_RTR_CTRL_7_SCRAM_SRAM_EN,
2082 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2083
2084 WREG32(mmSIF_RTR_CTRL_0_SCRAM_SRAM_EN,
2085 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2086 WREG32(mmSIF_RTR_CTRL_1_SCRAM_SRAM_EN,
2087 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2088 WREG32(mmSIF_RTR_CTRL_2_SCRAM_SRAM_EN,
2089 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2090 WREG32(mmSIF_RTR_CTRL_3_SCRAM_SRAM_EN,
2091 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2092 WREG32(mmSIF_RTR_CTRL_4_SCRAM_SRAM_EN,
2093 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2094 WREG32(mmSIF_RTR_CTRL_5_SCRAM_SRAM_EN,
2095 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2096 WREG32(mmSIF_RTR_CTRL_6_SCRAM_SRAM_EN,
2097 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2098 WREG32(mmSIF_RTR_CTRL_7_SCRAM_SRAM_EN,
2099 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2100
2101 WREG32(mmDMA_IF_E_N_DOWN_CH0_SCRAM_SRAM_EN,
2102 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2103 WREG32(mmDMA_IF_E_N_DOWN_CH1_SCRAM_SRAM_EN,
2104 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2105 WREG32(mmDMA_IF_E_S_DOWN_CH0_SCRAM_SRAM_EN,
2106 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2107 WREG32(mmDMA_IF_E_S_DOWN_CH1_SCRAM_SRAM_EN,
2108 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2109 WREG32(mmDMA_IF_W_N_DOWN_CH0_SCRAM_SRAM_EN,
2110 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2111 WREG32(mmDMA_IF_W_N_DOWN_CH1_SCRAM_SRAM_EN,
2112 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2113 WREG32(mmDMA_IF_W_S_DOWN_CH0_SCRAM_SRAM_EN,
2114 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2115 WREG32(mmDMA_IF_W_S_DOWN_CH1_SCRAM_SRAM_EN,
2116 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2117
2118 gaudi->hw_cap_initialized |= HW_CAP_SRAM_SCRAMBLER;
2119}
2120
2121static void gaudi_init_scrambler_hbm(struct hl_device *hdev)
2122{
2123 struct gaudi_device *gaudi = hdev->asic_specific;
2124
Ohad Sharabi4cb45082021-05-20 09:09:03 +03002125 if (hdev->asic_prop.fw_security_enabled)
Ofir Bittonc692dec2020-10-04 17:34:37 +03002126 return;
2127
Ohad Sharabi6a785e32021-05-29 23:26:10 +03002128 if (hdev->asic_prop.fw_bootfit_cpu_boot_dev_sts0 &
2129 CPU_BOOT_DEV_STS0_DRAM_SCR_EN)
Ofir Bittonc692dec2020-10-04 17:34:37 +03002130 return;
2131
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03002132 if (gaudi->hw_cap_initialized & HW_CAP_HBM_SCRAMBLER)
2133 return;
2134
2135 if (!hdev->dram_scrambler_enable)
2136 return;
2137
2138 WREG32(mmNIF_RTR_CTRL_0_SCRAM_HBM_EN,
2139 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2140 WREG32(mmNIF_RTR_CTRL_1_SCRAM_HBM_EN,
2141 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2142 WREG32(mmNIF_RTR_CTRL_2_SCRAM_HBM_EN,
2143 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2144 WREG32(mmNIF_RTR_CTRL_3_SCRAM_HBM_EN,
2145 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2146 WREG32(mmNIF_RTR_CTRL_4_SCRAM_HBM_EN,
2147 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2148 WREG32(mmNIF_RTR_CTRL_5_SCRAM_HBM_EN,
2149 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2150 WREG32(mmNIF_RTR_CTRL_6_SCRAM_HBM_EN,
2151 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2152 WREG32(mmNIF_RTR_CTRL_7_SCRAM_HBM_EN,
2153 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2154
2155 WREG32(mmSIF_RTR_CTRL_0_SCRAM_HBM_EN,
2156 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2157 WREG32(mmSIF_RTR_CTRL_1_SCRAM_HBM_EN,
2158 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2159 WREG32(mmSIF_RTR_CTRL_2_SCRAM_HBM_EN,
2160 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2161 WREG32(mmSIF_RTR_CTRL_3_SCRAM_HBM_EN,
2162 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2163 WREG32(mmSIF_RTR_CTRL_4_SCRAM_HBM_EN,
2164 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2165 WREG32(mmSIF_RTR_CTRL_5_SCRAM_HBM_EN,
2166 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2167 WREG32(mmSIF_RTR_CTRL_6_SCRAM_HBM_EN,
2168 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2169 WREG32(mmSIF_RTR_CTRL_7_SCRAM_HBM_EN,
2170 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2171
2172 WREG32(mmDMA_IF_E_N_DOWN_CH0_SCRAM_HBM_EN,
2173 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2174 WREG32(mmDMA_IF_E_N_DOWN_CH1_SCRAM_HBM_EN,
2175 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2176 WREG32(mmDMA_IF_E_S_DOWN_CH0_SCRAM_HBM_EN,
2177 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2178 WREG32(mmDMA_IF_E_S_DOWN_CH1_SCRAM_HBM_EN,
2179 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2180 WREG32(mmDMA_IF_W_N_DOWN_CH0_SCRAM_HBM_EN,
2181 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2182 WREG32(mmDMA_IF_W_N_DOWN_CH1_SCRAM_HBM_EN,
2183 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2184 WREG32(mmDMA_IF_W_S_DOWN_CH0_SCRAM_HBM_EN,
2185 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2186 WREG32(mmDMA_IF_W_S_DOWN_CH1_SCRAM_HBM_EN,
2187 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2188
2189 gaudi->hw_cap_initialized |= HW_CAP_HBM_SCRAMBLER;
2190}
2191
2192static void gaudi_init_e2e(struct hl_device *hdev)
2193{
Ohad Sharabi4cb45082021-05-20 09:09:03 +03002194 if (hdev->asic_prop.fw_security_enabled)
Ofir Bittonc692dec2020-10-04 17:34:37 +03002195 return;
2196
Ohad Sharabi6a785e32021-05-29 23:26:10 +03002197 if (hdev->asic_prop.fw_bootfit_cpu_boot_dev_sts0 &
2198 CPU_BOOT_DEV_STS0_E2E_CRED_EN)
Ofir Bittonc692dec2020-10-04 17:34:37 +03002199 return;
2200
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03002201 WREG32(mmSIF_RTR_CTRL_0_E2E_HBM_WR_SIZE, 247 >> 3);
2202 WREG32(mmSIF_RTR_CTRL_0_E2E_HBM_RD_SIZE, 785 >> 3);
2203 WREG32(mmSIF_RTR_CTRL_0_E2E_PCI_WR_SIZE, 49);
2204 WREG32(mmSIF_RTR_CTRL_0_E2E_PCI_RD_SIZE, 101);
2205
2206 WREG32(mmSIF_RTR_CTRL_1_E2E_HBM_WR_SIZE, 275 >> 3);
2207 WREG32(mmSIF_RTR_CTRL_1_E2E_HBM_RD_SIZE, 614 >> 3);
2208 WREG32(mmSIF_RTR_CTRL_1_E2E_PCI_WR_SIZE, 1);
2209 WREG32(mmSIF_RTR_CTRL_1_E2E_PCI_RD_SIZE, 39);
2210
2211 WREG32(mmSIF_RTR_CTRL_2_E2E_HBM_WR_SIZE, 1);
2212 WREG32(mmSIF_RTR_CTRL_2_E2E_HBM_RD_SIZE, 1);
2213 WREG32(mmSIF_RTR_CTRL_2_E2E_PCI_WR_SIZE, 1);
2214 WREG32(mmSIF_RTR_CTRL_2_E2E_PCI_RD_SIZE, 32);
2215
2216 WREG32(mmSIF_RTR_CTRL_3_E2E_HBM_WR_SIZE, 176 >> 3);
2217 WREG32(mmSIF_RTR_CTRL_3_E2E_HBM_RD_SIZE, 32 >> 3);
2218 WREG32(mmSIF_RTR_CTRL_3_E2E_PCI_WR_SIZE, 19);
2219 WREG32(mmSIF_RTR_CTRL_3_E2E_PCI_RD_SIZE, 32);
2220
2221 WREG32(mmSIF_RTR_CTRL_4_E2E_HBM_WR_SIZE, 176 >> 3);
2222 WREG32(mmSIF_RTR_CTRL_4_E2E_HBM_RD_SIZE, 32 >> 3);
2223 WREG32(mmSIF_RTR_CTRL_4_E2E_PCI_WR_SIZE, 19);
2224 WREG32(mmSIF_RTR_CTRL_4_E2E_PCI_RD_SIZE, 32);
2225
2226 WREG32(mmSIF_RTR_CTRL_5_E2E_HBM_WR_SIZE, 1);
2227 WREG32(mmSIF_RTR_CTRL_5_E2E_HBM_RD_SIZE, 1);
2228 WREG32(mmSIF_RTR_CTRL_5_E2E_PCI_WR_SIZE, 1);
2229 WREG32(mmSIF_RTR_CTRL_5_E2E_PCI_RD_SIZE, 32);
2230
2231 WREG32(mmSIF_RTR_CTRL_6_E2E_HBM_WR_SIZE, 275 >> 3);
2232 WREG32(mmSIF_RTR_CTRL_6_E2E_HBM_RD_SIZE, 614 >> 3);
2233 WREG32(mmSIF_RTR_CTRL_6_E2E_PCI_WR_SIZE, 1);
2234 WREG32(mmSIF_RTR_CTRL_6_E2E_PCI_RD_SIZE, 39);
2235
2236 WREG32(mmSIF_RTR_CTRL_7_E2E_HBM_WR_SIZE, 297 >> 3);
2237 WREG32(mmSIF_RTR_CTRL_7_E2E_HBM_RD_SIZE, 908 >> 3);
2238 WREG32(mmSIF_RTR_CTRL_7_E2E_PCI_WR_SIZE, 19);
2239 WREG32(mmSIF_RTR_CTRL_7_E2E_PCI_RD_SIZE, 19);
2240
2241 WREG32(mmNIF_RTR_CTRL_0_E2E_HBM_WR_SIZE, 318 >> 3);
2242 WREG32(mmNIF_RTR_CTRL_0_E2E_HBM_RD_SIZE, 956 >> 3);
2243 WREG32(mmNIF_RTR_CTRL_0_E2E_PCI_WR_SIZE, 79);
2244 WREG32(mmNIF_RTR_CTRL_0_E2E_PCI_RD_SIZE, 163);
2245
2246 WREG32(mmNIF_RTR_CTRL_1_E2E_HBM_WR_SIZE, 275 >> 3);
2247 WREG32(mmNIF_RTR_CTRL_1_E2E_HBM_RD_SIZE, 614 >> 3);
2248 WREG32(mmNIF_RTR_CTRL_1_E2E_PCI_WR_SIZE, 1);
2249 WREG32(mmNIF_RTR_CTRL_1_E2E_PCI_RD_SIZE, 39);
2250
2251 WREG32(mmNIF_RTR_CTRL_2_E2E_HBM_WR_SIZE, 1);
2252 WREG32(mmNIF_RTR_CTRL_2_E2E_HBM_RD_SIZE, 1);
2253 WREG32(mmNIF_RTR_CTRL_2_E2E_PCI_WR_SIZE, 1);
2254 WREG32(mmNIF_RTR_CTRL_2_E2E_PCI_RD_SIZE, 32);
2255
2256 WREG32(mmNIF_RTR_CTRL_3_E2E_HBM_WR_SIZE, 176 >> 3);
2257 WREG32(mmNIF_RTR_CTRL_3_E2E_HBM_RD_SIZE, 32 >> 3);
2258 WREG32(mmNIF_RTR_CTRL_3_E2E_PCI_WR_SIZE, 19);
2259 WREG32(mmNIF_RTR_CTRL_3_E2E_PCI_RD_SIZE, 32);
2260
2261 WREG32(mmNIF_RTR_CTRL_4_E2E_HBM_WR_SIZE, 176 >> 3);
2262 WREG32(mmNIF_RTR_CTRL_4_E2E_HBM_RD_SIZE, 32 >> 3);
2263 WREG32(mmNIF_RTR_CTRL_4_E2E_PCI_WR_SIZE, 19);
2264 WREG32(mmNIF_RTR_CTRL_4_E2E_PCI_RD_SIZE, 32);
2265
2266 WREG32(mmNIF_RTR_CTRL_5_E2E_HBM_WR_SIZE, 1);
2267 WREG32(mmNIF_RTR_CTRL_5_E2E_HBM_RD_SIZE, 1);
2268 WREG32(mmNIF_RTR_CTRL_5_E2E_PCI_WR_SIZE, 1);
2269 WREG32(mmNIF_RTR_CTRL_5_E2E_PCI_RD_SIZE, 32);
2270
2271 WREG32(mmNIF_RTR_CTRL_6_E2E_HBM_WR_SIZE, 275 >> 3);
2272 WREG32(mmNIF_RTR_CTRL_6_E2E_HBM_RD_SIZE, 614 >> 3);
2273 WREG32(mmNIF_RTR_CTRL_6_E2E_PCI_WR_SIZE, 1);
2274 WREG32(mmNIF_RTR_CTRL_6_E2E_PCI_RD_SIZE, 39);
2275
2276 WREG32(mmNIF_RTR_CTRL_7_E2E_HBM_WR_SIZE, 318 >> 3);
2277 WREG32(mmNIF_RTR_CTRL_7_E2E_HBM_RD_SIZE, 956 >> 3);
2278 WREG32(mmNIF_RTR_CTRL_7_E2E_PCI_WR_SIZE, 79);
2279 WREG32(mmNIF_RTR_CTRL_7_E2E_PCI_RD_SIZE, 79);
2280
2281 WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
2282 WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
2283 WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
2284 WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
2285
2286 WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
2287 WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
2288 WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
2289 WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
2290
2291 WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
2292 WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
2293 WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
2294 WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
2295
2296 WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
2297 WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
2298 WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
2299 WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
2300
2301 WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
2302 WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
2303 WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
2304 WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
2305
2306 WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
2307 WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
2308 WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
2309 WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
2310
2311 WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
2312 WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
2313 WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
2314 WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
2315
2316 WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
2317 WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
2318 WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
2319 WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
2320
2321 if (!hdev->dram_scrambler_enable) {
2322 WREG32(mmSIF_RTR_CTRL_0_NL_HBM_SEL_0, 0x21);
2323 WREG32(mmSIF_RTR_CTRL_0_NL_HBM_SEL_1, 0x22);
2324 WREG32(mmSIF_RTR_CTRL_0_NL_HBM_OFFSET_18, 0x1F);
2325 WREG32(mmSIF_RTR_CTRL_0_NL_HBM_PC_SEL_3, 0x20);
2326
2327 WREG32(mmSIF_RTR_CTRL_1_NL_HBM_SEL_0, 0x21);
2328 WREG32(mmSIF_RTR_CTRL_1_NL_HBM_SEL_1, 0x22);
2329 WREG32(mmSIF_RTR_CTRL_1_NL_HBM_OFFSET_18, 0x1F);
2330 WREG32(mmSIF_RTR_CTRL_1_NL_HBM_PC_SEL_3, 0x20);
2331
2332 WREG32(mmSIF_RTR_CTRL_2_NL_HBM_SEL_0, 0x21);
2333 WREG32(mmSIF_RTR_CTRL_2_NL_HBM_SEL_1, 0x22);
2334 WREG32(mmSIF_RTR_CTRL_2_NL_HBM_OFFSET_18, 0x1F);
2335 WREG32(mmSIF_RTR_CTRL_2_NL_HBM_PC_SEL_3, 0x20);
2336
2337 WREG32(mmSIF_RTR_CTRL_3_NL_HBM_SEL_0, 0x21);
2338 WREG32(mmSIF_RTR_CTRL_3_NL_HBM_SEL_1, 0x22);
2339 WREG32(mmSIF_RTR_CTRL_3_NL_HBM_OFFSET_18, 0x1F);
2340 WREG32(mmSIF_RTR_CTRL_3_NL_HBM_PC_SEL_3, 0x20);
2341
2342 WREG32(mmSIF_RTR_CTRL_4_NL_HBM_SEL_0, 0x21);
2343 WREG32(mmSIF_RTR_CTRL_4_NL_HBM_SEL_1, 0x22);
2344 WREG32(mmSIF_RTR_CTRL_4_NL_HBM_OFFSET_18, 0x1F);
2345 WREG32(mmSIF_RTR_CTRL_4_NL_HBM_PC_SEL_3, 0x20);
2346
2347 WREG32(mmSIF_RTR_CTRL_5_NL_HBM_SEL_0, 0x21);
2348 WREG32(mmSIF_RTR_CTRL_5_NL_HBM_SEL_1, 0x22);
2349 WREG32(mmSIF_RTR_CTRL_5_NL_HBM_OFFSET_18, 0x1F);
2350 WREG32(mmSIF_RTR_CTRL_5_NL_HBM_PC_SEL_3, 0x20);
2351
2352 WREG32(mmSIF_RTR_CTRL_6_NL_HBM_SEL_0, 0x21);
2353 WREG32(mmSIF_RTR_CTRL_6_NL_HBM_SEL_1, 0x22);
2354 WREG32(mmSIF_RTR_CTRL_6_NL_HBM_OFFSET_18, 0x1F);
2355 WREG32(mmSIF_RTR_CTRL_6_NL_HBM_PC_SEL_3, 0x20);
2356
2357 WREG32(mmSIF_RTR_CTRL_7_NL_HBM_SEL_0, 0x21);
2358 WREG32(mmSIF_RTR_CTRL_7_NL_HBM_SEL_1, 0x22);
2359 WREG32(mmSIF_RTR_CTRL_7_NL_HBM_OFFSET_18, 0x1F);
2360 WREG32(mmSIF_RTR_CTRL_7_NL_HBM_PC_SEL_3, 0x20);
2361
2362 WREG32(mmNIF_RTR_CTRL_0_NL_HBM_SEL_0, 0x21);
2363 WREG32(mmNIF_RTR_CTRL_0_NL_HBM_SEL_1, 0x22);
2364 WREG32(mmNIF_RTR_CTRL_0_NL_HBM_OFFSET_18, 0x1F);
2365 WREG32(mmNIF_RTR_CTRL_0_NL_HBM_PC_SEL_3, 0x20);
2366
2367 WREG32(mmNIF_RTR_CTRL_1_NL_HBM_SEL_0, 0x21);
2368 WREG32(mmNIF_RTR_CTRL_1_NL_HBM_SEL_1, 0x22);
2369 WREG32(mmNIF_RTR_CTRL_1_NL_HBM_OFFSET_18, 0x1F);
2370 WREG32(mmNIF_RTR_CTRL_1_NL_HBM_PC_SEL_3, 0x20);
2371
2372 WREG32(mmNIF_RTR_CTRL_2_NL_HBM_SEL_0, 0x21);
2373 WREG32(mmNIF_RTR_CTRL_2_NL_HBM_SEL_1, 0x22);
2374 WREG32(mmNIF_RTR_CTRL_2_NL_HBM_OFFSET_18, 0x1F);
2375 WREG32(mmNIF_RTR_CTRL_2_NL_HBM_PC_SEL_3, 0x20);
2376
2377 WREG32(mmNIF_RTR_CTRL_3_NL_HBM_SEL_0, 0x21);
2378 WREG32(mmNIF_RTR_CTRL_3_NL_HBM_SEL_1, 0x22);
2379 WREG32(mmNIF_RTR_CTRL_3_NL_HBM_OFFSET_18, 0x1F);
2380 WREG32(mmNIF_RTR_CTRL_3_NL_HBM_PC_SEL_3, 0x20);
2381
2382 WREG32(mmNIF_RTR_CTRL_4_NL_HBM_SEL_0, 0x21);
2383 WREG32(mmNIF_RTR_CTRL_4_NL_HBM_SEL_1, 0x22);
2384 WREG32(mmNIF_RTR_CTRL_4_NL_HBM_OFFSET_18, 0x1F);
2385 WREG32(mmNIF_RTR_CTRL_4_NL_HBM_PC_SEL_3, 0x20);
2386
2387 WREG32(mmNIF_RTR_CTRL_5_NL_HBM_SEL_0, 0x21);
2388 WREG32(mmNIF_RTR_CTRL_5_NL_HBM_SEL_1, 0x22);
2389 WREG32(mmNIF_RTR_CTRL_5_NL_HBM_OFFSET_18, 0x1F);
2390 WREG32(mmNIF_RTR_CTRL_5_NL_HBM_PC_SEL_3, 0x20);
2391
2392 WREG32(mmNIF_RTR_CTRL_6_NL_HBM_SEL_0, 0x21);
2393 WREG32(mmNIF_RTR_CTRL_6_NL_HBM_SEL_1, 0x22);
2394 WREG32(mmNIF_RTR_CTRL_6_NL_HBM_OFFSET_18, 0x1F);
2395 WREG32(mmNIF_RTR_CTRL_6_NL_HBM_PC_SEL_3, 0x20);
2396
2397 WREG32(mmNIF_RTR_CTRL_7_NL_HBM_SEL_0, 0x21);
2398 WREG32(mmNIF_RTR_CTRL_7_NL_HBM_SEL_1, 0x22);
2399 WREG32(mmNIF_RTR_CTRL_7_NL_HBM_OFFSET_18, 0x1F);
2400 WREG32(mmNIF_RTR_CTRL_7_NL_HBM_PC_SEL_3, 0x20);
2401
2402 WREG32(mmDMA_IF_E_N_DOWN_CH0_NL_HBM_SEL_0, 0x21);
2403 WREG32(mmDMA_IF_E_N_DOWN_CH0_NL_HBM_SEL_1, 0x22);
2404 WREG32(mmDMA_IF_E_N_DOWN_CH0_NL_HBM_OFFSET_18, 0x1F);
2405 WREG32(mmDMA_IF_E_N_DOWN_CH0_NL_HBM_PC_SEL_3, 0x20);
2406
2407 WREG32(mmDMA_IF_E_N_DOWN_CH1_NL_HBM_SEL_0, 0x21);
2408 WREG32(mmDMA_IF_E_N_DOWN_CH1_NL_HBM_SEL_1, 0x22);
2409 WREG32(mmDMA_IF_E_N_DOWN_CH1_NL_HBM_OFFSET_18, 0x1F);
2410 WREG32(mmDMA_IF_E_N_DOWN_CH1_NL_HBM_PC_SEL_3, 0x20);
2411
2412 WREG32(mmDMA_IF_E_S_DOWN_CH0_NL_HBM_SEL_0, 0x21);
2413 WREG32(mmDMA_IF_E_S_DOWN_CH0_NL_HBM_SEL_1, 0x22);
2414 WREG32(mmDMA_IF_E_S_DOWN_CH0_NL_HBM_OFFSET_18, 0x1F);
2415 WREG32(mmDMA_IF_E_S_DOWN_CH0_NL_HBM_PC_SEL_3, 0x20);
2416
2417 WREG32(mmDMA_IF_E_S_DOWN_CH1_NL_HBM_SEL_0, 0x21);
2418 WREG32(mmDMA_IF_E_S_DOWN_CH1_NL_HBM_SEL_1, 0x22);
2419 WREG32(mmDMA_IF_E_S_DOWN_CH1_NL_HBM_OFFSET_18, 0x1F);
2420 WREG32(mmDMA_IF_E_S_DOWN_CH1_NL_HBM_PC_SEL_3, 0x20);
2421
2422 WREG32(mmDMA_IF_W_N_DOWN_CH0_NL_HBM_SEL_0, 0x21);
2423 WREG32(mmDMA_IF_W_N_DOWN_CH0_NL_HBM_SEL_1, 0x22);
2424 WREG32(mmDMA_IF_W_N_DOWN_CH0_NL_HBM_OFFSET_18, 0x1F);
2425 WREG32(mmDMA_IF_W_N_DOWN_CH0_NL_HBM_PC_SEL_3, 0x20);
2426
2427 WREG32(mmDMA_IF_W_N_DOWN_CH1_NL_HBM_SEL_0, 0x21);
2428 WREG32(mmDMA_IF_W_N_DOWN_CH1_NL_HBM_SEL_1, 0x22);
2429 WREG32(mmDMA_IF_W_N_DOWN_CH1_NL_HBM_OFFSET_18, 0x1F);
2430 WREG32(mmDMA_IF_W_N_DOWN_CH1_NL_HBM_PC_SEL_3, 0x20);
2431
2432 WREG32(mmDMA_IF_W_S_DOWN_CH0_NL_HBM_SEL_0, 0x21);
2433 WREG32(mmDMA_IF_W_S_DOWN_CH0_NL_HBM_SEL_1, 0x22);
2434 WREG32(mmDMA_IF_W_S_DOWN_CH0_NL_HBM_OFFSET_18, 0x1F);
2435 WREG32(mmDMA_IF_W_S_DOWN_CH0_NL_HBM_PC_SEL_3, 0x20);
2436
2437 WREG32(mmDMA_IF_W_S_DOWN_CH1_NL_HBM_SEL_0, 0x21);
2438 WREG32(mmDMA_IF_W_S_DOWN_CH1_NL_HBM_SEL_1, 0x22);
2439 WREG32(mmDMA_IF_W_S_DOWN_CH1_NL_HBM_OFFSET_18, 0x1F);
2440 WREG32(mmDMA_IF_W_S_DOWN_CH1_NL_HBM_PC_SEL_3, 0x20);
2441 }
2442
2443 WREG32(mmSIF_RTR_CTRL_0_E2E_HBM_EN,
2444 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2445 WREG32(mmSIF_RTR_CTRL_0_E2E_PCI_EN,
2446 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2447
2448 WREG32(mmSIF_RTR_CTRL_1_E2E_HBM_EN,
2449 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2450 WREG32(mmSIF_RTR_CTRL_1_E2E_PCI_EN,
2451 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2452
2453 WREG32(mmSIF_RTR_CTRL_2_E2E_HBM_EN,
2454 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2455 WREG32(mmSIF_RTR_CTRL_2_E2E_PCI_EN,
2456 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2457
2458 WREG32(mmSIF_RTR_CTRL_3_E2E_HBM_EN,
2459 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2460 WREG32(mmSIF_RTR_CTRL_3_E2E_PCI_EN,
2461 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2462
2463 WREG32(mmSIF_RTR_CTRL_4_E2E_HBM_EN,
2464 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2465 WREG32(mmSIF_RTR_CTRL_4_E2E_PCI_EN,
2466 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2467
2468 WREG32(mmSIF_RTR_CTRL_5_E2E_HBM_EN,
2469 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2470 WREG32(mmSIF_RTR_CTRL_5_E2E_PCI_EN,
2471 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2472
2473 WREG32(mmSIF_RTR_CTRL_6_E2E_HBM_EN,
2474 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2475 WREG32(mmSIF_RTR_CTRL_6_E2E_PCI_EN,
2476 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2477
2478 WREG32(mmSIF_RTR_CTRL_7_E2E_HBM_EN,
2479 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2480 WREG32(mmSIF_RTR_CTRL_7_E2E_PCI_EN,
2481 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2482
2483 WREG32(mmNIF_RTR_CTRL_0_E2E_HBM_EN,
2484 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2485 WREG32(mmNIF_RTR_CTRL_0_E2E_PCI_EN,
2486 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2487
2488 WREG32(mmNIF_RTR_CTRL_1_E2E_HBM_EN,
2489 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2490 WREG32(mmNIF_RTR_CTRL_1_E2E_PCI_EN,
2491 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2492
2493 WREG32(mmNIF_RTR_CTRL_2_E2E_HBM_EN,
2494 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2495 WREG32(mmNIF_RTR_CTRL_2_E2E_PCI_EN,
2496 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2497
2498 WREG32(mmNIF_RTR_CTRL_3_E2E_HBM_EN,
2499 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2500 WREG32(mmNIF_RTR_CTRL_3_E2E_PCI_EN,
2501 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2502
2503 WREG32(mmNIF_RTR_CTRL_4_E2E_HBM_EN,
2504 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2505 WREG32(mmNIF_RTR_CTRL_4_E2E_PCI_EN,
2506 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2507
2508 WREG32(mmNIF_RTR_CTRL_5_E2E_HBM_EN,
2509 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2510 WREG32(mmNIF_RTR_CTRL_5_E2E_PCI_EN,
2511 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2512
2513 WREG32(mmNIF_RTR_CTRL_6_E2E_HBM_EN,
2514 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2515 WREG32(mmNIF_RTR_CTRL_6_E2E_PCI_EN,
2516 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2517
2518 WREG32(mmNIF_RTR_CTRL_7_E2E_HBM_EN,
2519 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2520 WREG32(mmNIF_RTR_CTRL_7_E2E_PCI_EN,
2521 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2522
2523 WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_HBM_EN,
2524 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2525 WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_PCI_EN,
2526 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2527
2528 WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_HBM_EN,
2529 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2530 WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_PCI_EN,
2531 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2532
2533 WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_HBM_EN,
2534 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2535 WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_PCI_EN,
2536 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2537
2538 WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_HBM_EN,
2539 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2540 WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_PCI_EN,
2541 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2542
2543 WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_HBM_EN,
2544 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2545 WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_PCI_EN,
2546 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2547
2548 WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_HBM_EN,
2549 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2550 WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_PCI_EN,
2551 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2552
2553 WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_HBM_EN,
2554 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2555 WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_PCI_EN,
2556 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2557
2558 WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_HBM_EN,
2559 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2560 WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_PCI_EN,
2561 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2562}
2563
2564static void gaudi_init_hbm_cred(struct hl_device *hdev)
2565{
2566 uint32_t hbm0_wr, hbm1_wr, hbm0_rd, hbm1_rd;
2567
Ohad Sharabi4cb45082021-05-20 09:09:03 +03002568 if (hdev->asic_prop.fw_security_enabled)
Ofir Bittonc692dec2020-10-04 17:34:37 +03002569 return;
2570
Ohad Sharabi6a785e32021-05-29 23:26:10 +03002571 if (hdev->asic_prop.fw_bootfit_cpu_boot_dev_sts0 &
2572 CPU_BOOT_DEV_STS0_HBM_CRED_EN)
Ofir Bittonc692dec2020-10-04 17:34:37 +03002573 return;
2574
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03002575 hbm0_wr = 0x33333333;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03002576 hbm0_rd = 0x77777777;
Oded Gabbay0b168c82020-06-15 19:25:57 +03002577 hbm1_wr = 0x55555555;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03002578 hbm1_rd = 0xDDDDDDDD;
2579
2580 WREG32(mmDMA_IF_E_N_HBM0_WR_CRED_CNT, hbm0_wr);
2581 WREG32(mmDMA_IF_E_N_HBM1_WR_CRED_CNT, hbm1_wr);
2582 WREG32(mmDMA_IF_E_N_HBM0_RD_CRED_CNT, hbm0_rd);
2583 WREG32(mmDMA_IF_E_N_HBM1_RD_CRED_CNT, hbm1_rd);
2584
2585 WREG32(mmDMA_IF_E_S_HBM0_WR_CRED_CNT, hbm0_wr);
2586 WREG32(mmDMA_IF_E_S_HBM1_WR_CRED_CNT, hbm1_wr);
2587 WREG32(mmDMA_IF_E_S_HBM0_RD_CRED_CNT, hbm0_rd);
2588 WREG32(mmDMA_IF_E_S_HBM1_RD_CRED_CNT, hbm1_rd);
2589
2590 WREG32(mmDMA_IF_W_N_HBM0_WR_CRED_CNT, hbm0_wr);
2591 WREG32(mmDMA_IF_W_N_HBM1_WR_CRED_CNT, hbm1_wr);
2592 WREG32(mmDMA_IF_W_N_HBM0_RD_CRED_CNT, hbm0_rd);
2593 WREG32(mmDMA_IF_W_N_HBM1_RD_CRED_CNT, hbm1_rd);
2594
2595 WREG32(mmDMA_IF_W_S_HBM0_WR_CRED_CNT, hbm0_wr);
2596 WREG32(mmDMA_IF_W_S_HBM1_WR_CRED_CNT, hbm1_wr);
2597 WREG32(mmDMA_IF_W_S_HBM0_RD_CRED_CNT, hbm0_rd);
2598 WREG32(mmDMA_IF_W_S_HBM1_RD_CRED_CNT, hbm1_rd);
2599
2600 WREG32(mmDMA_IF_E_N_HBM_CRED_EN_0,
2601 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2602 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2603 WREG32(mmDMA_IF_E_S_HBM_CRED_EN_0,
2604 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2605 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2606 WREG32(mmDMA_IF_W_N_HBM_CRED_EN_0,
2607 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2608 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2609 WREG32(mmDMA_IF_W_S_HBM_CRED_EN_0,
2610 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2611 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2612
2613 WREG32(mmDMA_IF_E_N_HBM_CRED_EN_1,
2614 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2615 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2616 WREG32(mmDMA_IF_E_S_HBM_CRED_EN_1,
2617 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2618 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2619 WREG32(mmDMA_IF_W_N_HBM_CRED_EN_1,
2620 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2621 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2622 WREG32(mmDMA_IF_W_S_HBM_CRED_EN_1,
2623 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2624 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2625}
2626
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03002627static void gaudi_init_golden_registers(struct hl_device *hdev)
2628{
2629 u32 tpc_offset;
2630 int tpc_id, i;
2631
2632 gaudi_init_e2e(hdev);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03002633 gaudi_init_hbm_cred(hdev);
2634
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03002635 for (tpc_id = 0, tpc_offset = 0;
2636 tpc_id < TPC_NUMBER_OF_ENGINES;
2637 tpc_id++, tpc_offset += TPC_CFG_OFFSET) {
2638 /* Mask all arithmetic interrupts from TPC */
2639 WREG32(mmTPC0_CFG_TPC_INTR_MASK + tpc_offset, 0x8FFF);
2640 /* Set 16 cache lines */
2641 WREG32_FIELD(TPC0_CFG_MSS_CONFIG, tpc_offset,
2642 ICACHE_FETCH_LINE_NUM, 2);
2643 }
2644
2645 /* Make sure 1st 128 bytes in SRAM are 0 for Tensor DMA */
2646 for (i = 0 ; i < 128 ; i += 8)
2647 writeq(0, hdev->pcie_bar[SRAM_BAR_ID] + i);
2648
2649 WREG32(mmMME0_CTRL_EUS_ROLLUP_CNT_ADD, 3);
2650 WREG32(mmMME1_CTRL_EUS_ROLLUP_CNT_ADD, 3);
2651 WREG32(mmMME2_CTRL_EUS_ROLLUP_CNT_ADD, 3);
2652 WREG32(mmMME3_CTRL_EUS_ROLLUP_CNT_ADD, 3);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03002653}
2654
2655static void gaudi_init_pci_dma_qman(struct hl_device *hdev, int dma_id,
2656 int qman_id, dma_addr_t qman_pq_addr)
2657{
Koby Elbaze591a492021-05-12 18:05:46 +03002658 struct cpu_dyn_regs *dyn_regs =
2659 &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03002660 u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi;
2661 u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi;
2662 u32 q_off, dma_qm_offset;
Koby Elbaz81217362021-05-03 23:03:15 +03002663 u32 dma_qm_err_cfg, irq_handler_offset;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03002664
2665 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
2666
2667 mtr_base_en_lo = lower_32_bits(CFG_BASE +
2668 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2669 mtr_base_en_hi = upper_32_bits(CFG_BASE +
2670 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2671 so_base_en_lo = lower_32_bits(CFG_BASE +
2672 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2673 so_base_en_hi = upper_32_bits(CFG_BASE +
2674 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2675 mtr_base_ws_lo = lower_32_bits(CFG_BASE +
2676 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2677 mtr_base_ws_hi = upper_32_bits(CFG_BASE +
2678 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2679 so_base_ws_lo = lower_32_bits(CFG_BASE +
2680 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
2681 so_base_ws_hi = upper_32_bits(CFG_BASE +
2682 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
2683
2684 q_off = dma_qm_offset + qman_id * 4;
2685
2686 WREG32(mmDMA0_QM_PQ_BASE_LO_0 + q_off, lower_32_bits(qman_pq_addr));
2687 WREG32(mmDMA0_QM_PQ_BASE_HI_0 + q_off, upper_32_bits(qman_pq_addr));
2688
2689 WREG32(mmDMA0_QM_PQ_SIZE_0 + q_off, ilog2(HL_QUEUE_LENGTH));
2690 WREG32(mmDMA0_QM_PQ_PI_0 + q_off, 0);
2691 WREG32(mmDMA0_QM_PQ_CI_0 + q_off, 0);
2692
Ofir Bitton25121d92020-09-24 08:22:58 +03002693 WREG32(mmDMA0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off, QMAN_LDMA_SIZE_OFFSET);
2694 WREG32(mmDMA0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2695 QMAN_LDMA_SRC_OFFSET);
2696 WREG32(mmDMA0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2697 QMAN_LDMA_DST_OFFSET);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03002698
2699 WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo);
2700 WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi);
2701 WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo);
2702 WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi);
2703 WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_LO_0 + q_off, mtr_base_ws_lo);
2704 WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_HI_0 + q_off, mtr_base_ws_hi);
2705 WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_LO_0 + q_off, so_base_ws_lo);
2706 WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_HI_0 + q_off, so_base_ws_hi);
2707
Omer Shpigelmance043262020-06-16 17:56:27 +03002708 WREG32(mmDMA0_QM_CP_BARRIER_CFG_0 + q_off, 0x100);
2709
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03002710 /* The following configuration is needed only once per QMAN */
2711 if (qman_id == 0) {
Koby Elbaz81217362021-05-03 23:03:15 +03002712 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
2713 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
Koby Elbaze591a492021-05-12 18:05:46 +03002714 le32_to_cpu(dyn_regs->gic_dma_qm_irq_ctrl);
Koby Elbaz81217362021-05-03 23:03:15 +03002715
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03002716 /* Configure RAZWI IRQ */
2717 dma_qm_err_cfg = PCI_DMA_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
Tomer Tayarae151bc2021-05-24 22:35:06 +03002718 if (hdev->stop_on_err)
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03002719 dma_qm_err_cfg |=
2720 PCI_DMA_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03002721
2722 WREG32(mmDMA0_QM_GLBL_ERR_CFG + dma_qm_offset, dma_qm_err_cfg);
Koby Elbaz81217362021-05-03 23:03:15 +03002723
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03002724 WREG32(mmDMA0_QM_GLBL_ERR_ADDR_LO + dma_qm_offset,
Koby Elbaz81217362021-05-03 23:03:15 +03002725 lower_32_bits(CFG_BASE + irq_handler_offset));
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03002726 WREG32(mmDMA0_QM_GLBL_ERR_ADDR_HI + dma_qm_offset,
Koby Elbaz81217362021-05-03 23:03:15 +03002727 upper_32_bits(CFG_BASE + irq_handler_offset));
2728
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03002729 WREG32(mmDMA0_QM_GLBL_ERR_WDATA + dma_qm_offset,
2730 gaudi_irq_map_table[GAUDI_EVENT_DMA0_QM].cpu_id +
2731 dma_id);
2732
2733 WREG32(mmDMA0_QM_ARB_ERR_MSG_EN + dma_qm_offset,
2734 QM_ARB_ERR_MSG_EN_MASK);
2735
2736 /* Increase ARB WDT to support streams architecture */
2737 WREG32(mmDMA0_QM_ARB_SLV_CHOISE_WDT + dma_qm_offset,
2738 GAUDI_ARB_WDT_TIMEOUT);
2739
2740 WREG32(mmDMA0_QM_GLBL_PROT + dma_qm_offset,
2741 QMAN_EXTERNAL_MAKE_TRUSTED);
2742
2743 WREG32(mmDMA0_QM_GLBL_CFG1 + dma_qm_offset, 0);
2744 }
2745}
2746
2747static void gaudi_init_dma_core(struct hl_device *hdev, int dma_id)
2748{
Koby Elbaze591a492021-05-12 18:05:46 +03002749 struct cpu_dyn_regs *dyn_regs =
2750 &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03002751 u32 dma_err_cfg = 1 << DMA0_CORE_ERR_CFG_ERR_MSG_EN_SHIFT;
Koby Elbaz81217362021-05-03 23:03:15 +03002752 u32 dma_offset = dma_id * DMA_CORE_OFFSET;
2753 u32 irq_handler_offset;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03002754
2755 /* Set to maximum possible according to physical size */
2756 WREG32(mmDMA0_CORE_RD_MAX_OUTSTAND + dma_offset, 0);
2757 WREG32(mmDMA0_CORE_RD_MAX_SIZE + dma_offset, 0);
2758
Oded Gabbayd1f36332020-09-14 09:26:54 +03002759 /* WA for H/W bug H3-2116 */
2760 WREG32(mmDMA0_CORE_LBW_MAX_OUTSTAND + dma_offset, 15);
2761
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03002762 /* STOP_ON bit implies no completion to operation in case of RAZWI */
2763 if (hdev->stop_on_err)
2764 dma_err_cfg |= 1 << DMA0_CORE_ERR_CFG_STOP_ON_ERR_SHIFT;
2765
2766 WREG32(mmDMA0_CORE_ERR_CFG + dma_offset, dma_err_cfg);
Koby Elbaz81217362021-05-03 23:03:15 +03002767
2768 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
2769 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
Koby Elbaze591a492021-05-12 18:05:46 +03002770 le32_to_cpu(dyn_regs->gic_dma_core_irq_ctrl);
Koby Elbaz81217362021-05-03 23:03:15 +03002771
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03002772 WREG32(mmDMA0_CORE_ERRMSG_ADDR_LO + dma_offset,
Koby Elbaz81217362021-05-03 23:03:15 +03002773 lower_32_bits(CFG_BASE + irq_handler_offset));
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03002774 WREG32(mmDMA0_CORE_ERRMSG_ADDR_HI + dma_offset,
Koby Elbaz81217362021-05-03 23:03:15 +03002775 upper_32_bits(CFG_BASE + irq_handler_offset));
2776
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03002777 WREG32(mmDMA0_CORE_ERRMSG_WDATA + dma_offset,
2778 gaudi_irq_map_table[GAUDI_EVENT_DMA0_CORE].cpu_id + dma_id);
2779 WREG32(mmDMA0_CORE_PROT + dma_offset,
2780 1 << DMA0_CORE_PROT_ERR_VAL_SHIFT);
2781 /* If the channel is secured, it should be in MMU bypass mode */
2782 WREG32(mmDMA0_CORE_SECURE_PROPS + dma_offset,
2783 1 << DMA0_CORE_SECURE_PROPS_MMBP_SHIFT);
2784 WREG32(mmDMA0_CORE_CFG_0 + dma_offset, 1 << DMA0_CORE_CFG_0_EN_SHIFT);
2785}
2786
2787static void gaudi_enable_qman(struct hl_device *hdev, int dma_id,
2788 u32 enable_mask)
2789{
2790 u32 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
2791
2792 WREG32(mmDMA0_QM_GLBL_CFG0 + dma_qm_offset, enable_mask);
2793}
2794
2795static void gaudi_init_pci_dma_qmans(struct hl_device *hdev)
2796{
2797 struct gaudi_device *gaudi = hdev->asic_specific;
2798 struct hl_hw_queue *q;
2799 int i, j, dma_id, cpu_skip, nic_skip, cq_id = 0, q_idx, msi_vec = 0;
2800
2801 if (gaudi->hw_cap_initialized & HW_CAP_PCI_DMA)
2802 return;
2803
2804 for (i = 0 ; i < PCI_DMA_NUMBER_OF_CHNLS ; i++) {
2805 dma_id = gaudi_dma_assignment[i];
2806 /*
2807 * For queues after the CPU Q need to add 1 to get the correct
2808 * queue. In addition, need to add the CPU EQ and NIC IRQs in
2809 * order to get the correct MSI register.
2810 */
2811 if (dma_id > 1) {
2812 cpu_skip = 1;
2813 nic_skip = NIC_NUMBER_OF_ENGINES;
2814 } else {
2815 cpu_skip = 0;
2816 nic_skip = 0;
2817 }
2818
2819 for (j = 0 ; j < QMAN_STREAMS ; j++) {
2820 q_idx = 4 * dma_id + j + cpu_skip;
2821 q = &hdev->kernel_queues[q_idx];
2822 q->cq_id = cq_id++;
2823 q->msi_vec = nic_skip + cpu_skip + msi_vec++;
2824 gaudi_init_pci_dma_qman(hdev, dma_id, j,
2825 q->bus_address);
2826 }
2827
2828 gaudi_init_dma_core(hdev, dma_id);
2829
2830 gaudi_enable_qman(hdev, dma_id, PCI_DMA_QMAN_ENABLE);
2831 }
2832
2833 gaudi->hw_cap_initialized |= HW_CAP_PCI_DMA;
2834}
2835
2836static void gaudi_init_hbm_dma_qman(struct hl_device *hdev, int dma_id,
2837 int qman_id, u64 qman_base_addr)
2838{
Koby Elbaze591a492021-05-12 18:05:46 +03002839 struct cpu_dyn_regs *dyn_regs =
2840 &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
Ofir Bitton5de406c2020-09-10 10:56:26 +03002841 u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi;
2842 u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi;
Koby Elbaz81217362021-05-03 23:03:15 +03002843 u32 dma_qm_err_cfg, irq_handler_offset;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03002844 u32 q_off, dma_qm_offset;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03002845
2846 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
2847
Ofir Bitton5de406c2020-09-10 10:56:26 +03002848 mtr_base_en_lo = lower_32_bits(CFG_BASE +
2849 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2850 mtr_base_en_hi = upper_32_bits(CFG_BASE +
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03002851 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
Ofir Bitton5de406c2020-09-10 10:56:26 +03002852 so_base_en_lo = lower_32_bits(CFG_BASE +
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03002853 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
Ofir Bitton5de406c2020-09-10 10:56:26 +03002854 so_base_en_hi = upper_32_bits(CFG_BASE +
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03002855 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
Ofir Bitton5de406c2020-09-10 10:56:26 +03002856 mtr_base_ws_lo = lower_32_bits(CFG_BASE +
2857 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2858 mtr_base_ws_hi = upper_32_bits(CFG_BASE +
2859 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2860 so_base_ws_lo = lower_32_bits(CFG_BASE +
2861 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
2862 so_base_ws_hi = upper_32_bits(CFG_BASE +
2863 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03002864
2865 q_off = dma_qm_offset + qman_id * 4;
2866
2867 if (qman_id < 4) {
2868 WREG32(mmDMA0_QM_PQ_BASE_LO_0 + q_off,
2869 lower_32_bits(qman_base_addr));
2870 WREG32(mmDMA0_QM_PQ_BASE_HI_0 + q_off,
2871 upper_32_bits(qman_base_addr));
2872
2873 WREG32(mmDMA0_QM_PQ_SIZE_0 + q_off, ilog2(HBM_DMA_QMAN_LENGTH));
2874 WREG32(mmDMA0_QM_PQ_PI_0 + q_off, 0);
2875 WREG32(mmDMA0_QM_PQ_CI_0 + q_off, 0);
2876
Ofir Bitton25121d92020-09-24 08:22:58 +03002877 WREG32(mmDMA0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
2878 QMAN_CPDMA_SIZE_OFFSET);
2879 WREG32(mmDMA0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2880 QMAN_CPDMA_SRC_OFFSET);
2881 WREG32(mmDMA0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2882 QMAN_CPDMA_DST_OFFSET);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03002883 } else {
Koby Elbaz81217362021-05-03 23:03:15 +03002884 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
Koby Elbaze591a492021-05-12 18:05:46 +03002885 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
2886 le32_to_cpu(dyn_regs->gic_dma_qm_irq_ctrl);
Koby Elbaz81217362021-05-03 23:03:15 +03002887
Ofir Bitton25121d92020-09-24 08:22:58 +03002888 WREG32(mmDMA0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
2889 QMAN_LDMA_SIZE_OFFSET);
2890 WREG32(mmDMA0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2891 QMAN_LDMA_SRC_OFFSET);
2892 WREG32(mmDMA0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
Oded Gabbay5b94d6e2020-09-25 20:14:15 +03002893 QMAN_LDMA_DST_OFFSET);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03002894
2895 /* Configure RAZWI IRQ */
2896 dma_qm_err_cfg = HBM_DMA_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
Tomer Tayarae151bc2021-05-24 22:35:06 +03002897 if (hdev->stop_on_err)
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03002898 dma_qm_err_cfg |=
2899 HBM_DMA_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
Tomer Tayarae151bc2021-05-24 22:35:06 +03002900
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03002901 WREG32(mmDMA0_QM_GLBL_ERR_CFG + dma_qm_offset, dma_qm_err_cfg);
2902
2903 WREG32(mmDMA0_QM_GLBL_ERR_ADDR_LO + dma_qm_offset,
Koby Elbaz81217362021-05-03 23:03:15 +03002904 lower_32_bits(CFG_BASE + irq_handler_offset));
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03002905 WREG32(mmDMA0_QM_GLBL_ERR_ADDR_HI + dma_qm_offset,
Koby Elbaz81217362021-05-03 23:03:15 +03002906 upper_32_bits(CFG_BASE + irq_handler_offset));
2907
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03002908 WREG32(mmDMA0_QM_GLBL_ERR_WDATA + dma_qm_offset,
2909 gaudi_irq_map_table[GAUDI_EVENT_DMA0_QM].cpu_id +
2910 dma_id);
2911
2912 WREG32(mmDMA0_QM_ARB_ERR_MSG_EN + dma_qm_offset,
2913 QM_ARB_ERR_MSG_EN_MASK);
2914
2915 /* Increase ARB WDT to support streams architecture */
2916 WREG32(mmDMA0_QM_ARB_SLV_CHOISE_WDT + dma_qm_offset,
2917 GAUDI_ARB_WDT_TIMEOUT);
2918
2919 WREG32(mmDMA0_QM_GLBL_CFG1 + dma_qm_offset, 0);
2920 WREG32(mmDMA0_QM_GLBL_PROT + dma_qm_offset,
2921 QMAN_INTERNAL_MAKE_TRUSTED);
2922 }
2923
Ofir Bitton5de406c2020-09-10 10:56:26 +03002924 WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo);
2925 WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi);
2926 WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo);
2927 WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi);
2928
2929 /* Configure DMA5 CP_MSG_BASE 2/3 for sync stream collective */
2930 if (gaudi_dma_assignment[dma_id] == GAUDI_ENGINE_ID_DMA_5) {
2931 WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_LO_0 + q_off,
2932 mtr_base_ws_lo);
2933 WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_HI_0 + q_off,
2934 mtr_base_ws_hi);
2935 WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_LO_0 + q_off,
2936 so_base_ws_lo);
2937 WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_HI_0 + q_off,
2938 so_base_ws_hi);
2939 }
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03002940}
2941
2942static void gaudi_init_hbm_dma_qmans(struct hl_device *hdev)
2943{
2944 struct gaudi_device *gaudi = hdev->asic_specific;
2945 struct gaudi_internal_qman_info *q;
2946 u64 qman_base_addr;
2947 int i, j, dma_id, internal_q_index;
2948
2949 if (gaudi->hw_cap_initialized & HW_CAP_HBM_DMA)
2950 return;
2951
2952 for (i = 0 ; i < HBM_DMA_NUMBER_OF_CHNLS ; i++) {
2953 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_1 + i];
2954
2955 for (j = 0 ; j < QMAN_STREAMS ; j++) {
2956 /*
2957 * Add the CPU queue in order to get the correct queue
2958 * number as all internal queue are placed after it
2959 */
2960 internal_q_index = dma_id * QMAN_STREAMS + j + 1;
2961
2962 q = &gaudi->internal_qmans[internal_q_index];
2963 qman_base_addr = (u64) q->pq_dma_addr;
2964 gaudi_init_hbm_dma_qman(hdev, dma_id, j,
2965 qman_base_addr);
2966 }
2967
2968 /* Initializing lower CP for HBM DMA QMAN */
2969 gaudi_init_hbm_dma_qman(hdev, dma_id, 4, 0);
2970
2971 gaudi_init_dma_core(hdev, dma_id);
2972
2973 gaudi_enable_qman(hdev, dma_id, HBM_DMA_QMAN_ENABLE);
2974 }
2975
2976 gaudi->hw_cap_initialized |= HW_CAP_HBM_DMA;
2977}
2978
2979static void gaudi_init_mme_qman(struct hl_device *hdev, u32 mme_offset,
2980 int qman_id, u64 qman_base_addr)
2981{
Koby Elbaze591a492021-05-12 18:05:46 +03002982 struct cpu_dyn_regs *dyn_regs =
2983 &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03002984 u32 mtr_base_lo, mtr_base_hi;
2985 u32 so_base_lo, so_base_hi;
Koby Elbaz81217362021-05-03 23:03:15 +03002986 u32 irq_handler_offset;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03002987 u32 q_off, mme_id;
2988 u32 mme_qm_err_cfg;
2989
2990 mtr_base_lo = lower_32_bits(CFG_BASE +
2991 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2992 mtr_base_hi = upper_32_bits(CFG_BASE +
2993 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2994 so_base_lo = lower_32_bits(CFG_BASE +
2995 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2996 so_base_hi = upper_32_bits(CFG_BASE +
2997 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2998
2999 q_off = mme_offset + qman_id * 4;
3000
3001 if (qman_id < 4) {
3002 WREG32(mmMME0_QM_PQ_BASE_LO_0 + q_off,
3003 lower_32_bits(qman_base_addr));
3004 WREG32(mmMME0_QM_PQ_BASE_HI_0 + q_off,
3005 upper_32_bits(qman_base_addr));
3006
3007 WREG32(mmMME0_QM_PQ_SIZE_0 + q_off, ilog2(MME_QMAN_LENGTH));
3008 WREG32(mmMME0_QM_PQ_PI_0 + q_off, 0);
3009 WREG32(mmMME0_QM_PQ_CI_0 + q_off, 0);
3010
Ofir Bitton25121d92020-09-24 08:22:58 +03003011 WREG32(mmMME0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
3012 QMAN_CPDMA_SIZE_OFFSET);
3013 WREG32(mmMME0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
3014 QMAN_CPDMA_SRC_OFFSET);
3015 WREG32(mmMME0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
3016 QMAN_CPDMA_DST_OFFSET);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003017 } else {
Koby Elbaz81217362021-05-03 23:03:15 +03003018 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
Koby Elbaze591a492021-05-12 18:05:46 +03003019 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
3020 le32_to_cpu(dyn_regs->gic_mme_qm_irq_ctrl);
Koby Elbaz81217362021-05-03 23:03:15 +03003021
Ofir Bitton25121d92020-09-24 08:22:58 +03003022 WREG32(mmMME0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
3023 QMAN_LDMA_SIZE_OFFSET);
3024 WREG32(mmMME0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
3025 QMAN_LDMA_SRC_OFFSET);
3026 WREG32(mmMME0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
3027 QMAN_LDMA_DST_OFFSET);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003028
3029 /* Configure RAZWI IRQ */
3030 mme_id = mme_offset /
Koby Elbazb92c6372021-05-19 15:16:52 +03003031 (mmMME1_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0) / 2;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003032
3033 mme_qm_err_cfg = MME_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
Tomer Tayarae151bc2021-05-24 22:35:06 +03003034 if (hdev->stop_on_err)
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003035 mme_qm_err_cfg |=
3036 MME_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
Tomer Tayarae151bc2021-05-24 22:35:06 +03003037
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003038 WREG32(mmMME0_QM_GLBL_ERR_CFG + mme_offset, mme_qm_err_cfg);
Koby Elbaz81217362021-05-03 23:03:15 +03003039
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003040 WREG32(mmMME0_QM_GLBL_ERR_ADDR_LO + mme_offset,
Koby Elbaz81217362021-05-03 23:03:15 +03003041 lower_32_bits(CFG_BASE + irq_handler_offset));
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003042 WREG32(mmMME0_QM_GLBL_ERR_ADDR_HI + mme_offset,
Koby Elbaz81217362021-05-03 23:03:15 +03003043 upper_32_bits(CFG_BASE + irq_handler_offset));
3044
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003045 WREG32(mmMME0_QM_GLBL_ERR_WDATA + mme_offset,
3046 gaudi_irq_map_table[GAUDI_EVENT_MME0_QM].cpu_id +
3047 mme_id);
3048
3049 WREG32(mmMME0_QM_ARB_ERR_MSG_EN + mme_offset,
3050 QM_ARB_ERR_MSG_EN_MASK);
3051
3052 /* Increase ARB WDT to support streams architecture */
3053 WREG32(mmMME0_QM_ARB_SLV_CHOISE_WDT + mme_offset,
3054 GAUDI_ARB_WDT_TIMEOUT);
3055
3056 WREG32(mmMME0_QM_GLBL_CFG1 + mme_offset, 0);
3057 WREG32(mmMME0_QM_GLBL_PROT + mme_offset,
3058 QMAN_INTERNAL_MAKE_TRUSTED);
3059 }
3060
3061 WREG32(mmMME0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_lo);
3062 WREG32(mmMME0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_hi);
3063 WREG32(mmMME0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_lo);
3064 WREG32(mmMME0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_hi);
3065}
3066
3067static void gaudi_init_mme_qmans(struct hl_device *hdev)
3068{
3069 struct gaudi_device *gaudi = hdev->asic_specific;
3070 struct gaudi_internal_qman_info *q;
3071 u64 qman_base_addr;
3072 u32 mme_offset;
3073 int i, internal_q_index;
3074
3075 if (gaudi->hw_cap_initialized & HW_CAP_MME)
3076 return;
3077
3078 /*
3079 * map GAUDI_QUEUE_ID_MME_0_X to the N_W_MME (mmMME2_QM_BASE)
3080 * and GAUDI_QUEUE_ID_MME_1_X to the S_W_MME (mmMME0_QM_BASE)
3081 */
3082
3083 mme_offset = mmMME2_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0;
3084
3085 for (i = 0 ; i < MME_NUMBER_OF_QMANS ; i++) {
3086 internal_q_index = GAUDI_QUEUE_ID_MME_0_0 + i;
3087 q = &gaudi->internal_qmans[internal_q_index];
3088 qman_base_addr = (u64) q->pq_dma_addr;
3089 gaudi_init_mme_qman(hdev, mme_offset, (i & 0x3),
3090 qman_base_addr);
3091 if (i == 3)
3092 mme_offset = 0;
3093 }
3094
3095 /* Initializing lower CP for MME QMANs */
3096 mme_offset = mmMME2_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0;
3097 gaudi_init_mme_qman(hdev, mme_offset, 4, 0);
3098 gaudi_init_mme_qman(hdev, 0, 4, 0);
3099
3100 WREG32(mmMME2_QM_GLBL_CFG0, QMAN_MME_ENABLE);
3101 WREG32(mmMME0_QM_GLBL_CFG0, QMAN_MME_ENABLE);
3102
3103 gaudi->hw_cap_initialized |= HW_CAP_MME;
3104}
3105
3106static void gaudi_init_tpc_qman(struct hl_device *hdev, u32 tpc_offset,
3107 int qman_id, u64 qman_base_addr)
3108{
Koby Elbaze591a492021-05-12 18:05:46 +03003109 struct cpu_dyn_regs *dyn_regs =
3110 &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
Ofir Bitton5de406c2020-09-10 10:56:26 +03003111 u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi;
3112 u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi;
Koby Elbaz81217362021-05-03 23:03:15 +03003113 u32 tpc_qm_err_cfg, irq_handler_offset;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003114 u32 q_off, tpc_id;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003115
Ofir Bitton5de406c2020-09-10 10:56:26 +03003116 mtr_base_en_lo = lower_32_bits(CFG_BASE +
3117 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3118 mtr_base_en_hi = upper_32_bits(CFG_BASE +
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003119 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
Ofir Bitton5de406c2020-09-10 10:56:26 +03003120 so_base_en_lo = lower_32_bits(CFG_BASE +
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003121 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
Ofir Bitton5de406c2020-09-10 10:56:26 +03003122 so_base_en_hi = upper_32_bits(CFG_BASE +
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003123 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
Ofir Bitton5de406c2020-09-10 10:56:26 +03003124 mtr_base_ws_lo = lower_32_bits(CFG_BASE +
3125 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3126 mtr_base_ws_hi = upper_32_bits(CFG_BASE +
3127 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3128 so_base_ws_lo = lower_32_bits(CFG_BASE +
3129 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
3130 so_base_ws_hi = upper_32_bits(CFG_BASE +
3131 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003132
3133 q_off = tpc_offset + qman_id * 4;
3134
Ofir Bitton5de406c2020-09-10 10:56:26 +03003135 tpc_id = tpc_offset /
3136 (mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0);
3137
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003138 if (qman_id < 4) {
3139 WREG32(mmTPC0_QM_PQ_BASE_LO_0 + q_off,
3140 lower_32_bits(qman_base_addr));
3141 WREG32(mmTPC0_QM_PQ_BASE_HI_0 + q_off,
3142 upper_32_bits(qman_base_addr));
3143
3144 WREG32(mmTPC0_QM_PQ_SIZE_0 + q_off, ilog2(TPC_QMAN_LENGTH));
3145 WREG32(mmTPC0_QM_PQ_PI_0 + q_off, 0);
3146 WREG32(mmTPC0_QM_PQ_CI_0 + q_off, 0);
3147
Ofir Bitton25121d92020-09-24 08:22:58 +03003148 WREG32(mmTPC0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
3149 QMAN_CPDMA_SIZE_OFFSET);
3150 WREG32(mmTPC0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
3151 QMAN_CPDMA_SRC_OFFSET);
3152 WREG32(mmTPC0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
3153 QMAN_CPDMA_DST_OFFSET);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003154 } else {
Koby Elbaz81217362021-05-03 23:03:15 +03003155 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
Koby Elbaze591a492021-05-12 18:05:46 +03003156 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
3157 le32_to_cpu(dyn_regs->gic_tpc_qm_irq_ctrl);
Koby Elbaz81217362021-05-03 23:03:15 +03003158
Ofir Bitton25121d92020-09-24 08:22:58 +03003159 WREG32(mmTPC0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
3160 QMAN_LDMA_SIZE_OFFSET);
3161 WREG32(mmTPC0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
3162 QMAN_LDMA_SRC_OFFSET);
3163 WREG32(mmTPC0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
3164 QMAN_LDMA_DST_OFFSET);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003165
3166 /* Configure RAZWI IRQ */
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003167 tpc_qm_err_cfg = TPC_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
Tomer Tayarae151bc2021-05-24 22:35:06 +03003168 if (hdev->stop_on_err)
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003169 tpc_qm_err_cfg |=
3170 TPC_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003171
3172 WREG32(mmTPC0_QM_GLBL_ERR_CFG + tpc_offset, tpc_qm_err_cfg);
Koby Elbaz81217362021-05-03 23:03:15 +03003173
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003174 WREG32(mmTPC0_QM_GLBL_ERR_ADDR_LO + tpc_offset,
Koby Elbaz81217362021-05-03 23:03:15 +03003175 lower_32_bits(CFG_BASE + irq_handler_offset));
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003176 WREG32(mmTPC0_QM_GLBL_ERR_ADDR_HI + tpc_offset,
Koby Elbaz81217362021-05-03 23:03:15 +03003177 upper_32_bits(CFG_BASE + irq_handler_offset));
3178
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003179 WREG32(mmTPC0_QM_GLBL_ERR_WDATA + tpc_offset,
3180 gaudi_irq_map_table[GAUDI_EVENT_TPC0_QM].cpu_id +
3181 tpc_id);
3182
3183 WREG32(mmTPC0_QM_ARB_ERR_MSG_EN + tpc_offset,
3184 QM_ARB_ERR_MSG_EN_MASK);
3185
3186 /* Increase ARB WDT to support streams architecture */
3187 WREG32(mmTPC0_QM_ARB_SLV_CHOISE_WDT + tpc_offset,
3188 GAUDI_ARB_WDT_TIMEOUT);
3189
3190 WREG32(mmTPC0_QM_GLBL_CFG1 + tpc_offset, 0);
3191 WREG32(mmTPC0_QM_GLBL_PROT + tpc_offset,
3192 QMAN_INTERNAL_MAKE_TRUSTED);
3193 }
3194
Ofir Bitton5de406c2020-09-10 10:56:26 +03003195 WREG32(mmTPC0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo);
3196 WREG32(mmTPC0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi);
3197 WREG32(mmTPC0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo);
3198 WREG32(mmTPC0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi);
3199
3200 /* Configure TPC7 CP_MSG_BASE 2/3 for sync stream collective */
3201 if (tpc_id == 6) {
3202 WREG32(mmTPC0_QM_CP_MSG_BASE2_ADDR_LO_0 + q_off,
3203 mtr_base_ws_lo);
3204 WREG32(mmTPC0_QM_CP_MSG_BASE2_ADDR_HI_0 + q_off,
3205 mtr_base_ws_hi);
3206 WREG32(mmTPC0_QM_CP_MSG_BASE3_ADDR_LO_0 + q_off,
3207 so_base_ws_lo);
3208 WREG32(mmTPC0_QM_CP_MSG_BASE3_ADDR_HI_0 + q_off,
3209 so_base_ws_hi);
3210 }
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003211}
3212
3213static void gaudi_init_tpc_qmans(struct hl_device *hdev)
3214{
3215 struct gaudi_device *gaudi = hdev->asic_specific;
3216 struct gaudi_internal_qman_info *q;
3217 u64 qman_base_addr;
3218 u32 so_base_hi, tpc_offset = 0;
3219 u32 tpc_delta = mmTPC1_CFG_SM_BASE_ADDRESS_HIGH -
3220 mmTPC0_CFG_SM_BASE_ADDRESS_HIGH;
3221 int i, tpc_id, internal_q_index;
3222
3223 if (gaudi->hw_cap_initialized & HW_CAP_TPC_MASK)
3224 return;
3225
3226 so_base_hi = upper_32_bits(CFG_BASE +
3227 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
3228
3229 for (tpc_id = 0 ; tpc_id < TPC_NUMBER_OF_ENGINES ; tpc_id++) {
3230 for (i = 0 ; i < QMAN_STREAMS ; i++) {
3231 internal_q_index = GAUDI_QUEUE_ID_TPC_0_0 +
3232 tpc_id * QMAN_STREAMS + i;
3233 q = &gaudi->internal_qmans[internal_q_index];
3234 qman_base_addr = (u64) q->pq_dma_addr;
3235 gaudi_init_tpc_qman(hdev, tpc_offset, i,
3236 qman_base_addr);
3237
3238 if (i == 3) {
3239 /* Initializing lower CP for TPC QMAN */
3240 gaudi_init_tpc_qman(hdev, tpc_offset, 4, 0);
3241
3242 /* Enable the QMAN and TPC channel */
3243 WREG32(mmTPC0_QM_GLBL_CFG0 + tpc_offset,
3244 QMAN_TPC_ENABLE);
3245 }
3246 }
3247
3248 WREG32(mmTPC0_CFG_SM_BASE_ADDRESS_HIGH + tpc_id * tpc_delta,
3249 so_base_hi);
3250
3251 tpc_offset += mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0;
3252
Oded Gabbay65887292020-08-12 11:21:01 +03003253 gaudi->hw_cap_initialized |=
3254 FIELD_PREP(HW_CAP_TPC_MASK, 1 << tpc_id);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003255 }
3256}
3257
Oded Gabbay3c681572020-11-02 21:10:39 +02003258static void gaudi_init_nic_qman(struct hl_device *hdev, u32 nic_offset,
3259 int qman_id, u64 qman_base_addr, int nic_id)
3260{
Koby Elbaze591a492021-05-12 18:05:46 +03003261 struct cpu_dyn_regs *dyn_regs =
3262 &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
Ofir Bitton5de406c2020-09-10 10:56:26 +03003263 u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi;
3264 u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi;
Koby Elbaz81217362021-05-03 23:03:15 +03003265 u32 nic_qm_err_cfg, irq_handler_offset;
Oded Gabbay3c681572020-11-02 21:10:39 +02003266 u32 q_off;
Oded Gabbay3c681572020-11-02 21:10:39 +02003267
Ofir Bitton5de406c2020-09-10 10:56:26 +03003268 mtr_base_en_lo = lower_32_bits(CFG_BASE +
3269 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3270 mtr_base_en_hi = upper_32_bits(CFG_BASE +
Oded Gabbay3c681572020-11-02 21:10:39 +02003271 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
Ofir Bitton5de406c2020-09-10 10:56:26 +03003272 so_base_en_lo = lower_32_bits(CFG_BASE +
Oded Gabbay3c681572020-11-02 21:10:39 +02003273 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
Ofir Bitton5de406c2020-09-10 10:56:26 +03003274 so_base_en_hi = upper_32_bits(CFG_BASE +
Oded Gabbay3c681572020-11-02 21:10:39 +02003275 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
Ofir Bitton5de406c2020-09-10 10:56:26 +03003276 mtr_base_ws_lo = lower_32_bits(CFG_BASE +
3277 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3278 mtr_base_ws_hi = upper_32_bits(CFG_BASE +
3279 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3280 so_base_ws_lo = lower_32_bits(CFG_BASE +
3281 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
3282 so_base_ws_hi = upper_32_bits(CFG_BASE +
3283 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
Oded Gabbay3c681572020-11-02 21:10:39 +02003284
3285 q_off = nic_offset + qman_id * 4;
3286
3287 WREG32(mmNIC0_QM0_PQ_BASE_LO_0 + q_off, lower_32_bits(qman_base_addr));
3288 WREG32(mmNIC0_QM0_PQ_BASE_HI_0 + q_off, upper_32_bits(qman_base_addr));
3289
3290 WREG32(mmNIC0_QM0_PQ_SIZE_0 + q_off, ilog2(NIC_QMAN_LENGTH));
3291 WREG32(mmNIC0_QM0_PQ_PI_0 + q_off, 0);
3292 WREG32(mmNIC0_QM0_PQ_CI_0 + q_off, 0);
3293
Ofir Bitton5de406c2020-09-10 10:56:26 +03003294 WREG32(mmNIC0_QM0_CP_LDMA_TSIZE_OFFSET_0 + q_off,
3295 QMAN_LDMA_SIZE_OFFSET);
3296 WREG32(mmNIC0_QM0_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
3297 QMAN_LDMA_SRC_OFFSET);
3298 WREG32(mmNIC0_QM0_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
3299 QMAN_LDMA_DST_OFFSET);
Oded Gabbay3c681572020-11-02 21:10:39 +02003300
Ofir Bitton5de406c2020-09-10 10:56:26 +03003301 WREG32(mmNIC0_QM0_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo);
3302 WREG32(mmNIC0_QM0_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi);
3303 WREG32(mmNIC0_QM0_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo);
3304 WREG32(mmNIC0_QM0_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi);
3305
3306 /* Configure NIC CP_MSG_BASE 2/3 for sync stream collective */
3307 WREG32(mmNIC0_QM0_CP_MSG_BASE2_ADDR_LO_0 + q_off, mtr_base_ws_lo);
3308 WREG32(mmNIC0_QM0_CP_MSG_BASE2_ADDR_HI_0 + q_off, mtr_base_ws_hi);
3309 WREG32(mmNIC0_QM0_CP_MSG_BASE3_ADDR_LO_0 + q_off, so_base_ws_lo);
3310 WREG32(mmNIC0_QM0_CP_MSG_BASE3_ADDR_HI_0 + q_off, so_base_ws_hi);
Oded Gabbay3c681572020-11-02 21:10:39 +02003311
3312 if (qman_id == 0) {
Koby Elbaz81217362021-05-03 23:03:15 +03003313 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
Koby Elbaze591a492021-05-12 18:05:46 +03003314 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
3315 le32_to_cpu(dyn_regs->gic_nic_qm_irq_ctrl);
Koby Elbaz81217362021-05-03 23:03:15 +03003316
Oded Gabbay3c681572020-11-02 21:10:39 +02003317 /* Configure RAZWI IRQ */
3318 nic_qm_err_cfg = NIC_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
Tomer Tayarae151bc2021-05-24 22:35:06 +03003319 if (hdev->stop_on_err)
Oded Gabbay3c681572020-11-02 21:10:39 +02003320 nic_qm_err_cfg |=
3321 NIC_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
Oded Gabbay3c681572020-11-02 21:10:39 +02003322
3323 WREG32(mmNIC0_QM0_GLBL_ERR_CFG + nic_offset, nic_qm_err_cfg);
Koby Elbaz81217362021-05-03 23:03:15 +03003324
Oded Gabbay3c681572020-11-02 21:10:39 +02003325 WREG32(mmNIC0_QM0_GLBL_ERR_ADDR_LO + nic_offset,
Koby Elbaz81217362021-05-03 23:03:15 +03003326 lower_32_bits(CFG_BASE + irq_handler_offset));
Oded Gabbay3c681572020-11-02 21:10:39 +02003327 WREG32(mmNIC0_QM0_GLBL_ERR_ADDR_HI + nic_offset,
Koby Elbaz81217362021-05-03 23:03:15 +03003328 upper_32_bits(CFG_BASE + irq_handler_offset));
3329
Oded Gabbay3c681572020-11-02 21:10:39 +02003330 WREG32(mmNIC0_QM0_GLBL_ERR_WDATA + nic_offset,
3331 gaudi_irq_map_table[GAUDI_EVENT_NIC0_QM0].cpu_id +
3332 nic_id);
3333
3334 WREG32(mmNIC0_QM0_ARB_ERR_MSG_EN + nic_offset,
3335 QM_ARB_ERR_MSG_EN_MASK);
3336
3337 /* Increase ARB WDT to support streams architecture */
3338 WREG32(mmNIC0_QM0_ARB_SLV_CHOISE_WDT + nic_offset,
3339 GAUDI_ARB_WDT_TIMEOUT);
3340
3341 WREG32(mmNIC0_QM0_GLBL_CFG1 + nic_offset, 0);
3342 WREG32(mmNIC0_QM0_GLBL_PROT + nic_offset,
3343 QMAN_INTERNAL_MAKE_TRUSTED);
3344 }
3345}
3346
3347static void gaudi_init_nic_qmans(struct hl_device *hdev)
3348{
3349 struct gaudi_device *gaudi = hdev->asic_specific;
3350 struct gaudi_internal_qman_info *q;
3351 u64 qman_base_addr;
3352 u32 nic_offset = 0;
3353 u32 nic_delta_between_qmans =
3354 mmNIC0_QM1_GLBL_CFG0 - mmNIC0_QM0_GLBL_CFG0;
3355 u32 nic_delta_between_nics =
3356 mmNIC1_QM0_GLBL_CFG0 - mmNIC0_QM0_GLBL_CFG0;
3357 int i, nic_id, internal_q_index;
3358
3359 if (!hdev->nic_ports_mask)
3360 return;
3361
3362 if (gaudi->hw_cap_initialized & HW_CAP_NIC_MASK)
3363 return;
3364
3365 dev_dbg(hdev->dev, "Initializing NIC QMANs\n");
3366
3367 for (nic_id = 0 ; nic_id < NIC_NUMBER_OF_ENGINES ; nic_id++) {
3368 if (!(hdev->nic_ports_mask & (1 << nic_id))) {
3369 nic_offset += nic_delta_between_qmans;
3370 if (nic_id & 1) {
3371 nic_offset -= (nic_delta_between_qmans * 2);
3372 nic_offset += nic_delta_between_nics;
3373 }
3374 continue;
3375 }
3376
3377 for (i = 0 ; i < QMAN_STREAMS ; i++) {
3378 internal_q_index = GAUDI_QUEUE_ID_NIC_0_0 +
3379 nic_id * QMAN_STREAMS + i;
3380 q = &gaudi->internal_qmans[internal_q_index];
3381 qman_base_addr = (u64) q->pq_dma_addr;
3382 gaudi_init_nic_qman(hdev, nic_offset, (i & 0x3),
3383 qman_base_addr, nic_id);
3384 }
3385
3386 /* Enable the QMAN */
3387 WREG32(mmNIC0_QM0_GLBL_CFG0 + nic_offset, NIC_QMAN_ENABLE);
3388
3389 nic_offset += nic_delta_between_qmans;
3390 if (nic_id & 1) {
3391 nic_offset -= (nic_delta_between_qmans * 2);
3392 nic_offset += nic_delta_between_nics;
3393 }
3394
3395 gaudi->hw_cap_initialized |= 1 << (HW_CAP_NIC_SHIFT + nic_id);
3396 }
3397}
3398
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003399static void gaudi_disable_pci_dma_qmans(struct hl_device *hdev)
3400{
3401 struct gaudi_device *gaudi = hdev->asic_specific;
3402
3403 if (!(gaudi->hw_cap_initialized & HW_CAP_PCI_DMA))
3404 return;
3405
3406 WREG32(mmDMA0_QM_GLBL_CFG0, 0);
3407 WREG32(mmDMA1_QM_GLBL_CFG0, 0);
3408 WREG32(mmDMA5_QM_GLBL_CFG0, 0);
3409}
3410
3411static void gaudi_disable_hbm_dma_qmans(struct hl_device *hdev)
3412{
3413 struct gaudi_device *gaudi = hdev->asic_specific;
3414
3415 if (!(gaudi->hw_cap_initialized & HW_CAP_HBM_DMA))
3416 return;
3417
3418 WREG32(mmDMA2_QM_GLBL_CFG0, 0);
3419 WREG32(mmDMA3_QM_GLBL_CFG0, 0);
3420 WREG32(mmDMA4_QM_GLBL_CFG0, 0);
3421 WREG32(mmDMA6_QM_GLBL_CFG0, 0);
3422 WREG32(mmDMA7_QM_GLBL_CFG0, 0);
3423}
3424
3425static void gaudi_disable_mme_qmans(struct hl_device *hdev)
3426{
3427 struct gaudi_device *gaudi = hdev->asic_specific;
3428
3429 if (!(gaudi->hw_cap_initialized & HW_CAP_MME))
3430 return;
3431
3432 WREG32(mmMME2_QM_GLBL_CFG0, 0);
3433 WREG32(mmMME0_QM_GLBL_CFG0, 0);
3434}
3435
3436static void gaudi_disable_tpc_qmans(struct hl_device *hdev)
3437{
3438 struct gaudi_device *gaudi = hdev->asic_specific;
3439 u32 tpc_offset = 0;
3440 int tpc_id;
3441
3442 if (!(gaudi->hw_cap_initialized & HW_CAP_TPC_MASK))
3443 return;
3444
3445 for (tpc_id = 0 ; tpc_id < TPC_NUMBER_OF_ENGINES ; tpc_id++) {
3446 WREG32(mmTPC0_QM_GLBL_CFG0 + tpc_offset, 0);
3447 tpc_offset += mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0;
3448 }
3449}
3450
Oded Gabbay3c681572020-11-02 21:10:39 +02003451static void gaudi_disable_nic_qmans(struct hl_device *hdev)
3452{
3453 struct gaudi_device *gaudi = hdev->asic_specific;
3454 u32 nic_mask, nic_offset = 0;
3455 u32 nic_delta_between_qmans =
3456 mmNIC0_QM1_GLBL_CFG0 - mmNIC0_QM0_GLBL_CFG0;
3457 u32 nic_delta_between_nics =
3458 mmNIC1_QM0_GLBL_CFG0 - mmNIC0_QM0_GLBL_CFG0;
3459 int nic_id;
3460
3461 for (nic_id = 0 ; nic_id < NIC_NUMBER_OF_ENGINES ; nic_id++) {
3462 nic_mask = 1 << (HW_CAP_NIC_SHIFT + nic_id);
3463
3464 if (gaudi->hw_cap_initialized & nic_mask)
3465 WREG32(mmNIC0_QM0_GLBL_CFG0 + nic_offset, 0);
3466
3467 nic_offset += nic_delta_between_qmans;
3468 if (nic_id & 1) {
3469 nic_offset -= (nic_delta_between_qmans * 2);
3470 nic_offset += nic_delta_between_nics;
3471 }
3472 }
3473}
3474
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003475static void gaudi_stop_pci_dma_qmans(struct hl_device *hdev)
3476{
3477 struct gaudi_device *gaudi = hdev->asic_specific;
3478
3479 if (!(gaudi->hw_cap_initialized & HW_CAP_PCI_DMA))
3480 return;
3481
3482 /* Stop upper CPs of QMANs 0.0 to 1.3 and 5.0 to 5.3 */
3483 WREG32(mmDMA0_QM_GLBL_CFG1, 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3484 WREG32(mmDMA1_QM_GLBL_CFG1, 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3485 WREG32(mmDMA5_QM_GLBL_CFG1, 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3486}
3487
3488static void gaudi_stop_hbm_dma_qmans(struct hl_device *hdev)
3489{
3490 struct gaudi_device *gaudi = hdev->asic_specific;
3491
3492 if (!(gaudi->hw_cap_initialized & HW_CAP_HBM_DMA))
3493 return;
3494
3495 /* Stop CPs of HBM DMA QMANs */
3496
3497 WREG32(mmDMA2_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3498 WREG32(mmDMA3_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3499 WREG32(mmDMA4_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3500 WREG32(mmDMA6_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3501 WREG32(mmDMA7_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3502}
3503
3504static void gaudi_stop_mme_qmans(struct hl_device *hdev)
3505{
3506 struct gaudi_device *gaudi = hdev->asic_specific;
3507
3508 if (!(gaudi->hw_cap_initialized & HW_CAP_MME))
3509 return;
3510
3511 /* Stop CPs of MME QMANs */
3512 WREG32(mmMME2_QM_GLBL_CFG1, 0x1F << MME0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3513 WREG32(mmMME0_QM_GLBL_CFG1, 0x1F << MME0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3514}
3515
3516static void gaudi_stop_tpc_qmans(struct hl_device *hdev)
3517{
3518 struct gaudi_device *gaudi = hdev->asic_specific;
3519
3520 if (!(gaudi->hw_cap_initialized & HW_CAP_TPC_MASK))
3521 return;
3522
3523 WREG32(mmTPC0_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3524 WREG32(mmTPC1_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3525 WREG32(mmTPC2_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3526 WREG32(mmTPC3_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3527 WREG32(mmTPC4_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3528 WREG32(mmTPC5_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3529 WREG32(mmTPC6_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3530 WREG32(mmTPC7_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3531}
3532
Oded Gabbay3c681572020-11-02 21:10:39 +02003533static void gaudi_stop_nic_qmans(struct hl_device *hdev)
3534{
3535 struct gaudi_device *gaudi = hdev->asic_specific;
3536
3537 /* Stop upper CPs of QMANs */
3538
3539 if (gaudi->hw_cap_initialized & HW_CAP_NIC0)
3540 WREG32(mmNIC0_QM0_GLBL_CFG1,
3541 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3542 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3543 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3544
3545 if (gaudi->hw_cap_initialized & HW_CAP_NIC1)
3546 WREG32(mmNIC0_QM1_GLBL_CFG1,
3547 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3548 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3549 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3550
3551 if (gaudi->hw_cap_initialized & HW_CAP_NIC2)
3552 WREG32(mmNIC1_QM0_GLBL_CFG1,
3553 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3554 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3555 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3556
3557 if (gaudi->hw_cap_initialized & HW_CAP_NIC3)
3558 WREG32(mmNIC1_QM1_GLBL_CFG1,
3559 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3560 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3561 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3562
3563 if (gaudi->hw_cap_initialized & HW_CAP_NIC4)
3564 WREG32(mmNIC2_QM0_GLBL_CFG1,
3565 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3566 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3567 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3568
3569 if (gaudi->hw_cap_initialized & HW_CAP_NIC5)
3570 WREG32(mmNIC2_QM1_GLBL_CFG1,
3571 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3572 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3573 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3574
3575 if (gaudi->hw_cap_initialized & HW_CAP_NIC6)
3576 WREG32(mmNIC3_QM0_GLBL_CFG1,
3577 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3578 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3579 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3580
3581 if (gaudi->hw_cap_initialized & HW_CAP_NIC7)
3582 WREG32(mmNIC3_QM1_GLBL_CFG1,
3583 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3584 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3585 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3586
3587 if (gaudi->hw_cap_initialized & HW_CAP_NIC8)
3588 WREG32(mmNIC4_QM0_GLBL_CFG1,
3589 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3590 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3591 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3592
3593 if (gaudi->hw_cap_initialized & HW_CAP_NIC9)
3594 WREG32(mmNIC4_QM1_GLBL_CFG1,
3595 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3596 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3597 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3598}
3599
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003600static void gaudi_pci_dma_stall(struct hl_device *hdev)
3601{
3602 struct gaudi_device *gaudi = hdev->asic_specific;
3603
3604 if (!(gaudi->hw_cap_initialized & HW_CAP_PCI_DMA))
3605 return;
3606
3607 WREG32(mmDMA0_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3608 WREG32(mmDMA1_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3609 WREG32(mmDMA5_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3610}
3611
3612static void gaudi_hbm_dma_stall(struct hl_device *hdev)
3613{
3614 struct gaudi_device *gaudi = hdev->asic_specific;
3615
3616 if (!(gaudi->hw_cap_initialized & HW_CAP_HBM_DMA))
3617 return;
3618
3619 WREG32(mmDMA2_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3620 WREG32(mmDMA3_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3621 WREG32(mmDMA4_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3622 WREG32(mmDMA6_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3623 WREG32(mmDMA7_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3624}
3625
3626static void gaudi_mme_stall(struct hl_device *hdev)
3627{
3628 struct gaudi_device *gaudi = hdev->asic_specific;
3629
3630 if (!(gaudi->hw_cap_initialized & HW_CAP_MME))
3631 return;
3632
3633 /* WA for H3-1800 bug: do ACC and SBAB writes twice */
3634 WREG32(mmMME0_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3635 WREG32(mmMME0_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3636 WREG32(mmMME0_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3637 WREG32(mmMME0_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3638 WREG32(mmMME1_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3639 WREG32(mmMME1_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3640 WREG32(mmMME1_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3641 WREG32(mmMME1_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3642 WREG32(mmMME2_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3643 WREG32(mmMME2_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3644 WREG32(mmMME2_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3645 WREG32(mmMME2_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3646 WREG32(mmMME3_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3647 WREG32(mmMME3_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3648 WREG32(mmMME3_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3649 WREG32(mmMME3_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3650}
3651
3652static void gaudi_tpc_stall(struct hl_device *hdev)
3653{
3654 struct gaudi_device *gaudi = hdev->asic_specific;
3655
3656 if (!(gaudi->hw_cap_initialized & HW_CAP_TPC_MASK))
3657 return;
3658
3659 WREG32(mmTPC0_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3660 WREG32(mmTPC1_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3661 WREG32(mmTPC2_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3662 WREG32(mmTPC3_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3663 WREG32(mmTPC4_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3664 WREG32(mmTPC5_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3665 WREG32(mmTPC6_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3666 WREG32(mmTPC7_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3667}
3668
Oded Gabbaye38bfd32020-07-03 20:46:12 +03003669static void gaudi_set_clock_gating(struct hl_device *hdev)
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003670{
3671 struct gaudi_device *gaudi = hdev->asic_specific;
3672 u32 qman_offset;
Ofir Bittonf44d23b2020-08-04 13:38:43 +03003673 bool enable;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003674 int i;
3675
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003676 /* In case we are during debug session, don't enable the clock gate
3677 * as it may interfere
3678 */
3679 if (hdev->in_debug)
3680 return;
3681
Ohad Sharabi4cb45082021-05-20 09:09:03 +03003682 if (hdev->asic_prop.fw_security_enabled)
Oded Gabbay0024c0942020-12-05 22:55:09 +02003683 return;
3684
Oded Gabbaye38bfd32020-07-03 20:46:12 +03003685 for (i = GAUDI_PCI_DMA_1, qman_offset = 0 ; i < GAUDI_HBM_DMA_1 ; i++) {
Ofir Bittonf44d23b2020-08-04 13:38:43 +03003686 enable = !!(hdev->clock_gating_mask &
3687 (BIT_ULL(gaudi_dma_assignment[i])));
Oded Gabbaye38bfd32020-07-03 20:46:12 +03003688
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003689 qman_offset = gaudi_dma_assignment[i] * DMA_QMAN_OFFSET;
Ofir Bittonf44d23b2020-08-04 13:38:43 +03003690 WREG32(mmDMA0_QM_CGM_CFG1 + qman_offset,
3691 enable ? QMAN_CGM1_PWR_GATE_EN : 0);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003692 WREG32(mmDMA0_QM_CGM_CFG + qman_offset,
Ofir Bittonf44d23b2020-08-04 13:38:43 +03003693 enable ? QMAN_UPPER_CP_CGM_PWR_GATE_EN : 0);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003694 }
3695
Oded Gabbaye38bfd32020-07-03 20:46:12 +03003696 for (i = GAUDI_HBM_DMA_1 ; i < GAUDI_DMA_MAX ; i++) {
Ofir Bittonf44d23b2020-08-04 13:38:43 +03003697 enable = !!(hdev->clock_gating_mask &
3698 (BIT_ULL(gaudi_dma_assignment[i])));
Oded Gabbaye38bfd32020-07-03 20:46:12 +03003699
Oded Gabbayda5dfbb2021-02-06 19:34:59 +02003700 /* GC sends work to DMA engine through Upper CP in DMA5 so
3701 * we need to not enable clock gating in that DMA
3702 */
3703 if (i == GAUDI_HBM_DMA_4)
3704 enable = 0;
3705
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003706 qman_offset = gaudi_dma_assignment[i] * DMA_QMAN_OFFSET;
Ofir Bittonf44d23b2020-08-04 13:38:43 +03003707 WREG32(mmDMA0_QM_CGM_CFG1 + qman_offset,
3708 enable ? QMAN_CGM1_PWR_GATE_EN : 0);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003709 WREG32(mmDMA0_QM_CGM_CFG + qman_offset,
Ofir Bittonf44d23b2020-08-04 13:38:43 +03003710 enable ? QMAN_COMMON_CP_CGM_PWR_GATE_EN : 0);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003711 }
3712
Ofir Bittonf44d23b2020-08-04 13:38:43 +03003713 enable = !!(hdev->clock_gating_mask & (BIT_ULL(GAUDI_ENGINE_ID_MME_0)));
3714 WREG32(mmMME0_QM_CGM_CFG1, enable ? QMAN_CGM1_PWR_GATE_EN : 0);
3715 WREG32(mmMME0_QM_CGM_CFG, enable ? QMAN_COMMON_CP_CGM_PWR_GATE_EN : 0);
Oded Gabbaye38bfd32020-07-03 20:46:12 +03003716
Ofir Bittonf44d23b2020-08-04 13:38:43 +03003717 enable = !!(hdev->clock_gating_mask & (BIT_ULL(GAUDI_ENGINE_ID_MME_2)));
3718 WREG32(mmMME2_QM_CGM_CFG1, enable ? QMAN_CGM1_PWR_GATE_EN : 0);
3719 WREG32(mmMME2_QM_CGM_CFG, enable ? QMAN_COMMON_CP_CGM_PWR_GATE_EN : 0);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003720
3721 for (i = 0, qman_offset = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) {
Ofir Bittonf44d23b2020-08-04 13:38:43 +03003722 enable = !!(hdev->clock_gating_mask &
3723 (BIT_ULL(GAUDI_ENGINE_ID_TPC_0 + i)));
Oded Gabbaye38bfd32020-07-03 20:46:12 +03003724
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003725 WREG32(mmTPC0_QM_CGM_CFG1 + qman_offset,
Ofir Bittonf44d23b2020-08-04 13:38:43 +03003726 enable ? QMAN_CGM1_PWR_GATE_EN : 0);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003727 WREG32(mmTPC0_QM_CGM_CFG + qman_offset,
Ofir Bittonf44d23b2020-08-04 13:38:43 +03003728 enable ? QMAN_COMMON_CP_CGM_PWR_GATE_EN : 0);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003729
3730 qman_offset += TPC_QMAN_OFFSET;
3731 }
3732
3733 gaudi->hw_cap_initialized |= HW_CAP_CLK_GATE;
3734}
3735
3736static void gaudi_disable_clock_gating(struct hl_device *hdev)
3737{
3738 struct gaudi_device *gaudi = hdev->asic_specific;
3739 u32 qman_offset;
3740 int i;
3741
Ohad Sharabi4cb45082021-05-20 09:09:03 +03003742 if (hdev->asic_prop.fw_security_enabled)
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003743 return;
3744
3745 for (i = 0, qman_offset = 0 ; i < DMA_NUMBER_OF_CHANNELS ; i++) {
3746 WREG32(mmDMA0_QM_CGM_CFG + qman_offset, 0);
3747 WREG32(mmDMA0_QM_CGM_CFG1 + qman_offset, 0);
3748
3749 qman_offset += (mmDMA1_QM_CGM_CFG - mmDMA0_QM_CGM_CFG);
3750 }
3751
3752 WREG32(mmMME0_QM_CGM_CFG, 0);
3753 WREG32(mmMME0_QM_CGM_CFG1, 0);
3754 WREG32(mmMME2_QM_CGM_CFG, 0);
3755 WREG32(mmMME2_QM_CGM_CFG1, 0);
3756
3757 for (i = 0, qman_offset = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) {
3758 WREG32(mmTPC0_QM_CGM_CFG + qman_offset, 0);
3759 WREG32(mmTPC0_QM_CGM_CFG1 + qman_offset, 0);
3760
3761 qman_offset += (mmTPC1_QM_CGM_CFG - mmTPC0_QM_CGM_CFG);
3762 }
3763
3764 gaudi->hw_cap_initialized &= ~(HW_CAP_CLK_GATE);
3765}
3766
3767static void gaudi_enable_timestamp(struct hl_device *hdev)
3768{
3769 /* Disable the timestamp counter */
3770 WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 0);
3771
3772 /* Zero the lower/upper parts of the 64-bit counter */
3773 WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE + 0xC, 0);
3774 WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE + 0x8, 0);
3775
3776 /* Enable the counter */
3777 WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 1);
3778}
3779
3780static void gaudi_disable_timestamp(struct hl_device *hdev)
3781{
3782 /* Disable the timestamp counter */
3783 WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 0);
3784}
3785
3786static void gaudi_halt_engines(struct hl_device *hdev, bool hard_reset)
3787{
Oded Gabbayc83c4172020-07-05 15:48:34 +03003788 u32 wait_timeout_ms;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003789
3790 dev_info(hdev->dev,
3791 "Halting compute engines and disabling interrupts\n");
3792
Oded Gabbayc83c4172020-07-05 15:48:34 +03003793 if (hdev->pldm)
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003794 wait_timeout_ms = GAUDI_PLDM_RESET_WAIT_MSEC;
Oded Gabbayc83c4172020-07-05 15:48:34 +03003795 else
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003796 wait_timeout_ms = GAUDI_RESET_WAIT_MSEC;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003797
Oded Gabbay3c681572020-11-02 21:10:39 +02003798 gaudi_stop_nic_qmans(hdev);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003799 gaudi_stop_mme_qmans(hdev);
3800 gaudi_stop_tpc_qmans(hdev);
3801 gaudi_stop_hbm_dma_qmans(hdev);
3802 gaudi_stop_pci_dma_qmans(hdev);
3803
Oded Gabbaye38bfd32020-07-03 20:46:12 +03003804 hdev->asic_funcs->disable_clock_gating(hdev);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003805
3806 msleep(wait_timeout_ms);
3807
3808 gaudi_pci_dma_stall(hdev);
3809 gaudi_hbm_dma_stall(hdev);
3810 gaudi_tpc_stall(hdev);
3811 gaudi_mme_stall(hdev);
3812
3813 msleep(wait_timeout_ms);
3814
Oded Gabbay3c681572020-11-02 21:10:39 +02003815 gaudi_disable_nic_qmans(hdev);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003816 gaudi_disable_mme_qmans(hdev);
3817 gaudi_disable_tpc_qmans(hdev);
3818 gaudi_disable_hbm_dma_qmans(hdev);
3819 gaudi_disable_pci_dma_qmans(hdev);
3820
3821 gaudi_disable_timestamp(hdev);
3822
Oded Gabbay12ae3132020-07-03 20:58:23 +03003823 gaudi_disable_msi(hdev);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003824}
3825
3826static int gaudi_mmu_init(struct hl_device *hdev)
3827{
3828 struct asic_fixed_properties *prop = &hdev->asic_prop;
3829 struct gaudi_device *gaudi = hdev->asic_specific;
3830 u64 hop0_addr;
3831 int rc, i;
3832
3833 if (!hdev->mmu_enable)
3834 return 0;
3835
3836 if (gaudi->hw_cap_initialized & HW_CAP_MMU)
3837 return 0;
3838
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003839 for (i = 0 ; i < prop->max_asid ; i++) {
3840 hop0_addr = prop->mmu_pgt_addr +
3841 (i * prop->mmu_hop_table_size);
3842
3843 rc = gaudi_mmu_update_asid_hop0_addr(hdev, i, hop0_addr);
3844 if (rc) {
3845 dev_err(hdev->dev,
3846 "failed to set hop0 addr for asid %d\n", i);
3847 goto err;
3848 }
3849 }
3850
3851 /* init MMU cache manage page */
3852 WREG32(mmSTLB_CACHE_INV_BASE_39_8, MMU_CACHE_MNG_ADDR >> 8);
3853 WREG32(mmSTLB_CACHE_INV_BASE_49_40, MMU_CACHE_MNG_ADDR >> 40);
3854
Tomer Tayar644883e2020-07-19 11:00:03 +03003855 hdev->asic_funcs->mmu_invalidate_cache(hdev, true, 0);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003856
3857 WREG32(mmMMU_UP_MMU_ENABLE, 1);
3858 WREG32(mmMMU_UP_SPI_MASK, 0xF);
3859
3860 WREG32(mmSTLB_HOP_CONFIGURATION,
3861 hdev->mmu_huge_page_opt ? 0x30440 : 0x40440);
3862
Omer Shpigelmancfd41762020-06-03 13:03:35 +03003863 /*
3864 * The H/W expects the first PI after init to be 1. After wraparound
3865 * we'll write 0.
3866 */
3867 gaudi->mmu_cache_inv_pi = 1;
3868
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003869 gaudi->hw_cap_initialized |= HW_CAP_MMU;
3870
3871 return 0;
3872
3873err:
3874 return rc;
3875}
3876
3877static int gaudi_load_firmware_to_device(struct hl_device *hdev)
3878{
3879 void __iomem *dst;
3880
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003881 dst = hdev->pcie_bar[HBM_BAR_ID] + LINUX_FW_OFFSET;
3882
Ofir Bitton9bb86b62020-10-20 10:45:37 +03003883 return hl_fw_load_fw_to_device(hdev, GAUDI_LINUX_FW_FILE, dst, 0, 0);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003884}
3885
3886static int gaudi_load_boot_fit_to_device(struct hl_device *hdev)
3887{
3888 void __iomem *dst;
3889
3890 dst = hdev->pcie_bar[SRAM_BAR_ID] + BOOT_FIT_SRAM_OFFSET;
3891
Ofir Bitton9bb86b62020-10-20 10:45:37 +03003892 return hl_fw_load_fw_to_device(hdev, GAUDI_BOOT_FIT_FILE, dst, 0, 0);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003893}
3894
Ohad Sharabi22a795b2021-04-08 13:42:00 +03003895static void gaudi_init_dynamic_firmware_loader(struct hl_device *hdev)
3896{
Ohad Sharabi8a43c832021-04-11 10:32:18 +03003897 struct dynamic_fw_load_mgr *dynamic_loader;
3898 struct cpu_dyn_regs *dyn_regs;
Ohad Sharabi22a795b2021-04-08 13:42:00 +03003899
Ohad Sharabi8a43c832021-04-11 10:32:18 +03003900 dynamic_loader = &hdev->fw_loader.dynamic_loader;
3901
3902 /*
3903 * here we update initial values for few specific dynamic regs (as
3904 * before reading the first descriptor from FW those value has to be
3905 * hard-coded) in later stages of the protocol those values will be
3906 * updated automatically by reading the FW descriptor so data there
3907 * will always be up-to-date
3908 */
3909 dyn_regs = &dynamic_loader->comm_desc.cpu_dyn_regs;
3910 dyn_regs->kmd_msg_to_cpu =
3911 cpu_to_le32(mmPSOC_GLOBAL_CONF_KMD_MSG_TO_CPU);
3912 dyn_regs->cpu_cmd_status_to_host =
3913 cpu_to_le32(mmCPU_CMD_STATUS_TO_HOST);
Ohad Sharabib31e59b2021-04-22 10:01:22 +03003914
3915 dynamic_loader->wait_for_bl_timeout = GAUDI_WAIT_FOR_BL_TIMEOUT_USEC;
Ohad Sharabi22a795b2021-04-08 13:42:00 +03003916}
3917
3918static void gaudi_init_static_firmware_loader(struct hl_device *hdev)
3919{
3920 struct static_fw_load_mgr *static_loader;
3921
3922 static_loader = &hdev->fw_loader.static_loader;
3923
3924 static_loader->preboot_version_max_off = SRAM_SIZE - VERSION_MAX_LEN;
3925 static_loader->boot_fit_version_max_off = SRAM_SIZE - VERSION_MAX_LEN;
Ohad Sharabi8a43c832021-04-11 10:32:18 +03003926 static_loader->kmd_msg_to_cpu_reg = mmPSOC_GLOBAL_CONF_KMD_MSG_TO_CPU;
3927 static_loader->cpu_cmd_status_to_host_reg = mmCPU_CMD_STATUS_TO_HOST;
Ohad Sharabi22a795b2021-04-08 13:42:00 +03003928 static_loader->cpu_boot_status_reg = mmPSOC_GLOBAL_CONF_CPU_BOOT_STATUS;
Ohad Sharabie67a60402021-05-02 15:45:21 +03003929 static_loader->cpu_boot_dev_status0_reg = mmCPU_BOOT_DEV_STS0;
3930 static_loader->cpu_boot_dev_status1_reg = mmCPU_BOOT_DEV_STS1;
Ohad Sharabi22a795b2021-04-08 13:42:00 +03003931 static_loader->boot_err0_reg = mmCPU_BOOT_ERR0;
Ohad Sharabie67a60402021-05-02 15:45:21 +03003932 static_loader->boot_err1_reg = mmCPU_BOOT_ERR1;
Ohad Sharabi22a795b2021-04-08 13:42:00 +03003933 static_loader->preboot_version_offset_reg = mmPREBOOT_VER_OFFSET;
3934 static_loader->boot_fit_version_offset_reg = mmUBOOT_VER_OFFSET;
Koby Elbaz69dbbba2021-06-17 17:04:16 +03003935 static_loader->sram_offset_mask = ~(lower_32_bits(SRAM_BASE_ADDR));
Koby Elbazb7a71fd2021-06-15 17:07:02 +03003936 static_loader->cpu_reset_wait_msec = hdev->pldm ?
3937 GAUDI_PLDM_RESET_WAIT_MSEC :
3938 GAUDI_CPU_RESET_WAIT_MSEC;
Ohad Sharabi22a795b2021-04-08 13:42:00 +03003939}
3940
Ohad Sharabia22f0ec2021-04-11 23:06:46 +03003941static void gaudi_init_firmware_loader(struct hl_device *hdev)
3942{
Ohad Sharabi22a795b2021-04-08 13:42:00 +03003943 struct asic_fixed_properties *prop = &hdev->asic_prop;
Ohad Sharabia22f0ec2021-04-11 23:06:46 +03003944 struct fw_load_mgr *fw_loader = &hdev->fw_loader;
3945
Ohad Sharabi22a795b2021-04-08 13:42:00 +03003946 /* fill common fields */
Koby Elbaz3649eae2021-05-18 15:43:47 +03003947 fw_loader->linux_loaded = false;
Ohad Sharabi8a43c832021-04-11 10:32:18 +03003948 fw_loader->boot_fit_img.image_name = GAUDI_BOOT_FIT_FILE;
3949 fw_loader->linux_img.image_name = GAUDI_LINUX_FW_FILE;
Ohad Sharabia22f0ec2021-04-11 23:06:46 +03003950 fw_loader->cpu_timeout = GAUDI_CPU_TIMEOUT_USEC;
3951 fw_loader->boot_fit_timeout = GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC;
3952 fw_loader->skip_bmc = !hdev->bmc_enable;
Ohad Sharabi50f036d2021-04-11 15:26:28 +03003953 fw_loader->sram_bar_id = SRAM_BAR_ID;
Ohad Sharabi8a43c832021-04-11 10:32:18 +03003954 fw_loader->dram_bar_id = HBM_BAR_ID;
Ohad Sharabi22a795b2021-04-08 13:42:00 +03003955
3956 if (prop->dynamic_fw_load)
3957 gaudi_init_dynamic_firmware_loader(hdev);
3958 else
3959 gaudi_init_static_firmware_loader(hdev);
Ohad Sharabia22f0ec2021-04-11 23:06:46 +03003960}
3961
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003962static int gaudi_init_cpu(struct hl_device *hdev)
3963{
3964 struct gaudi_device *gaudi = hdev->asic_specific;
3965 int rc;
3966
Ofir Bitton6a2f5d72021-02-15 13:23:04 +02003967 if (!(hdev->fw_components & FW_TYPE_PREBOOT_CPU))
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003968 return 0;
3969
3970 if (gaudi->hw_cap_initialized & HW_CAP_CPU)
3971 return 0;
3972
3973 /*
3974 * The device CPU works with 40 bits addresses.
3975 * This register sets the extension to 50 bits.
3976 */
Ohad Sharabi4cb45082021-05-20 09:09:03 +03003977 if (!hdev->asic_prop.fw_security_enabled)
Ofir Bittonc692dec2020-10-04 17:34:37 +03003978 WREG32(mmCPU_IF_CPU_MSB_ADDR, hdev->cpu_pci_msb_addr);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003979
Ohad Sharabia22f0ec2021-04-11 23:06:46 +03003980 rc = hl_fw_init_cpu(hdev);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003981
3982 if (rc)
3983 return rc;
3984
3985 gaudi->hw_cap_initialized |= HW_CAP_CPU;
3986
3987 return 0;
3988}
3989
3990static int gaudi_init_cpu_queues(struct hl_device *hdev, u32 cpu_timeout)
3991{
Koby Elbaze591a492021-05-12 18:05:46 +03003992 struct cpu_dyn_regs *dyn_regs =
3993 &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
Ohad Sharabi5b6b7802021-02-02 13:33:34 +02003994 struct asic_fixed_properties *prop = &hdev->asic_prop;
Koby Elbaz81217362021-05-03 23:03:15 +03003995 struct gaudi_device *gaudi = hdev->asic_specific;
3996 u32 status, irq_handler_offset;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003997 struct hl_eq *eq;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003998 struct hl_hw_queue *cpu_pq =
3999 &hdev->kernel_queues[GAUDI_QUEUE_ID_CPU_PQ];
4000 int err;
4001
4002 if (!hdev->cpu_queues_enable)
4003 return 0;
4004
4005 if (gaudi->hw_cap_initialized & HW_CAP_CPU_Q)
4006 return 0;
4007
4008 eq = &hdev->event_queue;
4009
4010 WREG32(mmCPU_IF_PQ_BASE_ADDR_LOW, lower_32_bits(cpu_pq->bus_address));
4011 WREG32(mmCPU_IF_PQ_BASE_ADDR_HIGH, upper_32_bits(cpu_pq->bus_address));
4012
4013 WREG32(mmCPU_IF_EQ_BASE_ADDR_LOW, lower_32_bits(eq->bus_address));
4014 WREG32(mmCPU_IF_EQ_BASE_ADDR_HIGH, upper_32_bits(eq->bus_address));
4015
4016 WREG32(mmCPU_IF_CQ_BASE_ADDR_LOW,
4017 lower_32_bits(hdev->cpu_accessible_dma_address));
4018 WREG32(mmCPU_IF_CQ_BASE_ADDR_HIGH,
4019 upper_32_bits(hdev->cpu_accessible_dma_address));
4020
4021 WREG32(mmCPU_IF_PQ_LENGTH, HL_QUEUE_SIZE_IN_BYTES);
4022 WREG32(mmCPU_IF_EQ_LENGTH, HL_EQ_SIZE_IN_BYTES);
4023 WREG32(mmCPU_IF_CQ_LENGTH, HL_CPU_ACCESSIBLE_MEM_SIZE);
4024
4025 /* Used for EQ CI */
4026 WREG32(mmCPU_IF_EQ_RD_OFFS, 0);
4027
4028 WREG32(mmCPU_IF_PF_PQ_PI, 0);
4029
4030 if (gaudi->multi_msi_mode)
4031 WREG32(mmCPU_IF_QUEUE_INIT, PQ_INIT_STATUS_READY_FOR_CP);
4032 else
4033 WREG32(mmCPU_IF_QUEUE_INIT,
4034 PQ_INIT_STATUS_READY_FOR_CP_SINGLE_MSI);
4035
Koby Elbaz81217362021-05-03 23:03:15 +03004036 irq_handler_offset = prop->gic_interrupts_enable ?
4037 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
Ofir Bitton5bc691d2021-05-25 22:09:13 +03004038 le32_to_cpu(dyn_regs->gic_host_pi_upd_irq);
Koby Elbaz81217362021-05-03 23:03:15 +03004039
Ofir Bitton7d5ba002021-06-07 15:22:56 +03004040 WREG32(irq_handler_offset,
4041 gaudi_irq_map_table[GAUDI_EVENT_PI_UPDATE].cpu_id);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03004042
4043 err = hl_poll_timeout(
4044 hdev,
4045 mmCPU_IF_QUEUE_INIT,
4046 status,
4047 (status == PQ_INIT_STATUS_READY_FOR_HOST),
4048 1000,
4049 cpu_timeout);
4050
4051 if (err) {
4052 dev_err(hdev->dev,
Oded Gabbay6138bbe2020-09-04 20:18:16 +03004053 "Failed to communicate with Device CPU (CPU-CP timeout)\n");
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03004054 return -EIO;
4055 }
4056
Ohad Sharabi5b6b7802021-02-02 13:33:34 +02004057 /* update FW application security bits */
Ohad Sharabie67a60402021-05-02 15:45:21 +03004058 if (prop->fw_cpu_boot_dev_sts0_valid)
4059 prop->fw_app_cpu_boot_dev_sts0 = RREG32(mmCPU_BOOT_DEV_STS0);
4060 if (prop->fw_cpu_boot_dev_sts1_valid)
4061 prop->fw_app_cpu_boot_dev_sts1 = RREG32(mmCPU_BOOT_DEV_STS1);
Ohad Sharabi5b6b7802021-02-02 13:33:34 +02004062
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03004063 gaudi->hw_cap_initialized |= HW_CAP_CPU_Q;
4064 return 0;
4065}
4066
4067static void gaudi_pre_hw_init(struct hl_device *hdev)
4068{
4069 /* Perform read from the device to make sure device is up */
Oded Gabbay377182a2020-12-09 19:50:46 +02004070 RREG32(mmHW_STATE);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03004071
Ohad Sharabi4cb45082021-05-20 09:09:03 +03004072 if (!hdev->asic_prop.fw_security_enabled) {
Ofir Bittonc692dec2020-10-04 17:34:37 +03004073 /* Set the access through PCI bars (Linux driver only) as
4074 * secured
4075 */
4076 WREG32(mmPCIE_WRAP_LBW_PROT_OVR,
4077 (PCIE_WRAP_LBW_PROT_OVR_RD_EN_MASK |
4078 PCIE_WRAP_LBW_PROT_OVR_WR_EN_MASK));
Oded Gabbay57799ce2020-09-13 15:51:28 +03004079
Ofir Bittonc692dec2020-10-04 17:34:37 +03004080 /* Perform read to flush the waiting writes to ensure
4081 * configuration was set in the device
4082 */
4083 RREG32(mmPCIE_WRAP_LBW_PROT_OVR);
4084 }
Oded Gabbay57799ce2020-09-13 15:51:28 +03004085
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03004086 /*
4087 * Let's mark in the H/W that we have reached this point. We check
4088 * this value in the reset_before_init function to understand whether
4089 * we need to reset the chip before doing H/W init. This register is
4090 * cleared by the H/W upon H/W reset
4091 */
4092 WREG32(mmHW_STATE, HL_DEVICE_HW_STATE_DIRTY);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03004093}
4094
4095static int gaudi_hw_init(struct hl_device *hdev)
4096{
Ofir Bitton1dae12f2021-05-12 09:07:39 +03004097 struct gaudi_device *gaudi = hdev->asic_specific;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03004098 int rc;
4099
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03004100 gaudi_pre_hw_init(hdev);
4101
Ofir Bitton1dae12f2021-05-12 09:07:39 +03004102 /* If iATU is done by FW, the HBM bar ALWAYS points to DRAM_PHYS_BASE.
4103 * So we set it here and if anyone tries to move it later to
4104 * a different address, there will be an error
4105 */
4106 if (hdev->asic_prop.iatu_done_by_fw)
4107 gaudi->hbm_bar_cur_addr = DRAM_PHYS_BASE;
4108
4109 /*
4110 * Before pushing u-boot/linux to device, need to set the hbm bar to
4111 * base address of dram
4112 */
4113 if (gaudi_set_hbm_bar_base(hdev, DRAM_PHYS_BASE) == U64_MAX) {
4114 dev_err(hdev->dev,
4115 "failed to map HBM bar to DRAM base address\n");
4116 return -EIO;
4117 }
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03004118
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03004119 rc = gaudi_init_cpu(hdev);
4120 if (rc) {
4121 dev_err(hdev->dev, "failed to initialize CPU\n");
4122 return rc;
4123 }
4124
Oded Gabbay0024c0942020-12-05 22:55:09 +02004125 /* In case the clock gating was enabled in preboot we need to disable
4126 * it here before touching the MME/TPC registers.
4127 * There is no need to take clk gating mutex because when this function
4128 * runs, no other relevant code can run
4129 */
4130 hdev->asic_funcs->disable_clock_gating(hdev);
4131
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03004132 /* SRAM scrambler must be initialized after CPU is running from HBM */
4133 gaudi_init_scrambler_sram(hdev);
4134
4135 /* This is here just in case we are working without CPU */
4136 gaudi_init_scrambler_hbm(hdev);
4137
4138 gaudi_init_golden_registers(hdev);
4139
4140 rc = gaudi_mmu_init(hdev);
4141 if (rc)
4142 return rc;
4143
Omer Shpigelman3a3a5bf12020-05-11 10:45:12 +03004144 gaudi_init_security(hdev);
4145
Koby Elbaze591a492021-05-12 18:05:46 +03004146 gaudi_init_pci_dma_qmans(hdev);
4147
4148 gaudi_init_hbm_dma_qmans(hdev);
4149
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03004150 gaudi_init_mme_qmans(hdev);
4151
4152 gaudi_init_tpc_qmans(hdev);
4153
Oded Gabbay3c681572020-11-02 21:10:39 +02004154 gaudi_init_nic_qmans(hdev);
4155
Oded Gabbaye38bfd32020-07-03 20:46:12 +03004156 hdev->asic_funcs->set_clock_gating(hdev);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03004157
4158 gaudi_enable_timestamp(hdev);
4159
Oded Gabbay3c681572020-11-02 21:10:39 +02004160 /* MSI must be enabled before CPU queues and NIC are initialized */
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03004161 rc = gaudi_enable_msi(hdev);
4162 if (rc)
4163 goto disable_queues;
4164
4165 /* must be called after MSI was enabled */
4166 rc = gaudi_init_cpu_queues(hdev, GAUDI_CPU_TIMEOUT_USEC);
4167 if (rc) {
4168 dev_err(hdev->dev, "failed to initialize CPU H/W queues %d\n",
4169 rc);
4170 goto disable_msi;
4171 }
4172
4173 /* Perform read from the device to flush all configuration */
Oded Gabbay377182a2020-12-09 19:50:46 +02004174 RREG32(mmHW_STATE);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03004175
4176 return 0;
4177
4178disable_msi:
4179 gaudi_disable_msi(hdev);
4180disable_queues:
4181 gaudi_disable_mme_qmans(hdev);
4182 gaudi_disable_pci_dma_qmans(hdev);
4183
4184 return rc;
4185}
4186
4187static void gaudi_hw_fini(struct hl_device *hdev, bool hard_reset)
4188{
Koby Elbaze591a492021-05-12 18:05:46 +03004189 struct cpu_dyn_regs *dyn_regs =
4190 &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
Koby Elbaz81217362021-05-03 23:03:15 +03004191 u32 status, reset_timeout_ms, cpu_timeout_ms, irq_handler_offset;
Oded Gabbaya60d0752021-05-23 19:00:49 +03004192 struct gaudi_device *gaudi = hdev->asic_specific;
4193 bool driver_performs_reset;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03004194
Oded Gabbay12ae3132020-07-03 20:58:23 +03004195 if (!hard_reset) {
4196 dev_err(hdev->dev, "GAUDI doesn't support soft-reset\n");
4197 return;
4198 }
4199
Oded Gabbayc83c4172020-07-05 15:48:34 +03004200 if (hdev->pldm) {
Oded Gabbay12ae3132020-07-03 20:58:23 +03004201 reset_timeout_ms = GAUDI_PLDM_HRESET_TIMEOUT_MSEC;
Oded Gabbayc83c4172020-07-05 15:48:34 +03004202 cpu_timeout_ms = GAUDI_PLDM_RESET_WAIT_MSEC;
4203 } else {
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03004204 reset_timeout_ms = GAUDI_RESET_TIMEOUT_MSEC;
Oded Gabbayc83c4172020-07-05 15:48:34 +03004205 cpu_timeout_ms = GAUDI_CPU_RESET_WAIT_MSEC;
4206 }
4207
Oded Gabbaya60d0752021-05-23 19:00:49 +03004208 driver_performs_reset = !!(!hdev->asic_prop.fw_security_enabled &&
4209 !hdev->asic_prop.hard_reset_done_by_fw);
4210
Oded Gabbayc83c4172020-07-05 15:48:34 +03004211 /* Set device to handle FLR by H/W as we will put the device CPU to
4212 * halt mode
4213 */
Oded Gabbaya60d0752021-05-23 19:00:49 +03004214 if (driver_performs_reset)
Ofir Bittonb90c8942020-11-08 12:59:04 +02004215 WREG32(mmPCIE_AUX_FLR_CTRL, (PCIE_AUX_FLR_CTRL_HW_CTRL_MASK |
Oded Gabbayc83c4172020-07-05 15:48:34 +03004216 PCIE_AUX_FLR_CTRL_INT_MASK_MASK));
4217
Oded Gabbaya60d0752021-05-23 19:00:49 +03004218 /* If linux is loaded in the device CPU we need to communicate with it
4219 * via the GIC. Otherwise, we need to use COMMS or the MSG_TO_CPU
4220 * registers in case of old F/Ws
Oded Gabbayc83c4172020-07-05 15:48:34 +03004221 */
Koby Elbaz3649eae2021-05-18 15:43:47 +03004222 if (hdev->fw_loader.linux_loaded) {
4223 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
4224 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
Ofir Bitton5bc691d2021-05-25 22:09:13 +03004225 le32_to_cpu(dyn_regs->gic_host_halt_irq);
Koby Elbaz81217362021-05-03 23:03:15 +03004226
Ofir Bitton7d5ba002021-06-07 15:22:56 +03004227 WREG32(irq_handler_offset,
4228 gaudi_irq_map_table[GAUDI_EVENT_HALT_MACHINE].cpu_id);
Oded Gabbaya60d0752021-05-23 19:00:49 +03004229 } else {
4230 if (hdev->asic_prop.hard_reset_done_by_fw)
Koby Elbazb7a71fd2021-06-15 17:07:02 +03004231 hl_fw_ask_hard_reset_without_linux(hdev);
Oded Gabbaya60d0752021-05-23 19:00:49 +03004232 else
Koby Elbazb7a71fd2021-06-15 17:07:02 +03004233 hl_fw_ask_halt_machine_without_linux(hdev);
Koby Elbaz3649eae2021-05-18 15:43:47 +03004234 }
Oded Gabbayc83c4172020-07-05 15:48:34 +03004235
Oded Gabbaya60d0752021-05-23 19:00:49 +03004236 if (driver_performs_reset) {
Oded Gabbaya63c3fb2020-11-26 18:11:05 +02004237
4238 /* Configure the reset registers. Must be done as early as
4239 * possible in case we fail during H/W initialization
4240 */
4241 WREG32(mmPSOC_GLOBAL_CONF_SOFT_RST_CFG_H,
4242 (CFG_RST_H_DMA_MASK |
4243 CFG_RST_H_MME_MASK |
4244 CFG_RST_H_SM_MASK |
4245 CFG_RST_H_TPC_7_MASK));
4246
4247 WREG32(mmPSOC_GLOBAL_CONF_SOFT_RST_CFG_L, CFG_RST_L_TPC_MASK);
4248
4249 WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST_CFG_H,
4250 (CFG_RST_H_HBM_MASK |
4251 CFG_RST_H_TPC_7_MASK |
4252 CFG_RST_H_NIC_MASK |
4253 CFG_RST_H_SM_MASK |
4254 CFG_RST_H_DMA_MASK |
4255 CFG_RST_H_MME_MASK |
4256 CFG_RST_H_CPU_MASK |
4257 CFG_RST_H_MMU_MASK));
4258
4259 WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST_CFG_L,
4260 (CFG_RST_L_IF_MASK |
4261 CFG_RST_L_PSOC_MASK |
4262 CFG_RST_L_TPC_MASK));
4263
Ofir Bittonb90c8942020-11-08 12:59:04 +02004264 msleep(cpu_timeout_ms);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03004265
Ofir Bittonb90c8942020-11-08 12:59:04 +02004266 /* Tell ASIC not to re-initialize PCIe */
4267 WREG32(mmPREBOOT_PCIE_EN, LKD_HARD_RESET_MAGIC);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03004268
Ofir Bittonb90c8942020-11-08 12:59:04 +02004269 /* Restart BTL/BLR upon hard-reset */
Oded Gabbaya60d0752021-05-23 19:00:49 +03004270 WREG32(mmPSOC_GLOBAL_CONF_BOOT_SEQ_RE_START, 1);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03004271
Ofir Bittonb90c8942020-11-08 12:59:04 +02004272 WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST,
Oded Gabbay12ae3132020-07-03 20:58:23 +03004273 1 << PSOC_GLOBAL_CONF_SW_ALL_RST_IND_SHIFT);
Ofir Bittonb90c8942020-11-08 12:59:04 +02004274
Oded Gabbay13d0ee12020-12-06 23:48:45 +02004275 dev_info(hdev->dev,
4276 "Issued HARD reset command, going to wait %dms\n",
4277 reset_timeout_ms);
4278 } else {
4279 dev_info(hdev->dev,
4280 "Firmware performs HARD reset, going to wait %dms\n",
4281 reset_timeout_ms);
4282 }
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03004283
4284 /*
4285 * After hard reset, we can't poll the BTM_FSM register because the PSOC
4286 * itself is in reset. Need to wait until the reset is deasserted
4287 */
4288 msleep(reset_timeout_ms);
4289
4290 status = RREG32(mmPSOC_GLOBAL_CONF_BTM_FSM);
4291 if (status & PSOC_GLOBAL_CONF_BTM_FSM_STATE_MASK)
4292 dev_err(hdev->dev,
4293 "Timeout while waiting for device to reset 0x%x\n",
4294 status);
4295
farah kassabrieb10b892020-10-14 15:17:36 +03004296 if (gaudi) {
4297 gaudi->hw_cap_initialized &= ~(HW_CAP_CPU | HW_CAP_CPU_Q |
4298 HW_CAP_HBM | HW_CAP_PCI_DMA |
4299 HW_CAP_MME | HW_CAP_TPC_MASK |
4300 HW_CAP_HBM_DMA | HW_CAP_PLL |
4301 HW_CAP_NIC_MASK | HW_CAP_MMU |
4302 HW_CAP_SRAM_SCRAMBLER |
4303 HW_CAP_HBM_SCRAMBLER |
4304 HW_CAP_CLK_GATE);
Oded Gabbaye38bfd32020-07-03 20:46:12 +03004305
farah kassabrieb10b892020-10-14 15:17:36 +03004306 memset(gaudi->events_stat, 0, sizeof(gaudi->events_stat));
Oded Gabbaya60d0752021-05-23 19:00:49 +03004307
Koby Elbazb7a71fd2021-06-15 17:07:02 +03004308 hdev->device_cpu_is_halted = false;
farah kassabrieb10b892020-10-14 15:17:36 +03004309 }
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03004310}
4311
4312static int gaudi_suspend(struct hl_device *hdev)
4313{
4314 int rc;
4315
Oded Gabbay2f553422020-08-15 16:28:10 +03004316 rc = hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_DISABLE_PCI_ACCESS);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03004317 if (rc)
4318 dev_err(hdev->dev, "Failed to disable PCI access from CPU\n");
4319
4320 return rc;
4321}
4322
4323static int gaudi_resume(struct hl_device *hdev)
4324{
4325 return gaudi_init_iatu(hdev);
4326}
4327
Zvika Yehudai1ee8e2b2021-07-06 13:50:32 +03004328static int gaudi_mmap(struct hl_device *hdev, struct vm_area_struct *vma,
Hillf Danton0db57532020-08-23 07:32:42 +08004329 void *cpu_addr, dma_addr_t dma_addr, size_t size)
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03004330{
4331 int rc;
4332
4333 vma->vm_flags |= VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP |
4334 VM_DONTCOPY | VM_NORESERVE;
4335
Oded Gabbaya9d4ef62021-01-11 13:49:38 +02004336 rc = dma_mmap_coherent(hdev->dev, vma, cpu_addr,
4337 (dma_addr - HOST_PHYS_BASE), size);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03004338 if (rc)
Hillf Danton0db57532020-08-23 07:32:42 +08004339 dev_err(hdev->dev, "dma_mmap_coherent error %d", rc);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03004340
4341 return rc;
4342}
4343
4344static void gaudi_ring_doorbell(struct hl_device *hdev, u32 hw_queue_id, u32 pi)
4345{
Koby Elbaze591a492021-05-12 18:05:46 +03004346 struct cpu_dyn_regs *dyn_regs =
4347 &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
Koby Elbaz81217362021-05-03 23:03:15 +03004348 u32 db_reg_offset, db_value, dma_qm_offset, q_off, irq_handler_offset;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03004349 struct gaudi_device *gaudi = hdev->asic_specific;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03004350 bool invalid_queue = false;
Ofir Bittona3972582021-05-24 22:58:44 +03004351 int dma_id;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03004352
4353 switch (hw_queue_id) {
4354 case GAUDI_QUEUE_ID_DMA_0_0...GAUDI_QUEUE_ID_DMA_0_3:
4355 dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_1];
4356 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4357 q_off = dma_qm_offset + (hw_queue_id & 0x3) * 4;
4358 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4359 break;
4360
4361 case GAUDI_QUEUE_ID_DMA_1_0...GAUDI_QUEUE_ID_DMA_1_3:
4362 dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_2];
4363 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4364 q_off = dma_qm_offset + (hw_queue_id & 0x3) * 4;
4365 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4366 break;
4367
4368 case GAUDI_QUEUE_ID_DMA_2_0...GAUDI_QUEUE_ID_DMA_2_3:
4369 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_1];
4370 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4371 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4372 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4373 break;
4374
4375 case GAUDI_QUEUE_ID_DMA_3_0...GAUDI_QUEUE_ID_DMA_3_3:
4376 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_2];
4377 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4378 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4379 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4380 break;
4381
4382 case GAUDI_QUEUE_ID_DMA_4_0...GAUDI_QUEUE_ID_DMA_4_3:
4383 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_3];
4384 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4385 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4386 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4387 break;
4388
4389 case GAUDI_QUEUE_ID_DMA_5_0...GAUDI_QUEUE_ID_DMA_5_3:
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03004390 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_4];
4391 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4392 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4393 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4394 break;
4395
Ofir Bitton0940cab2020-08-31 08:52:56 +03004396 case GAUDI_QUEUE_ID_DMA_6_0...GAUDI_QUEUE_ID_DMA_6_3:
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03004397 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_5];
4398 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4399 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4400 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4401 break;
4402
Ofir Bitton0940cab2020-08-31 08:52:56 +03004403 case GAUDI_QUEUE_ID_DMA_7_0...GAUDI_QUEUE_ID_DMA_7_3:
4404 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_6];
4405 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4406 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4407 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4408 break;
4409
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03004410 case GAUDI_QUEUE_ID_CPU_PQ:
4411 if (gaudi->hw_cap_initialized & HW_CAP_CPU_Q)
4412 db_reg_offset = mmCPU_IF_PF_PQ_PI;
4413 else
4414 invalid_queue = true;
4415 break;
4416
4417 case GAUDI_QUEUE_ID_MME_0_0:
4418 db_reg_offset = mmMME2_QM_PQ_PI_0;
4419 break;
4420
4421 case GAUDI_QUEUE_ID_MME_0_1:
4422 db_reg_offset = mmMME2_QM_PQ_PI_1;
4423 break;
4424
4425 case GAUDI_QUEUE_ID_MME_0_2:
4426 db_reg_offset = mmMME2_QM_PQ_PI_2;
4427 break;
4428
4429 case GAUDI_QUEUE_ID_MME_0_3:
4430 db_reg_offset = mmMME2_QM_PQ_PI_3;
4431 break;
4432
4433 case GAUDI_QUEUE_ID_MME_1_0:
4434 db_reg_offset = mmMME0_QM_PQ_PI_0;
4435 break;
4436
4437 case GAUDI_QUEUE_ID_MME_1_1:
4438 db_reg_offset = mmMME0_QM_PQ_PI_1;
4439 break;
4440
4441 case GAUDI_QUEUE_ID_MME_1_2:
4442 db_reg_offset = mmMME0_QM_PQ_PI_2;
4443 break;
4444
4445 case GAUDI_QUEUE_ID_MME_1_3:
4446 db_reg_offset = mmMME0_QM_PQ_PI_3;
4447 break;
4448
4449 case GAUDI_QUEUE_ID_TPC_0_0:
4450 db_reg_offset = mmTPC0_QM_PQ_PI_0;
4451 break;
4452
4453 case GAUDI_QUEUE_ID_TPC_0_1:
4454 db_reg_offset = mmTPC0_QM_PQ_PI_1;
4455 break;
4456
4457 case GAUDI_QUEUE_ID_TPC_0_2:
4458 db_reg_offset = mmTPC0_QM_PQ_PI_2;
4459 break;
4460
4461 case GAUDI_QUEUE_ID_TPC_0_3:
4462 db_reg_offset = mmTPC0_QM_PQ_PI_3;
4463 break;
4464
4465 case GAUDI_QUEUE_ID_TPC_1_0:
4466 db_reg_offset = mmTPC1_QM_PQ_PI_0;
4467 break;
4468
4469 case GAUDI_QUEUE_ID_TPC_1_1:
4470 db_reg_offset = mmTPC1_QM_PQ_PI_1;
4471 break;
4472
4473 case GAUDI_QUEUE_ID_TPC_1_2:
4474 db_reg_offset = mmTPC1_QM_PQ_PI_2;
4475 break;
4476
4477 case GAUDI_QUEUE_ID_TPC_1_3:
4478 db_reg_offset = mmTPC1_QM_PQ_PI_3;
4479 break;
4480
4481 case GAUDI_QUEUE_ID_TPC_2_0:
4482 db_reg_offset = mmTPC2_QM_PQ_PI_0;
4483 break;
4484
4485 case GAUDI_QUEUE_ID_TPC_2_1:
4486 db_reg_offset = mmTPC2_QM_PQ_PI_1;
4487 break;
4488
4489 case GAUDI_QUEUE_ID_TPC_2_2:
4490 db_reg_offset = mmTPC2_QM_PQ_PI_2;
4491 break;
4492
4493 case GAUDI_QUEUE_ID_TPC_2_3:
4494 db_reg_offset = mmTPC2_QM_PQ_PI_3;
4495 break;
4496
4497 case GAUDI_QUEUE_ID_TPC_3_0:
4498 db_reg_offset = mmTPC3_QM_PQ_PI_0;
4499 break;
4500
4501 case GAUDI_QUEUE_ID_TPC_3_1:
4502 db_reg_offset = mmTPC3_QM_PQ_PI_1;
4503 break;
4504
4505 case GAUDI_QUEUE_ID_TPC_3_2:
4506 db_reg_offset = mmTPC3_QM_PQ_PI_2;
4507 break;
4508
4509 case GAUDI_QUEUE_ID_TPC_3_3:
4510 db_reg_offset = mmTPC3_QM_PQ_PI_3;
4511 break;
4512
4513 case GAUDI_QUEUE_ID_TPC_4_0:
4514 db_reg_offset = mmTPC4_QM_PQ_PI_0;
4515 break;
4516
4517 case GAUDI_QUEUE_ID_TPC_4_1:
4518 db_reg_offset = mmTPC4_QM_PQ_PI_1;
4519 break;
4520
4521 case GAUDI_QUEUE_ID_TPC_4_2:
4522 db_reg_offset = mmTPC4_QM_PQ_PI_2;
4523 break;
4524
4525 case GAUDI_QUEUE_ID_TPC_4_3:
4526 db_reg_offset = mmTPC4_QM_PQ_PI_3;
4527 break;
4528
4529 case GAUDI_QUEUE_ID_TPC_5_0:
4530 db_reg_offset = mmTPC5_QM_PQ_PI_0;
4531 break;
4532
4533 case GAUDI_QUEUE_ID_TPC_5_1:
4534 db_reg_offset = mmTPC5_QM_PQ_PI_1;
4535 break;
4536
4537 case GAUDI_QUEUE_ID_TPC_5_2:
4538 db_reg_offset = mmTPC5_QM_PQ_PI_2;
4539 break;
4540
4541 case GAUDI_QUEUE_ID_TPC_5_3:
4542 db_reg_offset = mmTPC5_QM_PQ_PI_3;
4543 break;
4544
4545 case GAUDI_QUEUE_ID_TPC_6_0:
4546 db_reg_offset = mmTPC6_QM_PQ_PI_0;
4547 break;
4548
4549 case GAUDI_QUEUE_ID_TPC_6_1:
4550 db_reg_offset = mmTPC6_QM_PQ_PI_1;
4551 break;
4552
4553 case GAUDI_QUEUE_ID_TPC_6_2:
4554 db_reg_offset = mmTPC6_QM_PQ_PI_2;
4555 break;
4556
4557 case GAUDI_QUEUE_ID_TPC_6_3:
4558 db_reg_offset = mmTPC6_QM_PQ_PI_3;
4559 break;
4560
4561 case GAUDI_QUEUE_ID_TPC_7_0:
4562 db_reg_offset = mmTPC7_QM_PQ_PI_0;
4563 break;
4564
4565 case GAUDI_QUEUE_ID_TPC_7_1:
4566 db_reg_offset = mmTPC7_QM_PQ_PI_1;
4567 break;
4568
4569 case GAUDI_QUEUE_ID_TPC_7_2:
4570 db_reg_offset = mmTPC7_QM_PQ_PI_2;
4571 break;
4572
4573 case GAUDI_QUEUE_ID_TPC_7_3:
4574 db_reg_offset = mmTPC7_QM_PQ_PI_3;
4575 break;
4576
Ofir Bittona3972582021-05-24 22:58:44 +03004577 case GAUDI_QUEUE_ID_NIC_0_0...GAUDI_QUEUE_ID_NIC_0_3:
4578 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC0))
4579 invalid_queue = true;
4580
4581 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4582 db_reg_offset = mmNIC0_QM0_PQ_PI_0 + q_off;
Oded Gabbay3c681572020-11-02 21:10:39 +02004583 break;
4584
Ofir Bittona3972582021-05-24 22:58:44 +03004585 case GAUDI_QUEUE_ID_NIC_1_0...GAUDI_QUEUE_ID_NIC_1_3:
4586 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC1))
4587 invalid_queue = true;
4588
4589 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4590 db_reg_offset = mmNIC0_QM1_PQ_PI_0 + q_off;
Oded Gabbay3c681572020-11-02 21:10:39 +02004591 break;
4592
Ofir Bittona3972582021-05-24 22:58:44 +03004593 case GAUDI_QUEUE_ID_NIC_2_0...GAUDI_QUEUE_ID_NIC_2_3:
4594 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC2))
4595 invalid_queue = true;
4596
4597 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4598 db_reg_offset = mmNIC1_QM0_PQ_PI_0 + q_off;
Oded Gabbay3c681572020-11-02 21:10:39 +02004599 break;
4600
Ofir Bittona3972582021-05-24 22:58:44 +03004601 case GAUDI_QUEUE_ID_NIC_3_0...GAUDI_QUEUE_ID_NIC_3_3:
4602 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC3))
4603 invalid_queue = true;
4604
4605 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4606 db_reg_offset = mmNIC1_QM1_PQ_PI_0 + q_off;
Oded Gabbay3c681572020-11-02 21:10:39 +02004607 break;
4608
Ofir Bittona3972582021-05-24 22:58:44 +03004609 case GAUDI_QUEUE_ID_NIC_4_0...GAUDI_QUEUE_ID_NIC_4_3:
4610 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC4))
4611 invalid_queue = true;
4612
4613 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4614 db_reg_offset = mmNIC2_QM0_PQ_PI_0 + q_off;
Oded Gabbay3c681572020-11-02 21:10:39 +02004615 break;
4616
Ofir Bittona3972582021-05-24 22:58:44 +03004617 case GAUDI_QUEUE_ID_NIC_5_0...GAUDI_QUEUE_ID_NIC_5_3:
4618 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC5))
4619 invalid_queue = true;
4620
4621 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4622 db_reg_offset = mmNIC2_QM1_PQ_PI_0 + q_off;
Oded Gabbay3c681572020-11-02 21:10:39 +02004623 break;
4624
Ofir Bittona3972582021-05-24 22:58:44 +03004625 case GAUDI_QUEUE_ID_NIC_6_0...GAUDI_QUEUE_ID_NIC_6_3:
4626 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC6))
4627 invalid_queue = true;
4628
4629 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4630 db_reg_offset = mmNIC3_QM0_PQ_PI_0 + q_off;
Oded Gabbay3c681572020-11-02 21:10:39 +02004631 break;
4632
Ofir Bittona3972582021-05-24 22:58:44 +03004633 case GAUDI_QUEUE_ID_NIC_7_0...GAUDI_QUEUE_ID_NIC_7_3:
4634 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC7))
4635 invalid_queue = true;
4636
4637 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4638 db_reg_offset = mmNIC3_QM1_PQ_PI_0 + q_off;
Oded Gabbay3c681572020-11-02 21:10:39 +02004639 break;
4640
Ofir Bittona3972582021-05-24 22:58:44 +03004641 case GAUDI_QUEUE_ID_NIC_8_0...GAUDI_QUEUE_ID_NIC_8_3:
4642 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC8))
4643 invalid_queue = true;
4644
4645 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4646 db_reg_offset = mmNIC4_QM0_PQ_PI_0 + q_off;
Oded Gabbay3c681572020-11-02 21:10:39 +02004647 break;
4648
Ofir Bittona3972582021-05-24 22:58:44 +03004649 case GAUDI_QUEUE_ID_NIC_9_0...GAUDI_QUEUE_ID_NIC_9_3:
4650 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC9))
4651 invalid_queue = true;
Oded Gabbay3c681572020-11-02 21:10:39 +02004652
Ofir Bittona3972582021-05-24 22:58:44 +03004653 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4654 db_reg_offset = mmNIC4_QM1_PQ_PI_0 + q_off;
Oded Gabbay3c681572020-11-02 21:10:39 +02004655 break;
4656
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03004657 default:
4658 invalid_queue = true;
4659 }
4660
4661 if (invalid_queue) {
4662 /* Should never get here */
4663 dev_err(hdev->dev, "h/w queue %d is invalid. Can't set pi\n",
4664 hw_queue_id);
4665 return;
4666 }
4667
4668 db_value = pi;
4669
4670 /* ring the doorbell */
4671 WREG32(db_reg_offset, db_value);
4672
Ofir Bitton5dbd7b42021-01-28 16:30:25 +02004673 if (hw_queue_id == GAUDI_QUEUE_ID_CPU_PQ) {
4674 /* make sure device CPU will read latest data from host */
4675 mb();
Koby Elbaz81217362021-05-03 23:03:15 +03004676
4677 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
Koby Elbaze591a492021-05-12 18:05:46 +03004678 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
Ofir Bitton5bc691d2021-05-25 22:09:13 +03004679 le32_to_cpu(dyn_regs->gic_host_pi_upd_irq);
Koby Elbaz81217362021-05-03 23:03:15 +03004680
Ofir Bitton7d5ba002021-06-07 15:22:56 +03004681 WREG32(irq_handler_offset,
4682 gaudi_irq_map_table[GAUDI_EVENT_PI_UPDATE].cpu_id);
Ofir Bitton5dbd7b42021-01-28 16:30:25 +02004683 }
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03004684}
4685
4686static void gaudi_pqe_write(struct hl_device *hdev, __le64 *pqe,
4687 struct hl_bd *bd)
4688{
4689 __le64 *pbd = (__le64 *) bd;
4690
4691 /* The QMANs are on the host memory so a simple copy suffice */
4692 pqe[0] = pbd[0];
4693 pqe[1] = pbd[1];
4694}
4695
4696static void *gaudi_dma_alloc_coherent(struct hl_device *hdev, size_t size,
4697 dma_addr_t *dma_handle, gfp_t flags)
4698{
4699 void *kernel_addr = dma_alloc_coherent(&hdev->pdev->dev, size,
4700 dma_handle, flags);
4701
4702 /* Shift to the device's base physical address of host memory */
4703 if (kernel_addr)
4704 *dma_handle += HOST_PHYS_BASE;
4705
4706 return kernel_addr;
4707}
4708
4709static void gaudi_dma_free_coherent(struct hl_device *hdev, size_t size,
4710 void *cpu_addr, dma_addr_t dma_handle)
4711{
4712 /* Cancel the device's base physical address of host memory */
4713 dma_addr_t fixed_dma_handle = dma_handle - HOST_PHYS_BASE;
4714
4715 dma_free_coherent(&hdev->pdev->dev, size, cpu_addr, fixed_dma_handle);
4716}
4717
farah kassabri03df1362020-05-06 11:17:38 +03004718static int gaudi_hbm_scrubbing(struct hl_device *hdev)
4719{
4720 struct asic_fixed_properties *prop = &hdev->asic_prop;
4721 u64 cur_addr = DRAM_BASE_ADDR_USER;
4722 u32 val;
4723 u32 chunk_size;
4724 int rc, dma_id;
4725
4726 while (cur_addr < prop->dram_end_address) {
4727 for (dma_id = 0 ; dma_id < DMA_NUMBER_OF_CHANNELS ; dma_id++) {
4728 u32 dma_offset = dma_id * DMA_CORE_OFFSET;
4729
4730 chunk_size =
4731 min((u64)SZ_2G, prop->dram_end_address - cur_addr);
4732
4733 dev_dbg(hdev->dev,
4734 "Doing HBM scrubbing for 0x%09llx - 0x%09llx\n",
4735 cur_addr, cur_addr + chunk_size);
4736
4737 WREG32(mmDMA0_CORE_SRC_BASE_LO + dma_offset, 0);
4738 WREG32(mmDMA0_CORE_SRC_BASE_HI + dma_offset, 0);
4739 WREG32(mmDMA0_CORE_DST_BASE_LO + dma_offset,
4740 lower_32_bits(cur_addr));
4741 WREG32(mmDMA0_CORE_DST_BASE_HI + dma_offset,
4742 upper_32_bits(cur_addr));
4743 WREG32(mmDMA0_CORE_DST_TSIZE_0 + dma_offset,
4744 chunk_size);
4745 WREG32(mmDMA0_CORE_COMMIT + dma_offset,
4746 ((1 << DMA0_CORE_COMMIT_LIN_SHIFT) |
4747 (1 << DMA0_CORE_COMMIT_MEM_SET_SHIFT)));
4748
4749 cur_addr += chunk_size;
4750
4751 if (cur_addr == prop->dram_end_address)
4752 break;
4753 }
4754
4755 for (dma_id = 0 ; dma_id < DMA_NUMBER_OF_CHANNELS ; dma_id++) {
4756 u32 dma_offset = dma_id * DMA_CORE_OFFSET;
4757
4758 rc = hl_poll_timeout(
4759 hdev,
4760 mmDMA0_CORE_STS0 + dma_offset,
4761 val,
4762 ((val & DMA0_CORE_STS0_BUSY_MASK) == 0),
4763 1000,
4764 HBM_SCRUBBING_TIMEOUT_US);
4765
4766 if (rc) {
4767 dev_err(hdev->dev,
4768 "DMA Timeout during HBM scrubbing of DMA #%d\n",
4769 dma_id);
4770 return -EIO;
4771 }
4772 }
4773 }
4774
4775 return 0;
4776}
4777
4778static int gaudi_scrub_device_mem(struct hl_device *hdev, u64 addr, u64 size)
4779{
4780 struct asic_fixed_properties *prop = &hdev->asic_prop;
4781 struct gaudi_device *gaudi = hdev->asic_specific;
farah kassabri03df1362020-05-06 11:17:38 +03004782 int rc = 0;
4783 u64 val = 0;
4784
4785 if (!hdev->memory_scrub)
4786 return 0;
4787
4788 if (!addr && !size) {
4789 /* Wait till device is idle */
4790 rc = hl_poll_timeout(
4791 hdev,
4792 mmDMA0_CORE_STS0/* dummy */,
4793 val/* dummy */,
Ohad Sharabicf303392021-01-17 16:01:56 +02004794 (hdev->asic_funcs->is_device_idle(hdev, NULL,
4795 0, NULL)),
farah kassabri03df1362020-05-06 11:17:38 +03004796 1000,
4797 HBM_SCRUBBING_TIMEOUT_US);
4798 if (rc) {
4799 dev_err(hdev->dev, "waiting for idle timeout\n");
4800 return -EIO;
4801 }
4802
4803 /* Scrub SRAM */
4804 addr = prop->sram_user_base_address;
4805 size = hdev->pldm ? 0x10000 :
4806 (prop->sram_size - SRAM_USER_BASE_OFFSET);
4807 val = 0x7777777777777777ull;
4808
4809 rc = gaudi_memset_device_memory(hdev, addr, size, val);
4810 if (rc) {
4811 dev_err(hdev->dev,
4812 "Failed to clear SRAM in mem scrub all\n");
4813 return rc;
4814 }
4815
4816 mutex_lock(&gaudi->clk_gate_mutex);
4817 hdev->asic_funcs->disable_clock_gating(hdev);
4818
4819 /* Scrub HBM using all DMA channels in parallel */
4820 rc = gaudi_hbm_scrubbing(hdev);
4821 if (rc)
4822 dev_err(hdev->dev,
4823 "Failed to clear HBM in mem scrub all\n");
4824
4825 hdev->asic_funcs->set_clock_gating(hdev);
4826 mutex_unlock(&gaudi->clk_gate_mutex);
4827 }
4828
4829 return rc;
4830}
4831
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03004832static void *gaudi_get_int_queue_base(struct hl_device *hdev,
4833 u32 queue_id, dma_addr_t *dma_handle,
4834 u16 *queue_len)
4835{
4836 struct gaudi_device *gaudi = hdev->asic_specific;
4837 struct gaudi_internal_qman_info *q;
4838
4839 if (queue_id >= GAUDI_QUEUE_ID_SIZE ||
4840 gaudi_queue_type[queue_id] != QUEUE_TYPE_INT) {
4841 dev_err(hdev->dev, "Got invalid queue id %d\n", queue_id);
4842 return NULL;
4843 }
4844
4845 q = &gaudi->internal_qmans[queue_id];
4846 *dma_handle = q->pq_dma_addr;
4847 *queue_len = q->pq_size / QMAN_PQ_ENTRY_SIZE;
4848
4849 return q->pq_kernel_addr;
4850}
4851
4852static int gaudi_send_cpu_message(struct hl_device *hdev, u32 *msg,
Alon Mizrahi439bc472020-11-10 13:49:10 +02004853 u16 len, u32 timeout, u64 *result)
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03004854{
4855 struct gaudi_device *gaudi = hdev->asic_specific;
4856
4857 if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q)) {
4858 if (result)
4859 *result = 0;
4860 return 0;
4861 }
4862
Oded Gabbay788cacf2020-07-07 17:30:13 +03004863 if (!timeout)
4864 timeout = GAUDI_MSG_TO_CPU_TIMEOUT_USEC;
4865
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03004866 return hl_fw_send_cpu_message(hdev, GAUDI_QUEUE_ID_CPU_PQ, msg, len,
4867 timeout, result);
4868}
4869
4870static int gaudi_test_queue(struct hl_device *hdev, u32 hw_queue_id)
4871{
4872 struct packet_msg_prot *fence_pkt;
4873 dma_addr_t pkt_dma_addr;
4874 u32 fence_val, tmp, timeout_usec;
4875 dma_addr_t fence_dma_addr;
4876 u32 *fence_ptr;
4877 int rc;
4878
4879 if (hdev->pldm)
4880 timeout_usec = GAUDI_PLDM_TEST_QUEUE_WAIT_USEC;
4881 else
4882 timeout_usec = GAUDI_TEST_QUEUE_WAIT_USEC;
4883
4884 fence_val = GAUDI_QMAN0_FENCE_VAL;
4885
4886 fence_ptr = hdev->asic_funcs->asic_dma_pool_zalloc(hdev, 4, GFP_KERNEL,
4887 &fence_dma_addr);
4888 if (!fence_ptr) {
4889 dev_err(hdev->dev,
Dotan Barakd6b045c2020-08-06 09:20:49 +03004890 "Failed to allocate memory for H/W queue %d testing\n",
4891 hw_queue_id);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03004892 return -ENOMEM;
4893 }
4894
4895 *fence_ptr = 0;
4896
4897 fence_pkt = hdev->asic_funcs->asic_dma_pool_zalloc(hdev,
4898 sizeof(struct packet_msg_prot),
4899 GFP_KERNEL, &pkt_dma_addr);
4900 if (!fence_pkt) {
4901 dev_err(hdev->dev,
Dotan Barakd6b045c2020-08-06 09:20:49 +03004902 "Failed to allocate packet for H/W queue %d testing\n",
4903 hw_queue_id);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03004904 rc = -ENOMEM;
4905 goto free_fence_ptr;
4906 }
4907
Oded Gabbay65887292020-08-12 11:21:01 +03004908 tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT);
4909 tmp |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);
4910 tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
4911
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03004912 fence_pkt->ctl = cpu_to_le32(tmp);
4913 fence_pkt->value = cpu_to_le32(fence_val);
4914 fence_pkt->addr = cpu_to_le64(fence_dma_addr);
4915
4916 rc = hl_hw_queue_send_cb_no_cmpl(hdev, hw_queue_id,
4917 sizeof(struct packet_msg_prot),
4918 pkt_dma_addr);
4919 if (rc) {
4920 dev_err(hdev->dev,
Dotan Barakd6b045c2020-08-06 09:20:49 +03004921 "Failed to send fence packet to H/W queue %d\n",
4922 hw_queue_id);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03004923 goto free_pkt;
4924 }
4925
4926 rc = hl_poll_timeout_memory(hdev, fence_ptr, tmp, (tmp == fence_val),
4927 1000, timeout_usec, true);
4928
4929 hl_hw_queue_inc_ci_kernel(hdev, hw_queue_id);
4930
4931 if (rc == -ETIMEDOUT) {
4932 dev_err(hdev->dev,
4933 "H/W queue %d test failed (scratch(0x%08llX) == 0x%08X)\n",
4934 hw_queue_id, (unsigned long long) fence_dma_addr, tmp);
4935 rc = -EIO;
4936 }
4937
4938free_pkt:
4939 hdev->asic_funcs->asic_dma_pool_free(hdev, (void *) fence_pkt,
4940 pkt_dma_addr);
4941free_fence_ptr:
4942 hdev->asic_funcs->asic_dma_pool_free(hdev, (void *) fence_ptr,
4943 fence_dma_addr);
4944 return rc;
4945}
4946
4947static int gaudi_test_cpu_queue(struct hl_device *hdev)
4948{
4949 struct gaudi_device *gaudi = hdev->asic_specific;
4950
4951 /*
4952 * check capability here as send_cpu_message() won't update the result
4953 * value if no capability
4954 */
4955 if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
4956 return 0;
4957
4958 return hl_fw_test_cpu_queue(hdev);
4959}
4960
4961static int gaudi_test_queues(struct hl_device *hdev)
4962{
4963 int i, rc, ret_val = 0;
4964
Ofir Bitton3abc99b2020-06-23 14:50:39 +03004965 for (i = 0 ; i < hdev->asic_prop.max_queues ; i++) {
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03004966 if (hdev->asic_prop.hw_queues_props[i].type == QUEUE_TYPE_EXT) {
4967 rc = gaudi_test_queue(hdev, i);
4968 if (rc)
4969 ret_val = -EINVAL;
4970 }
4971 }
4972
4973 rc = gaudi_test_cpu_queue(hdev);
4974 if (rc)
4975 ret_val = -EINVAL;
4976
4977 return ret_val;
4978}
4979
4980static void *gaudi_dma_pool_zalloc(struct hl_device *hdev, size_t size,
4981 gfp_t mem_flags, dma_addr_t *dma_handle)
4982{
4983 void *kernel_addr;
4984
4985 if (size > GAUDI_DMA_POOL_BLK_SIZE)
4986 return NULL;
4987
4988 kernel_addr = dma_pool_zalloc(hdev->dma_pool, mem_flags, dma_handle);
4989
4990 /* Shift to the device's base physical address of host memory */
4991 if (kernel_addr)
4992 *dma_handle += HOST_PHYS_BASE;
4993
4994 return kernel_addr;
4995}
4996
4997static void gaudi_dma_pool_free(struct hl_device *hdev, void *vaddr,
4998 dma_addr_t dma_addr)
4999{
5000 /* Cancel the device's base physical address of host memory */
5001 dma_addr_t fixed_dma_addr = dma_addr - HOST_PHYS_BASE;
5002
5003 dma_pool_free(hdev->dma_pool, vaddr, fixed_dma_addr);
5004}
5005
5006static void *gaudi_cpu_accessible_dma_pool_alloc(struct hl_device *hdev,
5007 size_t size, dma_addr_t *dma_handle)
5008{
5009 return hl_fw_cpu_accessible_dma_pool_alloc(hdev, size, dma_handle);
5010}
5011
5012static void gaudi_cpu_accessible_dma_pool_free(struct hl_device *hdev,
5013 size_t size, void *vaddr)
5014{
5015 hl_fw_cpu_accessible_dma_pool_free(hdev, size, vaddr);
5016}
5017
5018static int gaudi_dma_map_sg(struct hl_device *hdev, struct scatterlist *sgl,
5019 int nents, enum dma_data_direction dir)
5020{
5021 struct scatterlist *sg;
5022 int i;
5023
5024 if (!dma_map_sg(&hdev->pdev->dev, sgl, nents, dir))
5025 return -ENOMEM;
5026
5027 /* Shift to the device's base physical address of host memory */
5028 for_each_sg(sgl, sg, nents, i)
5029 sg->dma_address += HOST_PHYS_BASE;
5030
5031 return 0;
5032}
5033
5034static void gaudi_dma_unmap_sg(struct hl_device *hdev, struct scatterlist *sgl,
5035 int nents, enum dma_data_direction dir)
5036{
5037 struct scatterlist *sg;
5038 int i;
5039
5040 /* Cancel the device's base physical address of host memory */
5041 for_each_sg(sgl, sg, nents, i)
5042 sg->dma_address -= HOST_PHYS_BASE;
5043
5044 dma_unmap_sg(&hdev->pdev->dev, sgl, nents, dir);
5045}
5046
5047static u32 gaudi_get_dma_desc_list_size(struct hl_device *hdev,
5048 struct sg_table *sgt)
5049{
5050 struct scatterlist *sg, *sg_next_iter;
5051 u32 count, dma_desc_cnt;
5052 u64 len, len_next;
5053 dma_addr_t addr, addr_next;
5054
5055 dma_desc_cnt = 0;
5056
5057 for_each_sg(sgt->sgl, sg, sgt->nents, count) {
5058
5059 len = sg_dma_len(sg);
5060 addr = sg_dma_address(sg);
5061
5062 if (len == 0)
5063 break;
5064
5065 while ((count + 1) < sgt->nents) {
5066 sg_next_iter = sg_next(sg);
5067 len_next = sg_dma_len(sg_next_iter);
5068 addr_next = sg_dma_address(sg_next_iter);
5069
5070 if (len_next == 0)
5071 break;
5072
5073 if ((addr + len == addr_next) &&
5074 (len + len_next <= DMA_MAX_TRANSFER_SIZE)) {
5075 len += len_next;
5076 count++;
5077 sg = sg_next_iter;
5078 } else {
5079 break;
5080 }
5081 }
5082
5083 dma_desc_cnt++;
5084 }
5085
5086 return dma_desc_cnt * sizeof(struct packet_lin_dma);
5087}
5088
5089static int gaudi_pin_memory_before_cs(struct hl_device *hdev,
5090 struct hl_cs_parser *parser,
5091 struct packet_lin_dma *user_dma_pkt,
5092 u64 addr, enum dma_data_direction dir)
5093{
5094 struct hl_userptr *userptr;
5095 int rc;
5096
5097 if (hl_userptr_is_pinned(hdev, addr, le32_to_cpu(user_dma_pkt->tsize),
5098 parser->job_userptr_list, &userptr))
5099 goto already_pinned;
5100
Ofir Bittond5eb8372021-02-14 15:35:56 +02005101 userptr = kzalloc(sizeof(*userptr), GFP_KERNEL);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005102 if (!userptr)
5103 return -ENOMEM;
5104
5105 rc = hl_pin_host_memory(hdev, addr, le32_to_cpu(user_dma_pkt->tsize),
5106 userptr);
5107 if (rc)
5108 goto free_userptr;
5109
5110 list_add_tail(&userptr->job_node, parser->job_userptr_list);
5111
5112 rc = hdev->asic_funcs->asic_dma_map_sg(hdev, userptr->sgt->sgl,
5113 userptr->sgt->nents, dir);
5114 if (rc) {
5115 dev_err(hdev->dev, "failed to map sgt with DMA region\n");
5116 goto unpin_memory;
5117 }
5118
5119 userptr->dma_mapped = true;
5120 userptr->dir = dir;
5121
5122already_pinned:
5123 parser->patched_cb_size +=
5124 gaudi_get_dma_desc_list_size(hdev, userptr->sgt);
5125
5126 return 0;
5127
5128unpin_memory:
Koby Elbazf5eb7bf2021-06-09 21:43:52 +03005129 list_del(&userptr->job_node);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005130 hl_unpin_host_memory(hdev, userptr);
5131free_userptr:
5132 kfree(userptr);
5133 return rc;
5134}
5135
5136static int gaudi_validate_dma_pkt_host(struct hl_device *hdev,
5137 struct hl_cs_parser *parser,
5138 struct packet_lin_dma *user_dma_pkt,
5139 bool src_in_host)
5140{
5141 enum dma_data_direction dir;
5142 bool skip_host_mem_pin = false, user_memset;
5143 u64 addr;
5144 int rc = 0;
5145
5146 user_memset = (le32_to_cpu(user_dma_pkt->ctl) &
5147 GAUDI_PKT_LIN_DMA_CTL_MEMSET_MASK) >>
5148 GAUDI_PKT_LIN_DMA_CTL_MEMSET_SHIFT;
5149
5150 if (src_in_host) {
5151 if (user_memset)
5152 skip_host_mem_pin = true;
5153
5154 dev_dbg(hdev->dev, "DMA direction is HOST --> DEVICE\n");
5155 dir = DMA_TO_DEVICE;
5156 addr = le64_to_cpu(user_dma_pkt->src_addr);
5157 } else {
5158 dev_dbg(hdev->dev, "DMA direction is DEVICE --> HOST\n");
5159 dir = DMA_FROM_DEVICE;
5160 addr = (le64_to_cpu(user_dma_pkt->dst_addr) &
5161 GAUDI_PKT_LIN_DMA_DST_ADDR_MASK) >>
5162 GAUDI_PKT_LIN_DMA_DST_ADDR_SHIFT;
5163 }
5164
5165 if (skip_host_mem_pin)
5166 parser->patched_cb_size += sizeof(*user_dma_pkt);
5167 else
5168 rc = gaudi_pin_memory_before_cs(hdev, parser, user_dma_pkt,
5169 addr, dir);
5170
5171 return rc;
5172}
5173
5174static int gaudi_validate_dma_pkt_no_mmu(struct hl_device *hdev,
5175 struct hl_cs_parser *parser,
5176 struct packet_lin_dma *user_dma_pkt)
5177{
5178 bool src_in_host = false;
5179 u64 dst_addr = (le64_to_cpu(user_dma_pkt->dst_addr) &
5180 GAUDI_PKT_LIN_DMA_DST_ADDR_MASK) >>
5181 GAUDI_PKT_LIN_DMA_DST_ADDR_SHIFT;
5182
5183 dev_dbg(hdev->dev, "DMA packet details:\n");
5184 dev_dbg(hdev->dev, "source == 0x%llx\n",
5185 le64_to_cpu(user_dma_pkt->src_addr));
5186 dev_dbg(hdev->dev, "destination == 0x%llx\n", dst_addr);
5187 dev_dbg(hdev->dev, "size == %u\n", le32_to_cpu(user_dma_pkt->tsize));
5188
5189 /*
5190 * Special handling for DMA with size 0. Bypass all validations
5191 * because no transactions will be done except for WR_COMP, which
5192 * is not a security issue
5193 */
5194 if (!le32_to_cpu(user_dma_pkt->tsize)) {
5195 parser->patched_cb_size += sizeof(*user_dma_pkt);
5196 return 0;
5197 }
5198
5199 if (parser->hw_queue_id <= GAUDI_QUEUE_ID_DMA_0_3)
5200 src_in_host = true;
5201
5202 return gaudi_validate_dma_pkt_host(hdev, parser, user_dma_pkt,
5203 src_in_host);
5204}
5205
Oded Gabbay64536ab2020-05-27 12:38:16 +03005206static int gaudi_validate_load_and_exe_pkt(struct hl_device *hdev,
5207 struct hl_cs_parser *parser,
5208 struct packet_load_and_exe *user_pkt)
5209{
5210 u32 cfg;
5211
5212 cfg = le32_to_cpu(user_pkt->cfg);
5213
5214 if (cfg & GAUDI_PKT_LOAD_AND_EXE_CFG_DST_MASK) {
5215 dev_err(hdev->dev,
5216 "User not allowed to use Load and Execute\n");
5217 return -EPERM;
5218 }
5219
5220 parser->patched_cb_size += sizeof(struct packet_load_and_exe);
5221
5222 return 0;
5223}
5224
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005225static int gaudi_validate_cb(struct hl_device *hdev,
5226 struct hl_cs_parser *parser, bool is_mmu)
5227{
5228 u32 cb_parsed_length = 0;
5229 int rc = 0;
5230
5231 parser->patched_cb_size = 0;
5232
5233 /* cb_user_size is more than 0 so loop will always be executed */
5234 while (cb_parsed_length < parser->user_cb_size) {
5235 enum packet_id pkt_id;
5236 u16 pkt_size;
5237 struct gaudi_packet *user_pkt;
5238
Arnd Bergmann82948e62020-10-26 17:08:06 +01005239 user_pkt = parser->user_cb->kernel_address + cb_parsed_length;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005240
5241 pkt_id = (enum packet_id) (
5242 (le64_to_cpu(user_pkt->header) &
5243 PACKET_HEADER_PACKET_ID_MASK) >>
5244 PACKET_HEADER_PACKET_ID_SHIFT);
5245
Ofir Bittonbc75be22020-07-30 14:56:38 +03005246 if (!validate_packet_id(pkt_id)) {
5247 dev_err(hdev->dev, "Invalid packet id %u\n", pkt_id);
5248 rc = -EINVAL;
5249 break;
5250 }
5251
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005252 pkt_size = gaudi_packet_sizes[pkt_id];
5253 cb_parsed_length += pkt_size;
5254 if (cb_parsed_length > parser->user_cb_size) {
5255 dev_err(hdev->dev,
5256 "packet 0x%x is out of CB boundary\n", pkt_id);
5257 rc = -EINVAL;
5258 break;
5259 }
5260
5261 switch (pkt_id) {
5262 case PACKET_MSG_PROT:
5263 dev_err(hdev->dev,
5264 "User not allowed to use MSG_PROT\n");
5265 rc = -EPERM;
5266 break;
5267
5268 case PACKET_CP_DMA:
5269 dev_err(hdev->dev, "User not allowed to use CP_DMA\n");
5270 rc = -EPERM;
5271 break;
5272
5273 case PACKET_STOP:
5274 dev_err(hdev->dev, "User not allowed to use STOP\n");
5275 rc = -EPERM;
5276 break;
5277
Oded Gabbay2edc66e2020-07-03 19:28:54 +03005278 case PACKET_WREG_BULK:
5279 dev_err(hdev->dev,
5280 "User not allowed to use WREG_BULK\n");
5281 rc = -EPERM;
5282 break;
5283
Oded Gabbay64536ab2020-05-27 12:38:16 +03005284 case PACKET_LOAD_AND_EXE:
5285 rc = gaudi_validate_load_and_exe_pkt(hdev, parser,
5286 (struct packet_load_and_exe *) user_pkt);
5287 break;
5288
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005289 case PACKET_LIN_DMA:
5290 parser->contains_dma_pkt = true;
5291 if (is_mmu)
5292 parser->patched_cb_size += pkt_size;
5293 else
5294 rc = gaudi_validate_dma_pkt_no_mmu(hdev, parser,
5295 (struct packet_lin_dma *) user_pkt);
5296 break;
5297
5298 case PACKET_WREG_32:
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005299 case PACKET_MSG_LONG:
5300 case PACKET_MSG_SHORT:
5301 case PACKET_REPEAT:
5302 case PACKET_FENCE:
5303 case PACKET_NOP:
5304 case PACKET_ARB_POINT:
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005305 parser->patched_cb_size += pkt_size;
5306 break;
5307
5308 default:
5309 dev_err(hdev->dev, "Invalid packet header 0x%x\n",
5310 pkt_id);
5311 rc = -EINVAL;
5312 break;
5313 }
5314
5315 if (rc)
5316 break;
5317 }
5318
5319 /*
5320 * The new CB should have space at the end for two MSG_PROT packets:
5321 * 1. A packet that will act as a completion packet
5322 * 2. A packet that will generate MSI-X interrupt
5323 */
Ofir Bittonac6fdbf2020-12-03 16:59:28 +02005324 if (parser->completion)
5325 parser->patched_cb_size += sizeof(struct packet_msg_prot) * 2;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005326
5327 return rc;
5328}
5329
5330static int gaudi_patch_dma_packet(struct hl_device *hdev,
5331 struct hl_cs_parser *parser,
5332 struct packet_lin_dma *user_dma_pkt,
5333 struct packet_lin_dma *new_dma_pkt,
5334 u32 *new_dma_pkt_size)
5335{
5336 struct hl_userptr *userptr;
5337 struct scatterlist *sg, *sg_next_iter;
5338 u32 count, dma_desc_cnt, user_wrcomp_en_mask, ctl;
5339 u64 len, len_next;
5340 dma_addr_t dma_addr, dma_addr_next;
5341 u64 device_memory_addr, addr;
5342 enum dma_data_direction dir;
5343 struct sg_table *sgt;
5344 bool src_in_host = false;
5345 bool skip_host_mem_pin = false;
5346 bool user_memset;
5347
5348 ctl = le32_to_cpu(user_dma_pkt->ctl);
5349
5350 if (parser->hw_queue_id <= GAUDI_QUEUE_ID_DMA_0_3)
5351 src_in_host = true;
5352
5353 user_memset = (ctl & GAUDI_PKT_LIN_DMA_CTL_MEMSET_MASK) >>
5354 GAUDI_PKT_LIN_DMA_CTL_MEMSET_SHIFT;
5355
5356 if (src_in_host) {
5357 addr = le64_to_cpu(user_dma_pkt->src_addr);
5358 device_memory_addr = le64_to_cpu(user_dma_pkt->dst_addr);
5359 dir = DMA_TO_DEVICE;
5360 if (user_memset)
5361 skip_host_mem_pin = true;
5362 } else {
5363 addr = le64_to_cpu(user_dma_pkt->dst_addr);
5364 device_memory_addr = le64_to_cpu(user_dma_pkt->src_addr);
5365 dir = DMA_FROM_DEVICE;
5366 }
5367
5368 if ((!skip_host_mem_pin) &&
5369 (!hl_userptr_is_pinned(hdev, addr,
5370 le32_to_cpu(user_dma_pkt->tsize),
5371 parser->job_userptr_list, &userptr))) {
5372 dev_err(hdev->dev, "Userptr 0x%llx + 0x%x NOT mapped\n",
5373 addr, user_dma_pkt->tsize);
5374 return -EFAULT;
5375 }
5376
5377 if ((user_memset) && (dir == DMA_TO_DEVICE)) {
5378 memcpy(new_dma_pkt, user_dma_pkt, sizeof(*user_dma_pkt));
5379 *new_dma_pkt_size = sizeof(*user_dma_pkt);
5380 return 0;
5381 }
5382
5383 user_wrcomp_en_mask = ctl & GAUDI_PKT_LIN_DMA_CTL_WRCOMP_EN_MASK;
5384
5385 sgt = userptr->sgt;
5386 dma_desc_cnt = 0;
5387
5388 for_each_sg(sgt->sgl, sg, sgt->nents, count) {
5389 len = sg_dma_len(sg);
5390 dma_addr = sg_dma_address(sg);
5391
5392 if (len == 0)
5393 break;
5394
5395 while ((count + 1) < sgt->nents) {
5396 sg_next_iter = sg_next(sg);
5397 len_next = sg_dma_len(sg_next_iter);
5398 dma_addr_next = sg_dma_address(sg_next_iter);
5399
5400 if (len_next == 0)
5401 break;
5402
5403 if ((dma_addr + len == dma_addr_next) &&
5404 (len + len_next <= DMA_MAX_TRANSFER_SIZE)) {
5405 len += len_next;
5406 count++;
5407 sg = sg_next_iter;
5408 } else {
5409 break;
5410 }
5411 }
5412
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005413 ctl = le32_to_cpu(user_dma_pkt->ctl);
5414 if (likely(dma_desc_cnt))
5415 ctl &= ~GAUDI_PKT_CTL_EB_MASK;
5416 ctl &= ~GAUDI_PKT_LIN_DMA_CTL_WRCOMP_EN_MASK;
5417 new_dma_pkt->ctl = cpu_to_le32(ctl);
5418 new_dma_pkt->tsize = cpu_to_le32(len);
5419
5420 if (dir == DMA_TO_DEVICE) {
5421 new_dma_pkt->src_addr = cpu_to_le64(dma_addr);
5422 new_dma_pkt->dst_addr = cpu_to_le64(device_memory_addr);
5423 } else {
5424 new_dma_pkt->src_addr = cpu_to_le64(device_memory_addr);
5425 new_dma_pkt->dst_addr = cpu_to_le64(dma_addr);
5426 }
5427
5428 if (!user_memset)
5429 device_memory_addr += len;
5430 dma_desc_cnt++;
5431 new_dma_pkt++;
5432 }
5433
5434 if (!dma_desc_cnt) {
5435 dev_err(hdev->dev,
5436 "Error of 0 SG entries when patching DMA packet\n");
5437 return -EFAULT;
5438 }
5439
5440 /* Fix the last dma packet - wrcomp must be as user set it */
5441 new_dma_pkt--;
5442 new_dma_pkt->ctl |= cpu_to_le32(user_wrcomp_en_mask);
5443
5444 *new_dma_pkt_size = dma_desc_cnt * sizeof(struct packet_lin_dma);
5445
5446 return 0;
5447}
5448
5449static int gaudi_patch_cb(struct hl_device *hdev,
5450 struct hl_cs_parser *parser)
5451{
5452 u32 cb_parsed_length = 0;
5453 u32 cb_patched_cur_length = 0;
5454 int rc = 0;
5455
5456 /* cb_user_size is more than 0 so loop will always be executed */
5457 while (cb_parsed_length < parser->user_cb_size) {
5458 enum packet_id pkt_id;
5459 u16 pkt_size;
5460 u32 new_pkt_size = 0;
5461 struct gaudi_packet *user_pkt, *kernel_pkt;
5462
Arnd Bergmann82948e62020-10-26 17:08:06 +01005463 user_pkt = parser->user_cb->kernel_address + cb_parsed_length;
5464 kernel_pkt = parser->patched_cb->kernel_address +
5465 cb_patched_cur_length;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005466
5467 pkt_id = (enum packet_id) (
5468 (le64_to_cpu(user_pkt->header) &
5469 PACKET_HEADER_PACKET_ID_MASK) >>
5470 PACKET_HEADER_PACKET_ID_SHIFT);
5471
Ofir Bittonbc75be22020-07-30 14:56:38 +03005472 if (!validate_packet_id(pkt_id)) {
5473 dev_err(hdev->dev, "Invalid packet id %u\n", pkt_id);
5474 rc = -EINVAL;
5475 break;
5476 }
5477
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005478 pkt_size = gaudi_packet_sizes[pkt_id];
5479 cb_parsed_length += pkt_size;
5480 if (cb_parsed_length > parser->user_cb_size) {
5481 dev_err(hdev->dev,
5482 "packet 0x%x is out of CB boundary\n", pkt_id);
5483 rc = -EINVAL;
5484 break;
5485 }
5486
5487 switch (pkt_id) {
5488 case PACKET_LIN_DMA:
5489 rc = gaudi_patch_dma_packet(hdev, parser,
5490 (struct packet_lin_dma *) user_pkt,
5491 (struct packet_lin_dma *) kernel_pkt,
5492 &new_pkt_size);
5493 cb_patched_cur_length += new_pkt_size;
5494 break;
5495
5496 case PACKET_MSG_PROT:
5497 dev_err(hdev->dev,
5498 "User not allowed to use MSG_PROT\n");
5499 rc = -EPERM;
5500 break;
5501
5502 case PACKET_CP_DMA:
5503 dev_err(hdev->dev, "User not allowed to use CP_DMA\n");
5504 rc = -EPERM;
5505 break;
5506
5507 case PACKET_STOP:
5508 dev_err(hdev->dev, "User not allowed to use STOP\n");
5509 rc = -EPERM;
5510 break;
5511
5512 case PACKET_WREG_32:
5513 case PACKET_WREG_BULK:
5514 case PACKET_MSG_LONG:
5515 case PACKET_MSG_SHORT:
5516 case PACKET_REPEAT:
5517 case PACKET_FENCE:
5518 case PACKET_NOP:
5519 case PACKET_ARB_POINT:
5520 case PACKET_LOAD_AND_EXE:
5521 memcpy(kernel_pkt, user_pkt, pkt_size);
5522 cb_patched_cur_length += pkt_size;
5523 break;
5524
5525 default:
5526 dev_err(hdev->dev, "Invalid packet header 0x%x\n",
5527 pkt_id);
5528 rc = -EINVAL;
5529 break;
5530 }
5531
5532 if (rc)
5533 break;
5534 }
5535
5536 return rc;
5537}
5538
5539static int gaudi_parse_cb_mmu(struct hl_device *hdev,
5540 struct hl_cs_parser *parser)
5541{
5542 u64 patched_cb_handle;
5543 u32 patched_cb_size;
5544 struct hl_cb *user_cb;
5545 int rc;
5546
5547 /*
5548 * The new CB should have space at the end for two MSG_PROT pkt:
5549 * 1. A packet that will act as a completion packet
5550 * 2. A packet that will generate MSI interrupt
5551 */
Ofir Bittonac6fdbf2020-12-03 16:59:28 +02005552 if (parser->completion)
5553 parser->patched_cb_size = parser->user_cb_size +
5554 sizeof(struct packet_msg_prot) * 2;
5555 else
5556 parser->patched_cb_size = parser->user_cb_size;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005557
Tomer Tayarfa8641a12020-09-07 17:36:41 +03005558 rc = hl_cb_create(hdev, &hdev->kernel_cb_mgr, hdev->kernel_ctx,
Tomer Tayaref6a0f62020-07-09 16:17:48 +03005559 parser->patched_cb_size, false, false,
Tomer Tayarfa8641a12020-09-07 17:36:41 +03005560 &patched_cb_handle);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005561
5562 if (rc) {
5563 dev_err(hdev->dev,
5564 "Failed to allocate patched CB for DMA CS %d\n",
5565 rc);
5566 return rc;
5567 }
5568
5569 patched_cb_handle >>= PAGE_SHIFT;
5570 parser->patched_cb = hl_cb_get(hdev, &hdev->kernel_cb_mgr,
5571 (u32) patched_cb_handle);
Alon Mizrahi75d9a2a2020-12-03 17:32:19 +02005572 /* hl_cb_get should never fail */
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005573 if (!parser->patched_cb) {
Alon Mizrahi75d9a2a2020-12-03 17:32:19 +02005574 dev_crit(hdev->dev, "DMA CB handle invalid 0x%x\n",
5575 (u32) patched_cb_handle);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005576 rc = -EFAULT;
5577 goto out;
5578 }
5579
5580 /*
5581 * The check that parser->user_cb_size <= parser->user_cb->size was done
5582 * in validate_queue_index().
5583 */
Arnd Bergmann82948e62020-10-26 17:08:06 +01005584 memcpy(parser->patched_cb->kernel_address,
5585 parser->user_cb->kernel_address,
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005586 parser->user_cb_size);
5587
5588 patched_cb_size = parser->patched_cb_size;
5589
5590 /* Validate patched CB instead of user CB */
5591 user_cb = parser->user_cb;
5592 parser->user_cb = parser->patched_cb;
5593 rc = gaudi_validate_cb(hdev, parser, true);
5594 parser->user_cb = user_cb;
5595
5596 if (rc) {
5597 hl_cb_put(parser->patched_cb);
5598 goto out;
5599 }
5600
5601 if (patched_cb_size != parser->patched_cb_size) {
5602 dev_err(hdev->dev, "user CB size mismatch\n");
5603 hl_cb_put(parser->patched_cb);
5604 rc = -EINVAL;
5605 goto out;
5606 }
5607
5608out:
5609 /*
5610 * Always call cb destroy here because we still have 1 reference
5611 * to it by calling cb_get earlier. After the job will be completed,
5612 * cb_put will release it, but here we want to remove it from the
5613 * idr
5614 */
5615 hl_cb_destroy(hdev, &hdev->kernel_cb_mgr,
5616 patched_cb_handle << PAGE_SHIFT);
5617
5618 return rc;
5619}
5620
5621static int gaudi_parse_cb_no_mmu(struct hl_device *hdev,
5622 struct hl_cs_parser *parser)
5623{
5624 u64 patched_cb_handle;
5625 int rc;
5626
5627 rc = gaudi_validate_cb(hdev, parser, false);
5628
5629 if (rc)
5630 goto free_userptr;
5631
Tomer Tayarfa8641a12020-09-07 17:36:41 +03005632 rc = hl_cb_create(hdev, &hdev->kernel_cb_mgr, hdev->kernel_ctx,
Tomer Tayaref6a0f62020-07-09 16:17:48 +03005633 parser->patched_cb_size, false, false,
Tomer Tayarfa8641a12020-09-07 17:36:41 +03005634 &patched_cb_handle);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005635 if (rc) {
5636 dev_err(hdev->dev,
5637 "Failed to allocate patched CB for DMA CS %d\n", rc);
5638 goto free_userptr;
5639 }
5640
5641 patched_cb_handle >>= PAGE_SHIFT;
5642 parser->patched_cb = hl_cb_get(hdev, &hdev->kernel_cb_mgr,
5643 (u32) patched_cb_handle);
Alon Mizrahi75d9a2a2020-12-03 17:32:19 +02005644 /* hl_cb_get should never fail here */
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005645 if (!parser->patched_cb) {
Alon Mizrahi75d9a2a2020-12-03 17:32:19 +02005646 dev_crit(hdev->dev, "DMA CB handle invalid 0x%x\n",
5647 (u32) patched_cb_handle);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005648 rc = -EFAULT;
5649 goto out;
5650 }
5651
5652 rc = gaudi_patch_cb(hdev, parser);
5653
5654 if (rc)
5655 hl_cb_put(parser->patched_cb);
5656
5657out:
5658 /*
5659 * Always call cb destroy here because we still have 1 reference
5660 * to it by calling cb_get earlier. After the job will be completed,
5661 * cb_put will release it, but here we want to remove it from the
5662 * idr
5663 */
5664 hl_cb_destroy(hdev, &hdev->kernel_cb_mgr,
5665 patched_cb_handle << PAGE_SHIFT);
5666
5667free_userptr:
5668 if (rc)
5669 hl_userptr_delete_list(hdev, parser->job_userptr_list);
5670 return rc;
5671}
5672
5673static int gaudi_parse_cb_no_ext_queue(struct hl_device *hdev,
5674 struct hl_cs_parser *parser)
5675{
5676 struct asic_fixed_properties *asic_prop = &hdev->asic_prop;
Oded Gabbay3c681572020-11-02 21:10:39 +02005677 struct gaudi_device *gaudi = hdev->asic_specific;
5678 u32 nic_mask_q_id = 1 << (HW_CAP_NIC_SHIFT +
5679 ((parser->hw_queue_id - GAUDI_QUEUE_ID_NIC_0_0) >> 2));
5680
5681 if ((parser->hw_queue_id >= GAUDI_QUEUE_ID_NIC_0_0) &&
5682 (parser->hw_queue_id <= GAUDI_QUEUE_ID_NIC_9_3) &&
5683 (!(gaudi->hw_cap_initialized & nic_mask_q_id))) {
5684 dev_err(hdev->dev, "h/w queue %d is disabled\n",
5685 parser->hw_queue_id);
5686 return -EINVAL;
5687 }
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005688
5689 /* For internal queue jobs just check if CB address is valid */
5690 if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,
5691 parser->user_cb_size,
5692 asic_prop->sram_user_base_address,
5693 asic_prop->sram_end_address))
5694 return 0;
5695
5696 if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,
5697 parser->user_cb_size,
5698 asic_prop->dram_user_base_address,
5699 asic_prop->dram_end_address))
5700 return 0;
5701
5702 /* PMMU and HPMMU addresses are equal, check only one of them */
5703 if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,
5704 parser->user_cb_size,
5705 asic_prop->pmmu.start_addr,
5706 asic_prop->pmmu.end_addr))
5707 return 0;
5708
5709 dev_err(hdev->dev,
5710 "CB address 0x%px + 0x%x for internal QMAN is not valid\n",
5711 parser->user_cb, parser->user_cb_size);
5712
5713 return -EFAULT;
5714}
5715
5716static int gaudi_cs_parser(struct hl_device *hdev, struct hl_cs_parser *parser)
5717{
5718 struct gaudi_device *gaudi = hdev->asic_specific;
5719
5720 if (parser->queue_type == QUEUE_TYPE_INT)
5721 return gaudi_parse_cb_no_ext_queue(hdev, parser);
5722
5723 if (gaudi->hw_cap_initialized & HW_CAP_MMU)
5724 return gaudi_parse_cb_mmu(hdev, parser);
5725 else
5726 return gaudi_parse_cb_no_mmu(hdev, parser);
5727}
5728
5729static void gaudi_add_end_of_cb_packets(struct hl_device *hdev,
Arnd Bergmann82948e62020-10-26 17:08:06 +01005730 void *kernel_address, u32 len,
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005731 u64 cq_addr, u32 cq_val, u32 msi_vec,
5732 bool eb)
5733{
5734 struct gaudi_device *gaudi = hdev->asic_specific;
5735 struct packet_msg_prot *cq_pkt;
5736 u32 tmp;
5737
Arnd Bergmann82948e62020-10-26 17:08:06 +01005738 cq_pkt = kernel_address + len - (sizeof(struct packet_msg_prot) * 2);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005739
Oded Gabbay65887292020-08-12 11:21:01 +03005740 tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT);
5741 tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005742
5743 if (eb)
Oded Gabbay65887292020-08-12 11:21:01 +03005744 tmp |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005745
5746 cq_pkt->ctl = cpu_to_le32(tmp);
5747 cq_pkt->value = cpu_to_le32(cq_val);
5748 cq_pkt->addr = cpu_to_le64(cq_addr);
5749
5750 cq_pkt++;
5751
Oded Gabbay65887292020-08-12 11:21:01 +03005752 tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT);
5753 tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005754 cq_pkt->ctl = cpu_to_le32(tmp);
5755 cq_pkt->value = cpu_to_le32(1);
5756
5757 if (!gaudi->multi_msi_mode)
5758 msi_vec = 0;
5759
5760 cq_pkt->addr = cpu_to_le64(CFG_BASE + mmPCIE_MSI_INTR_0 + msi_vec * 4);
5761}
5762
5763static void gaudi_update_eq_ci(struct hl_device *hdev, u32 val)
5764{
5765 WREG32(mmCPU_IF_EQ_RD_OFFS, val);
5766}
5767
5768static int gaudi_memset_device_memory(struct hl_device *hdev, u64 addr,
5769 u32 size, u64 val)
5770{
5771 struct packet_lin_dma *lin_dma_pkt;
5772 struct hl_cs_job *job;
Moti Haimovskia9855a2d92020-06-24 19:40:57 +03005773 u32 cb_size, ctl, err_cause;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005774 struct hl_cb *cb;
Lv Yunlong115726c2021-04-26 06:43:46 -07005775 u64 id;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005776 int rc;
5777
Ofir Bittona04b7cd2020-07-13 13:36:55 +03005778 cb = hl_cb_kernel_create(hdev, PAGE_SIZE, false);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005779 if (!cb)
5780 return -EFAULT;
5781
Arnd Bergmann82948e62020-10-26 17:08:06 +01005782 lin_dma_pkt = cb->kernel_address;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005783 memset(lin_dma_pkt, 0, sizeof(*lin_dma_pkt));
5784 cb_size = sizeof(*lin_dma_pkt);
5785
Oded Gabbay65887292020-08-12 11:21:01 +03005786 ctl = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_LIN_DMA);
5787 ctl |= FIELD_PREP(GAUDI_PKT_LIN_DMA_CTL_MEMSET_MASK, 1);
5788 ctl |= FIELD_PREP(GAUDI_PKT_LIN_DMA_CTL_LIN_MASK, 1);
5789 ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
5790 ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
5791
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005792 lin_dma_pkt->ctl = cpu_to_le32(ctl);
5793 lin_dma_pkt->src_addr = cpu_to_le64(val);
5794 lin_dma_pkt->dst_addr |= cpu_to_le64(addr);
5795 lin_dma_pkt->tsize = cpu_to_le32(size);
5796
5797 job = hl_cs_allocate_job(hdev, QUEUE_TYPE_EXT, true);
5798 if (!job) {
5799 dev_err(hdev->dev, "Failed to allocate a new job\n");
5800 rc = -ENOMEM;
5801 goto release_cb;
5802 }
5803
Moti Haimovskia9855a2d92020-06-24 19:40:57 +03005804 /* Verify DMA is OK */
5805 err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE);
5806 if (err_cause && !hdev->init_done) {
5807 dev_dbg(hdev->dev,
5808 "Clearing DMA0 engine from errors (cause 0x%x)\n",
5809 err_cause);
5810 WREG32(mmDMA0_CORE_ERR_CAUSE, err_cause);
5811 }
5812
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005813 job->id = 0;
5814 job->user_cb = cb;
Tomer Tayarf07486742020-08-02 22:51:31 +03005815 atomic_inc(&job->user_cb->cs_cnt);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005816 job->user_cb_size = cb_size;
5817 job->hw_queue_id = GAUDI_QUEUE_ID_DMA_0_0;
5818 job->patched_cb = job->user_cb;
5819 job->job_cb_size = job->user_cb_size + sizeof(struct packet_msg_prot);
5820
5821 hl_debugfs_add_job(hdev, job);
5822
5823 rc = gaudi_send_job_on_qman0(hdev, job);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005824 hl_debugfs_remove_job(hdev, job);
5825 kfree(job);
Tomer Tayarf07486742020-08-02 22:51:31 +03005826 atomic_dec(&cb->cs_cnt);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005827
Moti Haimovskia9855a2d92020-06-24 19:40:57 +03005828 /* Verify DMA is OK */
5829 err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE);
5830 if (err_cause) {
5831 dev_err(hdev->dev, "DMA Failed, cause 0x%x\n", err_cause);
5832 rc = -EIO;
5833 if (!hdev->init_done) {
5834 dev_dbg(hdev->dev,
5835 "Clearing DMA0 engine from errors (cause 0x%x)\n",
5836 err_cause);
5837 WREG32(mmDMA0_CORE_ERR_CAUSE, err_cause);
5838 }
5839 }
5840
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005841release_cb:
Lv Yunlong115726c2021-04-26 06:43:46 -07005842 id = cb->id;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005843 hl_cb_put(cb);
Lv Yunlong115726c2021-04-26 06:43:46 -07005844 hl_cb_destroy(hdev, &hdev->kernel_cb_mgr, id << PAGE_SHIFT);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005845
5846 return rc;
5847}
5848
Ofir Bitton423815b2021-01-05 09:04:07 +02005849static int gaudi_memset_registers(struct hl_device *hdev, u64 reg_base,
5850 u32 num_regs, u32 val)
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005851{
Ofir Bitton423815b2021-01-05 09:04:07 +02005852 struct packet_msg_long *pkt;
5853 struct hl_cs_job *job;
5854 u32 cb_size, ctl;
5855 struct hl_cb *cb;
5856 int i, rc;
5857
5858 cb_size = (sizeof(*pkt) * num_regs) + sizeof(struct packet_msg_prot);
5859
5860 if (cb_size > SZ_2M) {
5861 dev_err(hdev->dev, "CB size must be smaller than %uMB", SZ_2M);
5862 return -ENOMEM;
5863 }
5864
5865 cb = hl_cb_kernel_create(hdev, cb_size, false);
5866 if (!cb)
5867 return -EFAULT;
5868
5869 pkt = cb->kernel_address;
5870
5871 ctl = FIELD_PREP(GAUDI_PKT_LONG_CTL_OP_MASK, 0); /* write the value */
5872 ctl |= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_LONG);
5873 ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);
5874 ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
5875 ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
5876
5877 for (i = 0; i < num_regs ; i++, pkt++) {
5878 pkt->ctl = cpu_to_le32(ctl);
5879 pkt->value = cpu_to_le32(val);
5880 pkt->addr = cpu_to_le64(reg_base + (i * 4));
5881 }
5882
5883 job = hl_cs_allocate_job(hdev, QUEUE_TYPE_EXT, true);
5884 if (!job) {
5885 dev_err(hdev->dev, "Failed to allocate a new job\n");
5886 rc = -ENOMEM;
5887 goto release_cb;
5888 }
5889
5890 job->id = 0;
5891 job->user_cb = cb;
5892 atomic_inc(&job->user_cb->cs_cnt);
5893 job->user_cb_size = cb_size;
5894 job->hw_queue_id = GAUDI_QUEUE_ID_DMA_0_0;
5895 job->patched_cb = job->user_cb;
5896 job->job_cb_size = cb_size;
5897
5898 hl_debugfs_add_job(hdev, job);
5899
5900 rc = gaudi_send_job_on_qman0(hdev, job);
5901 hl_debugfs_remove_job(hdev, job);
5902 kfree(job);
5903 atomic_dec(&cb->cs_cnt);
5904
5905release_cb:
5906 hl_cb_put(cb);
5907 hl_cb_destroy(hdev, &hdev->kernel_cb_mgr, cb->id << PAGE_SHIFT);
5908
5909 return rc;
5910}
5911
5912static int gaudi_schedule_register_memset(struct hl_device *hdev,
5913 u32 hw_queue_id, u64 reg_base, u32 num_regs, u32 val)
5914{
farah kassabrie65448f2021-03-30 16:38:02 +03005915 struct hl_ctx *ctx;
Ofir Bitton423815b2021-01-05 09:04:07 +02005916 struct hl_pending_cb *pending_cb;
5917 struct packet_msg_long *pkt;
5918 u32 cb_size, ctl;
5919 struct hl_cb *cb;
farah kassabrie65448f2021-03-30 16:38:02 +03005920 int i, rc;
5921
5922 mutex_lock(&hdev->fpriv_list_lock);
5923 ctx = hdev->compute_ctx;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005924
Ofir Bitton423815b2021-01-05 09:04:07 +02005925 /* If no compute context available or context is going down
5926 * memset registers directly
5927 */
farah kassabrie65448f2021-03-30 16:38:02 +03005928 if (!ctx || kref_read(&ctx->refcount) == 0) {
5929 rc = gaudi_memset_registers(hdev, reg_base, num_regs, val);
5930 mutex_unlock(&hdev->fpriv_list_lock);
5931 return rc;
5932 }
5933
5934 mutex_unlock(&hdev->fpriv_list_lock);
Ofir Bitton423815b2021-01-05 09:04:07 +02005935
5936 cb_size = (sizeof(*pkt) * num_regs) +
5937 sizeof(struct packet_msg_prot) * 2;
5938
5939 if (cb_size > SZ_2M) {
5940 dev_err(hdev->dev, "CB size must be smaller than %uMB", SZ_2M);
5941 return -ENOMEM;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005942 }
5943
Ofir Bitton423815b2021-01-05 09:04:07 +02005944 pending_cb = kzalloc(sizeof(*pending_cb), GFP_KERNEL);
5945 if (!pending_cb)
5946 return -ENOMEM;
5947
5948 cb = hl_cb_kernel_create(hdev, cb_size, false);
5949 if (!cb) {
5950 kfree(pending_cb);
5951 return -EFAULT;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005952 }
5953
Ofir Bitton423815b2021-01-05 09:04:07 +02005954 pkt = cb->kernel_address;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005955
Ofir Bitton423815b2021-01-05 09:04:07 +02005956 ctl = FIELD_PREP(GAUDI_PKT_LONG_CTL_OP_MASK, 0); /* write the value */
5957 ctl |= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_LONG);
5958 ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);
5959 ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
5960 ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005961
Ofir Bitton423815b2021-01-05 09:04:07 +02005962 for (i = 0; i < num_regs ; i++, pkt++) {
5963 pkt->ctl = cpu_to_le32(ctl);
5964 pkt->value = cpu_to_le32(val);
5965 pkt->addr = cpu_to_le64(reg_base + (i * 4));
5966 }
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005967
Ofir Bitton423815b2021-01-05 09:04:07 +02005968 hl_cb_destroy(hdev, &hdev->kernel_cb_mgr, cb->id << PAGE_SHIFT);
5969
5970 pending_cb->cb = cb;
5971 pending_cb->cb_size = cb_size;
5972 /* The queue ID MUST be an external queue ID. Otherwise, we will
5973 * have undefined behavior
5974 */
5975 pending_cb->hw_queue_id = hw_queue_id;
5976
5977 spin_lock(&ctx->pending_cb_lock);
5978 list_add_tail(&pending_cb->cb_node, &ctx->pending_cb_list);
5979 spin_unlock(&ctx->pending_cb_lock);
5980
5981 return 0;
5982}
5983
5984static int gaudi_restore_sm_registers(struct hl_device *hdev)
5985{
5986 u64 base_addr;
5987 u32 num_regs;
5988 int rc;
5989
5990 base_addr = CFG_BASE + mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0;
5991 num_regs = NUM_OF_SOB_IN_BLOCK;
5992 rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5993 if (rc) {
5994 dev_err(hdev->dev, "failed resetting SM registers");
5995 return -ENOMEM;
5996 }
5997
5998 base_addr = CFG_BASE + mmSYNC_MNGR_E_S_SYNC_MNGR_OBJS_SOB_OBJ_0;
5999 num_regs = NUM_OF_SOB_IN_BLOCK;
6000 rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
6001 if (rc) {
6002 dev_err(hdev->dev, "failed resetting SM registers");
6003 return -ENOMEM;
6004 }
6005
6006 base_addr = CFG_BASE + mmSYNC_MNGR_W_N_SYNC_MNGR_OBJS_SOB_OBJ_0;
6007 num_regs = NUM_OF_SOB_IN_BLOCK;
6008 rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
6009 if (rc) {
6010 dev_err(hdev->dev, "failed resetting SM registers");
6011 return -ENOMEM;
6012 }
6013
6014 base_addr = CFG_BASE + mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_STATUS_0;
6015 num_regs = NUM_OF_MONITORS_IN_BLOCK;
6016 rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
6017 if (rc) {
6018 dev_err(hdev->dev, "failed resetting SM registers");
6019 return -ENOMEM;
6020 }
6021
6022 base_addr = CFG_BASE + mmSYNC_MNGR_E_S_SYNC_MNGR_OBJS_MON_STATUS_0;
6023 num_regs = NUM_OF_MONITORS_IN_BLOCK;
6024 rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
6025 if (rc) {
6026 dev_err(hdev->dev, "failed resetting SM registers");
6027 return -ENOMEM;
6028 }
6029
6030 base_addr = CFG_BASE + mmSYNC_MNGR_W_N_SYNC_MNGR_OBJS_MON_STATUS_0;
6031 num_regs = NUM_OF_MONITORS_IN_BLOCK;
6032 rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
6033 if (rc) {
6034 dev_err(hdev->dev, "failed resetting SM registers");
6035 return -ENOMEM;
6036 }
6037
6038 base_addr = CFG_BASE + mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 +
6039 (GAUDI_FIRST_AVAILABLE_W_S_SYNC_OBJECT * 4);
6040 num_regs = NUM_OF_SOB_IN_BLOCK - GAUDI_FIRST_AVAILABLE_W_S_SYNC_OBJECT;
6041 rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
6042 if (rc) {
6043 dev_err(hdev->dev, "failed resetting SM registers");
6044 return -ENOMEM;
6045 }
6046
6047 base_addr = CFG_BASE + mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_STATUS_0 +
6048 (GAUDI_FIRST_AVAILABLE_W_S_MONITOR * 4);
6049 num_regs = NUM_OF_MONITORS_IN_BLOCK - GAUDI_FIRST_AVAILABLE_W_S_MONITOR;
6050 rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
6051 if (rc) {
6052 dev_err(hdev->dev, "failed resetting SM registers");
6053 return -ENOMEM;
6054 }
6055
6056 return 0;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006057}
6058
6059static void gaudi_restore_dma_registers(struct hl_device *hdev)
6060{
6061 u32 sob_delta = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_1 -
6062 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0;
6063 int i;
6064
6065 for (i = 0 ; i < DMA_NUMBER_OF_CHANNELS ; i++) {
6066 u64 sob_addr = CFG_BASE +
6067 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0 +
6068 (i * sob_delta);
6069 u32 dma_offset = i * DMA_CORE_OFFSET;
6070
6071 WREG32(mmDMA0_CORE_WR_COMP_ADDR_LO + dma_offset,
6072 lower_32_bits(sob_addr));
6073 WREG32(mmDMA0_CORE_WR_COMP_ADDR_HI + dma_offset,
6074 upper_32_bits(sob_addr));
6075 WREG32(mmDMA0_CORE_WR_COMP_WDATA + dma_offset, 0x80000001);
6076
6077 /* For DMAs 2-7, need to restore WR_AWUSER_31_11 as it can be
6078 * modified by the user for SRAM reduction
6079 */
6080 if (i > 1)
6081 WREG32(mmDMA0_CORE_WR_AWUSER_31_11 + dma_offset,
6082 0x00000001);
6083 }
6084}
6085
6086static void gaudi_restore_qm_registers(struct hl_device *hdev)
6087{
6088 u32 qman_offset;
6089 int i;
6090
6091 for (i = 0 ; i < DMA_NUMBER_OF_CHANNELS ; i++) {
6092 qman_offset = i * DMA_QMAN_OFFSET;
6093 WREG32(mmDMA0_QM_ARB_CFG_0 + qman_offset, 0);
6094 }
6095
6096 for (i = 0 ; i < MME_NUMBER_OF_MASTER_ENGINES ; i++) {
6097 qman_offset = i * (mmMME2_QM_BASE - mmMME0_QM_BASE);
6098 WREG32(mmMME0_QM_ARB_CFG_0 + qman_offset, 0);
6099 }
6100
6101 for (i = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) {
6102 qman_offset = i * TPC_QMAN_OFFSET;
6103 WREG32(mmTPC0_QM_ARB_CFG_0 + qman_offset, 0);
6104 }
Oded Gabbay3c681572020-11-02 21:10:39 +02006105
6106 for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++) {
6107 qman_offset = (i >> 1) * NIC_MACRO_QMAN_OFFSET +
6108 (i & 0x1) * NIC_ENGINE_QMAN_OFFSET;
6109 WREG32(mmNIC0_QM0_ARB_CFG_0 + qman_offset, 0);
6110 }
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006111}
6112
Ofir Bitton423815b2021-01-05 09:04:07 +02006113static int gaudi_restore_user_registers(struct hl_device *hdev)
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006114{
Ofir Bitton423815b2021-01-05 09:04:07 +02006115 int rc;
6116
6117 rc = gaudi_restore_sm_registers(hdev);
6118 if (rc)
6119 return rc;
6120
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006121 gaudi_restore_dma_registers(hdev);
6122 gaudi_restore_qm_registers(hdev);
Ofir Bitton423815b2021-01-05 09:04:07 +02006123
6124 return 0;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006125}
6126
6127static int gaudi_context_switch(struct hl_device *hdev, u32 asid)
6128{
Ofir Bitton423815b2021-01-05 09:04:07 +02006129 return gaudi_restore_user_registers(hdev);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006130}
6131
6132static int gaudi_mmu_clear_pgt_range(struct hl_device *hdev)
6133{
6134 struct asic_fixed_properties *prop = &hdev->asic_prop;
6135 struct gaudi_device *gaudi = hdev->asic_specific;
6136 u64 addr = prop->mmu_pgt_addr;
6137 u32 size = prop->mmu_pgt_size + MMU_CACHE_MNG_SIZE;
6138
6139 if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
6140 return 0;
6141
6142 return gaudi_memset_device_memory(hdev, addr, size, 0);
6143}
6144
6145static void gaudi_restore_phase_topology(struct hl_device *hdev)
6146{
6147
6148}
6149
Ofir Bittona5778d12021-02-24 11:51:40 +02006150static int gaudi_debugfs_read32(struct hl_device *hdev, u64 addr,
6151 bool user_address, u32 *val)
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006152{
6153 struct asic_fixed_properties *prop = &hdev->asic_prop;
6154 struct gaudi_device *gaudi = hdev->asic_specific;
Ofir Bittona5778d12021-02-24 11:51:40 +02006155 u64 hbm_bar_addr, host_phys_end;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006156 int rc = 0;
6157
Ofir Bittona5778d12021-02-24 11:51:40 +02006158 host_phys_end = HOST_PHYS_BASE + HOST_PHYS_SIZE;
6159
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006160 if ((addr >= CFG_BASE) && (addr < CFG_BASE + CFG_SIZE)) {
Oded Gabbaye38bfd32020-07-03 20:46:12 +03006161
6162 if ((gaudi->hw_cap_initialized & HW_CAP_CLK_GATE) &&
6163 (hdev->clock_gating_mask &
6164 GAUDI_CLK_GATE_DEBUGFS_MASK)) {
6165
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006166 dev_err_ratelimited(hdev->dev,
6167 "Can't read register - clock gating is enabled!\n");
6168 rc = -EFAULT;
6169 } else {
6170 *val = RREG32(addr - CFG_BASE);
6171 }
Oded Gabbaye38bfd32020-07-03 20:46:12 +03006172
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006173 } else if ((addr >= SRAM_BASE_ADDR) &&
6174 (addr < SRAM_BASE_ADDR + SRAM_BAR_SIZE)) {
6175 *val = readl(hdev->pcie_bar[SRAM_BAR_ID] +
6176 (addr - SRAM_BASE_ADDR));
6177 } else if (addr < DRAM_PHYS_BASE + hdev->asic_prop.dram_size) {
6178 u64 bar_base_addr = DRAM_PHYS_BASE +
6179 (addr & ~(prop->dram_pci_bar_size - 0x1ull));
6180
6181 hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, bar_base_addr);
6182 if (hbm_bar_addr != U64_MAX) {
6183 *val = readl(hdev->pcie_bar[HBM_BAR_ID] +
6184 (addr - bar_base_addr));
6185
6186 hbm_bar_addr = gaudi_set_hbm_bar_base(hdev,
6187 hbm_bar_addr);
6188 }
6189 if (hbm_bar_addr == U64_MAX)
6190 rc = -EIO;
Ofir Bittona5778d12021-02-24 11:51:40 +02006191 } else if (addr >= HOST_PHYS_BASE && addr < host_phys_end &&
6192 user_address && !iommu_present(&pci_bus_type)) {
6193 *val = *(u32 *) phys_to_virt(addr - HOST_PHYS_BASE);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006194 } else {
6195 rc = -EFAULT;
6196 }
6197
6198 return rc;
6199}
6200
Ofir Bittona5778d12021-02-24 11:51:40 +02006201static int gaudi_debugfs_write32(struct hl_device *hdev, u64 addr,
6202 bool user_address, u32 val)
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006203{
6204 struct asic_fixed_properties *prop = &hdev->asic_prop;
6205 struct gaudi_device *gaudi = hdev->asic_specific;
Ofir Bittona5778d12021-02-24 11:51:40 +02006206 u64 hbm_bar_addr, host_phys_end;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006207 int rc = 0;
6208
Ofir Bittona5778d12021-02-24 11:51:40 +02006209 host_phys_end = HOST_PHYS_BASE + HOST_PHYS_SIZE;
6210
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006211 if ((addr >= CFG_BASE) && (addr < CFG_BASE + CFG_SIZE)) {
Oded Gabbaye38bfd32020-07-03 20:46:12 +03006212
6213 if ((gaudi->hw_cap_initialized & HW_CAP_CLK_GATE) &&
6214 (hdev->clock_gating_mask &
6215 GAUDI_CLK_GATE_DEBUGFS_MASK)) {
6216
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006217 dev_err_ratelimited(hdev->dev,
6218 "Can't write register - clock gating is enabled!\n");
6219 rc = -EFAULT;
6220 } else {
6221 WREG32(addr - CFG_BASE, val);
6222 }
Oded Gabbaye38bfd32020-07-03 20:46:12 +03006223
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006224 } else if ((addr >= SRAM_BASE_ADDR) &&
6225 (addr < SRAM_BASE_ADDR + SRAM_BAR_SIZE)) {
6226 writel(val, hdev->pcie_bar[SRAM_BAR_ID] +
6227 (addr - SRAM_BASE_ADDR));
6228 } else if (addr < DRAM_PHYS_BASE + hdev->asic_prop.dram_size) {
6229 u64 bar_base_addr = DRAM_PHYS_BASE +
6230 (addr & ~(prop->dram_pci_bar_size - 0x1ull));
6231
6232 hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, bar_base_addr);
6233 if (hbm_bar_addr != U64_MAX) {
6234 writel(val, hdev->pcie_bar[HBM_BAR_ID] +
6235 (addr - bar_base_addr));
6236
6237 hbm_bar_addr = gaudi_set_hbm_bar_base(hdev,
6238 hbm_bar_addr);
6239 }
6240 if (hbm_bar_addr == U64_MAX)
6241 rc = -EIO;
Ofir Bittona5778d12021-02-24 11:51:40 +02006242 } else if (addr >= HOST_PHYS_BASE && addr < host_phys_end &&
6243 user_address && !iommu_present(&pci_bus_type)) {
6244 *(u32 *) phys_to_virt(addr - HOST_PHYS_BASE) = val;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006245 } else {
6246 rc = -EFAULT;
6247 }
6248
6249 return rc;
6250}
6251
Ofir Bittona5778d12021-02-24 11:51:40 +02006252static int gaudi_debugfs_read64(struct hl_device *hdev, u64 addr,
6253 bool user_address, u64 *val)
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006254{
6255 struct asic_fixed_properties *prop = &hdev->asic_prop;
6256 struct gaudi_device *gaudi = hdev->asic_specific;
Ofir Bittona5778d12021-02-24 11:51:40 +02006257 u64 hbm_bar_addr, host_phys_end;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006258 int rc = 0;
6259
Ofir Bittona5778d12021-02-24 11:51:40 +02006260 host_phys_end = HOST_PHYS_BASE + HOST_PHYS_SIZE;
6261
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006262 if ((addr >= CFG_BASE) && (addr <= CFG_BASE + CFG_SIZE - sizeof(u64))) {
Oded Gabbaye38bfd32020-07-03 20:46:12 +03006263
6264 if ((gaudi->hw_cap_initialized & HW_CAP_CLK_GATE) &&
6265 (hdev->clock_gating_mask &
6266 GAUDI_CLK_GATE_DEBUGFS_MASK)) {
6267
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006268 dev_err_ratelimited(hdev->dev,
6269 "Can't read register - clock gating is enabled!\n");
6270 rc = -EFAULT;
6271 } else {
6272 u32 val_l = RREG32(addr - CFG_BASE);
6273 u32 val_h = RREG32(addr + sizeof(u32) - CFG_BASE);
6274
6275 *val = (((u64) val_h) << 32) | val_l;
6276 }
Oded Gabbaye38bfd32020-07-03 20:46:12 +03006277
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006278 } else if ((addr >= SRAM_BASE_ADDR) &&
6279 (addr <= SRAM_BASE_ADDR + SRAM_BAR_SIZE - sizeof(u64))) {
6280 *val = readq(hdev->pcie_bar[SRAM_BAR_ID] +
6281 (addr - SRAM_BASE_ADDR));
6282 } else if (addr <=
6283 DRAM_PHYS_BASE + hdev->asic_prop.dram_size - sizeof(u64)) {
6284 u64 bar_base_addr = DRAM_PHYS_BASE +
6285 (addr & ~(prop->dram_pci_bar_size - 0x1ull));
6286
6287 hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, bar_base_addr);
6288 if (hbm_bar_addr != U64_MAX) {
6289 *val = readq(hdev->pcie_bar[HBM_BAR_ID] +
6290 (addr - bar_base_addr));
6291
6292 hbm_bar_addr = gaudi_set_hbm_bar_base(hdev,
6293 hbm_bar_addr);
6294 }
6295 if (hbm_bar_addr == U64_MAX)
6296 rc = -EIO;
Ofir Bittona5778d12021-02-24 11:51:40 +02006297 } else if (addr >= HOST_PHYS_BASE && addr < host_phys_end &&
6298 user_address && !iommu_present(&pci_bus_type)) {
6299 *val = *(u64 *) phys_to_virt(addr - HOST_PHYS_BASE);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006300 } else {
6301 rc = -EFAULT;
6302 }
6303
6304 return rc;
6305}
6306
Ofir Bittona5778d12021-02-24 11:51:40 +02006307static int gaudi_debugfs_write64(struct hl_device *hdev, u64 addr,
6308 bool user_address, u64 val)
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006309{
6310 struct asic_fixed_properties *prop = &hdev->asic_prop;
6311 struct gaudi_device *gaudi = hdev->asic_specific;
Ofir Bittona5778d12021-02-24 11:51:40 +02006312 u64 hbm_bar_addr, host_phys_end;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006313 int rc = 0;
6314
Ofir Bittona5778d12021-02-24 11:51:40 +02006315 host_phys_end = HOST_PHYS_BASE + HOST_PHYS_SIZE;
6316
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006317 if ((addr >= CFG_BASE) && (addr <= CFG_BASE + CFG_SIZE - sizeof(u64))) {
Oded Gabbaye38bfd32020-07-03 20:46:12 +03006318
6319 if ((gaudi->hw_cap_initialized & HW_CAP_CLK_GATE) &&
6320 (hdev->clock_gating_mask &
6321 GAUDI_CLK_GATE_DEBUGFS_MASK)) {
6322
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006323 dev_err_ratelimited(hdev->dev,
6324 "Can't write register - clock gating is enabled!\n");
6325 rc = -EFAULT;
6326 } else {
6327 WREG32(addr - CFG_BASE, lower_32_bits(val));
6328 WREG32(addr + sizeof(u32) - CFG_BASE,
6329 upper_32_bits(val));
6330 }
Oded Gabbaye38bfd32020-07-03 20:46:12 +03006331
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006332 } else if ((addr >= SRAM_BASE_ADDR) &&
6333 (addr <= SRAM_BASE_ADDR + SRAM_BAR_SIZE - sizeof(u64))) {
6334 writeq(val, hdev->pcie_bar[SRAM_BAR_ID] +
6335 (addr - SRAM_BASE_ADDR));
6336 } else if (addr <=
6337 DRAM_PHYS_BASE + hdev->asic_prop.dram_size - sizeof(u64)) {
6338 u64 bar_base_addr = DRAM_PHYS_BASE +
6339 (addr & ~(prop->dram_pci_bar_size - 0x1ull));
6340
6341 hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, bar_base_addr);
6342 if (hbm_bar_addr != U64_MAX) {
6343 writeq(val, hdev->pcie_bar[HBM_BAR_ID] +
6344 (addr - bar_base_addr));
6345
6346 hbm_bar_addr = gaudi_set_hbm_bar_base(hdev,
6347 hbm_bar_addr);
6348 }
6349 if (hbm_bar_addr == U64_MAX)
6350 rc = -EIO;
Ofir Bittona5778d12021-02-24 11:51:40 +02006351 } else if (addr >= HOST_PHYS_BASE && addr < host_phys_end &&
6352 user_address && !iommu_present(&pci_bus_type)) {
6353 *(u64 *) phys_to_virt(addr - HOST_PHYS_BASE) = val;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006354 } else {
6355 rc = -EFAULT;
6356 }
6357
6358 return rc;
6359}
6360
Oded Gabbay639781d2021-04-02 01:43:18 +03006361static int gaudi_dma_core_transfer(struct hl_device *hdev, int dma_id, u64 addr,
6362 u32 size_to_dma, dma_addr_t dma_addr)
6363{
6364 u32 err_cause, val;
6365 u64 dma_offset;
6366 int rc;
6367
6368 dma_offset = dma_id * DMA_CORE_OFFSET;
6369
6370 WREG32(mmDMA0_CORE_SRC_BASE_LO + dma_offset, lower_32_bits(addr));
6371 WREG32(mmDMA0_CORE_SRC_BASE_HI + dma_offset, upper_32_bits(addr));
6372 WREG32(mmDMA0_CORE_DST_BASE_LO + dma_offset, lower_32_bits(dma_addr));
6373 WREG32(mmDMA0_CORE_DST_BASE_HI + dma_offset, upper_32_bits(dma_addr));
6374 WREG32(mmDMA0_CORE_DST_TSIZE_0 + dma_offset, size_to_dma);
6375 WREG32(mmDMA0_CORE_COMMIT + dma_offset,
6376 (1 << DMA0_CORE_COMMIT_LIN_SHIFT));
6377
6378 rc = hl_poll_timeout(
6379 hdev,
6380 mmDMA0_CORE_STS0 + dma_offset,
6381 val,
6382 ((val & DMA0_CORE_STS0_BUSY_MASK) == 0),
6383 0,
6384 1000000);
6385
6386 if (rc) {
6387 dev_err(hdev->dev,
6388 "DMA %d timed-out during reading of 0x%llx\n",
6389 dma_id, addr);
6390 return -EIO;
6391 }
6392
6393 /* Verify DMA is OK */
6394 err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset);
6395 if (err_cause) {
6396 dev_err(hdev->dev, "DMA Failed, cause 0x%x\n", err_cause);
6397 dev_dbg(hdev->dev,
6398 "Clearing DMA0 engine from errors (cause 0x%x)\n",
6399 err_cause);
6400 WREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset, err_cause);
6401
6402 return -EIO;
6403 }
6404
6405 return 0;
6406}
6407
6408static int gaudi_debugfs_read_dma(struct hl_device *hdev, u64 addr, u32 size,
6409 void *blob_addr)
6410{
6411 u32 dma_core_sts0, err_cause, cfg1, size_left, pos, size_to_dma;
6412 struct gaudi_device *gaudi = hdev->asic_specific;
6413 u64 dma_offset, qm_offset;
6414 dma_addr_t dma_addr;
6415 void *kernel_addr;
6416 bool is_eng_idle;
Colin Ian Kingb4e964f2021-04-12 17:10:12 +01006417 int rc = 0, dma_id;
Oded Gabbay639781d2021-04-02 01:43:18 +03006418
6419 kernel_addr = hdev->asic_funcs->asic_dma_alloc_coherent(
6420 hdev, SZ_2M,
6421 &dma_addr,
6422 GFP_KERNEL | __GFP_ZERO);
6423
6424 if (!kernel_addr)
6425 return -ENOMEM;
6426
6427 mutex_lock(&gaudi->clk_gate_mutex);
6428
6429 hdev->asic_funcs->disable_clock_gating(hdev);
6430
6431 hdev->asic_funcs->hw_queues_lock(hdev);
6432
6433 dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_1];
6434 dma_offset = dma_id * DMA_CORE_OFFSET;
6435 qm_offset = dma_id * DMA_QMAN_OFFSET;
6436 dma_core_sts0 = RREG32(mmDMA0_CORE_STS0 + dma_offset);
6437 is_eng_idle = IS_DMA_IDLE(dma_core_sts0);
6438
6439 if (!is_eng_idle) {
6440 dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_2];
6441 dma_offset = dma_id * DMA_CORE_OFFSET;
6442 qm_offset = dma_id * DMA_QMAN_OFFSET;
6443 dma_core_sts0 = RREG32(mmDMA0_CORE_STS0 + dma_offset);
6444 is_eng_idle = IS_DMA_IDLE(dma_core_sts0);
6445
6446 if (!is_eng_idle) {
6447 dev_err_ratelimited(hdev->dev,
6448 "Can't read via DMA because it is BUSY\n");
6449 rc = -EAGAIN;
6450 goto out;
6451 }
6452 }
6453
6454 cfg1 = RREG32(mmDMA0_QM_GLBL_CFG1 + qm_offset);
6455 WREG32(mmDMA0_QM_GLBL_CFG1 + qm_offset,
6456 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
6457
6458 /* TODO: remove this by mapping the DMA temporary buffer to the MMU
6459 * using the compute ctx ASID, if exists. If not, use the kernel ctx
6460 * ASID
6461 */
6462 WREG32_OR(mmDMA0_CORE_PROT + dma_offset, BIT(DMA0_CORE_PROT_VAL_SHIFT));
6463
6464 /* Verify DMA is OK */
6465 err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset);
6466 if (err_cause) {
6467 dev_dbg(hdev->dev,
6468 "Clearing DMA0 engine from errors (cause 0x%x)\n",
6469 err_cause);
6470 WREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset, err_cause);
6471 }
6472
6473 pos = 0;
6474 size_left = size;
6475 size_to_dma = SZ_2M;
6476
6477 while (size_left > 0) {
6478
6479 if (size_left < SZ_2M)
6480 size_to_dma = size_left;
6481
6482 rc = gaudi_dma_core_transfer(hdev, dma_id, addr, size_to_dma,
6483 dma_addr);
6484 if (rc)
6485 break;
6486
6487 memcpy(blob_addr + pos, kernel_addr, size_to_dma);
6488
6489 if (size_left <= SZ_2M)
6490 break;
6491
6492 pos += SZ_2M;
6493 addr += SZ_2M;
6494 size_left -= SZ_2M;
6495 }
6496
6497 /* TODO: remove this by mapping the DMA temporary buffer to the MMU
6498 * using the compute ctx ASID, if exists. If not, use the kernel ctx
6499 * ASID
6500 */
6501 WREG32_AND(mmDMA0_CORE_PROT + dma_offset,
6502 ~BIT(DMA0_CORE_PROT_VAL_SHIFT));
6503
6504 WREG32(mmDMA0_QM_GLBL_CFG1 + qm_offset, cfg1);
6505
6506out:
6507 hdev->asic_funcs->hw_queues_unlock(hdev);
6508
6509 hdev->asic_funcs->set_clock_gating(hdev);
6510
6511 mutex_unlock(&gaudi->clk_gate_mutex);
6512
6513 hdev->asic_funcs->asic_dma_free_coherent(hdev, SZ_2M, kernel_addr,
6514 dma_addr);
6515
6516 return rc;
6517}
6518
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006519static u64 gaudi_read_pte(struct hl_device *hdev, u64 addr)
6520{
6521 struct gaudi_device *gaudi = hdev->asic_specific;
6522
6523 if (hdev->hard_reset_pending)
6524 return U64_MAX;
6525
6526 return readq(hdev->pcie_bar[HBM_BAR_ID] +
6527 (addr - gaudi->hbm_bar_cur_addr));
6528}
6529
6530static void gaudi_write_pte(struct hl_device *hdev, u64 addr, u64 val)
6531{
6532 struct gaudi_device *gaudi = hdev->asic_specific;
6533
6534 if (hdev->hard_reset_pending)
6535 return;
6536
6537 writeq(val, hdev->pcie_bar[HBM_BAR_ID] +
6538 (addr - gaudi->hbm_bar_cur_addr));
6539}
6540
Ofir Bitton1137e1e2020-09-30 18:43:52 +03006541void gaudi_mmu_prepare_reg(struct hl_device *hdev, u64 reg, u32 asid)
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006542{
6543 /* mask to zero the MMBP and ASID bits */
6544 WREG32_AND(reg, ~0x7FF);
6545 WREG32_OR(reg, asid);
6546}
6547
6548static void gaudi_mmu_prepare(struct hl_device *hdev, u32 asid)
6549{
6550 struct gaudi_device *gaudi = hdev->asic_specific;
6551
6552 if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
6553 return;
6554
6555 if (asid & ~DMA0_QM_GLBL_NON_SECURE_PROPS_0_ASID_MASK) {
Alon Mizrahi75d9a2a2020-12-03 17:32:19 +02006556 dev_crit(hdev->dev, "asid %u is too big\n", asid);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006557 return;
6558 }
6559
6560 mutex_lock(&gaudi->clk_gate_mutex);
6561
6562 hdev->asic_funcs->disable_clock_gating(hdev);
6563
6564 gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_0, asid);
6565 gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_1, asid);
6566 gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_2, asid);
6567 gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_3, asid);
6568 gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_4, asid);
6569
6570 gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_0, asid);
6571 gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_1, asid);
6572 gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_2, asid);
6573 gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_3, asid);
6574 gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_4, asid);
6575
6576 gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_0, asid);
6577 gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_1, asid);
6578 gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_2, asid);
6579 gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_3, asid);
6580 gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_4, asid);
6581
6582 gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_0, asid);
6583 gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_1, asid);
6584 gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_2, asid);
6585 gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_3, asid);
6586 gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_4, asid);
6587
6588 gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_0, asid);
6589 gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_1, asid);
6590 gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_2, asid);
6591 gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_3, asid);
6592 gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_4, asid);
6593
6594 gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_0, asid);
6595 gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_1, asid);
6596 gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_2, asid);
6597 gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_3, asid);
6598 gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_4, asid);
6599
6600 gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_0, asid);
6601 gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_1, asid);
6602 gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_2, asid);
6603 gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_3, asid);
6604 gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_4, asid);
6605
6606 gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_0, asid);
6607 gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_1, asid);
6608 gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_2, asid);
6609 gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_3, asid);
6610 gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_4, asid);
6611
6612 gaudi_mmu_prepare_reg(hdev, mmDMA0_CORE_NON_SECURE_PROPS, asid);
6613 gaudi_mmu_prepare_reg(hdev, mmDMA1_CORE_NON_SECURE_PROPS, asid);
6614 gaudi_mmu_prepare_reg(hdev, mmDMA2_CORE_NON_SECURE_PROPS, asid);
6615 gaudi_mmu_prepare_reg(hdev, mmDMA3_CORE_NON_SECURE_PROPS, asid);
6616 gaudi_mmu_prepare_reg(hdev, mmDMA4_CORE_NON_SECURE_PROPS, asid);
6617 gaudi_mmu_prepare_reg(hdev, mmDMA5_CORE_NON_SECURE_PROPS, asid);
6618 gaudi_mmu_prepare_reg(hdev, mmDMA6_CORE_NON_SECURE_PROPS, asid);
6619 gaudi_mmu_prepare_reg(hdev, mmDMA7_CORE_NON_SECURE_PROPS, asid);
6620
6621 gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_0, asid);
6622 gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_1, asid);
6623 gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_2, asid);
6624 gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_3, asid);
6625 gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_4, asid);
6626 gaudi_mmu_prepare_reg(hdev, mmTPC0_CFG_ARUSER_LO, asid);
6627 gaudi_mmu_prepare_reg(hdev, mmTPC0_CFG_AWUSER_LO, asid);
6628
6629 gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_0, asid);
6630 gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_1, asid);
6631 gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_2, asid);
6632 gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_3, asid);
6633 gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_4, asid);
6634 gaudi_mmu_prepare_reg(hdev, mmTPC1_CFG_ARUSER_LO, asid);
6635 gaudi_mmu_prepare_reg(hdev, mmTPC1_CFG_AWUSER_LO, asid);
6636
6637 gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_0, asid);
6638 gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_1, asid);
6639 gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_2, asid);
6640 gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_3, asid);
6641 gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_4, asid);
6642 gaudi_mmu_prepare_reg(hdev, mmTPC2_CFG_ARUSER_LO, asid);
6643 gaudi_mmu_prepare_reg(hdev, mmTPC2_CFG_AWUSER_LO, asid);
6644
6645 gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_0, asid);
6646 gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_1, asid);
6647 gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_2, asid);
6648 gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_3, asid);
6649 gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_4, asid);
6650 gaudi_mmu_prepare_reg(hdev, mmTPC3_CFG_ARUSER_LO, asid);
6651 gaudi_mmu_prepare_reg(hdev, mmTPC3_CFG_AWUSER_LO, asid);
6652
6653 gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_0, asid);
6654 gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_1, asid);
6655 gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_2, asid);
6656 gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_3, asid);
6657 gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_4, asid);
6658 gaudi_mmu_prepare_reg(hdev, mmTPC4_CFG_ARUSER_LO, asid);
6659 gaudi_mmu_prepare_reg(hdev, mmTPC4_CFG_AWUSER_LO, asid);
6660
6661 gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_0, asid);
6662 gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_1, asid);
6663 gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_2, asid);
6664 gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_3, asid);
6665 gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_4, asid);
6666 gaudi_mmu_prepare_reg(hdev, mmTPC5_CFG_ARUSER_LO, asid);
6667 gaudi_mmu_prepare_reg(hdev, mmTPC5_CFG_AWUSER_LO, asid);
6668
6669 gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_0, asid);
6670 gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_1, asid);
6671 gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_2, asid);
6672 gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_3, asid);
6673 gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_4, asid);
6674 gaudi_mmu_prepare_reg(hdev, mmTPC6_CFG_ARUSER_LO, asid);
6675 gaudi_mmu_prepare_reg(hdev, mmTPC6_CFG_AWUSER_LO, asid);
6676
6677 gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_0, asid);
6678 gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_1, asid);
6679 gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_2, asid);
6680 gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_3, asid);
6681 gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_4, asid);
6682 gaudi_mmu_prepare_reg(hdev, mmTPC7_CFG_ARUSER_LO, asid);
6683 gaudi_mmu_prepare_reg(hdev, mmTPC7_CFG_AWUSER_LO, asid);
6684
6685 gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_0, asid);
6686 gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_1, asid);
6687 gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_2, asid);
6688 gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_3, asid);
6689 gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_4, asid);
6690 gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_0, asid);
6691 gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_1, asid);
6692 gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_2, asid);
6693 gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_3, asid);
6694 gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_4, asid);
6695
6696 gaudi_mmu_prepare_reg(hdev, mmMME0_SBAB_ARUSER0, asid);
6697 gaudi_mmu_prepare_reg(hdev, mmMME0_SBAB_ARUSER1, asid);
6698 gaudi_mmu_prepare_reg(hdev, mmMME1_SBAB_ARUSER0, asid);
6699 gaudi_mmu_prepare_reg(hdev, mmMME1_SBAB_ARUSER1, asid);
6700 gaudi_mmu_prepare_reg(hdev, mmMME2_SBAB_ARUSER0, asid);
6701 gaudi_mmu_prepare_reg(hdev, mmMME2_SBAB_ARUSER1, asid);
6702 gaudi_mmu_prepare_reg(hdev, mmMME3_SBAB_ARUSER0, asid);
6703 gaudi_mmu_prepare_reg(hdev, mmMME3_SBAB_ARUSER1, asid);
6704 gaudi_mmu_prepare_reg(hdev, mmMME0_ACC_WBC, asid);
6705 gaudi_mmu_prepare_reg(hdev, mmMME1_ACC_WBC, asid);
6706 gaudi_mmu_prepare_reg(hdev, mmMME2_ACC_WBC, asid);
6707 gaudi_mmu_prepare_reg(hdev, mmMME3_ACC_WBC, asid);
6708
Oded Gabbay90810212021-05-25 21:35:13 +03006709 if (gaudi->hw_cap_initialized & HW_CAP_NIC0) {
Oded Gabbay3c681572020-11-02 21:10:39 +02006710 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_0,
6711 asid);
6712 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_1,
6713 asid);
6714 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_2,
6715 asid);
6716 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_3,
6717 asid);
6718 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_4,
6719 asid);
6720 }
6721
Oded Gabbay90810212021-05-25 21:35:13 +03006722 if (gaudi->hw_cap_initialized & HW_CAP_NIC1) {
Oded Gabbay3c681572020-11-02 21:10:39 +02006723 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_0,
6724 asid);
6725 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_1,
6726 asid);
6727 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_2,
6728 asid);
6729 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_3,
6730 asid);
6731 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_4,
6732 asid);
6733 }
6734
Oded Gabbay90810212021-05-25 21:35:13 +03006735 if (gaudi->hw_cap_initialized & HW_CAP_NIC2) {
Oded Gabbay3c681572020-11-02 21:10:39 +02006736 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_0,
6737 asid);
6738 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_1,
6739 asid);
6740 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_2,
6741 asid);
6742 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_3,
6743 asid);
6744 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_4,
6745 asid);
6746 }
6747
Oded Gabbay90810212021-05-25 21:35:13 +03006748 if (gaudi->hw_cap_initialized & HW_CAP_NIC3) {
Oded Gabbay3c681572020-11-02 21:10:39 +02006749 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_0,
6750 asid);
6751 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_1,
6752 asid);
6753 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_2,
6754 asid);
6755 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_3,
6756 asid);
6757 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_4,
6758 asid);
6759 }
6760
Oded Gabbay90810212021-05-25 21:35:13 +03006761 if (gaudi->hw_cap_initialized & HW_CAP_NIC4) {
Oded Gabbay3c681572020-11-02 21:10:39 +02006762 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_0,
6763 asid);
6764 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_1,
6765 asid);
6766 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_2,
6767 asid);
6768 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_3,
6769 asid);
6770 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_4,
6771 asid);
6772 }
6773
Oded Gabbay90810212021-05-25 21:35:13 +03006774 if (gaudi->hw_cap_initialized & HW_CAP_NIC5) {
Oded Gabbay3c681572020-11-02 21:10:39 +02006775 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_0,
6776 asid);
6777 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_1,
6778 asid);
6779 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_2,
6780 asid);
6781 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_3,
6782 asid);
6783 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_4,
6784 asid);
6785 }
6786
Oded Gabbay90810212021-05-25 21:35:13 +03006787 if (gaudi->hw_cap_initialized & HW_CAP_NIC6) {
Oded Gabbay3c681572020-11-02 21:10:39 +02006788 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_0,
6789 asid);
6790 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_1,
6791 asid);
6792 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_2,
6793 asid);
6794 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_3,
6795 asid);
6796 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_4,
6797 asid);
6798 }
6799
Oded Gabbay90810212021-05-25 21:35:13 +03006800 if (gaudi->hw_cap_initialized & HW_CAP_NIC7) {
Oded Gabbay3c681572020-11-02 21:10:39 +02006801 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_0,
6802 asid);
6803 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_1,
6804 asid);
6805 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_2,
6806 asid);
6807 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_3,
6808 asid);
6809 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_4,
6810 asid);
6811 }
6812
Oded Gabbay90810212021-05-25 21:35:13 +03006813 if (gaudi->hw_cap_initialized & HW_CAP_NIC8) {
Oded Gabbay3c681572020-11-02 21:10:39 +02006814 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_0,
6815 asid);
6816 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_1,
6817 asid);
6818 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_2,
6819 asid);
6820 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_3,
6821 asid);
6822 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_4,
6823 asid);
6824 }
6825
Oded Gabbay90810212021-05-25 21:35:13 +03006826 if (gaudi->hw_cap_initialized & HW_CAP_NIC9) {
Oded Gabbay3c681572020-11-02 21:10:39 +02006827 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_0,
6828 asid);
6829 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_1,
6830 asid);
6831 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_2,
6832 asid);
6833 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_3,
6834 asid);
6835 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_4,
6836 asid);
6837 }
6838
Oded Gabbaye38bfd32020-07-03 20:46:12 +03006839 hdev->asic_funcs->set_clock_gating(hdev);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006840
6841 mutex_unlock(&gaudi->clk_gate_mutex);
6842}
6843
6844static int gaudi_send_job_on_qman0(struct hl_device *hdev,
6845 struct hl_cs_job *job)
6846{
6847 struct packet_msg_prot *fence_pkt;
6848 u32 *fence_ptr;
6849 dma_addr_t fence_dma_addr;
6850 struct hl_cb *cb;
6851 u32 tmp, timeout, dma_offset;
6852 int rc;
6853
6854 if (hdev->pldm)
6855 timeout = GAUDI_PLDM_QMAN0_TIMEOUT_USEC;
6856 else
6857 timeout = HL_DEVICE_TIMEOUT_USEC;
6858
Ohad Sharabicf303392021-01-17 16:01:56 +02006859 if (!hdev->asic_funcs->is_device_idle(hdev, NULL, 0, NULL)) {
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006860 dev_err_ratelimited(hdev->dev,
6861 "Can't send driver job on QMAN0 because the device is not idle\n");
6862 return -EBUSY;
6863 }
6864
6865 fence_ptr = hdev->asic_funcs->asic_dma_pool_zalloc(hdev, 4, GFP_KERNEL,
6866 &fence_dma_addr);
6867 if (!fence_ptr) {
6868 dev_err(hdev->dev,
6869 "Failed to allocate fence memory for QMAN0\n");
6870 return -ENOMEM;
6871 }
6872
6873 cb = job->patched_cb;
6874
Arnd Bergmann82948e62020-10-26 17:08:06 +01006875 fence_pkt = cb->kernel_address +
6876 job->job_cb_size - sizeof(struct packet_msg_prot);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006877
Oded Gabbay65887292020-08-12 11:21:01 +03006878 tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT);
6879 tmp |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);
6880 tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
6881
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006882 fence_pkt->ctl = cpu_to_le32(tmp);
6883 fence_pkt->value = cpu_to_le32(GAUDI_QMAN0_FENCE_VAL);
6884 fence_pkt->addr = cpu_to_le64(fence_dma_addr);
6885
6886 dma_offset = gaudi_dma_assignment[GAUDI_PCI_DMA_1] * DMA_CORE_OFFSET;
6887
6888 WREG32_OR(mmDMA0_CORE_PROT + dma_offset, BIT(DMA0_CORE_PROT_VAL_SHIFT));
6889
6890 rc = hl_hw_queue_send_cb_no_cmpl(hdev, GAUDI_QUEUE_ID_DMA_0_0,
6891 job->job_cb_size, cb->bus_address);
6892 if (rc) {
6893 dev_err(hdev->dev, "Failed to send CB on QMAN0, %d\n", rc);
6894 goto free_fence_ptr;
6895 }
6896
6897 rc = hl_poll_timeout_memory(hdev, fence_ptr, tmp,
6898 (tmp == GAUDI_QMAN0_FENCE_VAL), 1000,
6899 timeout, true);
6900
6901 hl_hw_queue_inc_ci_kernel(hdev, GAUDI_QUEUE_ID_DMA_0_0);
6902
6903 if (rc == -ETIMEDOUT) {
6904 dev_err(hdev->dev, "QMAN0 Job timeout (0x%x)\n", tmp);
6905 goto free_fence_ptr;
6906 }
6907
6908free_fence_ptr:
6909 WREG32_AND(mmDMA0_CORE_PROT + dma_offset,
6910 ~BIT(DMA0_CORE_PROT_VAL_SHIFT));
6911
6912 hdev->asic_funcs->asic_dma_pool_free(hdev, (void *) fence_ptr,
6913 fence_dma_addr);
6914 return rc;
6915}
6916
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006917static void gaudi_get_event_desc(u16 event_type, char *desc, size_t size)
6918{
Ofir Bittonebd8d122020-05-10 13:41:28 +03006919 if (event_type >= GAUDI_EVENT_SIZE)
6920 goto event_not_supported;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006921
Ofir Bittonebd8d122020-05-10 13:41:28 +03006922 if (!gaudi_irq_map_table[event_type].valid)
6923 goto event_not_supported;
6924
6925 snprintf(desc, size, gaudi_irq_map_table[event_type].name);
6926
6927 return;
6928
6929event_not_supported:
6930 snprintf(desc, size, "N/A");
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006931}
6932
6933static const char *gaudi_get_razwi_initiator_dma_name(struct hl_device *hdev,
6934 u32 x_y, bool is_write)
6935{
6936 u32 dma_id[2], dma_offset, err_cause[2], mask, i;
6937
6938 mask = is_write ? DMA0_CORE_ERR_CAUSE_HBW_WR_ERR_MASK :
6939 DMA0_CORE_ERR_CAUSE_HBW_RD_ERR_MASK;
6940
6941 switch (x_y) {
6942 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_0:
6943 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_1:
6944 dma_id[0] = 0;
6945 dma_id[1] = 2;
6946 break;
6947 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_0:
6948 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_1:
6949 dma_id[0] = 1;
6950 dma_id[1] = 3;
6951 break;
6952 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_0:
6953 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_1:
6954 dma_id[0] = 4;
6955 dma_id[1] = 6;
6956 break;
6957 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_0:
6958 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_1:
6959 dma_id[0] = 5;
6960 dma_id[1] = 7;
6961 break;
6962 default:
6963 goto unknown_initiator;
6964 }
6965
6966 for (i = 0 ; i < 2 ; i++) {
6967 dma_offset = dma_id[i] * DMA_CORE_OFFSET;
6968 err_cause[i] = RREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset);
6969 }
6970
6971 switch (x_y) {
6972 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_0:
6973 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_1:
6974 if ((err_cause[0] & mask) && !(err_cause[1] & mask))
6975 return "DMA0";
6976 else if (!(err_cause[0] & mask) && (err_cause[1] & mask))
6977 return "DMA2";
6978 else
6979 return "DMA0 or DMA2";
6980 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_0:
6981 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_1:
6982 if ((err_cause[0] & mask) && !(err_cause[1] & mask))
6983 return "DMA1";
6984 else if (!(err_cause[0] & mask) && (err_cause[1] & mask))
6985 return "DMA3";
6986 else
6987 return "DMA1 or DMA3";
6988 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_0:
6989 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_1:
6990 if ((err_cause[0] & mask) && !(err_cause[1] & mask))
6991 return "DMA4";
6992 else if (!(err_cause[0] & mask) && (err_cause[1] & mask))
6993 return "DMA6";
6994 else
6995 return "DMA4 or DMA6";
6996 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_0:
6997 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_1:
6998 if ((err_cause[0] & mask) && !(err_cause[1] & mask))
6999 return "DMA5";
7000 else if (!(err_cause[0] & mask) && (err_cause[1] & mask))
7001 return "DMA7";
7002 else
7003 return "DMA5 or DMA7";
7004 }
7005
7006unknown_initiator:
7007 return "unknown initiator";
7008}
7009
7010static const char *gaudi_get_razwi_initiator_name(struct hl_device *hdev,
7011 bool is_write)
7012{
7013 u32 val, x_y, axi_id;
7014
7015 val = is_write ? RREG32(mmMMU_UP_RAZWI_WRITE_ID) :
7016 RREG32(mmMMU_UP_RAZWI_READ_ID);
7017 x_y = val & ((RAZWI_INITIATOR_Y_MASK << RAZWI_INITIATOR_Y_SHIFT) |
7018 (RAZWI_INITIATOR_X_MASK << RAZWI_INITIATOR_X_SHIFT));
7019 axi_id = val & (RAZWI_INITIATOR_AXI_ID_MASK <<
7020 RAZWI_INITIATOR_AXI_ID_SHIFT);
7021
7022 switch (x_y) {
7023 case RAZWI_INITIATOR_ID_X_Y_TPC0_NIC0:
7024 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC))
7025 return "TPC0";
7026 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC))
7027 return "NIC0";
7028 break;
7029 case RAZWI_INITIATOR_ID_X_Y_TPC1:
7030 return "TPC1";
7031 case RAZWI_INITIATOR_ID_X_Y_MME0_0:
7032 case RAZWI_INITIATOR_ID_X_Y_MME0_1:
7033 return "MME0";
7034 case RAZWI_INITIATOR_ID_X_Y_MME1_0:
7035 case RAZWI_INITIATOR_ID_X_Y_MME1_1:
7036 return "MME1";
7037 case RAZWI_INITIATOR_ID_X_Y_TPC2:
7038 return "TPC2";
7039 case RAZWI_INITIATOR_ID_X_Y_TPC3_PCI_CPU_PSOC:
7040 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC))
7041 return "TPC3";
7042 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_PCI))
7043 return "PCI";
7044 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_CPU))
7045 return "CPU";
7046 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_PSOC))
7047 return "PSOC";
7048 break;
7049 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_0:
7050 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_1:
7051 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_0:
7052 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_1:
7053 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_0:
7054 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_1:
7055 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_0:
7056 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_1:
7057 return gaudi_get_razwi_initiator_dma_name(hdev, x_y, is_write);
7058 case RAZWI_INITIATOR_ID_X_Y_TPC4_NIC1_NIC2:
7059 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC))
7060 return "TPC4";
7061 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC))
7062 return "NIC1";
7063 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC_FT))
7064 return "NIC2";
7065 break;
7066 case RAZWI_INITIATOR_ID_X_Y_TPC5:
7067 return "TPC5";
7068 case RAZWI_INITIATOR_ID_X_Y_MME2_0:
7069 case RAZWI_INITIATOR_ID_X_Y_MME2_1:
7070 return "MME2";
7071 case RAZWI_INITIATOR_ID_X_Y_MME3_0:
7072 case RAZWI_INITIATOR_ID_X_Y_MME3_1:
7073 return "MME3";
7074 case RAZWI_INITIATOR_ID_X_Y_TPC6:
7075 return "TPC6";
7076 case RAZWI_INITIATOR_ID_X_Y_TPC7_NIC4_NIC5:
7077 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC))
7078 return "TPC7";
7079 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC))
7080 return "NIC4";
7081 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC_FT))
7082 return "NIC5";
7083 break;
7084 default:
7085 break;
7086 }
7087
7088 dev_err(hdev->dev,
7089 "Unknown RAZWI initiator ID 0x%x [Y=%d, X=%d, AXI_ID=%d]\n",
7090 val,
7091 (val >> RAZWI_INITIATOR_Y_SHIFT) & RAZWI_INITIATOR_Y_MASK,
7092 (val >> RAZWI_INITIATOR_X_SHIFT) & RAZWI_INITIATOR_X_MASK,
7093 (val >> RAZWI_INITIATOR_AXI_ID_SHIFT) &
7094 RAZWI_INITIATOR_AXI_ID_MASK);
7095
7096 return "unknown initiator";
7097}
7098
7099static void gaudi_print_razwi_info(struct hl_device *hdev)
7100{
7101 if (RREG32(mmMMU_UP_RAZWI_WRITE_VLD)) {
7102 dev_err_ratelimited(hdev->dev,
7103 "RAZWI event caused by illegal write of %s\n",
7104 gaudi_get_razwi_initiator_name(hdev, true));
7105 WREG32(mmMMU_UP_RAZWI_WRITE_VLD, 0);
7106 }
7107
7108 if (RREG32(mmMMU_UP_RAZWI_READ_VLD)) {
7109 dev_err_ratelimited(hdev->dev,
7110 "RAZWI event caused by illegal read of %s\n",
7111 gaudi_get_razwi_initiator_name(hdev, false));
7112 WREG32(mmMMU_UP_RAZWI_READ_VLD, 0);
7113 }
7114}
7115
7116static void gaudi_print_mmu_error_info(struct hl_device *hdev)
7117{
7118 struct gaudi_device *gaudi = hdev->asic_specific;
7119 u64 addr;
7120 u32 val;
7121
7122 if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
7123 return;
7124
7125 val = RREG32(mmMMU_UP_PAGE_ERROR_CAPTURE);
7126 if (val & MMU_UP_PAGE_ERROR_CAPTURE_ENTRY_VALID_MASK) {
7127 addr = val & MMU_UP_PAGE_ERROR_CAPTURE_VA_49_32_MASK;
7128 addr <<= 32;
7129 addr |= RREG32(mmMMU_UP_PAGE_ERROR_CAPTURE_VA);
7130
7131 dev_err_ratelimited(hdev->dev, "MMU page fault on va 0x%llx\n",
7132 addr);
7133
7134 WREG32(mmMMU_UP_PAGE_ERROR_CAPTURE, 0);
7135 }
7136
7137 val = RREG32(mmMMU_UP_ACCESS_ERROR_CAPTURE);
7138 if (val & MMU_UP_ACCESS_ERROR_CAPTURE_ENTRY_VALID_MASK) {
7139 addr = val & MMU_UP_ACCESS_ERROR_CAPTURE_VA_49_32_MASK;
7140 addr <<= 32;
7141 addr |= RREG32(mmMMU_UP_ACCESS_ERROR_CAPTURE_VA);
7142
7143 dev_err_ratelimited(hdev->dev,
7144 "MMU access error on va 0x%llx\n", addr);
7145
7146 WREG32(mmMMU_UP_ACCESS_ERROR_CAPTURE, 0);
7147 }
7148}
7149
7150/*
7151 * +-------------------+------------------------------------------------------+
7152 * | Configuration Reg | Description |
7153 * | Address | |
7154 * +-------------------+------------------------------------------------------+
7155 * | 0xF30 - 0xF3F |ECC single error indication (1 bit per memory wrapper)|
7156 * | |0xF30 memory wrappers 31:0 (MSB to LSB) |
7157 * | |0xF34 memory wrappers 63:32 |
7158 * | |0xF38 memory wrappers 95:64 |
7159 * | |0xF3C memory wrappers 127:96 |
7160 * +-------------------+------------------------------------------------------+
7161 * | 0xF40 - 0xF4F |ECC double error indication (1 bit per memory wrapper)|
7162 * | |0xF40 memory wrappers 31:0 (MSB to LSB) |
7163 * | |0xF44 memory wrappers 63:32 |
7164 * | |0xF48 memory wrappers 95:64 |
7165 * | |0xF4C memory wrappers 127:96 |
7166 * +-------------------+------------------------------------------------------+
7167 */
Oded Gabbayfcc6a4e2020-05-17 08:20:35 +03007168static int gaudi_extract_ecc_info(struct hl_device *hdev,
7169 struct ecc_info_extract_params *params, u64 *ecc_address,
7170 u64 *ecc_syndrom, u8 *memory_wrapper_idx)
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007171{
7172 struct gaudi_device *gaudi = hdev->asic_specific;
Oded Gabbayfcc6a4e2020-05-17 08:20:35 +03007173 u32 i, num_mem_regs, reg, err_bit;
7174 u64 err_addr, err_word = 0;
7175 int rc = 0;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007176
Oded Gabbayfcc6a4e2020-05-17 08:20:35 +03007177 num_mem_regs = params->num_memories / 32 +
7178 ((params->num_memories % 32) ? 1 : 0);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007179
Oded Gabbayfcc6a4e2020-05-17 08:20:35 +03007180 if (params->block_address >= CFG_BASE)
7181 params->block_address -= CFG_BASE;
7182
7183 if (params->derr)
7184 err_addr = params->block_address + GAUDI_ECC_DERR0_OFFSET;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007185 else
Oded Gabbayfcc6a4e2020-05-17 08:20:35 +03007186 err_addr = params->block_address + GAUDI_ECC_SERR0_OFFSET;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007187
Oded Gabbayfcc6a4e2020-05-17 08:20:35 +03007188 if (params->disable_clock_gating) {
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007189 mutex_lock(&gaudi->clk_gate_mutex);
7190 hdev->asic_funcs->disable_clock_gating(hdev);
7191 }
7192
Oded Gabbayfcc6a4e2020-05-17 08:20:35 +03007193 /* Set invalid wrapper index */
7194 *memory_wrapper_idx = 0xFF;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007195
Oded Gabbayfcc6a4e2020-05-17 08:20:35 +03007196 /* Iterate through memory wrappers, a single bit must be set */
Dan Carpenterb0353542020-08-05 12:51:05 +03007197 for (i = 0 ; i < num_mem_regs ; i++) {
Oded Gabbayfcc6a4e2020-05-17 08:20:35 +03007198 err_addr += i * 4;
7199 err_word = RREG32(err_addr);
7200 if (err_word) {
7201 err_bit = __ffs(err_word);
7202 *memory_wrapper_idx = err_bit + (32 * i);
7203 break;
7204 }
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007205 }
7206
Oded Gabbayfcc6a4e2020-05-17 08:20:35 +03007207 if (*memory_wrapper_idx == 0xFF) {
7208 dev_err(hdev->dev, "ECC error information cannot be found\n");
7209 rc = -EINVAL;
7210 goto enable_clk_gate;
7211 }
7212
7213 WREG32(params->block_address + GAUDI_ECC_MEM_SEL_OFFSET,
7214 *memory_wrapper_idx);
7215
7216 *ecc_address =
7217 RREG32(params->block_address + GAUDI_ECC_ADDRESS_OFFSET);
7218 *ecc_syndrom =
7219 RREG32(params->block_address + GAUDI_ECC_SYNDROME_OFFSET);
7220
7221 /* Clear error indication */
7222 reg = RREG32(params->block_address + GAUDI_ECC_MEM_INFO_CLR_OFFSET);
7223 if (params->derr)
7224 reg |= FIELD_PREP(GAUDI_ECC_MEM_INFO_CLR_DERR_MASK, 1);
7225 else
7226 reg |= FIELD_PREP(GAUDI_ECC_MEM_INFO_CLR_SERR_MASK, 1);
7227
7228 WREG32(params->block_address + GAUDI_ECC_MEM_INFO_CLR_OFFSET, reg);
7229
7230enable_clk_gate:
7231 if (params->disable_clock_gating) {
Oded Gabbaye38bfd32020-07-03 20:46:12 +03007232 hdev->asic_funcs->set_clock_gating(hdev);
Greg Kroah-Hartman65a9bde62020-07-27 11:49:37 +02007233
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007234 mutex_unlock(&gaudi->clk_gate_mutex);
7235 }
Oded Gabbayfcc6a4e2020-05-17 08:20:35 +03007236
7237 return rc;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007238}
7239
Ohad Sharabi2718e1d2021-05-24 09:59:31 +03007240/*
7241 * gaudi_queue_idx_dec - decrement queue index (pi/ci) and handle wrap
7242 *
7243 * @idx: the current pi/ci value
7244 * @q_len: the queue length (power of 2)
7245 *
7246 * @return the cyclically decremented index
7247 */
7248static inline u32 gaudi_queue_idx_dec(u32 idx, u32 q_len)
7249{
7250 u32 mask = q_len - 1;
7251
7252 /*
7253 * modular decrement is equivalent to adding (queue_size -1)
7254 * later we take LSBs to make sure the value is in the
7255 * range [0, queue_len - 1]
7256 */
7257 return (idx + q_len - 1) & mask;
7258}
7259
7260/**
7261 * gaudi_print_sw_config_stream_data - print SW config stream data
7262 *
7263 * @hdev: pointer to the habanalabs device structure
7264 * @stream: the QMAN's stream
7265 * @qman_base: base address of QMAN registers block
7266 */
7267static void gaudi_print_sw_config_stream_data(struct hl_device *hdev, u32 stream,
7268 u64 qman_base)
7269{
7270 u64 cq_ptr_lo, cq_ptr_hi, cq_tsize, cq_ptr;
7271 u32 cq_ptr_lo_off, size;
7272
7273 cq_ptr_lo_off = mmTPC0_QM_CQ_PTR_LO_1 - mmTPC0_QM_CQ_PTR_LO_0;
7274
7275 cq_ptr_lo = qman_base + (mmTPC0_QM_CQ_PTR_LO_0 - mmTPC0_QM_BASE) +
7276 stream * cq_ptr_lo_off;
7277 cq_ptr_hi = cq_ptr_lo +
7278 (mmTPC0_QM_CQ_PTR_HI_0 - mmTPC0_QM_CQ_PTR_LO_0);
7279 cq_tsize = cq_ptr_lo +
7280 (mmTPC0_QM_CQ_TSIZE_0 - mmTPC0_QM_CQ_PTR_LO_0);
7281
7282 cq_ptr = (((u64) RREG32(cq_ptr_hi)) << 32) | RREG32(cq_ptr_lo);
7283 size = RREG32(cq_tsize);
7284 dev_info(hdev->dev, "stop on err: stream: %u, addr: %#llx, size: %x\n",
7285 stream, cq_ptr, size);
7286}
7287
7288/**
7289 * gaudi_print_last_pqes_on_err - print last PQEs on error
7290 *
7291 * @hdev: pointer to the habanalabs device structure
7292 * @qid_base: first QID of the QMAN (out of 4 streams)
7293 * @stream: the QMAN's stream
7294 * @qman_base: base address of QMAN registers block
7295 * @pr_sw_conf: if true print the SW config stream data (CQ PTR and SIZE)
7296 */
7297static void gaudi_print_last_pqes_on_err(struct hl_device *hdev, u32 qid_base,
7298 u32 stream, u64 qman_base,
7299 bool pr_sw_conf)
7300{
7301 u32 ci, qm_ci_stream_off, queue_len;
7302 struct hl_hw_queue *q;
7303 u64 pq_ci;
7304 int i;
7305
7306 q = &hdev->kernel_queues[qid_base + stream];
7307
7308 qm_ci_stream_off = mmTPC0_QM_PQ_CI_1 - mmTPC0_QM_PQ_CI_0;
7309 pq_ci = qman_base + (mmTPC0_QM_PQ_CI_0 - mmTPC0_QM_BASE) +
7310 stream * qm_ci_stream_off;
7311
7312 queue_len = (q->queue_type == QUEUE_TYPE_INT) ?
7313 q->int_queue_len : HL_QUEUE_LENGTH;
7314
7315 hdev->asic_funcs->hw_queues_lock(hdev);
7316
7317 if (pr_sw_conf)
7318 gaudi_print_sw_config_stream_data(hdev, stream, qman_base);
7319
7320 ci = RREG32(pq_ci);
7321
7322 /* we should start printing form ci -1 */
7323 ci = gaudi_queue_idx_dec(ci, queue_len);
7324
7325 for (i = 0; i < PQ_FETCHER_CACHE_SIZE; i++) {
7326 struct hl_bd *bd;
7327 u64 addr;
7328 u32 len;
7329
7330 bd = q->kernel_address;
7331 bd += ci;
7332
7333 len = le32_to_cpu(bd->len);
7334 /* len 0 means uninitialized entry- break */
7335 if (!len)
7336 break;
7337
7338 addr = le64_to_cpu(bd->ptr);
7339
7340 dev_info(hdev->dev, "stop on err PQE(stream %u): ci: %u, addr: %#llx, size: %x\n",
7341 stream, ci, addr, len);
7342
7343 /* get previous ci, wrap if needed */
7344 ci = gaudi_queue_idx_dec(ci, queue_len);
7345 }
7346
7347 hdev->asic_funcs->hw_queues_unlock(hdev);
7348}
7349
7350/**
7351 * print_qman_data_on_err - extract QMAN data on error
7352 *
7353 * @hdev: pointer to the habanalabs device structure
7354 * @qid_base: first QID of the QMAN (out of 4 streams)
7355 * @stream: the QMAN's stream
7356 * @qman_base: base address of QMAN registers block
7357 *
7358 * This function attempt to exatract as much data as possible on QMAN error.
7359 * On upper CP print the SW config stream data and last 8 PQEs.
7360 * On lower CP print SW config data and last PQEs of ALL 4 upper CPs
7361 */
7362static void print_qman_data_on_err(struct hl_device *hdev, u32 qid_base,
7363 u32 stream, u64 qman_base)
7364{
7365 u32 i;
7366
7367 if (stream != QMAN_STREAMS) {
7368 gaudi_print_last_pqes_on_err(hdev, qid_base, stream, qman_base,
7369 true);
7370 return;
7371 }
7372
7373 gaudi_print_sw_config_stream_data(hdev, stream, qman_base);
7374
7375 for (i = 0; i < QMAN_STREAMS; i++)
7376 gaudi_print_last_pqes_on_err(hdev, qid_base, i, qman_base,
7377 false);
7378}
7379
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007380static void gaudi_handle_qman_err_generic(struct hl_device *hdev,
7381 const char *qm_name,
Ohad Sharabi2718e1d2021-05-24 09:59:31 +03007382 u64 qman_base,
7383 u32 qid_base)
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007384{
7385 u32 i, j, glbl_sts_val, arb_err_val, glbl_sts_clr_val;
Ohad Sharabi2718e1d2021-05-24 09:59:31 +03007386 u64 glbl_sts_addr, arb_err_addr;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007387 char reg_desc[32];
7388
Ohad Sharabi2718e1d2021-05-24 09:59:31 +03007389 glbl_sts_addr = qman_base + (mmTPC0_QM_GLBL_STS1_0 - mmTPC0_QM_BASE);
7390 arb_err_addr = qman_base + (mmTPC0_QM_ARB_ERR_CAUSE - mmTPC0_QM_BASE);
7391
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007392 /* Iterate through all stream GLBL_STS1 registers + Lower CP */
7393 for (i = 0 ; i < QMAN_STREAMS + 1 ; i++) {
7394 glbl_sts_clr_val = 0;
7395 glbl_sts_val = RREG32(glbl_sts_addr + 4 * i);
7396
7397 if (!glbl_sts_val)
7398 continue;
7399
7400 if (i == QMAN_STREAMS)
7401 snprintf(reg_desc, ARRAY_SIZE(reg_desc), "LowerCP");
7402 else
7403 snprintf(reg_desc, ARRAY_SIZE(reg_desc), "stream%u", i);
7404
7405 for (j = 0 ; j < GAUDI_NUM_OF_QM_ERR_CAUSE ; j++) {
7406 if (glbl_sts_val & BIT(j)) {
7407 dev_err_ratelimited(hdev->dev,
7408 "%s %s. err cause: %s\n",
7409 qm_name, reg_desc,
7410 gaudi_qman_error_cause[j]);
7411 glbl_sts_clr_val |= BIT(j);
7412 }
7413 }
7414
7415 /* Write 1 clear errors */
Tomer Tayar1b497152021-04-06 13:32:20 +03007416 if (!hdev->stop_on_err)
7417 WREG32(glbl_sts_addr + 4 * i, glbl_sts_clr_val);
Ohad Sharabi2718e1d2021-05-24 09:59:31 +03007418 else
7419 print_qman_data_on_err(hdev, qid_base, i, qman_base);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007420 }
7421
7422 arb_err_val = RREG32(arb_err_addr);
7423
7424 if (!arb_err_val)
7425 return;
7426
7427 for (j = 0 ; j < GAUDI_NUM_OF_QM_ARB_ERR_CAUSE ; j++) {
7428 if (arb_err_val & BIT(j)) {
7429 dev_err_ratelimited(hdev->dev,
7430 "%s ARB_ERR. err cause: %s\n",
7431 qm_name,
7432 gaudi_qman_arb_error_cause[j]);
7433 }
7434 }
7435}
7436
Ofir Bittonf8bc7f02021-01-03 20:52:40 +02007437static void gaudi_print_sm_sei_info(struct hl_device *hdev, u16 event_type,
7438 struct hl_eq_sm_sei_data *sei_data)
7439{
7440 u32 index = event_type - GAUDI_EVENT_DMA_IF_SEI_0;
7441
Oded Gabbayae2021d32021-07-12 13:48:56 +03007442 /* Flip the bits as the enum is ordered in the opposite way */
7443 index = (index ^ 0x3) & 0x3;
7444
Ofir Bittonf8bc7f02021-01-03 20:52:40 +02007445 switch (sei_data->sei_cause) {
Oded Gabbay78385042021-01-26 22:56:56 +02007446 case SM_SEI_SO_OVERFLOW:
Oded Gabbayae2021d32021-07-12 13:48:56 +03007447 dev_err_ratelimited(hdev->dev,
7448 "%s SEI Error: SOB Group %u overflow/underflow",
7449 gaudi_sync_manager_names[index],
7450 le32_to_cpu(sei_data->sei_log));
Ofir Bittonf8bc7f02021-01-03 20:52:40 +02007451 break;
Oded Gabbay78385042021-01-26 22:56:56 +02007452 case SM_SEI_LBW_4B_UNALIGNED:
Oded Gabbayae2021d32021-07-12 13:48:56 +03007453 dev_err_ratelimited(hdev->dev,
7454 "%s SEI Error: Unaligned 4B LBW access, monitor agent address low - %#x",
7455 gaudi_sync_manager_names[index],
7456 le32_to_cpu(sei_data->sei_log));
Ofir Bittonf8bc7f02021-01-03 20:52:40 +02007457 break;
Oded Gabbay78385042021-01-26 22:56:56 +02007458 case SM_SEI_AXI_RESPONSE_ERR:
Oded Gabbayae2021d32021-07-12 13:48:56 +03007459 dev_err_ratelimited(hdev->dev,
7460 "%s SEI Error: AXI ID %u response error",
7461 gaudi_sync_manager_names[index],
7462 le32_to_cpu(sei_data->sei_log));
Ofir Bittonf8bc7f02021-01-03 20:52:40 +02007463 break;
7464 default:
Oded Gabbayae2021d32021-07-12 13:48:56 +03007465 dev_err_ratelimited(hdev->dev, "Unknown SM SEI cause %u",
Oded Gabbay78385042021-01-26 22:56:56 +02007466 le32_to_cpu(sei_data->sei_log));
Ofir Bittonf8bc7f02021-01-03 20:52:40 +02007467 break;
7468 }
7469}
7470
Oded Gabbayfcc6a4e2020-05-17 08:20:35 +03007471static void gaudi_handle_ecc_event(struct hl_device *hdev, u16 event_type,
7472 struct hl_eq_ecc_data *ecc_data)
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007473{
Oded Gabbayfcc6a4e2020-05-17 08:20:35 +03007474 struct ecc_info_extract_params params;
7475 u64 ecc_address = 0, ecc_syndrom = 0;
7476 u8 index, memory_wrapper_idx = 0;
7477 bool extract_info_from_fw;
7478 int rc;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007479
7480 switch (event_type) {
Oded Gabbayfcc6a4e2020-05-17 08:20:35 +03007481 case GAUDI_EVENT_PCIE_CORE_SERR ... GAUDI_EVENT_PCIE_PHY_DERR:
7482 case GAUDI_EVENT_DMA0_SERR_ECC ... GAUDI_EVENT_MMU_DERR:
7483 extract_info_from_fw = true;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007484 break;
7485 case GAUDI_EVENT_TPC0_SERR ... GAUDI_EVENT_TPC7_SERR:
7486 index = event_type - GAUDI_EVENT_TPC0_SERR;
Oded Gabbayfcc6a4e2020-05-17 08:20:35 +03007487 params.block_address = mmTPC0_CFG_BASE + index * TPC_CFG_OFFSET;
7488 params.num_memories = 90;
7489 params.derr = false;
7490 params.disable_clock_gating = true;
7491 extract_info_from_fw = false;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007492 break;
7493 case GAUDI_EVENT_TPC0_DERR ... GAUDI_EVENT_TPC7_DERR:
7494 index = event_type - GAUDI_EVENT_TPC0_DERR;
Oded Gabbayfcc6a4e2020-05-17 08:20:35 +03007495 params.block_address =
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007496 mmTPC0_CFG_BASE + index * TPC_CFG_OFFSET;
Oded Gabbayfcc6a4e2020-05-17 08:20:35 +03007497 params.num_memories = 90;
7498 params.derr = true;
7499 params.disable_clock_gating = true;
7500 extract_info_from_fw = false;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007501 break;
7502 case GAUDI_EVENT_MME0_ACC_SERR:
7503 case GAUDI_EVENT_MME1_ACC_SERR:
7504 case GAUDI_EVENT_MME2_ACC_SERR:
7505 case GAUDI_EVENT_MME3_ACC_SERR:
7506 index = (event_type - GAUDI_EVENT_MME0_ACC_SERR) / 4;
Oded Gabbayfcc6a4e2020-05-17 08:20:35 +03007507 params.block_address = mmMME0_ACC_BASE + index * MME_ACC_OFFSET;
7508 params.num_memories = 128;
7509 params.derr = false;
7510 params.disable_clock_gating = true;
7511 extract_info_from_fw = false;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007512 break;
7513 case GAUDI_EVENT_MME0_ACC_DERR:
7514 case GAUDI_EVENT_MME1_ACC_DERR:
7515 case GAUDI_EVENT_MME2_ACC_DERR:
7516 case GAUDI_EVENT_MME3_ACC_DERR:
7517 index = (event_type - GAUDI_EVENT_MME0_ACC_DERR) / 4;
Oded Gabbayfcc6a4e2020-05-17 08:20:35 +03007518 params.block_address = mmMME0_ACC_BASE + index * MME_ACC_OFFSET;
7519 params.num_memories = 128;
7520 params.derr = true;
7521 params.disable_clock_gating = true;
7522 extract_info_from_fw = false;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007523 break;
7524 case GAUDI_EVENT_MME0_SBAB_SERR:
7525 case GAUDI_EVENT_MME1_SBAB_SERR:
7526 case GAUDI_EVENT_MME2_SBAB_SERR:
7527 case GAUDI_EVENT_MME3_SBAB_SERR:
7528 index = (event_type - GAUDI_EVENT_MME0_SBAB_SERR) / 4;
Oded Gabbayfcc6a4e2020-05-17 08:20:35 +03007529 params.block_address =
7530 mmMME0_SBAB_BASE + index * MME_ACC_OFFSET;
7531 params.num_memories = 33;
7532 params.derr = false;
7533 params.disable_clock_gating = true;
7534 extract_info_from_fw = false;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007535 break;
7536 case GAUDI_EVENT_MME0_SBAB_DERR:
7537 case GAUDI_EVENT_MME1_SBAB_DERR:
7538 case GAUDI_EVENT_MME2_SBAB_DERR:
7539 case GAUDI_EVENT_MME3_SBAB_DERR:
7540 index = (event_type - GAUDI_EVENT_MME0_SBAB_DERR) / 4;
Oded Gabbayfcc6a4e2020-05-17 08:20:35 +03007541 params.block_address =
7542 mmMME0_SBAB_BASE + index * MME_ACC_OFFSET;
7543 params.num_memories = 33;
7544 params.derr = true;
7545 params.disable_clock_gating = true;
Oded Gabbay652b4442020-11-21 14:35:35 +02007546 extract_info_from_fw = false;
7547 break;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007548 default:
7549 return;
7550 }
7551
Oded Gabbayfcc6a4e2020-05-17 08:20:35 +03007552 if (extract_info_from_fw) {
7553 ecc_address = le64_to_cpu(ecc_data->ecc_address);
7554 ecc_syndrom = le64_to_cpu(ecc_data->ecc_syndrom);
7555 memory_wrapper_idx = ecc_data->memory_wrapper_idx;
7556 } else {
7557 rc = gaudi_extract_ecc_info(hdev, &params, &ecc_address,
7558 &ecc_syndrom, &memory_wrapper_idx);
7559 if (rc)
7560 return;
7561 }
7562
7563 dev_err(hdev->dev,
7564 "ECC error detected. address: %#llx. Syndrom: %#llx. block id %u\n",
7565 ecc_address, ecc_syndrom, memory_wrapper_idx);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007566}
7567
7568static void gaudi_handle_qman_err(struct hl_device *hdev, u16 event_type)
7569{
Ohad Sharabi2718e1d2021-05-24 09:59:31 +03007570 u64 qman_base;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007571 char desc[32];
Ohad Sharabi2718e1d2021-05-24 09:59:31 +03007572 u32 qid_base;
7573 u8 index;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007574
7575 switch (event_type) {
7576 case GAUDI_EVENT_TPC0_QM ... GAUDI_EVENT_TPC7_QM:
7577 index = event_type - GAUDI_EVENT_TPC0_QM;
Ohad Sharabi2718e1d2021-05-24 09:59:31 +03007578 qid_base = GAUDI_QUEUE_ID_TPC_0_0 + index * QMAN_STREAMS;
7579 qman_base = mmTPC0_QM_BASE + index * TPC_QMAN_OFFSET;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007580 snprintf(desc, ARRAY_SIZE(desc), "%s%d", "TPC_QM", index);
7581 break;
7582 case GAUDI_EVENT_MME0_QM ... GAUDI_EVENT_MME2_QM:
7583 index = event_type - GAUDI_EVENT_MME0_QM;
Ohad Sharabi2718e1d2021-05-24 09:59:31 +03007584 qid_base = GAUDI_QUEUE_ID_MME_0_0 + index * QMAN_STREAMS;
7585 qman_base = mmMME0_QM_BASE + index * MME_QMAN_OFFSET;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007586 snprintf(desc, ARRAY_SIZE(desc), "%s%d", "MME_QM", index);
7587 break;
7588 case GAUDI_EVENT_DMA0_QM ... GAUDI_EVENT_DMA7_QM:
7589 index = event_type - GAUDI_EVENT_DMA0_QM;
Ohad Sharabi2718e1d2021-05-24 09:59:31 +03007590 qid_base = GAUDI_QUEUE_ID_DMA_0_0 + index * QMAN_STREAMS;
7591 /* skip GAUDI_QUEUE_ID_CPU_PQ if necessary */
7592 if (index > 1)
7593 qid_base++;
7594 qman_base = mmDMA0_QM_BASE + index * DMA_QMAN_OFFSET;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007595 snprintf(desc, ARRAY_SIZE(desc), "%s%d", "DMA_QM", index);
7596 break;
Oded Gabbay3c681572020-11-02 21:10:39 +02007597 case GAUDI_EVENT_NIC0_QM0:
Ohad Sharabi2718e1d2021-05-24 09:59:31 +03007598 qid_base = GAUDI_QUEUE_ID_NIC_0_0;
7599 qman_base = mmNIC0_QM0_BASE;
Oded Gabbay3c681572020-11-02 21:10:39 +02007600 snprintf(desc, ARRAY_SIZE(desc), "NIC0_QM0");
7601 break;
7602 case GAUDI_EVENT_NIC0_QM1:
Ohad Sharabi2718e1d2021-05-24 09:59:31 +03007603 qid_base = GAUDI_QUEUE_ID_NIC_1_0;
7604 qman_base = mmNIC0_QM1_BASE;
Oded Gabbay3c681572020-11-02 21:10:39 +02007605 snprintf(desc, ARRAY_SIZE(desc), "NIC0_QM1");
7606 break;
7607 case GAUDI_EVENT_NIC1_QM0:
Ohad Sharabi2718e1d2021-05-24 09:59:31 +03007608 qid_base = GAUDI_QUEUE_ID_NIC_2_0;
7609 qman_base = mmNIC1_QM0_BASE;
Oded Gabbay3c681572020-11-02 21:10:39 +02007610 snprintf(desc, ARRAY_SIZE(desc), "NIC1_QM0");
7611 break;
7612 case GAUDI_EVENT_NIC1_QM1:
Ohad Sharabi2718e1d2021-05-24 09:59:31 +03007613 qid_base = GAUDI_QUEUE_ID_NIC_3_0;
7614 qman_base = mmNIC1_QM1_BASE;
Oded Gabbay3c681572020-11-02 21:10:39 +02007615 snprintf(desc, ARRAY_SIZE(desc), "NIC1_QM1");
7616 break;
7617 case GAUDI_EVENT_NIC2_QM0:
Ohad Sharabi2718e1d2021-05-24 09:59:31 +03007618 qid_base = GAUDI_QUEUE_ID_NIC_4_0;
7619 qman_base = mmNIC2_QM0_BASE;
Oded Gabbay3c681572020-11-02 21:10:39 +02007620 snprintf(desc, ARRAY_SIZE(desc), "NIC2_QM0");
7621 break;
7622 case GAUDI_EVENT_NIC2_QM1:
Ohad Sharabi2718e1d2021-05-24 09:59:31 +03007623 qid_base = GAUDI_QUEUE_ID_NIC_5_0;
7624 qman_base = mmNIC2_QM1_BASE;
Oded Gabbay3c681572020-11-02 21:10:39 +02007625 snprintf(desc, ARRAY_SIZE(desc), "NIC2_QM1");
7626 break;
7627 case GAUDI_EVENT_NIC3_QM0:
Ohad Sharabi2718e1d2021-05-24 09:59:31 +03007628 qid_base = GAUDI_QUEUE_ID_NIC_6_0;
7629 qman_base = mmNIC3_QM0_BASE;
Oded Gabbay3c681572020-11-02 21:10:39 +02007630 snprintf(desc, ARRAY_SIZE(desc), "NIC3_QM0");
7631 break;
7632 case GAUDI_EVENT_NIC3_QM1:
Ohad Sharabi2718e1d2021-05-24 09:59:31 +03007633 qid_base = GAUDI_QUEUE_ID_NIC_7_0;
7634 qman_base = mmNIC3_QM1_BASE;
Oded Gabbay3c681572020-11-02 21:10:39 +02007635 snprintf(desc, ARRAY_SIZE(desc), "NIC3_QM1");
7636 break;
7637 case GAUDI_EVENT_NIC4_QM0:
Ohad Sharabi2718e1d2021-05-24 09:59:31 +03007638 qid_base = GAUDI_QUEUE_ID_NIC_8_0;
7639 qman_base = mmNIC4_QM0_BASE;
Oded Gabbay3c681572020-11-02 21:10:39 +02007640 snprintf(desc, ARRAY_SIZE(desc), "NIC4_QM0");
7641 break;
7642 case GAUDI_EVENT_NIC4_QM1:
Ohad Sharabi2718e1d2021-05-24 09:59:31 +03007643 qid_base = GAUDI_QUEUE_ID_NIC_9_0;
7644 qman_base = mmNIC4_QM1_BASE;
Oded Gabbay3c681572020-11-02 21:10:39 +02007645 snprintf(desc, ARRAY_SIZE(desc), "NIC4_QM1");
7646 break;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007647 default:
7648 return;
7649 }
7650
Ohad Sharabi2718e1d2021-05-24 09:59:31 +03007651 gaudi_handle_qman_err_generic(hdev, desc, qman_base, qid_base);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007652}
7653
7654static void gaudi_print_irq_info(struct hl_device *hdev, u16 event_type,
7655 bool razwi)
7656{
Ofir Bittonebd8d122020-05-10 13:41:28 +03007657 char desc[64] = "";
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007658
7659 gaudi_get_event_desc(event_type, desc, sizeof(desc));
7660 dev_err_ratelimited(hdev->dev, "Received H/W interrupt %d [\"%s\"]\n",
7661 event_type, desc);
7662
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007663 if (razwi) {
7664 gaudi_print_razwi_info(hdev);
7665 gaudi_print_mmu_error_info(hdev);
7666 }
7667}
7668
Ohad Sharabi5d6a1982021-02-08 14:53:56 +02007669static void gaudi_print_out_of_sync_info(struct hl_device *hdev,
7670 struct cpucp_pkt_sync_err *sync_err)
7671{
7672 struct hl_hw_queue *q = &hdev->kernel_queues[GAUDI_QUEUE_ID_CPU_PQ];
7673
7674 dev_err(hdev->dev, "Out of sync with FW, FW: pi=%u, ci=%u, LKD: pi=%u, ci=%u\n",
7675 sync_err->pi, sync_err->ci, q->pi, atomic_read(&q->ci));
7676}
7677
Ofir Bitton254fac62021-06-02 11:56:31 +03007678static void gaudi_print_fw_alive_info(struct hl_device *hdev,
7679 struct hl_eq_fw_alive *fw_alive)
7680{
7681 dev_err(hdev->dev,
7682 "FW alive report: severity=%s, process_id=%u, thread_id=%u, uptime=%llu seconds\n",
7683 (fw_alive->severity == FW_ALIVE_SEVERITY_MINOR) ?
7684 "Minor" : "Critical", fw_alive->process_id,
7685 fw_alive->thread_id, fw_alive->uptime_seconds);
7686}
7687
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007688static int gaudi_soft_reset_late_init(struct hl_device *hdev)
7689{
Ofir Bittonebd8d122020-05-10 13:41:28 +03007690 struct gaudi_device *gaudi = hdev->asic_specific;
7691
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007692 /* Unmask all IRQs since some could have been received
7693 * during the soft reset
7694 */
Ofir Bittonebd8d122020-05-10 13:41:28 +03007695 return hl_fw_unmask_irq_arr(hdev, gaudi->events, sizeof(gaudi->events));
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007696}
7697
Ofir Bitton5a2998f2020-10-05 13:44:59 +03007698static int gaudi_hbm_read_interrupts(struct hl_device *hdev, int device,
7699 struct hl_eq_hbm_ecc_data *hbm_ecc_data)
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007700{
Ofir Bitton5a2998f2020-10-05 13:44:59 +03007701 u32 base, val, val2, wr_par, rd_par, ca_par, derr, serr, type, ch;
Oded Gabbayf1a29772021-06-06 11:38:12 +03007702 int rc = 0;
Ofir Bitton5a2998f2020-10-05 13:44:59 +03007703
Ohad Sharabi6a785e32021-05-29 23:26:10 +03007704 if (hdev->asic_prop.fw_app_cpu_boot_dev_sts0 &
7705 CPU_BOOT_DEV_STS0_HBM_ECC_EN) {
Ofir Bitton5a2998f2020-10-05 13:44:59 +03007706 if (!hbm_ecc_data) {
7707 dev_err(hdev->dev, "No FW ECC data");
7708 return 0;
7709 }
7710
7711 wr_par = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_WR_PAR_MASK,
7712 le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7713 rd_par = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_RD_PAR_MASK,
7714 le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7715 ca_par = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_CA_PAR_MASK,
7716 le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7717 derr = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_DERR_MASK,
7718 le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7719 serr = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_SERR_MASK,
7720 le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7721 type = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_TYPE_MASK,
7722 le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7723 ch = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_HBM_CH_MASK,
7724 le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7725
7726 dev_err(hdev->dev,
Ohad Sharabib520ca52021-01-27 15:42:53 +02007727 "HBM%d pc%d interrupts info: WR_PAR=%d, RD_PAR=%d, CA_PAR=%d, SERR=%d, DERR=%d\n",
7728 device, ch, wr_par, rd_par, ca_par, serr, derr);
7729 dev_err(hdev->dev,
7730 "HBM%d pc%d ECC info: 1ST_ERR_ADDR=0x%x, 1ST_ERR_TYPE=%d, SEC_CONT_CNT=%u, SEC_CNT=%d, DEC_CNT=%d\n",
7731 device, ch, hbm_ecc_data->first_addr, type,
7732 hbm_ecc_data->sec_cont_cnt, hbm_ecc_data->sec_cnt,
7733 hbm_ecc_data->dec_cnt);
Ofir Bitton5a2998f2020-10-05 13:44:59 +03007734 return 0;
7735 }
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007736
Ohad Sharabi4cb45082021-05-20 09:09:03 +03007737 if (hdev->asic_prop.fw_security_enabled) {
Ohad Sharabib520ca52021-01-27 15:42:53 +02007738 dev_info(hdev->dev, "Cannot access MC regs for ECC data while security is enabled\n");
7739 return 0;
7740 }
7741
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007742 base = GAUDI_HBM_CFG_BASE + device * GAUDI_HBM_CFG_OFFSET;
7743 for (ch = 0 ; ch < GAUDI_HBM_CHANNELS ; ch++) {
7744 val = RREG32_MASK(base + ch * 0x1000 + 0x06C, 0x0000FFFF);
7745 val = (val & 0xFF) | ((val >> 8) & 0xFF);
7746 if (val) {
Oded Gabbayf1a29772021-06-06 11:38:12 +03007747 rc = -EIO;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007748 dev_err(hdev->dev,
7749 "HBM%d pc%d interrupts info: WR_PAR=%d, RD_PAR=%d, CA_PAR=%d, SERR=%d, DERR=%d\n",
7750 device, ch * 2, val & 0x1, (val >> 1) & 0x1,
7751 (val >> 2) & 0x1, (val >> 3) & 0x1,
7752 (val >> 4) & 0x1);
7753
7754 val2 = RREG32(base + ch * 0x1000 + 0x060);
7755 dev_err(hdev->dev,
Ofir Bitton5a2998f2020-10-05 13:44:59 +03007756 "HBM%d pc%d ECC info: 1ST_ERR_ADDR=0x%x, 1ST_ERR_TYPE=%d, SEC_CONT_CNT=%d, SEC_CNT=%d, DEC_CNT=%d\n",
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007757 device, ch * 2,
7758 RREG32(base + ch * 0x1000 + 0x064),
7759 (val2 & 0x200) >> 9, (val2 & 0xFC00) >> 10,
7760 (val2 & 0xFF0000) >> 16,
7761 (val2 & 0xFF000000) >> 24);
7762 }
7763
7764 val = RREG32_MASK(base + ch * 0x1000 + 0x07C, 0x0000FFFF);
7765 val = (val & 0xFF) | ((val >> 8) & 0xFF);
7766 if (val) {
Oded Gabbayf1a29772021-06-06 11:38:12 +03007767 rc = -EIO;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007768 dev_err(hdev->dev,
7769 "HBM%d pc%d interrupts info: WR_PAR=%d, RD_PAR=%d, CA_PAR=%d, SERR=%d, DERR=%d\n",
7770 device, ch * 2 + 1, val & 0x1, (val >> 1) & 0x1,
7771 (val >> 2) & 0x1, (val >> 3) & 0x1,
7772 (val >> 4) & 0x1);
7773
7774 val2 = RREG32(base + ch * 0x1000 + 0x070);
7775 dev_err(hdev->dev,
Ofir Bitton5a2998f2020-10-05 13:44:59 +03007776 "HBM%d pc%d ECC info: 1ST_ERR_ADDR=0x%x, 1ST_ERR_TYPE=%d, SEC_CONT_CNT=%d, SEC_CNT=%d, DEC_CNT=%d\n",
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007777 device, ch * 2 + 1,
7778 RREG32(base + ch * 0x1000 + 0x074),
7779 (val2 & 0x200) >> 9, (val2 & 0xFC00) >> 10,
7780 (val2 & 0xFF0000) >> 16,
7781 (val2 & 0xFF000000) >> 24);
7782 }
7783
7784 /* Clear interrupts */
7785 RMWREG32(base + (ch * 0x1000) + 0x060, 0x1C8, 0x1FF);
7786 RMWREG32(base + (ch * 0x1000) + 0x070, 0x1C8, 0x1FF);
7787 WREG32(base + (ch * 0x1000) + 0x06C, 0x1F1F);
7788 WREG32(base + (ch * 0x1000) + 0x07C, 0x1F1F);
7789 RMWREG32(base + (ch * 0x1000) + 0x060, 0x0, 0xF);
7790 RMWREG32(base + (ch * 0x1000) + 0x070, 0x0, 0xF);
7791 }
7792
7793 val = RREG32(base + 0x8F30);
7794 val2 = RREG32(base + 0x8F34);
7795 if (val | val2) {
Oded Gabbayf1a29772021-06-06 11:38:12 +03007796 rc = -EIO;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007797 dev_err(hdev->dev,
7798 "HBM %d MC SRAM SERR info: Reg 0x8F30=0x%x, Reg 0x8F34=0x%x\n",
7799 device, val, val2);
7800 }
7801 val = RREG32(base + 0x8F40);
7802 val2 = RREG32(base + 0x8F44);
7803 if (val | val2) {
Oded Gabbayf1a29772021-06-06 11:38:12 +03007804 rc = -EIO;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007805 dev_err(hdev->dev,
7806 "HBM %d MC SRAM DERR info: Reg 0x8F40=0x%x, Reg 0x8F44=0x%x\n",
7807 device, val, val2);
7808 }
7809
Oded Gabbayf1a29772021-06-06 11:38:12 +03007810 return rc;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007811}
7812
7813static int gaudi_hbm_event_to_dev(u16 hbm_event_type)
7814{
7815 switch (hbm_event_type) {
7816 case GAUDI_EVENT_HBM0_SPI_0:
7817 case GAUDI_EVENT_HBM0_SPI_1:
7818 return 0;
7819 case GAUDI_EVENT_HBM1_SPI_0:
7820 case GAUDI_EVENT_HBM1_SPI_1:
7821 return 1;
7822 case GAUDI_EVENT_HBM2_SPI_0:
7823 case GAUDI_EVENT_HBM2_SPI_1:
7824 return 2;
7825 case GAUDI_EVENT_HBM3_SPI_0:
7826 case GAUDI_EVENT_HBM3_SPI_1:
7827 return 3;
7828 default:
7829 break;
7830 }
7831
7832 /* Should never happen */
7833 return 0;
7834}
7835
7836static bool gaudi_tpc_read_interrupts(struct hl_device *hdev, u8 tpc_id,
7837 char *interrupt_name)
7838{
7839 struct gaudi_device *gaudi = hdev->asic_specific;
7840 u32 tpc_offset = tpc_id * TPC_CFG_OFFSET, tpc_interrupts_cause, i;
7841 bool soft_reset_required = false;
7842
7843 /* Accessing the TPC_INTR_CAUSE registers requires disabling the clock
Oded Gabbay6138bbe2020-09-04 20:18:16 +03007844 * gating, and thus cannot be done in CPU-CP and should be done instead
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007845 * by the driver.
7846 */
7847
7848 mutex_lock(&gaudi->clk_gate_mutex);
7849
7850 hdev->asic_funcs->disable_clock_gating(hdev);
7851
7852 tpc_interrupts_cause = RREG32(mmTPC0_CFG_TPC_INTR_CAUSE + tpc_offset) &
7853 TPC0_CFG_TPC_INTR_CAUSE_CAUSE_MASK;
7854
7855 for (i = 0 ; i < GAUDI_NUM_OF_TPC_INTR_CAUSE ; i++)
7856 if (tpc_interrupts_cause & BIT(i)) {
7857 dev_err_ratelimited(hdev->dev,
7858 "TPC%d_%s interrupt cause: %s\n",
7859 tpc_id, interrupt_name,
7860 gaudi_tpc_interrupts_cause[i]);
7861 /* If this is QM error, we need to soft-reset */
7862 if (i == 15)
7863 soft_reset_required = true;
7864 }
7865
7866 /* Clear interrupts */
7867 WREG32(mmTPC0_CFG_TPC_INTR_CAUSE + tpc_offset, 0);
7868
Oded Gabbaye38bfd32020-07-03 20:46:12 +03007869 hdev->asic_funcs->set_clock_gating(hdev);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007870
7871 mutex_unlock(&gaudi->clk_gate_mutex);
7872
7873 return soft_reset_required;
7874}
7875
7876static int tpc_dec_event_to_tpc_id(u16 tpc_dec_event_type)
7877{
7878 return (tpc_dec_event_type - GAUDI_EVENT_TPC0_DEC) >> 1;
7879}
7880
7881static int tpc_krn_event_to_tpc_id(u16 tpc_dec_event_type)
7882{
7883 return (tpc_dec_event_type - GAUDI_EVENT_TPC0_KRN_ERR) / 6;
7884}
7885
7886static void gaudi_print_clk_change_info(struct hl_device *hdev,
7887 u16 event_type)
7888{
7889 switch (event_type) {
7890 case GAUDI_EVENT_FIX_POWER_ENV_S:
Ofir Bitton0a068ad2020-07-21 10:49:51 +03007891 hdev->clk_throttling_reason |= HL_CLK_THROTTLE_POWER;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007892 dev_info_ratelimited(hdev->dev,
7893 "Clock throttling due to power consumption\n");
7894 break;
7895
7896 case GAUDI_EVENT_FIX_POWER_ENV_E:
Ofir Bitton0a068ad2020-07-21 10:49:51 +03007897 hdev->clk_throttling_reason &= ~HL_CLK_THROTTLE_POWER;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007898 dev_info_ratelimited(hdev->dev,
7899 "Power envelop is safe, back to optimal clock\n");
7900 break;
7901
7902 case GAUDI_EVENT_FIX_THERMAL_ENV_S:
Ofir Bitton0a068ad2020-07-21 10:49:51 +03007903 hdev->clk_throttling_reason |= HL_CLK_THROTTLE_THERMAL;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007904 dev_info_ratelimited(hdev->dev,
7905 "Clock throttling due to overheating\n");
7906 break;
7907
7908 case GAUDI_EVENT_FIX_THERMAL_ENV_E:
Ofir Bitton0a068ad2020-07-21 10:49:51 +03007909 hdev->clk_throttling_reason &= ~HL_CLK_THROTTLE_THERMAL;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007910 dev_info_ratelimited(hdev->dev,
7911 "Thermal envelop is safe, back to optimal clock\n");
7912 break;
7913
7914 default:
7915 dev_err(hdev->dev, "Received invalid clock change event %d\n",
7916 event_type);
7917 break;
7918 }
7919}
7920
7921static void gaudi_handle_eqe(struct hl_device *hdev,
7922 struct hl_eq_entry *eq_entry)
7923{
7924 struct gaudi_device *gaudi = hdev->asic_specific;
7925 u32 ctl = le32_to_cpu(eq_entry->hdr.ctl);
7926 u16 event_type = ((ctl & EQ_CTL_EVENT_TYPE_MASK)
7927 >> EQ_CTL_EVENT_TYPE_SHIFT);
Oded Gabbay66446822020-05-18 16:48:01 +03007928 bool reset_required;
Ofir Bitton7148e642021-07-12 14:18:30 +03007929 u8 cause;
7930 int rc;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007931
7932 gaudi->events_stat[event_type]++;
7933 gaudi->events_stat_aggregate[event_type]++;
7934
7935 switch (event_type) {
7936 case GAUDI_EVENT_PCIE_CORE_DERR:
7937 case GAUDI_EVENT_PCIE_IF_DERR:
7938 case GAUDI_EVENT_PCIE_PHY_DERR:
7939 case GAUDI_EVENT_TPC0_DERR ... GAUDI_EVENT_TPC7_DERR:
7940 case GAUDI_EVENT_MME0_ACC_DERR:
7941 case GAUDI_EVENT_MME0_SBAB_DERR:
7942 case GAUDI_EVENT_MME1_ACC_DERR:
7943 case GAUDI_EVENT_MME1_SBAB_DERR:
7944 case GAUDI_EVENT_MME2_ACC_DERR:
7945 case GAUDI_EVENT_MME2_SBAB_DERR:
7946 case GAUDI_EVENT_MME3_ACC_DERR:
7947 case GAUDI_EVENT_MME3_SBAB_DERR:
7948 case GAUDI_EVENT_DMA0_DERR_ECC ... GAUDI_EVENT_DMA7_DERR_ECC:
7949 fallthrough;
7950 case GAUDI_EVENT_CPU_IF_ECC_DERR:
7951 case GAUDI_EVENT_PSOC_MEM_DERR:
7952 case GAUDI_EVENT_PSOC_CORESIGHT_DERR:
7953 case GAUDI_EVENT_SRAM0_DERR ... GAUDI_EVENT_SRAM28_DERR:
7954 case GAUDI_EVENT_DMA_IF0_DERR ... GAUDI_EVENT_DMA_IF3_DERR:
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007955 case GAUDI_EVENT_HBM_0_DERR ... GAUDI_EVENT_HBM_3_DERR:
7956 case GAUDI_EVENT_MMU_DERR:
Ofir Bitton6c31f4942021-06-17 09:52:55 +03007957 case GAUDI_EVENT_NIC0_CS_DBG_DERR ... GAUDI_EVENT_NIC4_CS_DBG_DERR:
Oded Gabbayfcc6a4e2020-05-17 08:20:35 +03007958 gaudi_print_irq_info(hdev, event_type, true);
7959 gaudi_handle_ecc_event(hdev, event_type, &eq_entry->ecc_data);
Ohad Sharabie42a6400f2021-02-17 20:42:48 +02007960 goto reset_device;
Oded Gabbayfcc6a4e2020-05-17 08:20:35 +03007961
7962 case GAUDI_EVENT_GIC500:
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007963 case GAUDI_EVENT_AXI_ECC:
7964 case GAUDI_EVENT_L2_RAM_ECC:
7965 case GAUDI_EVENT_PLL0 ... GAUDI_EVENT_PLL17:
7966 gaudi_print_irq_info(hdev, event_type, false);
Ohad Sharabie42a6400f2021-02-17 20:42:48 +02007967 goto reset_device;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007968
7969 case GAUDI_EVENT_HBM0_SPI_0:
7970 case GAUDI_EVENT_HBM1_SPI_0:
7971 case GAUDI_EVENT_HBM2_SPI_0:
7972 case GAUDI_EVENT_HBM3_SPI_0:
7973 gaudi_print_irq_info(hdev, event_type, false);
7974 gaudi_hbm_read_interrupts(hdev,
Ofir Bitton5a2998f2020-10-05 13:44:59 +03007975 gaudi_hbm_event_to_dev(event_type),
7976 &eq_entry->hbm_ecc_data);
Ohad Sharabie42a6400f2021-02-17 20:42:48 +02007977 goto reset_device;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007978
7979 case GAUDI_EVENT_HBM0_SPI_1:
7980 case GAUDI_EVENT_HBM1_SPI_1:
7981 case GAUDI_EVENT_HBM2_SPI_1:
7982 case GAUDI_EVENT_HBM3_SPI_1:
7983 gaudi_print_irq_info(hdev, event_type, false);
7984 gaudi_hbm_read_interrupts(hdev,
Ofir Bitton5a2998f2020-10-05 13:44:59 +03007985 gaudi_hbm_event_to_dev(event_type),
7986 &eq_entry->hbm_ecc_data);
Oded Gabbay230cd892021-01-26 22:58:13 +02007987 hl_fw_unmask_irq(hdev, event_type);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007988 break;
7989
7990 case GAUDI_EVENT_TPC0_DEC:
7991 case GAUDI_EVENT_TPC1_DEC:
7992 case GAUDI_EVENT_TPC2_DEC:
7993 case GAUDI_EVENT_TPC3_DEC:
7994 case GAUDI_EVENT_TPC4_DEC:
7995 case GAUDI_EVENT_TPC5_DEC:
7996 case GAUDI_EVENT_TPC6_DEC:
7997 case GAUDI_EVENT_TPC7_DEC:
7998 gaudi_print_irq_info(hdev, event_type, true);
Oded Gabbay66446822020-05-18 16:48:01 +03007999 reset_required = gaudi_tpc_read_interrupts(hdev,
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03008000 tpc_dec_event_to_tpc_id(event_type),
8001 "AXI_SLV_DEC_Error");
Oded Gabbay66446822020-05-18 16:48:01 +03008002 if (reset_required) {
8003 dev_err(hdev->dev, "hard reset required due to %s\n",
8004 gaudi_irq_map_table[event_type].name);
8005
Ohad Sharabie42a6400f2021-02-17 20:42:48 +02008006 goto reset_device;
Oded Gabbay66446822020-05-18 16:48:01 +03008007 } else {
8008 hl_fw_unmask_irq(hdev, event_type);
Omer Shpigelmand7985072020-05-17 23:01:22 +03008009 }
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03008010 break;
8011
8012 case GAUDI_EVENT_TPC0_KRN_ERR:
8013 case GAUDI_EVENT_TPC1_KRN_ERR:
8014 case GAUDI_EVENT_TPC2_KRN_ERR:
8015 case GAUDI_EVENT_TPC3_KRN_ERR:
8016 case GAUDI_EVENT_TPC4_KRN_ERR:
8017 case GAUDI_EVENT_TPC5_KRN_ERR:
8018 case GAUDI_EVENT_TPC6_KRN_ERR:
8019 case GAUDI_EVENT_TPC7_KRN_ERR:
8020 gaudi_print_irq_info(hdev, event_type, true);
Oded Gabbay66446822020-05-18 16:48:01 +03008021 reset_required = gaudi_tpc_read_interrupts(hdev,
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03008022 tpc_krn_event_to_tpc_id(event_type),
8023 "KRN_ERR");
Oded Gabbay66446822020-05-18 16:48:01 +03008024 if (reset_required) {
8025 dev_err(hdev->dev, "hard reset required due to %s\n",
8026 gaudi_irq_map_table[event_type].name);
8027
Ohad Sharabie42a6400f2021-02-17 20:42:48 +02008028 goto reset_device;
Oded Gabbay66446822020-05-18 16:48:01 +03008029 } else {
8030 hl_fw_unmask_irq(hdev, event_type);
Omer Shpigelmand7985072020-05-17 23:01:22 +03008031 }
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03008032 break;
8033
8034 case GAUDI_EVENT_PCIE_CORE_SERR:
8035 case GAUDI_EVENT_PCIE_IF_SERR:
8036 case GAUDI_EVENT_PCIE_PHY_SERR:
8037 case GAUDI_EVENT_TPC0_SERR ... GAUDI_EVENT_TPC7_SERR:
8038 case GAUDI_EVENT_MME0_ACC_SERR:
8039 case GAUDI_EVENT_MME0_SBAB_SERR:
8040 case GAUDI_EVENT_MME1_ACC_SERR:
8041 case GAUDI_EVENT_MME1_SBAB_SERR:
8042 case GAUDI_EVENT_MME2_ACC_SERR:
8043 case GAUDI_EVENT_MME2_SBAB_SERR:
8044 case GAUDI_EVENT_MME3_ACC_SERR:
8045 case GAUDI_EVENT_MME3_SBAB_SERR:
8046 case GAUDI_EVENT_DMA0_SERR_ECC ... GAUDI_EVENT_DMA7_SERR_ECC:
8047 case GAUDI_EVENT_CPU_IF_ECC_SERR:
8048 case GAUDI_EVENT_PSOC_MEM_SERR:
8049 case GAUDI_EVENT_PSOC_CORESIGHT_SERR:
8050 case GAUDI_EVENT_SRAM0_SERR ... GAUDI_EVENT_SRAM28_SERR:
8051 case GAUDI_EVENT_DMA_IF0_SERR ... GAUDI_EVENT_DMA_IF3_SERR:
8052 case GAUDI_EVENT_HBM_0_SERR ... GAUDI_EVENT_HBM_3_SERR:
8053 fallthrough;
8054 case GAUDI_EVENT_MMU_SERR:
Oded Gabbayfcc6a4e2020-05-17 08:20:35 +03008055 gaudi_print_irq_info(hdev, event_type, true);
8056 gaudi_handle_ecc_event(hdev, event_type, &eq_entry->ecc_data);
8057 hl_fw_unmask_irq(hdev, event_type);
8058 break;
8059
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03008060 case GAUDI_EVENT_PCIE_DEC:
8061 case GAUDI_EVENT_MME0_WBC_RSP:
8062 case GAUDI_EVENT_MME0_SBAB0_RSP:
8063 case GAUDI_EVENT_MME1_WBC_RSP:
8064 case GAUDI_EVENT_MME1_SBAB0_RSP:
8065 case GAUDI_EVENT_MME2_WBC_RSP:
8066 case GAUDI_EVENT_MME2_SBAB0_RSP:
8067 case GAUDI_EVENT_MME3_WBC_RSP:
8068 case GAUDI_EVENT_MME3_SBAB0_RSP:
8069 case GAUDI_EVENT_CPU_AXI_SPLITTER:
8070 case GAUDI_EVENT_PSOC_AXI_DEC:
8071 case GAUDI_EVENT_PSOC_PRSTN_FALL:
8072 case GAUDI_EVENT_MMU_PAGE_FAULT:
8073 case GAUDI_EVENT_MMU_WR_PERM:
8074 case GAUDI_EVENT_RAZWI_OR_ADC:
8075 case GAUDI_EVENT_TPC0_QM ... GAUDI_EVENT_TPC7_QM:
8076 case GAUDI_EVENT_MME0_QM ... GAUDI_EVENT_MME2_QM:
8077 case GAUDI_EVENT_DMA0_QM ... GAUDI_EVENT_DMA7_QM:
8078 fallthrough;
Oded Gabbay3c681572020-11-02 21:10:39 +02008079 case GAUDI_EVENT_NIC0_QM0:
8080 case GAUDI_EVENT_NIC0_QM1:
8081 case GAUDI_EVENT_NIC1_QM0:
8082 case GAUDI_EVENT_NIC1_QM1:
8083 case GAUDI_EVENT_NIC2_QM0:
8084 case GAUDI_EVENT_NIC2_QM1:
8085 case GAUDI_EVENT_NIC3_QM0:
8086 case GAUDI_EVENT_NIC3_QM1:
8087 case GAUDI_EVENT_NIC4_QM0:
8088 case GAUDI_EVENT_NIC4_QM1:
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03008089 case GAUDI_EVENT_DMA0_CORE ... GAUDI_EVENT_DMA7_CORE:
8090 gaudi_print_irq_info(hdev, event_type, true);
8091 gaudi_handle_qman_err(hdev, event_type);
Ofir Bittonebd8d122020-05-10 13:41:28 +03008092 hl_fw_unmask_irq(hdev, event_type);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03008093 break;
8094
8095 case GAUDI_EVENT_RAZWI_OR_ADC_SW:
8096 gaudi_print_irq_info(hdev, event_type, true);
Ohad Sharabie42a6400f2021-02-17 20:42:48 +02008097 goto reset_device;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03008098
8099 case GAUDI_EVENT_TPC0_BMON_SPMU:
8100 case GAUDI_EVENT_TPC1_BMON_SPMU:
8101 case GAUDI_EVENT_TPC2_BMON_SPMU:
8102 case GAUDI_EVENT_TPC3_BMON_SPMU:
8103 case GAUDI_EVENT_TPC4_BMON_SPMU:
8104 case GAUDI_EVENT_TPC5_BMON_SPMU:
8105 case GAUDI_EVENT_TPC6_BMON_SPMU:
8106 case GAUDI_EVENT_TPC7_BMON_SPMU:
8107 case GAUDI_EVENT_DMA_BM_CH0 ... GAUDI_EVENT_DMA_BM_CH7:
8108 gaudi_print_irq_info(hdev, event_type, false);
Ofir Bittonebd8d122020-05-10 13:41:28 +03008109 hl_fw_unmask_irq(hdev, event_type);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03008110 break;
8111
Ofir Bittonf8bc7f02021-01-03 20:52:40 +02008112 case GAUDI_EVENT_DMA_IF_SEI_0 ... GAUDI_EVENT_DMA_IF_SEI_3:
8113 gaudi_print_irq_info(hdev, event_type, false);
8114 gaudi_print_sm_sei_info(hdev, event_type,
8115 &eq_entry->sm_sei_data);
Ofir Bitton7148e642021-07-12 14:18:30 +03008116 rc = hl_state_dump(hdev);
8117 if (rc)
8118 dev_err(hdev->dev,
8119 "Error during system state dump %d\n", rc);
Ofir Bittonf8bc7f02021-01-03 20:52:40 +02008120 hl_fw_unmask_irq(hdev, event_type);
8121 break;
8122
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03008123 case GAUDI_EVENT_FIX_POWER_ENV_S ... GAUDI_EVENT_FIX_THERMAL_ENV_E:
8124 gaudi_print_clk_change_info(hdev, event_type);
Ofir Bittonebd8d122020-05-10 13:41:28 +03008125 hl_fw_unmask_irq(hdev, event_type);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03008126 break;
8127
8128 case GAUDI_EVENT_PSOC_GPIO_U16_0:
8129 cause = le64_to_cpu(eq_entry->data[0]) & 0xFF;
8130 dev_err(hdev->dev,
8131 "Received high temp H/W interrupt %d (cause %d)\n",
8132 event_type, cause);
8133 break;
8134
Ofir Bittond661d792021-03-09 14:45:04 +02008135 case GAUDI_EVENT_DEV_RESET_REQ:
Ofir Bitton2ea09532021-03-03 13:23:47 +02008136 gaudi_print_irq_info(hdev, event_type, false);
8137 goto reset_device;
8138
Ohad Sharabi5d6a1982021-02-08 14:53:56 +02008139 case GAUDI_EVENT_PKT_QUEUE_OUT_SYNC:
8140 gaudi_print_irq_info(hdev, event_type, false);
8141 gaudi_print_out_of_sync_info(hdev, &eq_entry->pkt_sync_err);
Ohad Sharabie42a6400f2021-02-17 20:42:48 +02008142 goto reset_device;
Ohad Sharabi5d6a1982021-02-08 14:53:56 +02008143
Ofir Bitton254fac62021-06-02 11:56:31 +03008144 case GAUDI_EVENT_FW_ALIVE_S:
8145 gaudi_print_irq_info(hdev, event_type, false);
8146 gaudi_print_fw_alive_info(hdev, &eq_entry->fw_alive);
8147 goto reset_device;
8148
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03008149 default:
8150 dev_err(hdev->dev, "Received invalid H/W interrupt %d\n",
8151 event_type);
8152 break;
8153 }
Ohad Sharabie42a6400f2021-02-17 20:42:48 +02008154
8155 return;
8156
8157reset_device:
8158 if (hdev->hard_reset_on_fw_events)
8159 hl_device_reset(hdev, HL_RESET_HARD);
8160 else
8161 hl_fw_unmask_irq(hdev, event_type);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03008162}
8163
8164static void *gaudi_get_events_stat(struct hl_device *hdev, bool aggregate,
8165 u32 *size)
8166{
8167 struct gaudi_device *gaudi = hdev->asic_specific;
8168
8169 if (aggregate) {
8170 *size = (u32) sizeof(gaudi->events_stat_aggregate);
8171 return gaudi->events_stat_aggregate;
8172 }
8173
8174 *size = (u32) sizeof(gaudi->events_stat);
8175 return gaudi->events_stat;
8176}
8177
Omer Shpigelman8ff5f4f2020-05-24 23:06:59 +03008178static int gaudi_mmu_invalidate_cache(struct hl_device *hdev, bool is_hard,
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03008179 u32 flags)
8180{
8181 struct gaudi_device *gaudi = hdev->asic_specific;
8182 u32 status, timeout_usec;
8183 int rc;
8184
8185 if (!(gaudi->hw_cap_initialized & HW_CAP_MMU) ||
8186 hdev->hard_reset_pending)
Omer Shpigelman8ff5f4f2020-05-24 23:06:59 +03008187 return 0;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03008188
8189 if (hdev->pldm)
8190 timeout_usec = GAUDI_PLDM_MMU_TIMEOUT_USEC;
8191 else
8192 timeout_usec = MMU_CONFIG_TIMEOUT_USEC;
8193
8194 /* L0 & L1 invalidation */
Omer Shpigelmancfd41762020-06-03 13:03:35 +03008195 WREG32(mmSTLB_INV_PS, 3);
8196 WREG32(mmSTLB_CACHE_INV, gaudi->mmu_cache_inv_pi++);
Omer Shpigelman42d0b0b2020-05-17 17:35:39 +03008197 WREG32(mmSTLB_INV_PS, 2);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03008198
8199 rc = hl_poll_timeout(
8200 hdev,
Omer Shpigelman42d0b0b2020-05-17 17:35:39 +03008201 mmSTLB_INV_PS,
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03008202 status,
8203 !status,
8204 1000,
8205 timeout_usec);
8206
Omer Shpigelman42d0b0b2020-05-17 17:35:39 +03008207 WREG32(mmSTLB_INV_SET, 0);
8208
Omer Shpigelman8ff5f4f2020-05-24 23:06:59 +03008209 if (rc) {
8210 dev_err_ratelimited(hdev->dev,
8211 "MMU cache invalidation timeout\n");
Ohad Sharabie42a6400f2021-02-17 20:42:48 +02008212 hl_device_reset(hdev, HL_RESET_HARD);
Omer Shpigelman8ff5f4f2020-05-24 23:06:59 +03008213 }
8214
8215 return rc;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03008216}
8217
Omer Shpigelman8ff5f4f2020-05-24 23:06:59 +03008218static int gaudi_mmu_invalidate_cache_range(struct hl_device *hdev,
Alon Mizrahi08c03a12021-04-08 15:30:59 +03008219 bool is_hard, u32 flags,
8220 u32 asid, u64 va, u64 size)
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03008221{
Alon Mizrahi08c03a12021-04-08 15:30:59 +03008222 /* Treat as invalidate all because there is no range invalidation
8223 * in Gaudi
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03008224 */
Alon Mizrahi08c03a12021-04-08 15:30:59 +03008225 return hdev->asic_funcs->mmu_invalidate_cache(hdev, is_hard, flags);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03008226}
8227
8228static int gaudi_mmu_update_asid_hop0_addr(struct hl_device *hdev,
8229 u32 asid, u64 phys_addr)
8230{
8231 u32 status, timeout_usec;
8232 int rc;
8233
8234 if (hdev->pldm)
8235 timeout_usec = GAUDI_PLDM_MMU_TIMEOUT_USEC;
8236 else
8237 timeout_usec = MMU_CONFIG_TIMEOUT_USEC;
8238
8239 WREG32(MMU_ASID, asid);
8240 WREG32(MMU_HOP0_PA43_12, phys_addr >> MMU_HOP0_PA43_12_SHIFT);
8241 WREG32(MMU_HOP0_PA49_44, phys_addr >> MMU_HOP0_PA49_44_SHIFT);
8242 WREG32(MMU_BUSY, 0x80000000);
8243
8244 rc = hl_poll_timeout(
8245 hdev,
8246 MMU_BUSY,
8247 status,
8248 !(status & 0x80000000),
8249 1000,
8250 timeout_usec);
8251
8252 if (rc) {
8253 dev_err(hdev->dev,
8254 "Timeout during MMU hop0 config of asid %d\n", asid);
8255 return rc;
8256 }
8257
8258 return 0;
8259}
8260
8261static int gaudi_send_heartbeat(struct hl_device *hdev)
8262{
8263 struct gaudi_device *gaudi = hdev->asic_specific;
8264
8265 if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
8266 return 0;
8267
8268 return hl_fw_send_heartbeat(hdev);
8269}
8270
Oded Gabbay2f553422020-08-15 16:28:10 +03008271static int gaudi_cpucp_info_get(struct hl_device *hdev)
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03008272{
8273 struct gaudi_device *gaudi = hdev->asic_specific;
8274 struct asic_fixed_properties *prop = &hdev->asic_prop;
8275 int rc;
8276
8277 if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
8278 return 0;
8279
Ohad Sharabie67a60402021-05-02 15:45:21 +03008280 rc = hl_fw_cpucp_handshake(hdev, mmCPU_BOOT_DEV_STS0,
8281 mmCPU_BOOT_DEV_STS1, mmCPU_BOOT_ERR0,
8282 mmCPU_BOOT_ERR1);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03008283 if (rc)
8284 return rc;
8285
Oded Gabbay2f553422020-08-15 16:28:10 +03008286 if (!strlen(prop->cpucp_info.card_name))
8287 strncpy(prop->cpucp_info.card_name, GAUDI_DEFAULT_CARD_NAME,
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03008288 CARD_NAME_MAX_LEN);
8289
Oded Gabbay2f553422020-08-15 16:28:10 +03008290 hdev->card_type = le32_to_cpu(hdev->asic_prop.cpucp_info.card_type);
Oded Gabbay58361aa2020-08-08 23:34:47 +03008291
Koby Elbazcd5def82021-02-23 21:31:27 +02008292 set_default_power_values(hdev);
Oded Gabbay58361aa2020-08-08 23:34:47 +03008293
8294 hdev->max_power = prop->max_power_default;
8295
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03008296 return 0;
8297}
8298
Ohad Sharabicf303392021-01-17 16:01:56 +02008299static bool gaudi_is_device_idle(struct hl_device *hdev, u64 *mask_arr,
8300 u8 mask_len, struct seq_file *s)
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03008301{
8302 struct gaudi_device *gaudi = hdev->asic_specific;
8303 const char *fmt = "%-5d%-9s%#-14x%#-12x%#x\n";
8304 const char *mme_slave_fmt = "%-5d%-9s%-14s%-12s%#x\n";
Oded Gabbay3c681572020-11-02 21:10:39 +02008305 const char *nic_fmt = "%-5d%-9s%#-14x%#x\n";
Ohad Sharabicf303392021-01-17 16:01:56 +02008306 unsigned long *mask = (unsigned long *)mask_arr;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03008307 u32 qm_glbl_sts0, qm_cgm_sts, dma_core_sts0, tpc_cfg_sts, mme_arch_sts;
8308 bool is_idle = true, is_eng_idle, is_slave;
8309 u64 offset;
Oded Gabbay3c681572020-11-02 21:10:39 +02008310 int i, dma_id, port;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03008311
8312 mutex_lock(&gaudi->clk_gate_mutex);
8313
8314 hdev->asic_funcs->disable_clock_gating(hdev);
8315
8316 if (s)
8317 seq_puts(s,
8318 "\nDMA is_idle QM_GLBL_STS0 QM_CGM_STS DMA_CORE_STS0\n"
8319 "--- ------- ------------ ---------- -------------\n");
8320
8321 for (i = 0 ; i < DMA_NUMBER_OF_CHNLS ; i++) {
8322 dma_id = gaudi_dma_assignment[i];
8323 offset = dma_id * DMA_QMAN_OFFSET;
8324
8325 qm_glbl_sts0 = RREG32(mmDMA0_QM_GLBL_STS0 + offset);
8326 qm_cgm_sts = RREG32(mmDMA0_QM_CGM_STS + offset);
8327 dma_core_sts0 = RREG32(mmDMA0_CORE_STS0 + offset);
8328 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts) &&
8329 IS_DMA_IDLE(dma_core_sts0);
8330 is_idle &= is_eng_idle;
8331
Ohad Sharabicf303392021-01-17 16:01:56 +02008332 if (mask && !is_eng_idle)
8333 set_bit(GAUDI_ENGINE_ID_DMA_0 + dma_id, mask);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03008334 if (s)
8335 seq_printf(s, fmt, dma_id,
8336 is_eng_idle ? "Y" : "N", qm_glbl_sts0,
8337 qm_cgm_sts, dma_core_sts0);
8338 }
8339
8340 if (s)
8341 seq_puts(s,
8342 "\nTPC is_idle QM_GLBL_STS0 QM_CGM_STS CFG_STATUS\n"
8343 "--- ------- ------------ ---------- ----------\n");
8344
8345 for (i = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) {
8346 offset = i * TPC_QMAN_OFFSET;
8347 qm_glbl_sts0 = RREG32(mmTPC0_QM_GLBL_STS0 + offset);
8348 qm_cgm_sts = RREG32(mmTPC0_QM_CGM_STS + offset);
8349 tpc_cfg_sts = RREG32(mmTPC0_CFG_STATUS + offset);
8350 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts) &&
8351 IS_TPC_IDLE(tpc_cfg_sts);
8352 is_idle &= is_eng_idle;
8353
Ohad Sharabicf303392021-01-17 16:01:56 +02008354 if (mask && !is_eng_idle)
8355 set_bit(GAUDI_ENGINE_ID_TPC_0 + i, mask);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03008356 if (s)
8357 seq_printf(s, fmt, i,
8358 is_eng_idle ? "Y" : "N",
8359 qm_glbl_sts0, qm_cgm_sts, tpc_cfg_sts);
8360 }
8361
8362 if (s)
8363 seq_puts(s,
8364 "\nMME is_idle QM_GLBL_STS0 QM_CGM_STS ARCH_STATUS\n"
8365 "--- ------- ------------ ---------- -----------\n");
8366
8367 for (i = 0 ; i < MME_NUMBER_OF_ENGINES ; i++) {
8368 offset = i * MME_QMAN_OFFSET;
8369 mme_arch_sts = RREG32(mmMME0_CTRL_ARCH_STATUS + offset);
8370 is_eng_idle = IS_MME_IDLE(mme_arch_sts);
8371
8372 /* MME 1 & 3 are slaves, no need to check their QMANs */
8373 is_slave = i % 2;
8374 if (!is_slave) {
8375 qm_glbl_sts0 = RREG32(mmMME0_QM_GLBL_STS0 + offset);
8376 qm_cgm_sts = RREG32(mmMME0_QM_CGM_STS + offset);
8377 is_eng_idle &= IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts);
8378 }
8379
8380 is_idle &= is_eng_idle;
8381
Ohad Sharabicf303392021-01-17 16:01:56 +02008382 if (mask && !is_eng_idle)
8383 set_bit(GAUDI_ENGINE_ID_MME_0 + i, mask);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03008384 if (s) {
8385 if (!is_slave)
8386 seq_printf(s, fmt, i,
8387 is_eng_idle ? "Y" : "N",
8388 qm_glbl_sts0, qm_cgm_sts, mme_arch_sts);
8389 else
8390 seq_printf(s, mme_slave_fmt, i,
8391 is_eng_idle ? "Y" : "N", "-",
8392 "-", mme_arch_sts);
8393 }
8394 }
8395
8396 if (s)
Oded Gabbay3c681572020-11-02 21:10:39 +02008397 seq_puts(s, "\nNIC is_idle QM_GLBL_STS0 QM_CGM_STS\n"
8398 "--- ------- ------------ ----------\n");
8399
8400 for (i = 0 ; i < (NIC_NUMBER_OF_ENGINES / 2) ; i++) {
8401 offset = i * NIC_MACRO_QMAN_OFFSET;
8402 port = 2 * i;
Oded Gabbay90810212021-05-25 21:35:13 +03008403 if (gaudi->hw_cap_initialized & BIT(HW_CAP_NIC_SHIFT + port)) {
Oded Gabbay3c681572020-11-02 21:10:39 +02008404 qm_glbl_sts0 = RREG32(mmNIC0_QM0_GLBL_STS0 + offset);
8405 qm_cgm_sts = RREG32(mmNIC0_QM0_CGM_STS + offset);
8406 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts);
8407 is_idle &= is_eng_idle;
8408
Ohad Sharabicf303392021-01-17 16:01:56 +02008409 if (mask && !is_eng_idle)
8410 set_bit(GAUDI_ENGINE_ID_NIC_0 + port, mask);
Oded Gabbay3c681572020-11-02 21:10:39 +02008411 if (s)
8412 seq_printf(s, nic_fmt, port,
8413 is_eng_idle ? "Y" : "N",
8414 qm_glbl_sts0, qm_cgm_sts);
8415 }
8416
8417 port = 2 * i + 1;
Oded Gabbay90810212021-05-25 21:35:13 +03008418 if (gaudi->hw_cap_initialized & BIT(HW_CAP_NIC_SHIFT + port)) {
Oded Gabbay3c681572020-11-02 21:10:39 +02008419 qm_glbl_sts0 = RREG32(mmNIC0_QM1_GLBL_STS0 + offset);
8420 qm_cgm_sts = RREG32(mmNIC0_QM1_CGM_STS + offset);
8421 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts);
8422 is_idle &= is_eng_idle;
8423
Ohad Sharabicf303392021-01-17 16:01:56 +02008424 if (mask && !is_eng_idle)
8425 set_bit(GAUDI_ENGINE_ID_NIC_0 + port, mask);
Oded Gabbay3c681572020-11-02 21:10:39 +02008426 if (s)
8427 seq_printf(s, nic_fmt, port,
8428 is_eng_idle ? "Y" : "N",
8429 qm_glbl_sts0, qm_cgm_sts);
8430 }
8431 }
8432
8433 if (s)
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03008434 seq_puts(s, "\n");
8435
Oded Gabbaye38bfd32020-07-03 20:46:12 +03008436 hdev->asic_funcs->set_clock_gating(hdev);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03008437
8438 mutex_unlock(&gaudi->clk_gate_mutex);
8439
8440 return is_idle;
8441}
8442
8443static void gaudi_hw_queues_lock(struct hl_device *hdev)
8444 __acquires(&gaudi->hw_queues_lock)
8445{
8446 struct gaudi_device *gaudi = hdev->asic_specific;
8447
8448 spin_lock(&gaudi->hw_queues_lock);
8449}
8450
8451static void gaudi_hw_queues_unlock(struct hl_device *hdev)
8452 __releases(&gaudi->hw_queues_lock)
8453{
8454 struct gaudi_device *gaudi = hdev->asic_specific;
8455
8456 spin_unlock(&gaudi->hw_queues_lock);
8457}
8458
8459static u32 gaudi_get_pci_id(struct hl_device *hdev)
8460{
8461 return hdev->pdev->device;
8462}
8463
8464static int gaudi_get_eeprom_data(struct hl_device *hdev, void *data,
8465 size_t max_size)
8466{
8467 struct gaudi_device *gaudi = hdev->asic_specific;
8468
8469 if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
8470 return 0;
8471
8472 return hl_fw_get_eeprom_data(hdev, data, max_size);
8473}
8474
8475/*
8476 * this function should be used only during initialization and/or after reset,
8477 * when there are no active users.
8478 */
8479static int gaudi_run_tpc_kernel(struct hl_device *hdev, u64 tpc_kernel,
8480 u32 tpc_id)
8481{
8482 struct gaudi_device *gaudi = hdev->asic_specific;
8483 u64 kernel_timeout;
8484 u32 status, offset;
8485 int rc;
8486
8487 offset = tpc_id * (mmTPC1_CFG_STATUS - mmTPC0_CFG_STATUS);
8488
8489 if (hdev->pldm)
8490 kernel_timeout = GAUDI_PLDM_TPC_KERNEL_WAIT_USEC;
8491 else
8492 kernel_timeout = HL_DEVICE_TIMEOUT_USEC;
8493
8494 mutex_lock(&gaudi->clk_gate_mutex);
8495
8496 hdev->asic_funcs->disable_clock_gating(hdev);
8497
8498 WREG32(mmTPC0_CFG_QM_KERNEL_BASE_ADDRESS_LOW + offset,
8499 lower_32_bits(tpc_kernel));
8500 WREG32(mmTPC0_CFG_QM_KERNEL_BASE_ADDRESS_HIGH + offset,
8501 upper_32_bits(tpc_kernel));
8502
8503 WREG32(mmTPC0_CFG_ICACHE_BASE_ADDERESS_LOW + offset,
8504 lower_32_bits(tpc_kernel));
8505 WREG32(mmTPC0_CFG_ICACHE_BASE_ADDERESS_HIGH + offset,
8506 upper_32_bits(tpc_kernel));
8507 /* set a valid LUT pointer, content is of no significance */
8508 WREG32(mmTPC0_CFG_LUT_FUNC256_BASE_ADDR_LO + offset,
8509 lower_32_bits(tpc_kernel));
8510 WREG32(mmTPC0_CFG_LUT_FUNC256_BASE_ADDR_HI + offset,
8511 upper_32_bits(tpc_kernel));
8512
8513 WREG32(mmTPC0_CFG_QM_SYNC_OBJECT_ADDR + offset,
8514 lower_32_bits(CFG_BASE +
8515 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0));
8516
8517 WREG32(mmTPC0_CFG_TPC_CMD + offset,
8518 (1 << TPC0_CFG_TPC_CMD_ICACHE_INVALIDATE_SHIFT |
8519 1 << TPC0_CFG_TPC_CMD_ICACHE_PREFETCH_64KB_SHIFT));
8520 /* wait a bit for the engine to start executing */
8521 usleep_range(1000, 1500);
8522
8523 /* wait until engine has finished executing */
8524 rc = hl_poll_timeout(
8525 hdev,
8526 mmTPC0_CFG_STATUS + offset,
8527 status,
8528 (status & TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK) ==
8529 TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK,
8530 1000,
8531 kernel_timeout);
8532
8533 if (rc) {
8534 dev_err(hdev->dev,
8535 "Timeout while waiting for TPC%d icache prefetch\n",
8536 tpc_id);
Oded Gabbaye38bfd32020-07-03 20:46:12 +03008537 hdev->asic_funcs->set_clock_gating(hdev);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03008538 mutex_unlock(&gaudi->clk_gate_mutex);
8539 return -EIO;
8540 }
8541
8542 WREG32(mmTPC0_CFG_TPC_EXECUTE + offset,
8543 1 << TPC0_CFG_TPC_EXECUTE_V_SHIFT);
8544
8545 /* wait a bit for the engine to start executing */
8546 usleep_range(1000, 1500);
8547
8548 /* wait until engine has finished executing */
8549 rc = hl_poll_timeout(
8550 hdev,
8551 mmTPC0_CFG_STATUS + offset,
8552 status,
8553 (status & TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK) ==
8554 TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK,
8555 1000,
8556 kernel_timeout);
8557
Oded Gabbay31ac1f12020-08-12 11:28:13 +03008558 if (rc) {
8559 dev_err(hdev->dev,
8560 "Timeout while waiting for TPC%d vector pipe\n",
8561 tpc_id);
8562 hdev->asic_funcs->set_clock_gating(hdev);
8563 mutex_unlock(&gaudi->clk_gate_mutex);
8564 return -EIO;
8565 }
8566
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03008567 rc = hl_poll_timeout(
8568 hdev,
8569 mmTPC0_CFG_WQ_INFLIGHT_CNTR + offset,
8570 status,
8571 (status == 0),
8572 1000,
8573 kernel_timeout);
8574
Oded Gabbaye38bfd32020-07-03 20:46:12 +03008575 hdev->asic_funcs->set_clock_gating(hdev);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03008576 mutex_unlock(&gaudi->clk_gate_mutex);
8577
8578 if (rc) {
8579 dev_err(hdev->dev,
8580 "Timeout while waiting for TPC%d kernel to execute\n",
8581 tpc_id);
8582 return -EIO;
8583 }
8584
8585 return 0;
8586}
8587
Ofir Bitton5de406c2020-09-10 10:56:26 +03008588static int gaudi_internal_cb_pool_init(struct hl_device *hdev,
8589 struct hl_ctx *ctx)
8590{
8591 struct gaudi_device *gaudi = hdev->asic_specific;
Ofir Bitton5de406c2020-09-10 10:56:26 +03008592 int min_alloc_order, rc, collective_cb_size;
8593
8594 if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
8595 return 0;
8596
8597 hdev->internal_cb_pool_virt_addr =
8598 hdev->asic_funcs->asic_dma_alloc_coherent(hdev,
8599 HOST_SPACE_INTERNAL_CB_SZ,
8600 &hdev->internal_cb_pool_dma_addr,
8601 GFP_KERNEL | __GFP_ZERO);
8602
8603 if (!hdev->internal_cb_pool_virt_addr)
8604 return -ENOMEM;
8605
8606 collective_cb_size = sizeof(struct packet_msg_short) * 5 +
8607 sizeof(struct packet_fence);
8608 min_alloc_order = ilog2(collective_cb_size);
8609
8610 hdev->internal_cb_pool = gen_pool_create(min_alloc_order, -1);
8611 if (!hdev->internal_cb_pool) {
8612 dev_err(hdev->dev,
8613 "Failed to create internal CB pool\n");
8614 rc = -ENOMEM;
8615 goto free_internal_cb_pool;
8616 }
8617
8618 rc = gen_pool_add(hdev->internal_cb_pool,
8619 (uintptr_t) hdev->internal_cb_pool_virt_addr,
8620 HOST_SPACE_INTERNAL_CB_SZ, -1);
8621 if (rc) {
8622 dev_err(hdev->dev,
8623 "Failed to add memory to internal CB pool\n");
8624 rc = -EFAULT;
8625 goto destroy_internal_cb_pool;
8626 }
8627
Ofir Bittonbe91b912020-10-22 15:04:10 +03008628 hdev->internal_cb_va_base = hl_reserve_va_block(hdev, ctx,
Ofir Bitton412c41f2020-11-04 15:18:55 +02008629 HL_VA_RANGE_TYPE_HOST, HOST_SPACE_INTERNAL_CB_SZ,
8630 HL_MMU_VA_ALIGNMENT_NOT_NEEDED);
Ofir Bittonbe91b912020-10-22 15:04:10 +03008631
Koby Elbaz1f7ef4b2021-06-10 09:14:43 +03008632 if (!hdev->internal_cb_va_base) {
8633 rc = -ENOMEM;
Ofir Bittonbe91b912020-10-22 15:04:10 +03008634 goto destroy_internal_cb_pool;
Koby Elbaz1f7ef4b2021-06-10 09:14:43 +03008635 }
Ofir Bitton5de406c2020-09-10 10:56:26 +03008636
8637 mutex_lock(&ctx->mmu_lock);
Ofir Bitton5c054872020-10-22 15:13:10 +03008638 rc = hl_mmu_map_contiguous(ctx, hdev->internal_cb_va_base,
8639 hdev->internal_cb_pool_dma_addr,
8640 HOST_SPACE_INTERNAL_CB_SZ);
Ofir Bitton5de406c2020-09-10 10:56:26 +03008641
8642 hdev->asic_funcs->mmu_invalidate_cache(hdev, false, VM_TYPE_USERPTR);
Ofir Bitton5de406c2020-09-10 10:56:26 +03008643 mutex_unlock(&ctx->mmu_lock);
8644
Ofir Bitton5c054872020-10-22 15:13:10 +03008645 if (rc)
8646 goto unreserve_internal_cb_pool;
8647
Ofir Bitton5de406c2020-09-10 10:56:26 +03008648 return 0;
8649
Ofir Bitton5c054872020-10-22 15:13:10 +03008650unreserve_internal_cb_pool:
Ofir Bittonbe91b912020-10-22 15:04:10 +03008651 hl_unreserve_va_block(hdev, ctx, hdev->internal_cb_va_base,
8652 HOST_SPACE_INTERNAL_CB_SZ);
Ofir Bitton5de406c2020-09-10 10:56:26 +03008653destroy_internal_cb_pool:
8654 gen_pool_destroy(hdev->internal_cb_pool);
Ofir Bitton5de406c2020-09-10 10:56:26 +03008655free_internal_cb_pool:
8656 hdev->asic_funcs->asic_dma_free_coherent(hdev,
8657 HOST_SPACE_INTERNAL_CB_SZ,
8658 hdev->internal_cb_pool_virt_addr,
8659 hdev->internal_cb_pool_dma_addr);
8660
8661 return rc;
8662}
8663
8664static void gaudi_internal_cb_pool_fini(struct hl_device *hdev,
8665 struct hl_ctx *ctx)
8666{
8667 struct gaudi_device *gaudi = hdev->asic_specific;
Ofir Bitton5de406c2020-09-10 10:56:26 +03008668
8669 if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
8670 return;
8671
8672 mutex_lock(&ctx->mmu_lock);
Ofir Bitton5c054872020-10-22 15:13:10 +03008673 hl_mmu_unmap_contiguous(ctx, hdev->internal_cb_va_base,
8674 HOST_SPACE_INTERNAL_CB_SZ);
Ofir Bittonbe91b912020-10-22 15:04:10 +03008675 hl_unreserve_va_block(hdev, ctx, hdev->internal_cb_va_base,
8676 HOST_SPACE_INTERNAL_CB_SZ);
Ofir Bitton5de406c2020-09-10 10:56:26 +03008677 hdev->asic_funcs->mmu_invalidate_cache(hdev, true, VM_TYPE_USERPTR);
Ofir Bitton5de406c2020-09-10 10:56:26 +03008678 mutex_unlock(&ctx->mmu_lock);
8679
8680 gen_pool_destroy(hdev->internal_cb_pool);
8681
8682 hdev->asic_funcs->asic_dma_free_coherent(hdev,
8683 HOST_SPACE_INTERNAL_CB_SZ,
8684 hdev->internal_cb_pool_virt_addr,
8685 hdev->internal_cb_pool_dma_addr);
8686}
8687
kernel test robotbb34bf72020-07-29 08:03:13 +08008688static int gaudi_ctx_init(struct hl_ctx *ctx)
Ofir Bittona04b7cd2020-07-13 13:36:55 +03008689{
Ofir Bitton8e39e752020-11-12 11:03:32 +02008690 if (ctx->asid == HL_KERNEL_ASID_ID)
8691 return 0;
8692
Ofir Bitton20b75252020-09-30 15:51:10 +03008693 gaudi_mmu_prepare(ctx->hdev, ctx->asid);
Ofir Bitton5de406c2020-09-10 10:56:26 +03008694 return gaudi_internal_cb_pool_init(ctx->hdev, ctx);
8695}
Ofir Bitton20b75252020-09-30 15:51:10 +03008696
kernel test robot293744d2020-11-19 12:25:43 +08008697static void gaudi_ctx_fini(struct hl_ctx *ctx)
Ofir Bitton5de406c2020-09-10 10:56:26 +03008698{
Ofir Bitton8e39e752020-11-12 11:03:32 +02008699 if (ctx->asid == HL_KERNEL_ASID_ID)
Ofir Bitton5de406c2020-09-10 10:56:26 +03008700 return;
8701
8702 gaudi_internal_cb_pool_fini(ctx->hdev, ctx);
Ofir Bittona04b7cd2020-07-13 13:36:55 +03008703}
8704
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03008705static u32 gaudi_get_queue_id_for_cq(struct hl_device *hdev, u32 cq_idx)
8706{
8707 return gaudi_cq_assignment[cq_idx];
8708}
8709
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03008710static u32 gaudi_get_signal_cb_size(struct hl_device *hdev)
8711{
8712 return sizeof(struct packet_msg_short) +
8713 sizeof(struct packet_msg_prot) * 2;
8714}
8715
8716static u32 gaudi_get_wait_cb_size(struct hl_device *hdev)
8717{
8718 return sizeof(struct packet_msg_short) * 4 +
8719 sizeof(struct packet_fence) +
8720 sizeof(struct packet_msg_prot) * 2;
8721}
8722
farah kassabridadf17a2021-05-24 18:09:22 +03008723static u32 gaudi_get_sob_addr(struct hl_device *hdev, u32 sob_id)
8724{
8725 return mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 + (sob_id * 4);
8726}
8727
Ofir Bitton2992c1d2020-09-10 09:40:35 +03008728static u32 gaudi_gen_signal_cb(struct hl_device *hdev, void *data, u16 sob_id,
Alon Mizrahi72ab9ca52020-12-02 19:55:30 +02008729 u32 size, bool eb)
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03008730{
8731 struct hl_cb *cb = (struct hl_cb *) data;
8732 struct packet_msg_short *pkt;
Ofir Bitton2992c1d2020-09-10 09:40:35 +03008733 u32 value, ctl, pkt_size = sizeof(*pkt);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03008734
Ofir Bitton2992c1d2020-09-10 09:40:35 +03008735 pkt = cb->kernel_address + size;
8736 memset(pkt, 0, pkt_size);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03008737
Ofir Bitton6c07bab2020-06-01 10:38:46 +03008738 /* Inc by 1, Mode ADD */
8739 value = FIELD_PREP(GAUDI_PKT_SHORT_VAL_SOB_SYNC_VAL_MASK, 1);
8740 value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_SOB_MOD_MASK, 1);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03008741
Ofir Bitton6c07bab2020-06-01 10:38:46 +03008742 ctl = FIELD_PREP(GAUDI_PKT_SHORT_CTL_ADDR_MASK, sob_id * 4);
8743 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_OP_MASK, 0); /* write the value */
8744 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_BASE_MASK, 3); /* W_S SOB base */
Ofir Bittonf8b0f2e2020-12-06 10:22:32 +02008745 ctl |= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_SHORT);
8746 ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, eb);
8747 ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
8748 ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03008749
8750 pkt->value = cpu_to_le32(value);
8751 pkt->ctl = cpu_to_le32(ctl);
Ofir Bitton2992c1d2020-09-10 09:40:35 +03008752
8753 return size + pkt_size;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03008754}
8755
8756static u32 gaudi_add_mon_msg_short(struct packet_msg_short *pkt, u32 value,
8757 u16 addr)
8758{
8759 u32 ctl, pkt_size = sizeof(*pkt);
8760
8761 memset(pkt, 0, pkt_size);
8762
Ofir Bitton6c07bab2020-06-01 10:38:46 +03008763 ctl = FIELD_PREP(GAUDI_PKT_SHORT_CTL_ADDR_MASK, addr);
8764 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_BASE_MASK, 2); /* W_S MON base */
Ofir Bittonf8b0f2e2020-12-06 10:22:32 +02008765 ctl |= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_SHORT);
8766 ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 0);
8767 ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
8768 ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 0); /* last pkt MB */
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03008769
8770 pkt->value = cpu_to_le32(value);
8771 pkt->ctl = cpu_to_le32(ctl);
8772
8773 return pkt_size;
8774}
8775
Ofir Bitton2992c1d2020-09-10 09:40:35 +03008776static u32 gaudi_add_arm_monitor_pkt(struct hl_device *hdev,
8777 struct packet_msg_short *pkt, u16 sob_base, u8 sob_mask,
8778 u16 sob_val, u16 mon_id)
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03008779{
Ofir Bitton2992c1d2020-09-10 09:40:35 +03008780 u64 monitor_base;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03008781 u32 ctl, value, pkt_size = sizeof(*pkt);
Ofir Bitton2992c1d2020-09-10 09:40:35 +03008782 u16 msg_addr_offset;
8783 u8 mask;
8784
8785 if (hl_gen_sob_mask(sob_base, sob_mask, &mask)) {
8786 dev_err(hdev->dev,
8787 "sob_base %u (mask %#x) is not valid\n",
8788 sob_base, sob_mask);
8789 return 0;
8790 }
8791
8792 /*
8793 * monitor_base should be the content of the base0 address registers,
8794 * so it will be added to the msg short offsets
8795 */
8796 monitor_base = mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0;
8797
8798 msg_addr_offset =
8799 (mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0 + mon_id * 4) -
8800 monitor_base;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03008801
8802 memset(pkt, 0, pkt_size);
8803
Ofir Bitton2992c1d2020-09-10 09:40:35 +03008804 /* Monitor config packet: bind the monitor to a sync object */
8805 value = FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_SYNC_GID_MASK, sob_base / 8);
Ofir Bitton6c07bab2020-06-01 10:38:46 +03008806 value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_SYNC_VAL_MASK, sob_val);
8807 value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_MODE_MASK,
8808 0); /* GREATER OR EQUAL*/
8809 value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_MASK_MASK, mask);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03008810
Ofir Bitton2992c1d2020-09-10 09:40:35 +03008811 ctl = FIELD_PREP(GAUDI_PKT_SHORT_CTL_ADDR_MASK, msg_addr_offset);
Ofir Bitton6c07bab2020-06-01 10:38:46 +03008812 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_OP_MASK, 0); /* write the value */
8813 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_BASE_MASK, 2); /* W_S MON base */
Ofir Bittonf8b0f2e2020-12-06 10:22:32 +02008814 ctl |= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_SHORT);
8815 ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 0);
8816 ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
8817 ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03008818
8819 pkt->value = cpu_to_le32(value);
8820 pkt->ctl = cpu_to_le32(ctl);
8821
8822 return pkt_size;
8823}
8824
8825static u32 gaudi_add_fence_pkt(struct packet_fence *pkt)
8826{
8827 u32 ctl, cfg, pkt_size = sizeof(*pkt);
8828
8829 memset(pkt, 0, pkt_size);
8830
Ofir Bitton6c07bab2020-06-01 10:38:46 +03008831 cfg = FIELD_PREP(GAUDI_PKT_FENCE_CFG_DEC_VAL_MASK, 1);
8832 cfg |= FIELD_PREP(GAUDI_PKT_FENCE_CFG_TARGET_VAL_MASK, 1);
8833 cfg |= FIELD_PREP(GAUDI_PKT_FENCE_CFG_ID_MASK, 2);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03008834
Ofir Bittonf8b0f2e2020-12-06 10:22:32 +02008835 ctl = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_FENCE);
8836 ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 0);
8837 ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
8838 ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03008839
8840 pkt->cfg = cpu_to_le32(cfg);
8841 pkt->ctl = cpu_to_le32(ctl);
8842
8843 return pkt_size;
8844}
8845
Ofir Bitton2992c1d2020-09-10 09:40:35 +03008846static int gaudi_get_fence_addr(struct hl_device *hdev, u32 queue_id, u64 *addr)
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03008847{
Ofir Bitton5de406c2020-09-10 10:56:26 +03008848 u32 offset, nic_index;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03008849
Ofir Bitton2992c1d2020-09-10 09:40:35 +03008850 switch (queue_id) {
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03008851 case GAUDI_QUEUE_ID_DMA_0_0:
Ofir Bitton2992c1d2020-09-10 09:40:35 +03008852 offset = mmDMA0_QM_CP_FENCE2_RDATA_0;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03008853 break;
8854 case GAUDI_QUEUE_ID_DMA_0_1:
Ofir Bitton2992c1d2020-09-10 09:40:35 +03008855 offset = mmDMA0_QM_CP_FENCE2_RDATA_1;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03008856 break;
8857 case GAUDI_QUEUE_ID_DMA_0_2:
Ofir Bitton2992c1d2020-09-10 09:40:35 +03008858 offset = mmDMA0_QM_CP_FENCE2_RDATA_2;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03008859 break;
8860 case GAUDI_QUEUE_ID_DMA_0_3:
Ofir Bitton2992c1d2020-09-10 09:40:35 +03008861 offset = mmDMA0_QM_CP_FENCE2_RDATA_3;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03008862 break;
8863 case GAUDI_QUEUE_ID_DMA_1_0:
Ofir Bitton2992c1d2020-09-10 09:40:35 +03008864 offset = mmDMA1_QM_CP_FENCE2_RDATA_0;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03008865 break;
8866 case GAUDI_QUEUE_ID_DMA_1_1:
Ofir Bitton2992c1d2020-09-10 09:40:35 +03008867 offset = mmDMA1_QM_CP_FENCE2_RDATA_1;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03008868 break;
8869 case GAUDI_QUEUE_ID_DMA_1_2:
Ofir Bitton2992c1d2020-09-10 09:40:35 +03008870 offset = mmDMA1_QM_CP_FENCE2_RDATA_2;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03008871 break;
8872 case GAUDI_QUEUE_ID_DMA_1_3:
Ofir Bitton2992c1d2020-09-10 09:40:35 +03008873 offset = mmDMA1_QM_CP_FENCE2_RDATA_3;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03008874 break;
8875 case GAUDI_QUEUE_ID_DMA_5_0:
Ofir Bitton2992c1d2020-09-10 09:40:35 +03008876 offset = mmDMA5_QM_CP_FENCE2_RDATA_0;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03008877 break;
8878 case GAUDI_QUEUE_ID_DMA_5_1:
Ofir Bitton2992c1d2020-09-10 09:40:35 +03008879 offset = mmDMA5_QM_CP_FENCE2_RDATA_1;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03008880 break;
8881 case GAUDI_QUEUE_ID_DMA_5_2:
Ofir Bitton2992c1d2020-09-10 09:40:35 +03008882 offset = mmDMA5_QM_CP_FENCE2_RDATA_2;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03008883 break;
8884 case GAUDI_QUEUE_ID_DMA_5_3:
Ofir Bitton2992c1d2020-09-10 09:40:35 +03008885 offset = mmDMA5_QM_CP_FENCE2_RDATA_3;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03008886 break;
Ofir Bitton5de406c2020-09-10 10:56:26 +03008887 case GAUDI_QUEUE_ID_TPC_7_0:
8888 offset = mmTPC7_QM_CP_FENCE2_RDATA_0;
8889 break;
8890 case GAUDI_QUEUE_ID_TPC_7_1:
8891 offset = mmTPC7_QM_CP_FENCE2_RDATA_1;
8892 break;
8893 case GAUDI_QUEUE_ID_TPC_7_2:
8894 offset = mmTPC7_QM_CP_FENCE2_RDATA_2;
8895 break;
8896 case GAUDI_QUEUE_ID_TPC_7_3:
8897 offset = mmTPC7_QM_CP_FENCE2_RDATA_3;
8898 break;
8899 case GAUDI_QUEUE_ID_NIC_0_0:
8900 case GAUDI_QUEUE_ID_NIC_1_0:
8901 case GAUDI_QUEUE_ID_NIC_2_0:
8902 case GAUDI_QUEUE_ID_NIC_3_0:
8903 case GAUDI_QUEUE_ID_NIC_4_0:
8904 case GAUDI_QUEUE_ID_NIC_5_0:
8905 case GAUDI_QUEUE_ID_NIC_6_0:
8906 case GAUDI_QUEUE_ID_NIC_7_0:
8907 case GAUDI_QUEUE_ID_NIC_8_0:
8908 case GAUDI_QUEUE_ID_NIC_9_0:
8909 nic_index = (queue_id - GAUDI_QUEUE_ID_NIC_0_0) >> 2;
8910 offset = mmNIC0_QM0_CP_FENCE2_RDATA_0 +
8911 (nic_index >> 1) * NIC_MACRO_QMAN_OFFSET +
8912 (nic_index & 0x1) * NIC_ENGINE_QMAN_OFFSET;
8913 break;
8914 case GAUDI_QUEUE_ID_NIC_0_1:
8915 case GAUDI_QUEUE_ID_NIC_1_1:
8916 case GAUDI_QUEUE_ID_NIC_2_1:
8917 case GAUDI_QUEUE_ID_NIC_3_1:
8918 case GAUDI_QUEUE_ID_NIC_4_1:
8919 case GAUDI_QUEUE_ID_NIC_5_1:
8920 case GAUDI_QUEUE_ID_NIC_6_1:
8921 case GAUDI_QUEUE_ID_NIC_7_1:
8922 case GAUDI_QUEUE_ID_NIC_8_1:
8923 case GAUDI_QUEUE_ID_NIC_9_1:
8924 nic_index = (queue_id - GAUDI_QUEUE_ID_NIC_0_1) >> 2;
8925 offset = mmNIC0_QM0_CP_FENCE2_RDATA_1 +
8926 (nic_index >> 1) * NIC_MACRO_QMAN_OFFSET +
8927 (nic_index & 0x1) * NIC_ENGINE_QMAN_OFFSET;
8928 break;
8929 case GAUDI_QUEUE_ID_NIC_0_2:
8930 case GAUDI_QUEUE_ID_NIC_1_2:
8931 case GAUDI_QUEUE_ID_NIC_2_2:
8932 case GAUDI_QUEUE_ID_NIC_3_2:
8933 case GAUDI_QUEUE_ID_NIC_4_2:
8934 case GAUDI_QUEUE_ID_NIC_5_2:
8935 case GAUDI_QUEUE_ID_NIC_6_2:
8936 case GAUDI_QUEUE_ID_NIC_7_2:
8937 case GAUDI_QUEUE_ID_NIC_8_2:
8938 case GAUDI_QUEUE_ID_NIC_9_2:
8939 nic_index = (queue_id - GAUDI_QUEUE_ID_NIC_0_2) >> 2;
8940 offset = mmNIC0_QM0_CP_FENCE2_RDATA_2 +
8941 (nic_index >> 1) * NIC_MACRO_QMAN_OFFSET +
8942 (nic_index & 0x1) * NIC_ENGINE_QMAN_OFFSET;
8943 break;
8944 case GAUDI_QUEUE_ID_NIC_0_3:
8945 case GAUDI_QUEUE_ID_NIC_1_3:
8946 case GAUDI_QUEUE_ID_NIC_2_3:
8947 case GAUDI_QUEUE_ID_NIC_3_3:
8948 case GAUDI_QUEUE_ID_NIC_4_3:
8949 case GAUDI_QUEUE_ID_NIC_5_3:
8950 case GAUDI_QUEUE_ID_NIC_6_3:
8951 case GAUDI_QUEUE_ID_NIC_7_3:
8952 case GAUDI_QUEUE_ID_NIC_8_3:
8953 case GAUDI_QUEUE_ID_NIC_9_3:
8954 nic_index = (queue_id - GAUDI_QUEUE_ID_NIC_0_3) >> 2;
8955 offset = mmNIC0_QM0_CP_FENCE2_RDATA_3 +
8956 (nic_index >> 1) * NIC_MACRO_QMAN_OFFSET +
8957 (nic_index & 0x1) * NIC_ENGINE_QMAN_OFFSET;
8958 break;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03008959 default:
Ofir Bitton2992c1d2020-09-10 09:40:35 +03008960 return -EINVAL;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03008961 }
8962
Ofir Bitton2992c1d2020-09-10 09:40:35 +03008963 *addr = CFG_BASE + offset;
8964
8965 return 0;
8966}
8967
8968static u32 gaudi_add_mon_pkts(void *buf, u16 mon_id, u64 fence_addr)
8969{
8970 u64 monitor_base;
8971 u32 size = 0;
8972 u16 msg_addr_offset;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03008973
8974 /*
8975 * monitor_base should be the content of the base0 address registers,
8976 * so it will be added to the msg short offsets
8977 */
8978 monitor_base = mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0;
8979
8980 /* First monitor config packet: low address of the sync */
Ofir Bitton2992c1d2020-09-10 09:40:35 +03008981 msg_addr_offset =
8982 (mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0 + mon_id * 4) -
8983 monitor_base;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03008984
8985 size += gaudi_add_mon_msg_short(buf + size, (u32) fence_addr,
8986 msg_addr_offset);
8987
8988 /* Second monitor config packet: high address of the sync */
Ofir Bitton2992c1d2020-09-10 09:40:35 +03008989 msg_addr_offset =
8990 (mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRH_0 + mon_id * 4) -
8991 monitor_base;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03008992
8993 size += gaudi_add_mon_msg_short(buf + size, (u32) (fence_addr >> 32),
8994 msg_addr_offset);
8995
8996 /*
8997 * Third monitor config packet: the payload, i.e. what to write when the
8998 * sync triggers
8999 */
Ofir Bitton2992c1d2020-09-10 09:40:35 +03009000 msg_addr_offset =
9001 (mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_DATA_0 + mon_id * 4) -
9002 monitor_base;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03009003
9004 size += gaudi_add_mon_msg_short(buf + size, 1, msg_addr_offset);
9005
Ofir Bitton2992c1d2020-09-10 09:40:35 +03009006 return size;
9007}
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03009008
Oded Gabbay3c681572020-11-02 21:10:39 +02009009static u32 gaudi_gen_wait_cb(struct hl_device *hdev,
9010 struct hl_gen_wait_properties *prop)
Ofir Bitton2992c1d2020-09-10 09:40:35 +03009011{
9012 struct hl_cb *cb = (struct hl_cb *) prop->data;
9013 void *buf = cb->kernel_address;
9014 u64 fence_addr = 0;
9015 u32 size = prop->size;
9016
9017 if (gaudi_get_fence_addr(hdev, prop->q_idx, &fence_addr)) {
9018 dev_crit(hdev->dev, "wrong queue id %d for wait packet\n",
9019 prop->q_idx);
9020 return 0;
9021 }
9022
9023 size += gaudi_add_mon_pkts(buf + size, prop->mon_id, fence_addr);
9024 size += gaudi_add_arm_monitor_pkt(hdev, buf + size, prop->sob_base,
9025 prop->sob_mask, prop->sob_val, prop->mon_id);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03009026 size += gaudi_add_fence_pkt(buf + size);
Ofir Bitton2992c1d2020-09-10 09:40:35 +03009027
9028 return size;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03009029}
9030
9031static void gaudi_reset_sob(struct hl_device *hdev, void *data)
9032{
9033 struct hl_hw_sob *hw_sob = (struct hl_hw_sob *) data;
Ofir Bitton423815b2021-01-05 09:04:07 +02009034 int rc;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03009035
9036 dev_dbg(hdev->dev, "reset SOB, q_idx: %d, sob_id: %d\n", hw_sob->q_idx,
9037 hw_sob->sob_id);
9038
Ofir Bitton423815b2021-01-05 09:04:07 +02009039 rc = gaudi_schedule_register_memset(hdev, hw_sob->q_idx,
9040 CFG_BASE + mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 +
9041 hw_sob->sob_id * 4, 1, 0);
9042 if (rc)
9043 dev_err(hdev->dev, "failed resetting sob %u", hw_sob->sob_id);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03009044
9045 kref_init(&hw_sob->kref);
9046}
9047
9048static void gaudi_set_dma_mask_from_fw(struct hl_device *hdev)
9049{
9050 if (RREG32(mmPSOC_GLOBAL_CONF_NON_RST_FLOPS_0) ==
9051 HL_POWER9_HOST_MAGIC) {
9052 hdev->power9_64bit_dma_enable = 1;
9053 hdev->dma_mask = 64;
9054 } else {
9055 hdev->power9_64bit_dma_enable = 0;
9056 hdev->dma_mask = 48;
9057 }
9058}
9059
9060static u64 gaudi_get_device_time(struct hl_device *hdev)
9061{
9062 u64 device_time = ((u64) RREG32(mmPSOC_TIMESTAMP_CNTCVU)) << 32;
9063
9064 return device_time | RREG32(mmPSOC_TIMESTAMP_CNTCVL);
9065}
9066
Ofir Bittond00697f2021-01-05 12:55:06 +02009067static int gaudi_get_hw_block_id(struct hl_device *hdev, u64 block_addr,
Oded Gabbay6df50d22021-02-05 16:04:34 +02009068 u32 *block_size, u32 *block_id)
Ofir Bittond00697f2021-01-05 12:55:06 +02009069{
9070 return -EPERM;
9071}
9072
9073static int gaudi_block_mmap(struct hl_device *hdev,
9074 struct vm_area_struct *vma,
9075 u32 block_id, u32 block_size)
9076{
9077 return -EPERM;
9078}
9079
Oded Gabbay28bcf1f2021-02-01 21:23:43 +02009080static void gaudi_enable_events_from_fw(struct hl_device *hdev)
9081{
Koby Elbaze591a492021-05-12 18:05:46 +03009082 struct cpu_dyn_regs *dyn_regs =
9083 &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
Koby Elbaz81217362021-05-03 23:03:15 +03009084 u32 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
9085 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
Ofir Bitton5bc691d2021-05-25 22:09:13 +03009086 le32_to_cpu(dyn_regs->gic_host_ints_irq);
Koby Elbaz81217362021-05-03 23:03:15 +03009087
Ofir Bitton7d5ba002021-06-07 15:22:56 +03009088 WREG32(irq_handler_offset,
9089 gaudi_irq_map_table[GAUDI_EVENT_INTS_REGISTER].cpu_id);
Oded Gabbay28bcf1f2021-02-01 21:23:43 +02009090}
9091
Bharat Jauhari285c0fa2021-03-25 18:15:40 +02009092static int gaudi_map_pll_idx_to_fw_idx(u32 pll_idx)
9093{
9094 switch (pll_idx) {
9095 case HL_GAUDI_CPU_PLL: return CPU_PLL;
9096 case HL_GAUDI_PCI_PLL: return PCI_PLL;
9097 case HL_GAUDI_NIC_PLL: return NIC_PLL;
9098 case HL_GAUDI_DMA_PLL: return DMA_PLL;
9099 case HL_GAUDI_MESH_PLL: return MESH_PLL;
9100 case HL_GAUDI_MME_PLL: return MME_PLL;
9101 case HL_GAUDI_TPC_PLL: return TPC_PLL;
9102 case HL_GAUDI_IF_PLL: return IF_PLL;
9103 case HL_GAUDI_SRAM_PLL: return SRAM_PLL;
9104 case HL_GAUDI_HBM_PLL: return HBM_PLL;
9105 default: return -EINVAL;
9106 }
9107}
9108
Yuri Nudelman77977ac2021-06-06 10:30:41 +03009109static int gaudi_add_sync_to_engine_map_entry(
9110 struct hl_sync_to_engine_map *map, u32 reg_value,
9111 enum hl_sync_engine_type engine_type, u32 engine_id)
9112{
9113 struct hl_sync_to_engine_map_entry *entry;
9114
9115 /* Reg value represents a partial address of sync object,
9116 * it is used as unique identifier. For this we need to
9117 * clear the cutoff cfg base bits from the value.
9118 */
9119 if (reg_value == 0 || reg_value == 0xffffffff)
9120 return 0;
9121 reg_value -= (u32)CFG_BASE;
9122
9123 /* create a new hash entry */
9124 entry = kzalloc(sizeof(*entry), GFP_KERNEL);
9125 if (!entry)
9126 return -ENOMEM;
9127 entry->engine_type = engine_type;
9128 entry->engine_id = engine_id;
9129 entry->sync_id = reg_value;
9130 hash_add(map->tb, &entry->node, reg_value);
9131
9132 return 0;
9133}
9134
Yuri Nudelman938b7932021-06-06 10:28:51 +03009135static int gaudi_gen_sync_to_engine_map(struct hl_device *hdev,
9136 struct hl_sync_to_engine_map *map)
9137{
Yuri Nudelman77977ac2021-06-06 10:30:41 +03009138 struct hl_state_dump_specs *sds = &hdev->state_dump_specs;
9139 struct gaudi_device *gaudi = hdev->asic_specific;
9140 int i, j, rc;
9141 u32 reg_value;
9142
9143 /* Iterate over TPC engines */
9144 for (i = 0; i < sds->props[SP_NUM_OF_TPC_ENGINES]; ++i) {
9145 /* TPC registered must be accessed with clock gating disabled */
9146 mutex_lock(&gaudi->clk_gate_mutex);
9147 hdev->asic_funcs->disable_clock_gating(hdev);
9148
9149 reg_value = RREG32(sds->props[SP_TPC0_CFG_SO] +
9150 sds->props[SP_NEXT_TPC] * i);
9151
9152 /* We can reenable clock_gating */
9153 hdev->asic_funcs->set_clock_gating(hdev);
9154 mutex_unlock(&gaudi->clk_gate_mutex);
9155
9156 rc = gaudi_add_sync_to_engine_map_entry(map, reg_value,
9157 ENGINE_TPC, i);
9158 if (rc)
9159 goto free_sync_to_engine_map;
9160 }
9161
9162 /* Iterate over MME engines */
9163 for (i = 0; i < sds->props[SP_NUM_OF_MME_ENGINES]; ++i) {
9164 for (j = 0; j < sds->props[SP_SUB_MME_ENG_NUM]; ++j) {
9165 /* MME registered must be accessed with clock gating
9166 * disabled
9167 */
9168 mutex_lock(&gaudi->clk_gate_mutex);
9169 hdev->asic_funcs->disable_clock_gating(hdev);
9170
9171 reg_value = RREG32(sds->props[SP_MME_CFG_SO] +
9172 sds->props[SP_NEXT_MME] * i +
9173 j * sizeof(u32));
9174
9175 /* We can reenable clock_gating */
9176 hdev->asic_funcs->set_clock_gating(hdev);
9177 mutex_unlock(&gaudi->clk_gate_mutex);
9178
9179 rc = gaudi_add_sync_to_engine_map_entry(
9180 map, reg_value, ENGINE_MME,
9181 i * sds->props[SP_SUB_MME_ENG_NUM] + j);
9182 if (rc)
9183 goto free_sync_to_engine_map;
9184 }
9185 }
9186
9187 /* Iterate over DMA engines */
9188 for (i = 0; i < sds->props[SP_NUM_OF_DMA_ENGINES]; ++i) {
9189 reg_value = RREG32(sds->props[SP_DMA_CFG_SO] +
9190 sds->props[SP_DMA_QUEUES_OFFSET] * i);
9191 rc = gaudi_add_sync_to_engine_map_entry(map, reg_value,
9192 ENGINE_DMA, i);
9193 if (rc)
9194 goto free_sync_to_engine_map;
9195 }
9196
Yuri Nudelman938b7932021-06-06 10:28:51 +03009197 return 0;
Yuri Nudelman77977ac2021-06-06 10:30:41 +03009198
9199free_sync_to_engine_map:
9200 hl_state_dump_free_sync_to_engine_map(map);
9201
9202 return rc;
Yuri Nudelman938b7932021-06-06 10:28:51 +03009203}
9204
Yuri Nudelmanfd2010b2021-06-09 14:04:26 +03009205static int gaudi_monitor_valid(struct hl_mon_state_dump *mon)
9206{
Yuri Nudelman77977ac2021-06-06 10:30:41 +03009207 return FIELD_GET(
9208 SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_STATUS_0_VALID_MASK,
9209 mon->status);
Yuri Nudelmanfd2010b2021-06-09 14:04:26 +03009210}
9211
9212static int gaudi_print_single_monitor(char **buf, size_t *size, size_t *offset,
9213 struct hl_device *hdev,
9214 struct hl_mon_state_dump *mon)
9215{
Yuri Nudelman77977ac2021-06-06 10:30:41 +03009216 const char *name;
9217 char scratch_buf1[BIN_REG_STRING_SIZE],
9218 scratch_buf2[BIN_REG_STRING_SIZE];
9219
9220 name = hl_state_dump_get_monitor_name(hdev, mon);
9221 if (!name)
9222 name = "";
9223
9224 return hl_snprintf_resize(
9225 buf, size, offset,
9226 "Mon id: %u%s, wait for group id: %u mask %s to reach val: %u and write %u to address 0x%llx. Pending: %s",
9227 mon->id, name,
9228 FIELD_GET(SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_SID_MASK,
9229 mon->arm_data),
9230 hl_format_as_binary(
9231 scratch_buf1, sizeof(scratch_buf1),
9232 FIELD_GET(
9233 SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_MASK_MASK,
9234 mon->arm_data)),
9235 FIELD_GET(SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_SOD_MASK,
9236 mon->arm_data),
9237 mon->wr_data,
9238 (((u64)mon->wr_addr_high) << 32) | mon->wr_addr_low,
9239 hl_format_as_binary(
9240 scratch_buf2, sizeof(scratch_buf2),
9241 FIELD_GET(
9242 SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_STATUS_0_PENDING_MASK,
9243 mon->status)));
Yuri Nudelmanfd2010b2021-06-09 14:04:26 +03009244}
9245
9246
9247static int gaudi_print_fences_single_engine(
9248 struct hl_device *hdev, u64 base_offset, u64 status_base_offset,
9249 enum hl_sync_engine_type engine_type, u32 engine_id, char **buf,
9250 size_t *size, size_t *offset)
9251{
Yuri Nudelman77977ac2021-06-06 10:30:41 +03009252 struct hl_state_dump_specs *sds = &hdev->state_dump_specs;
9253 int rc = -ENOMEM, i;
9254 u32 *statuses, *fences;
9255
9256 statuses = kcalloc(sds->props[SP_ENGINE_NUM_OF_QUEUES],
9257 sizeof(*statuses), GFP_KERNEL);
9258 if (!statuses)
9259 goto out;
9260
9261 fences = kcalloc(sds->props[SP_ENGINE_NUM_OF_FENCES] *
9262 sds->props[SP_ENGINE_NUM_OF_QUEUES],
9263 sizeof(*fences), GFP_KERNEL);
9264 if (!fences)
9265 goto free_status;
9266
9267 for (i = 0; i < sds->props[SP_ENGINE_NUM_OF_FENCES]; ++i)
9268 statuses[i] = RREG32(status_base_offset + i * sizeof(u32));
9269
9270 for (i = 0; i < sds->props[SP_ENGINE_NUM_OF_FENCES] *
9271 sds->props[SP_ENGINE_NUM_OF_QUEUES]; ++i)
9272 fences[i] = RREG32(base_offset + i * sizeof(u32));
9273
9274 /* The actual print */
9275 for (i = 0; i < sds->props[SP_ENGINE_NUM_OF_QUEUES]; ++i) {
9276 u32 fence_id;
9277 u64 fence_cnt, fence_rdata;
9278 const char *engine_name;
9279
9280 if (!FIELD_GET(TPC0_QM_CP_STS_0_FENCE_IN_PROGRESS_MASK,
9281 statuses[i]))
9282 continue;
9283
9284 fence_id =
9285 FIELD_GET(TPC0_QM_CP_STS_0_FENCE_ID_MASK, statuses[i]);
9286 fence_cnt = base_offset + CFG_BASE +
9287 sizeof(u32) *
9288 (i + fence_id * sds->props[SP_ENGINE_NUM_OF_QUEUES]);
9289 fence_rdata = fence_cnt - sds->props[SP_FENCE0_CNT_OFFSET] +
9290 sds->props[SP_FENCE0_RDATA_OFFSET];
9291 engine_name = hl_sync_engine_to_string(engine_type);
9292
9293 rc = hl_snprintf_resize(
9294 buf, size, offset,
9295 "%s%u, stream %u: fence id %u cnt = 0x%llx (%s%u_QM.CP_FENCE%u_CNT_%u) rdata = 0x%llx (%s%u_QM.CP_FENCE%u_RDATA_%u) value = %u, cp_status = %u\n",
9296 engine_name, engine_id,
9297 i, fence_id,
9298 fence_cnt, engine_name, engine_id, fence_id, i,
9299 fence_rdata, engine_name, engine_id, fence_id, i,
9300 fences[fence_id],
9301 statuses[i]);
9302 if (rc)
9303 goto free_fences;
9304 }
9305
9306 rc = 0;
9307
9308free_fences:
9309 kfree(fences);
9310free_status:
9311 kfree(statuses);
9312out:
9313 return rc;
Yuri Nudelmanfd2010b2021-06-09 14:04:26 +03009314}
9315
Yuri Nudelman938b7932021-06-06 10:28:51 +03009316
9317static struct hl_state_dump_specs_funcs gaudi_state_dump_funcs = {
Yuri Nudelmanfd2010b2021-06-09 14:04:26 +03009318 .monitor_valid = gaudi_monitor_valid,
9319 .print_single_monitor = gaudi_print_single_monitor,
Yuri Nudelman938b7932021-06-06 10:28:51 +03009320 .gen_sync_to_engine_map = gaudi_gen_sync_to_engine_map,
Yuri Nudelmanfd2010b2021-06-09 14:04:26 +03009321 .print_fences_single_engine = gaudi_print_fences_single_engine,
Yuri Nudelman938b7932021-06-06 10:28:51 +03009322};
9323
9324static void gaudi_state_dump_init(struct hl_device *hdev)
9325{
Yuri Nudelman77977ac2021-06-06 10:30:41 +03009326 struct hl_state_dump_specs *sds = &hdev->state_dump_specs;
9327 int i;
9328
9329 for (i = 0; i < ARRAY_SIZE(gaudi_so_id_to_str); ++i)
9330 hash_add(sds->so_id_to_str_tb,
9331 &gaudi_so_id_to_str[i].node,
9332 gaudi_so_id_to_str[i].id);
9333
9334 for (i = 0; i < ARRAY_SIZE(gaudi_monitor_id_to_str); ++i)
9335 hash_add(sds->monitor_id_to_str_tb,
9336 &gaudi_monitor_id_to_str[i].node,
9337 gaudi_monitor_id_to_str[i].id);
9338
9339 sds->props = gaudi_state_dump_specs_props;
9340
9341 sds->sync_namager_names = gaudi_sync_manager_names;
9342
9343 sds->funcs = gaudi_state_dump_funcs;
Yuri Nudelman938b7932021-06-06 10:28:51 +03009344}
9345
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03009346static const struct hl_asic_funcs gaudi_funcs = {
9347 .early_init = gaudi_early_init,
9348 .early_fini = gaudi_early_fini,
9349 .late_init = gaudi_late_init,
9350 .late_fini = gaudi_late_fini,
9351 .sw_init = gaudi_sw_init,
9352 .sw_fini = gaudi_sw_fini,
9353 .hw_init = gaudi_hw_init,
9354 .hw_fini = gaudi_hw_fini,
9355 .halt_engines = gaudi_halt_engines,
9356 .suspend = gaudi_suspend,
9357 .resume = gaudi_resume,
Zvika Yehudai1ee8e2b2021-07-06 13:50:32 +03009358 .mmap = gaudi_mmap,
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03009359 .ring_doorbell = gaudi_ring_doorbell,
9360 .pqe_write = gaudi_pqe_write,
9361 .asic_dma_alloc_coherent = gaudi_dma_alloc_coherent,
9362 .asic_dma_free_coherent = gaudi_dma_free_coherent,
farah kassabri03df1362020-05-06 11:17:38 +03009363 .scrub_device_mem = gaudi_scrub_device_mem,
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03009364 .get_int_queue_base = gaudi_get_int_queue_base,
9365 .test_queues = gaudi_test_queues,
9366 .asic_dma_pool_zalloc = gaudi_dma_pool_zalloc,
9367 .asic_dma_pool_free = gaudi_dma_pool_free,
9368 .cpu_accessible_dma_pool_alloc = gaudi_cpu_accessible_dma_pool_alloc,
9369 .cpu_accessible_dma_pool_free = gaudi_cpu_accessible_dma_pool_free,
9370 .hl_dma_unmap_sg = gaudi_dma_unmap_sg,
9371 .cs_parser = gaudi_cs_parser,
9372 .asic_dma_map_sg = gaudi_dma_map_sg,
9373 .get_dma_desc_list_size = gaudi_get_dma_desc_list_size,
9374 .add_end_of_cb_packets = gaudi_add_end_of_cb_packets,
9375 .update_eq_ci = gaudi_update_eq_ci,
9376 .context_switch = gaudi_context_switch,
9377 .restore_phase_topology = gaudi_restore_phase_topology,
9378 .debugfs_read32 = gaudi_debugfs_read32,
9379 .debugfs_write32 = gaudi_debugfs_write32,
9380 .debugfs_read64 = gaudi_debugfs_read64,
9381 .debugfs_write64 = gaudi_debugfs_write64,
Oded Gabbay639781d2021-04-02 01:43:18 +03009382 .debugfs_read_dma = gaudi_debugfs_read_dma,
Oded Gabbaybcaf4152020-05-11 10:41:37 +03009383 .add_device_attr = gaudi_add_device_attr,
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03009384 .handle_eqe = gaudi_handle_eqe,
Oded Gabbaybcaf4152020-05-11 10:41:37 +03009385 .set_pll_profile = gaudi_set_pll_profile,
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03009386 .get_events_stat = gaudi_get_events_stat,
9387 .read_pte = gaudi_read_pte,
9388 .write_pte = gaudi_write_pte,
9389 .mmu_invalidate_cache = gaudi_mmu_invalidate_cache,
9390 .mmu_invalidate_cache_range = gaudi_mmu_invalidate_cache_range,
9391 .send_heartbeat = gaudi_send_heartbeat,
Oded Gabbaye38bfd32020-07-03 20:46:12 +03009392 .set_clock_gating = gaudi_set_clock_gating,
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03009393 .disable_clock_gating = gaudi_disable_clock_gating,
Omer Shpigelman79fc7a92020-05-11 10:46:29 +03009394 .debug_coresight = gaudi_debug_coresight,
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03009395 .is_device_idle = gaudi_is_device_idle,
9396 .soft_reset_late_init = gaudi_soft_reset_late_init,
9397 .hw_queues_lock = gaudi_hw_queues_lock,
9398 .hw_queues_unlock = gaudi_hw_queues_unlock,
9399 .get_pci_id = gaudi_get_pci_id,
9400 .get_eeprom_data = gaudi_get_eeprom_data,
9401 .send_cpu_message = gaudi_send_cpu_message,
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03009402 .pci_bars_map = gaudi_pci_bars_map,
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03009403 .init_iatu = gaudi_init_iatu,
9404 .rreg = hl_rreg,
9405 .wreg = hl_wreg,
Omer Shpigelman79fc7a92020-05-11 10:46:29 +03009406 .halt_coresight = gaudi_halt_coresight,
Ofir Bittona04b7cd2020-07-13 13:36:55 +03009407 .ctx_init = gaudi_ctx_init,
Ofir Bitton5de406c2020-09-10 10:56:26 +03009408 .ctx_fini = gaudi_ctx_fini,
Oded Gabbaybcaf4152020-05-11 10:41:37 +03009409 .get_clk_rate = gaudi_get_clk_rate,
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03009410 .get_queue_id_for_cq = gaudi_get_queue_id_for_cq,
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03009411 .load_firmware_to_device = gaudi_load_firmware_to_device,
9412 .load_boot_fit_to_device = gaudi_load_boot_fit_to_device,
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03009413 .get_signal_cb_size = gaudi_get_signal_cb_size,
9414 .get_wait_cb_size = gaudi_get_wait_cb_size,
9415 .gen_signal_cb = gaudi_gen_signal_cb,
9416 .gen_wait_cb = gaudi_gen_wait_cb,
9417 .reset_sob = gaudi_reset_sob,
Ofir Bitton5fe1c172020-09-10 10:10:55 +03009418 .reset_sob_group = gaudi_reset_sob_group,
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03009419 .set_dma_mask_from_fw = gaudi_set_dma_mask_from_fw,
Ofir Bitton5fe1c172020-09-10 10:10:55 +03009420 .get_device_time = gaudi_get_device_time,
9421 .collective_wait_init_cs = gaudi_collective_wait_init_cs,
Moti Haimovskib19dc672020-11-18 20:15:29 +02009422 .collective_wait_create_jobs = gaudi_collective_wait_create_jobs,
farah kassabri89473a12021-01-12 17:24:00 +02009423 .scramble_addr = hl_mmu_scramble_addr,
9424 .descramble_addr = hl_mmu_descramble_addr,
Ofir Bittond00697f2021-01-05 12:55:06 +02009425 .ack_protection_bits_errors = gaudi_ack_protection_bits_errors,
9426 .get_hw_block_id = gaudi_get_hw_block_id,
Oded Gabbay28bcf1f2021-02-01 21:23:43 +02009427 .hw_block_mmap = gaudi_block_mmap,
Bharat Jauhari285c0fa2021-03-25 18:15:40 +02009428 .enable_events_from_fw = gaudi_enable_events_from_fw,
Ohad Sharabia22f0ec2021-04-11 23:06:46 +03009429 .map_pll_idx_to_fw_idx = gaudi_map_pll_idx_to_fw_idx,
9430 .init_firmware_loader = gaudi_init_firmware_loader,
Yuri Nudelman938b7932021-06-06 10:28:51 +03009431 .init_cpu_scrambler_dram = gaudi_init_scrambler_hbm,
farah kassabridadf17a2021-05-24 18:09:22 +03009432 .state_dump_init = gaudi_state_dump_init,
9433 .get_sob_addr = gaudi_get_sob_addr
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03009434};
9435
9436/**
9437 * gaudi_set_asic_funcs - set GAUDI function pointers
9438 *
Lee Jonesf7d227c2020-07-01 09:58:42 +01009439 * @hdev: pointer to hl_device structure
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03009440 *
9441 */
9442void gaudi_set_asic_funcs(struct hl_device *hdev)
9443{
9444 hdev->asic_funcs = &gaudi_funcs;
9445}