blob: 7dd36d1cb39e3b84edd9c4c73d696a3744d9219d [file] [log] [blame]
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03001// SPDX-License-Identifier: GPL-2.0
2
3/*
4 * Copyright 2016-2020 HabanaLabs, Ltd.
5 * All Rights Reserved.
6 */
7
8#include "gaudiP.h"
Greg Kroah-Hartman7b16a152020-07-28 19:18:51 +02009#include "../include/hw_ip/mmu/mmu_general.h"
10#include "../include/hw_ip/mmu/mmu_v1_1.h"
11#include "../include/gaudi/gaudi_masks.h"
12#include "../include/gaudi/gaudi_fw_if.h"
13#include "../include/gaudi/gaudi_reg_map.h"
14#include "../include/gaudi/gaudi_async_ids_map_extended.h"
Oded Gabbayac0ae6a2020-05-11 10:29:27 +030015
16#include <linux/module.h>
17#include <linux/pci.h>
18#include <linux/firmware.h>
19#include <linux/hwmon.h>
Oded Gabbayac0ae6a2020-05-11 10:29:27 +030020#include <linux/iommu.h>
21#include <linux/seq_file.h>
22
23/*
24 * Gaudi security scheme:
25 *
26 * 1. Host is protected by:
27 * - Range registers
28 * - MMU
29 *
30 * 2. DDR is protected by:
31 * - Range registers (protect the first 512MB)
32 *
33 * 3. Configuration is protected by:
34 * - Range registers
35 * - Protection bits
36 *
37 * MMU is always enabled.
38 *
Ofir Bitton0940cab2020-08-31 08:52:56 +030039 * QMAN DMA channels 0,1 (PCI DMAN):
Oded Gabbayac0ae6a2020-05-11 10:29:27 +030040 * - DMA is not secured.
41 * - PQ and CQ are secured.
42 * - CP is secured: The driver needs to parse CB but WREG should be allowed
43 * because of TDMA (tensor DMA). Hence, WREG is always not
44 * secured.
45 *
46 * When the driver needs to use DMA it will check that Gaudi is idle, set DMA
47 * channel 0 to be secured, execute the DMA and change it back to not secured.
48 * Currently, the driver doesn't use the DMA while there are compute jobs
49 * running.
50 *
51 * The current use cases for the driver to use the DMA are:
52 * - Clear SRAM on context switch (happens on context switch when device is
53 * idle)
54 * - MMU page tables area clear (happens on init)
55 *
Ofir Bitton0940cab2020-08-31 08:52:56 +030056 * QMAN DMA 2-7, TPC, MME, NIC:
Oded Gabbayac0ae6a2020-05-11 10:29:27 +030057 * PQ is secured and is located on the Host (HBM CON TPC3 bug)
58 * CQ, CP and the engine are not secured
59 *
60 */
61
62#define GAUDI_BOOT_FIT_FILE "habanalabs/gaudi/gaudi-boot-fit.itb"
63#define GAUDI_LINUX_FW_FILE "habanalabs/gaudi/gaudi-fit.itb"
64#define GAUDI_TPC_FW_FILE "habanalabs/gaudi/gaudi_tpc.bin"
65
66#define GAUDI_DMA_POOL_BLK_SIZE 0x100 /* 256 bytes */
67
Ofir Bittonb90c8942020-11-08 12:59:04 +020068#define GAUDI_RESET_TIMEOUT_MSEC 2000 /* 2000ms */
Oded Gabbayac0ae6a2020-05-11 10:29:27 +030069#define GAUDI_RESET_WAIT_MSEC 1 /* 1ms */
70#define GAUDI_CPU_RESET_WAIT_MSEC 200 /* 200ms */
71#define GAUDI_TEST_QUEUE_WAIT_USEC 100000 /* 100ms */
72
73#define GAUDI_PLDM_RESET_WAIT_MSEC 1000 /* 1s */
74#define GAUDI_PLDM_HRESET_TIMEOUT_MSEC 20000 /* 20s */
Oded Gabbayac0ae6a2020-05-11 10:29:27 +030075#define GAUDI_PLDM_TEST_QUEUE_WAIT_USEC 1000000 /* 1s */
76#define GAUDI_PLDM_MMU_TIMEOUT_USEC (MMU_CONFIG_TIMEOUT_USEC * 100)
77#define GAUDI_PLDM_QMAN0_TIMEOUT_USEC (HL_DEVICE_TIMEOUT_USEC * 30)
78#define GAUDI_PLDM_TPC_KERNEL_WAIT_USEC (HL_DEVICE_TIMEOUT_USEC * 30)
79#define GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC 1000000 /* 1s */
Oded Gabbay788cacf2020-07-07 17:30:13 +030080#define GAUDI_MSG_TO_CPU_TIMEOUT_USEC 4000000 /* 4s */
Ohad Sharabib31e59b2021-04-22 10:01:22 +030081#define GAUDI_WAIT_FOR_BL_TIMEOUT_USEC 15000000 /* 15s */
Oded Gabbayac0ae6a2020-05-11 10:29:27 +030082
83#define GAUDI_QMAN0_FENCE_VAL 0x72E91AB9
84
85#define GAUDI_MAX_STRING_LEN 20
86
87#define GAUDI_CB_POOL_CB_CNT 512
88#define GAUDI_CB_POOL_CB_SIZE 0x20000 /* 128KB */
89
90#define GAUDI_ALLOC_CPU_MEM_RETRY_CNT 3
91
92#define GAUDI_NUM_OF_TPC_INTR_CAUSE 20
93
94#define GAUDI_NUM_OF_QM_ERR_CAUSE 16
95
96#define GAUDI_NUM_OF_QM_ARB_ERR_CAUSE 3
97
Oded Gabbay647e8352020-06-07 11:26:48 +030098#define GAUDI_ARB_WDT_TIMEOUT 0x1000000
Oded Gabbayac0ae6a2020-05-11 10:29:27 +030099
Oded Gabbaye38bfd32020-07-03 20:46:12 +0300100#define GAUDI_CLK_GATE_DEBUGFS_MASK (\
101 BIT(GAUDI_ENGINE_ID_MME_0) |\
102 BIT(GAUDI_ENGINE_ID_MME_2) |\
103 GENMASK_ULL(GAUDI_ENGINE_ID_TPC_7, GAUDI_ENGINE_ID_TPC_0))
104
farah kassabri03df1362020-05-06 11:17:38 +0300105#define HBM_SCRUBBING_TIMEOUT_US 1000000 /* 1s */
106
Alon Mizrahi41478642020-11-17 14:25:14 +0200107#define GAUDI_PLL_MAX 10
108
Yuri Nudelman77977ac2021-06-06 10:30:41 +0300109#define BIN_REG_STRING_SIZE sizeof("0b10101010101010101010101010101010")
110
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300111static const char gaudi_irq_name[GAUDI_MSI_ENTRIES][GAUDI_MAX_STRING_LEN] = {
112 "gaudi cq 0_0", "gaudi cq 0_1", "gaudi cq 0_2", "gaudi cq 0_3",
113 "gaudi cq 1_0", "gaudi cq 1_1", "gaudi cq 1_2", "gaudi cq 1_3",
114 "gaudi cq 5_0", "gaudi cq 5_1", "gaudi cq 5_2", "gaudi cq 5_3",
115 "gaudi cpu eq"
116};
117
118static const u8 gaudi_dma_assignment[GAUDI_DMA_MAX] = {
Oded Gabbaye38bfd32020-07-03 20:46:12 +0300119 [GAUDI_PCI_DMA_1] = GAUDI_ENGINE_ID_DMA_0,
120 [GAUDI_PCI_DMA_2] = GAUDI_ENGINE_ID_DMA_1,
Oded Gabbaye38bfd32020-07-03 20:46:12 +0300121 [GAUDI_HBM_DMA_1] = GAUDI_ENGINE_ID_DMA_2,
122 [GAUDI_HBM_DMA_2] = GAUDI_ENGINE_ID_DMA_3,
123 [GAUDI_HBM_DMA_3] = GAUDI_ENGINE_ID_DMA_4,
Ofir Bitton0940cab2020-08-31 08:52:56 +0300124 [GAUDI_HBM_DMA_4] = GAUDI_ENGINE_ID_DMA_5,
125 [GAUDI_HBM_DMA_5] = GAUDI_ENGINE_ID_DMA_6,
126 [GAUDI_HBM_DMA_6] = GAUDI_ENGINE_ID_DMA_7
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300127};
128
129static const u8 gaudi_cq_assignment[NUMBER_OF_CMPLT_QUEUES] = {
130 [0] = GAUDI_QUEUE_ID_DMA_0_0,
131 [1] = GAUDI_QUEUE_ID_DMA_0_1,
132 [2] = GAUDI_QUEUE_ID_DMA_0_2,
133 [3] = GAUDI_QUEUE_ID_DMA_0_3,
134 [4] = GAUDI_QUEUE_ID_DMA_1_0,
135 [5] = GAUDI_QUEUE_ID_DMA_1_1,
136 [6] = GAUDI_QUEUE_ID_DMA_1_2,
137 [7] = GAUDI_QUEUE_ID_DMA_1_3,
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300138};
139
140static const u16 gaudi_packet_sizes[MAX_PACKET_ID] = {
141 [PACKET_WREG_32] = sizeof(struct packet_wreg32),
142 [PACKET_WREG_BULK] = sizeof(struct packet_wreg_bulk),
143 [PACKET_MSG_LONG] = sizeof(struct packet_msg_long),
144 [PACKET_MSG_SHORT] = sizeof(struct packet_msg_short),
145 [PACKET_CP_DMA] = sizeof(struct packet_cp_dma),
146 [PACKET_REPEAT] = sizeof(struct packet_repeat),
147 [PACKET_MSG_PROT] = sizeof(struct packet_msg_prot),
148 [PACKET_FENCE] = sizeof(struct packet_fence),
149 [PACKET_LIN_DMA] = sizeof(struct packet_lin_dma),
150 [PACKET_NOP] = sizeof(struct packet_nop),
151 [PACKET_STOP] = sizeof(struct packet_stop),
152 [PACKET_ARB_POINT] = sizeof(struct packet_arb_point),
153 [PACKET_WAIT] = sizeof(struct packet_wait),
154 [PACKET_LOAD_AND_EXE] = sizeof(struct packet_load_and_exe)
155};
156
Ofir Bittonbc75be22020-07-30 14:56:38 +0300157static inline bool validate_packet_id(enum packet_id id)
158{
159 switch (id) {
160 case PACKET_WREG_32:
161 case PACKET_WREG_BULK:
162 case PACKET_MSG_LONG:
163 case PACKET_MSG_SHORT:
164 case PACKET_CP_DMA:
165 case PACKET_REPEAT:
166 case PACKET_MSG_PROT:
167 case PACKET_FENCE:
168 case PACKET_LIN_DMA:
169 case PACKET_NOP:
170 case PACKET_STOP:
171 case PACKET_ARB_POINT:
172 case PACKET_WAIT:
173 case PACKET_LOAD_AND_EXE:
174 return true;
175 default:
176 return false;
177 }
178}
179
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300180static const char * const
181gaudi_tpc_interrupts_cause[GAUDI_NUM_OF_TPC_INTR_CAUSE] = {
182 "tpc_address_exceed_slm",
183 "tpc_div_by_0",
184 "tpc_spu_mac_overflow",
185 "tpc_spu_addsub_overflow",
186 "tpc_spu_abs_overflow",
187 "tpc_spu_fp_dst_nan_inf",
188 "tpc_spu_fp_dst_denorm",
189 "tpc_vpu_mac_overflow",
190 "tpc_vpu_addsub_overflow",
191 "tpc_vpu_abs_overflow",
192 "tpc_vpu_fp_dst_nan_inf",
193 "tpc_vpu_fp_dst_denorm",
194 "tpc_assertions",
195 "tpc_illegal_instruction",
196 "tpc_pc_wrap_around",
197 "tpc_qm_sw_err",
198 "tpc_hbw_rresp_err",
199 "tpc_hbw_bresp_err",
200 "tpc_lbw_rresp_err",
201 "tpc_lbw_bresp_err"
202};
203
204static const char * const
205gaudi_qman_error_cause[GAUDI_NUM_OF_QM_ERR_CAUSE] = {
206 "PQ AXI HBW error",
207 "CQ AXI HBW error",
208 "CP AXI HBW error",
209 "CP error due to undefined OPCODE",
210 "CP encountered STOP OPCODE",
211 "CP AXI LBW error",
212 "CP WRREG32 or WRBULK returned error",
213 "N/A",
214 "FENCE 0 inc over max value and clipped",
215 "FENCE 1 inc over max value and clipped",
216 "FENCE 2 inc over max value and clipped",
217 "FENCE 3 inc over max value and clipped",
218 "FENCE 0 dec under min value and clipped",
219 "FENCE 1 dec under min value and clipped",
220 "FENCE 2 dec under min value and clipped",
221 "FENCE 3 dec under min value and clipped"
222};
223
224static const char * const
225gaudi_qman_arb_error_cause[GAUDI_NUM_OF_QM_ARB_ERR_CAUSE] = {
226 "Choice push while full error",
227 "Choice Q watchdog error",
228 "MSG AXI LBW returned with error"
229};
230
Ofir Bittonf8bc7f02021-01-03 20:52:40 +0200231enum gaudi_sm_sei_cause {
232 GAUDI_SM_SEI_SO_OVERFLOW,
233 GAUDI_SM_SEI_LBW_4B_UNALIGNED,
234 GAUDI_SM_SEI_AXI_RESPONSE_ERR
235};
236
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300237static enum hl_queue_type gaudi_queue_type[GAUDI_QUEUE_ID_SIZE] = {
238 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_0 */
239 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_1 */
240 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_2 */
241 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_3 */
242 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_0 */
243 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_1 */
244 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_2 */
245 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_3 */
246 QUEUE_TYPE_CPU, /* GAUDI_QUEUE_ID_CPU_PQ */
247 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_0 */
248 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_1 */
249 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_2 */
250 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_3 */
251 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_0 */
252 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_1 */
253 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_2 */
254 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_3 */
255 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_0 */
256 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_1 */
257 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_2 */
258 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_3 */
Ofir Bitton0940cab2020-08-31 08:52:56 +0300259 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_5_0 */
260 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_5_1 */
261 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_5_2 */
262 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_5_3 */
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300263 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_0 */
264 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_1 */
265 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_2 */
266 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_3 */
267 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_0 */
268 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_1 */
269 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_2 */
270 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_3 */
271 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_0 */
272 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_1 */
273 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_2 */
274 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_3 */
275 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_0 */
276 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_1 */
277 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_2 */
278 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_3 */
279 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_0 */
280 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_1 */
281 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_2 */
282 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_3 */
283 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_0 */
284 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_1 */
285 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_2 */
286 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_3 */
287 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_0 */
288 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_1 */
289 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_2 */
290 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_3 */
291 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_0 */
292 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_1 */
293 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_2 */
294 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_3 */
295 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_0 */
296 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_1 */
297 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_2 */
298 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_3 */
299 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_0 */
300 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_1 */
301 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_2 */
302 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_3 */
303 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_0 */
304 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_1 */
305 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_2 */
306 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_3 */
307 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_0 */
308 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_1 */
309 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_2 */
310 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_3 */
Oded Gabbay3c681572020-11-02 21:10:39 +0200311 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_0_0 */
312 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_0_1 */
313 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_0_2 */
314 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_0_3 */
315 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_1_0 */
316 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_1_1 */
317 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_1_2 */
318 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_1_3 */
319 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_2_0 */
320 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_2_1 */
321 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_2_2 */
322 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_2_3 */
323 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_3_0 */
324 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_3_1 */
325 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_3_2 */
326 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_3_3 */
327 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_4_0 */
328 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_4_1 */
329 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_4_2 */
330 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_4_3 */
331 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_5_0 */
332 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_5_1 */
333 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_5_2 */
334 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_5_3 */
335 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_6_0 */
336 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_6_1 */
337 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_6_2 */
338 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_6_3 */
339 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_7_0 */
340 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_7_1 */
341 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_7_2 */
342 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_7_3 */
343 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_8_0 */
344 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_8_1 */
345 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_8_2 */
346 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_8_3 */
347 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_9_0 */
348 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_9_1 */
349 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_9_2 */
350 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_9_3 */
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300351};
352
Yuri Nudelman77977ac2021-06-06 10:30:41 +0300353static struct hl_hw_obj_name_entry gaudi_so_id_to_str[] = {
354 { .id = 0, .name = "SYNC_OBJ_DMA_DOWN_FEEDBACK" },
355 { .id = 1, .name = "SYNC_OBJ_DMA_UP_FEEDBACK" },
356 { .id = 2, .name = "SYNC_OBJ_DMA_STATIC_DRAM_SRAM_FEEDBACK" },
357 { .id = 3, .name = "SYNC_OBJ_DMA_SRAM_DRAM_FEEDBACK" },
358 { .id = 4, .name = "SYNC_OBJ_FIRST_COMPUTE_FINISH" },
359 { .id = 5, .name = "SYNC_OBJ_HOST_DRAM_DONE" },
360 { .id = 6, .name = "SYNC_OBJ_DBG_CTR_DEPRECATED" },
361 { .id = 7, .name = "SYNC_OBJ_DMA_ACTIVATIONS_DRAM_SRAM_FEEDBACK" },
362 { .id = 8, .name = "SYNC_OBJ_ENGINE_SEM_MME_0" },
363 { .id = 9, .name = "SYNC_OBJ_ENGINE_SEM_MME_1" },
364 { .id = 10, .name = "SYNC_OBJ_ENGINE_SEM_TPC_0" },
365 { .id = 11, .name = "SYNC_OBJ_ENGINE_SEM_TPC_1" },
366 { .id = 12, .name = "SYNC_OBJ_ENGINE_SEM_TPC_2" },
367 { .id = 13, .name = "SYNC_OBJ_ENGINE_SEM_TPC_3" },
368 { .id = 14, .name = "SYNC_OBJ_ENGINE_SEM_TPC_4" },
369 { .id = 15, .name = "SYNC_OBJ_ENGINE_SEM_TPC_5" },
370 { .id = 16, .name = "SYNC_OBJ_ENGINE_SEM_TPC_6" },
371 { .id = 17, .name = "SYNC_OBJ_ENGINE_SEM_TPC_7" },
372 { .id = 18, .name = "SYNC_OBJ_ENGINE_SEM_DMA_1" },
373 { .id = 19, .name = "SYNC_OBJ_ENGINE_SEM_DMA_2" },
374 { .id = 20, .name = "SYNC_OBJ_ENGINE_SEM_DMA_3" },
375 { .id = 21, .name = "SYNC_OBJ_ENGINE_SEM_DMA_4" },
376 { .id = 22, .name = "SYNC_OBJ_ENGINE_SEM_DMA_5" },
377 { .id = 23, .name = "SYNC_OBJ_ENGINE_SEM_DMA_6" },
378 { .id = 24, .name = "SYNC_OBJ_ENGINE_SEM_DMA_7" },
379 { .id = 25, .name = "SYNC_OBJ_DBG_CTR_0" },
380 { .id = 26, .name = "SYNC_OBJ_DBG_CTR_1" },
381};
382
383static struct hl_hw_obj_name_entry gaudi_monitor_id_to_str[] = {
384 { .id = 200, .name = "MON_OBJ_DMA_DOWN_FEEDBACK_RESET" },
385 { .id = 201, .name = "MON_OBJ_DMA_UP_FEADBACK_RESET" },
386 { .id = 203, .name = "MON_OBJ_DRAM_TO_SRAM_QUEUE_FENCE" },
387 { .id = 204, .name = "MON_OBJ_TPC_0_CLK_GATE" },
388 { .id = 205, .name = "MON_OBJ_TPC_1_CLK_GATE" },
389 { .id = 206, .name = "MON_OBJ_TPC_2_CLK_GATE" },
390 { .id = 207, .name = "MON_OBJ_TPC_3_CLK_GATE" },
391 { .id = 208, .name = "MON_OBJ_TPC_4_CLK_GATE" },
392 { .id = 209, .name = "MON_OBJ_TPC_5_CLK_GATE" },
393 { .id = 210, .name = "MON_OBJ_TPC_6_CLK_GATE" },
394 { .id = 211, .name = "MON_OBJ_TPC_7_CLK_GATE" },
395};
396
397static s64 gaudi_state_dump_specs_props[] = {
398 [SP_SYNC_OBJ_BASE_ADDR] = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0,
399 [SP_NEXT_SYNC_OBJ_ADDR] = NEXT_SYNC_OBJ_ADDR_INTERVAL,
400 [SP_SYNC_OBJ_AMOUNT] = NUM_OF_SOB_IN_BLOCK,
401 [SP_MON_OBJ_WR_ADDR_LOW] =
402 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0,
403 [SP_MON_OBJ_WR_ADDR_HIGH] =
404 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRH_0,
405 [SP_MON_OBJ_WR_DATA] = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_DATA_0,
406 [SP_MON_OBJ_ARM_DATA] = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_ARM_0,
407 [SP_MON_OBJ_STATUS] = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_STATUS_0,
408 [SP_MONITORS_AMOUNT] = NUM_OF_MONITORS_IN_BLOCK,
409 [SP_TPC0_CMDQ] = mmTPC0_QM_GLBL_CFG0,
410 [SP_TPC0_CFG_SO] = mmTPC0_CFG_QM_SYNC_OBJECT_ADDR,
411 [SP_NEXT_TPC] = mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0,
412 [SP_MME_CMDQ] = mmMME0_QM_GLBL_CFG0,
413 [SP_MME_CFG_SO] = mmMME0_CTRL_ARCH_DESC_SYNC_OBJECT_ADDR_LOW_LOCAL,
414 [SP_NEXT_MME] = mmMME2_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0,
415 [SP_DMA_CMDQ] = mmDMA0_QM_GLBL_CFG0,
416 [SP_DMA_CFG_SO] = mmDMA0_CORE_WR_COMP_ADDR_LO,
417 [SP_DMA_QUEUES_OFFSET] = mmDMA1_QM_GLBL_CFG0 - mmDMA0_QM_GLBL_CFG0,
418 [SP_NUM_OF_MME_ENGINES] = NUM_OF_MME_ENGINES,
419 [SP_SUB_MME_ENG_NUM] = NUM_OF_MME_SUB_ENGINES,
420 [SP_NUM_OF_DMA_ENGINES] = NUM_OF_DMA_ENGINES,
421 [SP_NUM_OF_TPC_ENGINES] = NUM_OF_TPC_ENGINES,
422 [SP_ENGINE_NUM_OF_QUEUES] = NUM_OF_QUEUES,
423 [SP_ENGINE_NUM_OF_STREAMS] = NUM_OF_STREAMS,
424 [SP_ENGINE_NUM_OF_FENCES] = NUM_OF_FENCES,
425 [SP_FENCE0_CNT_OFFSET] =
426 mmDMA0_QM_CP_FENCE0_CNT_0 - mmDMA0_QM_GLBL_CFG0,
427 [SP_FENCE0_RDATA_OFFSET] =
428 mmDMA0_QM_CP_FENCE0_RDATA_0 - mmDMA0_QM_GLBL_CFG0,
429 [SP_CP_STS_OFFSET] = mmDMA0_QM_CP_STS_0 - mmDMA0_QM_GLBL_CFG0,
430 [SP_NUM_CORES] = 1,
431};
432
433static const char * const gaudi_sync_manager_names[] = {
434 "SYNC_MGR_E_N", "SYNC_MGR_W_N", "SYNC_MGR_E_S", "SYNC_MGR_W_S",
435 NULL
436};
Yuri Nudelman938b7932021-06-06 10:28:51 +0300437
Oded Gabbayfcc6a4e2020-05-17 08:20:35 +0300438struct ecc_info_extract_params {
439 u64 block_address;
440 u32 num_memories;
441 bool derr;
442 bool disable_clock_gating;
443};
444
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300445static int gaudi_mmu_update_asid_hop0_addr(struct hl_device *hdev, u32 asid,
446 u64 phys_addr);
447static int gaudi_send_job_on_qman0(struct hl_device *hdev,
448 struct hl_cs_job *job);
449static int gaudi_memset_device_memory(struct hl_device *hdev, u64 addr,
450 u32 size, u64 val);
Ofir Bitton423815b2021-01-05 09:04:07 +0200451static int gaudi_memset_registers(struct hl_device *hdev, u64 reg_base,
452 u32 num_regs, u32 val);
453static int gaudi_schedule_register_memset(struct hl_device *hdev,
454 u32 hw_queue_id, u64 reg_base, u32 num_regs, u32 val);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300455static int gaudi_run_tpc_kernel(struct hl_device *hdev, u64 tpc_kernel,
456 u32 tpc_id);
457static int gaudi_mmu_clear_pgt_range(struct hl_device *hdev);
Oded Gabbay2f553422020-08-15 16:28:10 +0300458static int gaudi_cpucp_info_get(struct hl_device *hdev);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300459static void gaudi_disable_clock_gating(struct hl_device *hdev);
460static void gaudi_mmu_prepare(struct hl_device *hdev, u32 asid);
Ofir Bitton5de406c2020-09-10 10:56:26 +0300461static u32 gaudi_gen_signal_cb(struct hl_device *hdev, void *data, u16 sob_id,
Alon Mizrahi72ab9ca52020-12-02 19:55:30 +0200462 u32 size, bool eb);
Ofir Bitton5de406c2020-09-10 10:56:26 +0300463static u32 gaudi_gen_wait_cb(struct hl_device *hdev,
464 struct hl_gen_wait_properties *prop);
465
466static inline enum hl_collective_mode
467get_collective_mode(struct hl_device *hdev, u32 queue_id)
468{
469 if (gaudi_queue_type[queue_id] == QUEUE_TYPE_EXT)
470 return HL_COLLECTIVE_MASTER;
471
472 if (queue_id >= GAUDI_QUEUE_ID_DMA_5_0 &&
473 queue_id <= GAUDI_QUEUE_ID_DMA_5_3)
474 return HL_COLLECTIVE_SLAVE;
475
476 if (queue_id >= GAUDI_QUEUE_ID_TPC_7_0 &&
477 queue_id <= GAUDI_QUEUE_ID_TPC_7_3)
478 return HL_COLLECTIVE_SLAVE;
479
480 if (queue_id >= GAUDI_QUEUE_ID_NIC_0_0 &&
481 queue_id <= GAUDI_QUEUE_ID_NIC_9_3)
482 return HL_COLLECTIVE_SLAVE;
483
484 return HL_COLLECTIVE_NOT_SUPPORTED;
485}
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300486
Koby Elbazcd5def82021-02-23 21:31:27 +0200487static inline void set_default_power_values(struct hl_device *hdev)
488{
489 struct asic_fixed_properties *prop = &hdev->asic_prop;
490
491 if (hdev->card_type == cpucp_card_type_pmc) {
492 prop->max_power_default = MAX_POWER_DEFAULT_PMC;
493 prop->dc_power_default = DC_POWER_DEFAULT_PMC;
494 } else {
495 prop->max_power_default = MAX_POWER_DEFAULT_PCI;
496 prop->dc_power_default = DC_POWER_DEFAULT_PCI;
497 }
498}
499
Koby Elbaz12d133d2021-06-03 13:18:20 +0300500static int gaudi_set_fixed_properties(struct hl_device *hdev)
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300501{
502 struct asic_fixed_properties *prop = &hdev->asic_prop;
Ofir Bitton843839b2020-07-19 11:08:09 +0300503 u32 num_sync_stream_queues = 0;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300504 int i;
505
Ofir Bitton3abc99b2020-06-23 14:50:39 +0300506 prop->max_queues = GAUDI_QUEUE_ID_SIZE;
507 prop->hw_queues_props = kcalloc(prop->max_queues,
508 sizeof(struct hw_queue_properties),
509 GFP_KERNEL);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300510
Ofir Bitton3abc99b2020-06-23 14:50:39 +0300511 if (!prop->hw_queues_props)
512 return -ENOMEM;
513
514 for (i = 0 ; i < prop->max_queues ; i++) {
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300515 if (gaudi_queue_type[i] == QUEUE_TYPE_EXT) {
516 prop->hw_queues_props[i].type = QUEUE_TYPE_EXT;
517 prop->hw_queues_props[i].driver_only = 0;
Ofir Bitton21e7a342020-05-14 18:25:47 +0300518 prop->hw_queues_props[i].supports_sync_stream = 1;
Tal Cohen4bb1f2f2020-06-03 09:25:27 +0300519 prop->hw_queues_props[i].cb_alloc_flags =
520 CB_ALLOC_KERNEL;
Ofir Bitton843839b2020-07-19 11:08:09 +0300521 num_sync_stream_queues++;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300522 } else if (gaudi_queue_type[i] == QUEUE_TYPE_CPU) {
523 prop->hw_queues_props[i].type = QUEUE_TYPE_CPU;
524 prop->hw_queues_props[i].driver_only = 1;
Ofir Bitton21e7a342020-05-14 18:25:47 +0300525 prop->hw_queues_props[i].supports_sync_stream = 0;
Tal Cohen4bb1f2f2020-06-03 09:25:27 +0300526 prop->hw_queues_props[i].cb_alloc_flags =
527 CB_ALLOC_KERNEL;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300528 } else if (gaudi_queue_type[i] == QUEUE_TYPE_INT) {
529 prop->hw_queues_props[i].type = QUEUE_TYPE_INT;
530 prop->hw_queues_props[i].driver_only = 0;
Tal Cohen4bb1f2f2020-06-03 09:25:27 +0300531 prop->hw_queues_props[i].supports_sync_stream = 0;
532 prop->hw_queues_props[i].cb_alloc_flags =
533 CB_ALLOC_USER;
Ofir Bitton5de406c2020-09-10 10:56:26 +0300534
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300535 }
Ofir Bitton5de406c2020-09-10 10:56:26 +0300536 prop->hw_queues_props[i].collective_mode =
537 get_collective_mode(hdev, i);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300538 }
539
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300540 prop->completion_queues_count = NUMBER_OF_CMPLT_QUEUES;
Ofir Bitton5de406c2020-09-10 10:56:26 +0300541 prop->collective_first_sob = 0;
542 prop->collective_first_mon = 0;
543
544 /* 2 SOBs per internal queue stream are reserved for collective */
545 prop->sync_stream_first_sob =
546 ALIGN(NUMBER_OF_SOBS_IN_GRP, HL_MAX_SOBS_PER_MONITOR)
547 * QMAN_STREAMS * HL_RSVD_SOBS;
548
549 /* 1 monitor per internal queue stream are reserved for collective
550 * 2 monitors per external queue stream are reserved for collective
551 */
552 prop->sync_stream_first_mon =
553 (NUMBER_OF_COLLECTIVE_QUEUES * QMAN_STREAMS) +
554 (NUMBER_OF_EXT_HW_QUEUES * 2);
555
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300556 prop->dram_base_address = DRAM_PHYS_BASE;
557 prop->dram_size = GAUDI_HBM_SIZE_32GB;
558 prop->dram_end_address = prop->dram_base_address +
559 prop->dram_size;
560 prop->dram_user_base_address = DRAM_BASE_ADDR_USER;
561
562 prop->sram_base_address = SRAM_BASE_ADDR;
563 prop->sram_size = SRAM_SIZE;
564 prop->sram_end_address = prop->sram_base_address +
565 prop->sram_size;
566 prop->sram_user_base_address = prop->sram_base_address +
567 SRAM_USER_BASE_OFFSET;
568
569 prop->mmu_pgt_addr = MMU_PAGE_TABLES_ADDR;
570 if (hdev->pldm)
571 prop->mmu_pgt_size = 0x800000; /* 8MB */
572 else
573 prop->mmu_pgt_size = MMU_PAGE_TABLES_SIZE;
574 prop->mmu_pte_size = HL_PTE_SIZE;
575 prop->mmu_hop_table_size = HOP_TABLE_SIZE;
576 prop->mmu_hop0_tables_total_size = HOP0_TABLES_TOTAL_SIZE;
577 prop->dram_page_size = PAGE_SIZE_2MB;
Oded Gabbay7f070c92020-11-09 09:48:31 +0200578 prop->dram_supports_virtual_memory = false;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300579
580 prop->pmmu.hop0_shift = HOP0_SHIFT;
581 prop->pmmu.hop1_shift = HOP1_SHIFT;
582 prop->pmmu.hop2_shift = HOP2_SHIFT;
583 prop->pmmu.hop3_shift = HOP3_SHIFT;
584 prop->pmmu.hop4_shift = HOP4_SHIFT;
585 prop->pmmu.hop0_mask = HOP0_MASK;
586 prop->pmmu.hop1_mask = HOP1_MASK;
587 prop->pmmu.hop2_mask = HOP2_MASK;
588 prop->pmmu.hop3_mask = HOP3_MASK;
589 prop->pmmu.hop4_mask = HOP4_MASK;
590 prop->pmmu.start_addr = VA_HOST_SPACE_START;
591 prop->pmmu.end_addr =
592 (VA_HOST_SPACE_START + VA_HOST_SPACE_SIZE / 2) - 1;
593 prop->pmmu.page_size = PAGE_SIZE_4KB;
Moti Haimovski7edf3412020-08-23 13:23:13 +0300594 prop->pmmu.num_hops = MMU_ARCH_5_HOPS;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300595
596 /* PMMU and HPMMU are the same except of page size */
597 memcpy(&prop->pmmu_huge, &prop->pmmu, sizeof(prop->pmmu));
598 prop->pmmu_huge.page_size = PAGE_SIZE_2MB;
599
600 /* shifts and masks are the same in PMMU and DMMU */
601 memcpy(&prop->dmmu, &prop->pmmu, sizeof(prop->pmmu));
602 prop->dmmu.start_addr = (VA_HOST_SPACE_START + VA_HOST_SPACE_SIZE / 2);
603 prop->dmmu.end_addr = VA_HOST_SPACE_END;
604 prop->dmmu.page_size = PAGE_SIZE_2MB;
605
606 prop->cfg_size = CFG_SIZE;
607 prop->max_asid = MAX_ASID;
608 prop->num_of_events = GAUDI_EVENT_SIZE;
609 prop->tpc_enabled_mask = TPC_ENABLED_MASK;
610
Koby Elbazcd5def82021-02-23 21:31:27 +0200611 set_default_power_values(hdev);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300612
613 prop->cb_pool_cb_cnt = GAUDI_CB_POOL_CB_CNT;
614 prop->cb_pool_cb_size = GAUDI_CB_POOL_CB_SIZE;
615
616 prop->pcie_dbi_base_address = mmPCIE_DBI_BASE;
617 prop->pcie_aux_dbi_reg_addr = CFG_BASE + mmPCIE_AUX_DBI;
618
Oded Gabbay2f553422020-08-15 16:28:10 +0300619 strncpy(prop->cpucp_info.card_name, GAUDI_DEFAULT_CARD_NAME,
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300620 CARD_NAME_MAX_LEN);
621
Ofir Bittonc16d45f2020-06-02 12:28:27 +0300622 prop->max_pending_cs = GAUDI_MAX_PENDING_CS;
623
Ofir Bitton843839b2020-07-19 11:08:09 +0300624 prop->first_available_user_sob[HL_GAUDI_WS_DCORE] =
Ofir Bitton3cf74b32020-09-10 09:17:50 +0300625 prop->sync_stream_first_sob +
626 (num_sync_stream_queues * HL_RSVD_SOBS);
Ofir Bitton843839b2020-07-19 11:08:09 +0300627 prop->first_available_user_mon[HL_GAUDI_WS_DCORE] =
Ofir Bitton3cf74b32020-09-10 09:17:50 +0300628 prop->sync_stream_first_mon +
629 (num_sync_stream_queues * HL_RSVD_MONS);
Ofir Bitton843839b2020-07-19 11:08:09 +0300630
Ofir Bittone1fa7242021-01-06 15:40:37 +0200631 prop->first_available_user_msix_interrupt = USHRT_MAX;
632
Ofir Bittone52606d2021-01-27 16:34:37 +0200633 for (i = 0 ; i < HL_MAX_DCORES ; i++)
634 prop->first_available_cq[i] = USHRT_MAX;
635
Ohad Sharabie67a60402021-05-02 15:45:21 +0300636 prop->fw_cpu_boot_dev_sts0_valid = false;
637 prop->fw_cpu_boot_dev_sts1_valid = false;
Ofir Bittond611b9f2020-11-08 13:10:09 +0200638 prop->hard_reset_done_by_fw = false;
Koby Elbaz81217362021-05-03 23:03:15 +0300639 prop->gic_interrupts_enable = true;
Ofir Bitton323b7262020-10-04 09:09:19 +0300640
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300641 return 0;
642}
643
644static int gaudi_pci_bars_map(struct hl_device *hdev)
645{
646 static const char * const name[] = {"SRAM", "CFG", "HBM"};
647 bool is_wc[3] = {false, false, true};
648 int rc;
649
650 rc = hl_pci_bars_map(hdev, name, is_wc);
651 if (rc)
652 return rc;
653
654 hdev->rmmio = hdev->pcie_bar[CFG_BAR_ID] +
655 (CFG_BASE - SPI_FLASH_BASE_ADDR);
656
657 return 0;
658}
659
660static u64 gaudi_set_hbm_bar_base(struct hl_device *hdev, u64 addr)
661{
662 struct gaudi_device *gaudi = hdev->asic_specific;
Ofir Bittonf4cbfd22020-06-15 17:45:12 +0300663 struct hl_inbound_pci_region pci_region;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300664 u64 old_addr = addr;
665 int rc;
666
667 if ((gaudi) && (gaudi->hbm_bar_cur_addr == addr))
668 return old_addr;
669
Ofir Bitton1dae12f2021-05-12 09:07:39 +0300670 if (hdev->asic_prop.iatu_done_by_fw)
671 return U64_MAX;
672
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300673 /* Inbound Region 2 - Bar 4 - Point to HBM */
Ofir Bittonf4cbfd22020-06-15 17:45:12 +0300674 pci_region.mode = PCI_BAR_MATCH_MODE;
675 pci_region.bar = HBM_BAR_ID;
676 pci_region.addr = addr;
677 rc = hl_pci_set_inbound_region(hdev, 2, &pci_region);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300678 if (rc)
679 return U64_MAX;
680
681 if (gaudi) {
682 old_addr = gaudi->hbm_bar_cur_addr;
683 gaudi->hbm_bar_cur_addr = addr;
684 }
685
686 return old_addr;
687}
688
689static int gaudi_init_iatu(struct hl_device *hdev)
690{
Ofir Bittonf4cbfd22020-06-15 17:45:12 +0300691 struct hl_inbound_pci_region inbound_region;
692 struct hl_outbound_pci_region outbound_region;
693 int rc;
694
Ohad Sharabi6542c352021-05-02 09:56:51 +0300695 if (hdev->asic_prop.iatu_done_by_fw)
Ofir Bitton41f458f2021-03-10 15:08:44 +0200696 return 0;
Ofir Bitton41f458f2021-03-10 15:08:44 +0200697
Ofir Bittonf4cbfd22020-06-15 17:45:12 +0300698 /* Inbound Region 0 - Bar 0 - Point to SRAM + CFG */
699 inbound_region.mode = PCI_BAR_MATCH_MODE;
700 inbound_region.bar = SRAM_BAR_ID;
701 inbound_region.addr = SRAM_BASE_ADDR;
702 rc = hl_pci_set_inbound_region(hdev, 0, &inbound_region);
703 if (rc)
704 goto done;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300705
706 /* Inbound Region 1 - Bar 2 - Point to SPI FLASH */
Ofir Bittonf4cbfd22020-06-15 17:45:12 +0300707 inbound_region.mode = PCI_BAR_MATCH_MODE;
708 inbound_region.bar = CFG_BAR_ID;
709 inbound_region.addr = SPI_FLASH_BASE_ADDR;
710 rc = hl_pci_set_inbound_region(hdev, 1, &inbound_region);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300711 if (rc)
Ofir Bittonf4cbfd22020-06-15 17:45:12 +0300712 goto done;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300713
Ofir Bittonf4cbfd22020-06-15 17:45:12 +0300714 /* Inbound Region 2 - Bar 4 - Point to HBM */
715 inbound_region.mode = PCI_BAR_MATCH_MODE;
716 inbound_region.bar = HBM_BAR_ID;
717 inbound_region.addr = DRAM_PHYS_BASE;
718 rc = hl_pci_set_inbound_region(hdev, 2, &inbound_region);
719 if (rc)
720 goto done;
721
722 hdev->asic_funcs->set_dma_mask_from_fw(hdev);
723
724 /* Outbound Region 0 - Point to Host */
725 outbound_region.addr = HOST_PHYS_BASE;
726 outbound_region.size = HOST_PHYS_SIZE;
727 rc = hl_pci_set_outbound_region(hdev, &outbound_region);
728
729done:
730 return rc;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300731}
732
Ofir Bittond1ddd902020-10-19 17:04:20 +0300733static enum hl_device_hw_state gaudi_get_hw_state(struct hl_device *hdev)
734{
735 return RREG32(mmHW_STATE);
736}
737
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300738static int gaudi_early_init(struct hl_device *hdev)
739{
740 struct asic_fixed_properties *prop = &hdev->asic_prop;
741 struct pci_dev *pdev = hdev->pdev;
Ofir Bitton41f458f2021-03-10 15:08:44 +0200742 u32 fw_boot_status;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300743 int rc;
744
Koby Elbaz12d133d2021-06-03 13:18:20 +0300745 rc = gaudi_set_fixed_properties(hdev);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300746 if (rc) {
Koby Elbaz12d133d2021-06-03 13:18:20 +0300747 dev_err(hdev->dev, "Failed setting fixed properties\n");
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300748 return rc;
749 }
750
751 /* Check BAR sizes */
752 if (pci_resource_len(pdev, SRAM_BAR_ID) != SRAM_BAR_SIZE) {
753 dev_err(hdev->dev,
754 "Not " HL_NAME "? BAR %d size %llu, expecting %llu\n",
755 SRAM_BAR_ID,
756 (unsigned long long) pci_resource_len(pdev,
757 SRAM_BAR_ID),
758 SRAM_BAR_SIZE);
Ofir Bitton3abc99b2020-06-23 14:50:39 +0300759 rc = -ENODEV;
760 goto free_queue_props;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300761 }
762
763 if (pci_resource_len(pdev, CFG_BAR_ID) != CFG_BAR_SIZE) {
764 dev_err(hdev->dev,
765 "Not " HL_NAME "? BAR %d size %llu, expecting %llu\n",
766 CFG_BAR_ID,
767 (unsigned long long) pci_resource_len(pdev,
768 CFG_BAR_ID),
769 CFG_BAR_SIZE);
Ofir Bitton3abc99b2020-06-23 14:50:39 +0300770 rc = -ENODEV;
771 goto free_queue_props;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300772 }
773
774 prop->dram_pci_bar_size = pci_resource_len(pdev, HBM_BAR_ID);
775
Ofir Bitton41f458f2021-03-10 15:08:44 +0200776 /* If FW security is enabled at this point it means no access to ELBI */
Ohad Sharabi4cb45082021-05-20 09:09:03 +0300777 if (hdev->asic_prop.fw_security_enabled) {
Ofir Bitton41f458f2021-03-10 15:08:44 +0200778 hdev->asic_prop.iatu_done_by_fw = true;
Koby Elbaz3649eae2021-05-18 15:43:47 +0300779
780 /*
781 * GIC-security-bit can ONLY be set by CPUCP, so in this stage
782 * decision can only be taken based on PCI ID security.
783 */
784 hdev->asic_prop.gic_interrupts_enable = false;
Ofir Bitton41f458f2021-03-10 15:08:44 +0200785 goto pci_init;
786 }
787
788 rc = hl_pci_elbi_read(hdev, CFG_BASE + mmCPU_BOOT_DEV_STS0,
789 &fw_boot_status);
790 if (rc)
791 goto free_queue_props;
792
793 /* Check whether FW is configuring iATU */
794 if ((fw_boot_status & CPU_BOOT_DEV_STS0_ENABLED) &&
795 (fw_boot_status & CPU_BOOT_DEV_STS0_FW_IATU_CONF_EN))
796 hdev->asic_prop.iatu_done_by_fw = true;
797
798pci_init:
Ofir Bittond1ddd902020-10-19 17:04:20 +0300799 rc = hl_pci_init(hdev);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300800 if (rc)
Ofir Bitton3abc99b2020-06-23 14:50:39 +0300801 goto free_queue_props;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300802
Ofir Bittond1ddd902020-10-19 17:04:20 +0300803 /* Before continuing in the initialization, we need to read the preboot
804 * version to determine whether we run with a security-enabled firmware
805 */
806 rc = hl_fw_read_preboot_status(hdev, mmPSOC_GLOBAL_CONF_CPU_BOOT_STATUS,
Ohad Sharabie67a60402021-05-02 15:45:21 +0300807 mmCPU_BOOT_DEV_STS0,
808 mmCPU_BOOT_DEV_STS1, mmCPU_BOOT_ERR0,
809 mmCPU_BOOT_ERR1,
810 GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC);
Ofir Bittond1ddd902020-10-19 17:04:20 +0300811 if (rc) {
812 if (hdev->reset_on_preboot_fail)
813 hdev->asic_funcs->hw_fini(hdev, true);
814 goto pci_fini;
815 }
816
Ofir Bitton9c9013c2020-12-01 10:39:54 +0200817 if (gaudi_get_hw_state(hdev) == HL_DEVICE_HW_STATE_DIRTY) {
818 dev_info(hdev->dev,
819 "H/W state is dirty, must reset before initializing\n");
820 hdev->asic_funcs->hw_fini(hdev, true);
821 }
822
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300823 return 0;
Ofir Bitton3abc99b2020-06-23 14:50:39 +0300824
Ofir Bittond1ddd902020-10-19 17:04:20 +0300825pci_fini:
826 hl_pci_fini(hdev);
Ofir Bitton3abc99b2020-06-23 14:50:39 +0300827free_queue_props:
828 kfree(hdev->asic_prop.hw_queues_props);
829 return rc;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300830}
831
832static int gaudi_early_fini(struct hl_device *hdev)
833{
Ofir Bitton3abc99b2020-06-23 14:50:39 +0300834 kfree(hdev->asic_prop.hw_queues_props);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300835 hl_pci_fini(hdev);
836
837 return 0;
838}
839
840/**
841 * gaudi_fetch_psoc_frequency - Fetch PSOC frequency values
842 *
843 * @hdev: pointer to hl_device structure
844 *
845 */
Ofir Bitton1cbca892020-10-05 11:36:00 +0300846static int gaudi_fetch_psoc_frequency(struct hl_device *hdev)
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300847{
848 struct asic_fixed_properties *prop = &hdev->asic_prop;
Alon Mizrahi65854892020-11-19 16:34:19 +0200849 u32 nr = 0, nf = 0, od = 0, div_fctr = 0, pll_clk, div_sel;
850 u16 pll_freq_arr[HL_PLL_NUM_OUTPUTS], freq;
Ofir Bitton1cbca892020-10-05 11:36:00 +0300851 int rc;
852
Ohad Sharabi4cb45082021-05-20 09:09:03 +0300853 if (hdev->asic_prop.fw_security_enabled) {
854 rc = hl_fw_cpucp_pll_info_get(hdev, HL_GAUDI_CPU_PLL, pll_freq_arr);
855
856 if (rc)
857 return rc;
858
859 freq = pll_freq_arr[2];
860 } else {
Alon Mizrahi65854892020-11-19 16:34:19 +0200861 /* Backward compatibility */
862 div_fctr = RREG32(mmPSOC_CPU_PLL_DIV_FACTOR_2);
863 div_sel = RREG32(mmPSOC_CPU_PLL_DIV_SEL_2);
864 nr = RREG32(mmPSOC_CPU_PLL_NR);
865 nf = RREG32(mmPSOC_CPU_PLL_NF);
866 od = RREG32(mmPSOC_CPU_PLL_OD);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300867
Alon Mizrahi65854892020-11-19 16:34:19 +0200868 if (div_sel == DIV_SEL_REF_CLK ||
869 div_sel == DIV_SEL_DIVIDED_REF) {
870 if (div_sel == DIV_SEL_REF_CLK)
871 freq = PLL_REF_CLK;
872 else
873 freq = PLL_REF_CLK / (div_fctr + 1);
874 } else if (div_sel == DIV_SEL_PLL_CLK ||
875 div_sel == DIV_SEL_DIVIDED_PLL) {
876 pll_clk = PLL_REF_CLK * (nf + 1) /
877 ((nr + 1) * (od + 1));
878 if (div_sel == DIV_SEL_PLL_CLK)
879 freq = pll_clk;
880 else
881 freq = pll_clk / (div_fctr + 1);
882 } else {
883 dev_warn(hdev->dev,
884 "Received invalid div select value: %d",
885 div_sel);
886 freq = 0;
887 }
Alon Mizrahi65854892020-11-19 16:34:19 +0200888 }
889
890 prop->psoc_timestamp_frequency = freq;
891 prop->psoc_pci_pll_nr = nr;
892 prop->psoc_pci_pll_nf = nf;
893 prop->psoc_pci_pll_od = od;
894 prop->psoc_pci_pll_div_factor = div_fctr;
Ofir Bitton1cbca892020-10-05 11:36:00 +0300895
896 return 0;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300897}
898
899static int _gaudi_init_tpc_mem(struct hl_device *hdev,
900 dma_addr_t tpc_kernel_src_addr, u32 tpc_kernel_size)
901{
902 struct asic_fixed_properties *prop = &hdev->asic_prop;
903 struct packet_lin_dma *init_tpc_mem_pkt;
904 struct hl_cs_job *job;
905 struct hl_cb *cb;
906 u64 dst_addr;
907 u32 cb_size, ctl;
908 u8 tpc_id;
909 int rc;
910
Ofir Bittona04b7cd2020-07-13 13:36:55 +0300911 cb = hl_cb_kernel_create(hdev, PAGE_SIZE, false);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300912 if (!cb)
913 return -EFAULT;
914
Arnd Bergmann82948e62020-10-26 17:08:06 +0100915 init_tpc_mem_pkt = cb->kernel_address;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300916 cb_size = sizeof(*init_tpc_mem_pkt);
917 memset(init_tpc_mem_pkt, 0, cb_size);
918
919 init_tpc_mem_pkt->tsize = cpu_to_le32(tpc_kernel_size);
920
Oded Gabbay65887292020-08-12 11:21:01 +0300921 ctl = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_LIN_DMA);
922 ctl |= FIELD_PREP(GAUDI_PKT_LIN_DMA_CTL_LIN_MASK, 1);
923 ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
924 ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300925
926 init_tpc_mem_pkt->ctl = cpu_to_le32(ctl);
927
928 init_tpc_mem_pkt->src_addr = cpu_to_le64(tpc_kernel_src_addr);
929 dst_addr = (prop->sram_user_base_address &
930 GAUDI_PKT_LIN_DMA_DST_ADDR_MASK) >>
931 GAUDI_PKT_LIN_DMA_DST_ADDR_SHIFT;
932 init_tpc_mem_pkt->dst_addr |= cpu_to_le64(dst_addr);
933
934 job = hl_cs_allocate_job(hdev, QUEUE_TYPE_EXT, true);
935 if (!job) {
936 dev_err(hdev->dev, "Failed to allocate a new job\n");
937 rc = -ENOMEM;
938 goto release_cb;
939 }
940
941 job->id = 0;
942 job->user_cb = cb;
Tomer Tayarf07486742020-08-02 22:51:31 +0300943 atomic_inc(&job->user_cb->cs_cnt);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300944 job->user_cb_size = cb_size;
945 job->hw_queue_id = GAUDI_QUEUE_ID_DMA_0_0;
946 job->patched_cb = job->user_cb;
947 job->job_cb_size = job->user_cb_size + sizeof(struct packet_msg_prot);
948
949 hl_debugfs_add_job(hdev, job);
950
951 rc = gaudi_send_job_on_qman0(hdev, job);
952
953 if (rc)
954 goto free_job;
955
956 for (tpc_id = 0 ; tpc_id < TPC_NUMBER_OF_ENGINES ; tpc_id++) {
957 rc = gaudi_run_tpc_kernel(hdev, dst_addr, tpc_id);
958 if (rc)
959 break;
960 }
961
962free_job:
963 hl_userptr_delete_list(hdev, &job->userptr_list);
964 hl_debugfs_remove_job(hdev, job);
965 kfree(job);
Tomer Tayarf07486742020-08-02 22:51:31 +0300966 atomic_dec(&cb->cs_cnt);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300967
968release_cb:
969 hl_cb_put(cb);
970 hl_cb_destroy(hdev, &hdev->kernel_cb_mgr, cb->id << PAGE_SHIFT);
971
972 return rc;
973}
974
975/*
976 * gaudi_init_tpc_mem() - Initialize TPC memories.
977 * @hdev: Pointer to hl_device structure.
978 *
979 * Copy TPC kernel fw from firmware file and run it to initialize TPC memories.
980 *
981 * Return: 0 for success, negative value for error.
982 */
983static int gaudi_init_tpc_mem(struct hl_device *hdev)
984{
985 const struct firmware *fw;
986 size_t fw_size;
987 void *cpu_addr;
988 dma_addr_t dma_handle;
Oded Gabbay98e87812020-12-09 23:07:58 +0200989 int rc, count = 5;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300990
Oded Gabbay98e87812020-12-09 23:07:58 +0200991again:
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300992 rc = request_firmware(&fw, GAUDI_TPC_FW_FILE, hdev->dev);
Oded Gabbay98e87812020-12-09 23:07:58 +0200993 if (rc == -EINTR && count-- > 0) {
994 msleep(50);
995 goto again;
996 }
997
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300998 if (rc) {
Oded Gabbay98e87812020-12-09 23:07:58 +0200999 dev_err(hdev->dev, "Failed to load firmware file %s\n",
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03001000 GAUDI_TPC_FW_FILE);
1001 goto out;
1002 }
1003
1004 fw_size = fw->size;
1005 cpu_addr = hdev->asic_funcs->asic_dma_alloc_coherent(hdev, fw_size,
1006 &dma_handle, GFP_KERNEL | __GFP_ZERO);
1007 if (!cpu_addr) {
1008 dev_err(hdev->dev,
1009 "Failed to allocate %zu of dma memory for TPC kernel\n",
1010 fw_size);
1011 rc = -ENOMEM;
1012 goto out;
1013 }
1014
1015 memcpy(cpu_addr, fw->data, fw_size);
1016
1017 rc = _gaudi_init_tpc_mem(hdev, dma_handle, fw_size);
1018
1019 hdev->asic_funcs->asic_dma_free_coherent(hdev, fw->size, cpu_addr,
1020 dma_handle);
1021
1022out:
1023 release_firmware(fw);
1024 return rc;
1025}
1026
Ofir Bitton5de406c2020-09-10 10:56:26 +03001027static void gaudi_collective_map_sobs(struct hl_device *hdev, u32 stream)
Ofir Bitton5fe1c172020-09-10 10:10:55 +03001028{
Ofir Bitton5de406c2020-09-10 10:56:26 +03001029 struct gaudi_device *gaudi = hdev->asic_specific;
1030 struct gaudi_collective_properties *prop = &gaudi->collective_props;
1031 struct hl_hw_queue *q;
1032 u32 i, sob_id, sob_group_id, queue_id;
Ofir Bitton5fe1c172020-09-10 10:10:55 +03001033
Ofir Bitton5de406c2020-09-10 10:56:26 +03001034 /* Iterate through SOB groups and assign a SOB for each slave queue */
1035 sob_group_id =
1036 stream * HL_RSVD_SOBS + prop->curr_sob_group_idx[stream];
1037 sob_id = prop->hw_sob_group[sob_group_id].base_sob_id;
1038
1039 queue_id = GAUDI_QUEUE_ID_NIC_0_0 + stream;
1040 for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++) {
1041 q = &hdev->kernel_queues[queue_id + (4 * i)];
1042 q->sync_stream_prop.collective_sob_id = sob_id + i;
1043 }
1044
1045 /* Both DMA5 and TPC7 use the same resources since only a single
1046 * engine need to participate in the reduction process
1047 */
1048 queue_id = GAUDI_QUEUE_ID_DMA_5_0 + stream;
1049 q = &hdev->kernel_queues[queue_id];
1050 q->sync_stream_prop.collective_sob_id =
1051 sob_id + NIC_NUMBER_OF_ENGINES;
1052
1053 queue_id = GAUDI_QUEUE_ID_TPC_7_0 + stream;
1054 q = &hdev->kernel_queues[queue_id];
1055 q->sync_stream_prop.collective_sob_id =
1056 sob_id + NIC_NUMBER_OF_ENGINES;
1057}
1058
1059static void gaudi_sob_group_hw_reset(struct kref *ref)
1060{
1061 struct gaudi_hw_sob_group *hw_sob_group =
1062 container_of(ref, struct gaudi_hw_sob_group, kref);
1063 struct hl_device *hdev = hw_sob_group->hdev;
Ofir Bitton423815b2021-01-05 09:04:07 +02001064 u64 base_addr;
1065 int rc;
Ofir Bitton5de406c2020-09-10 10:56:26 +03001066
Ofir Bitton423815b2021-01-05 09:04:07 +02001067 base_addr = CFG_BASE + mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 +
1068 hw_sob_group->base_sob_id * 4;
1069 rc = gaudi_schedule_register_memset(hdev, hw_sob_group->queue_id,
1070 base_addr, NUMBER_OF_SOBS_IN_GRP, 0);
1071 if (rc)
1072 dev_err(hdev->dev,
1073 "failed resetting sob group - sob base %u, count %u",
1074 hw_sob_group->base_sob_id, NUMBER_OF_SOBS_IN_GRP);
Ofir Bitton5de406c2020-09-10 10:56:26 +03001075
1076 kref_init(&hw_sob_group->kref);
1077}
1078
1079static void gaudi_sob_group_reset_error(struct kref *ref)
1080{
1081 struct gaudi_hw_sob_group *hw_sob_group =
1082 container_of(ref, struct gaudi_hw_sob_group, kref);
1083 struct hl_device *hdev = hw_sob_group->hdev;
1084
1085 dev_crit(hdev->dev,
1086 "SOB release shouldn't be called here, base_sob_id: %d\n",
1087 hw_sob_group->base_sob_id);
1088}
1089
Ofir Bittona3972582021-05-24 22:58:44 +03001090static void gaudi_collective_mstr_sob_mask_set(struct gaudi_device *gaudi)
1091{
1092 struct gaudi_collective_properties *prop;
1093 int i;
1094
1095 prop = &gaudi->collective_props;
1096
1097 memset(prop->mstr_sob_mask, 0, sizeof(prop->mstr_sob_mask));
1098
1099 for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++)
1100 if (gaudi->hw_cap_initialized & BIT(HW_CAP_NIC_SHIFT + i))
1101 prop->mstr_sob_mask[i / HL_MAX_SOBS_PER_MONITOR] |=
1102 BIT(i % HL_MAX_SOBS_PER_MONITOR);
1103 /* Set collective engine bit */
1104 prop->mstr_sob_mask[i / HL_MAX_SOBS_PER_MONITOR] |=
1105 BIT(i % HL_MAX_SOBS_PER_MONITOR);
1106}
1107
Ofir Bitton5de406c2020-09-10 10:56:26 +03001108static int gaudi_collective_init(struct hl_device *hdev)
1109{
Ofir Bittona3972582021-05-24 22:58:44 +03001110 u32 i, sob_id, reserved_sobs_per_group;
Ofir Bitton5de406c2020-09-10 10:56:26 +03001111 struct gaudi_collective_properties *prop;
1112 struct gaudi_device *gaudi;
1113
1114 gaudi = hdev->asic_specific;
1115 prop = &gaudi->collective_props;
1116 sob_id = hdev->asic_prop.collective_first_sob;
1117
1118 /* First sob in group must be aligned to HL_MAX_SOBS_PER_MONITOR */
1119 reserved_sobs_per_group =
1120 ALIGN(NUMBER_OF_SOBS_IN_GRP, HL_MAX_SOBS_PER_MONITOR);
1121
1122 /* Init SOB groups */
1123 for (i = 0 ; i < NUM_SOB_GROUPS; i++) {
1124 prop->hw_sob_group[i].hdev = hdev;
1125 prop->hw_sob_group[i].base_sob_id = sob_id;
1126 sob_id += reserved_sobs_per_group;
1127 gaudi_sob_group_hw_reset(&prop->hw_sob_group[i].kref);
1128 }
1129
1130 for (i = 0 ; i < QMAN_STREAMS; i++) {
1131 prop->next_sob_group_val[i] = 1;
1132 prop->curr_sob_group_idx[i] = 0;
1133 gaudi_collective_map_sobs(hdev, i);
1134 }
1135
Ofir Bittona3972582021-05-24 22:58:44 +03001136 gaudi_collective_mstr_sob_mask_set(gaudi);
Ofir Bitton5de406c2020-09-10 10:56:26 +03001137
1138 return 0;
1139}
1140
1141static void gaudi_reset_sob_group(struct hl_device *hdev, u16 sob_group)
1142{
1143 struct gaudi_device *gaudi = hdev->asic_specific;
1144 struct gaudi_collective_properties *cprop = &gaudi->collective_props;
1145
1146 kref_put(&cprop->hw_sob_group[sob_group].kref,
1147 gaudi_sob_group_hw_reset);
1148}
1149
1150static void gaudi_collective_master_init_job(struct hl_device *hdev,
1151 struct hl_cs_job *job, u32 stream, u32 sob_group_offset)
1152{
1153 u32 master_sob_base, master_monitor, queue_id, cb_size = 0;
1154 struct gaudi_collective_properties *cprop;
1155 struct hl_gen_wait_properties wait_prop;
1156 struct hl_sync_stream_properties *prop;
1157 struct gaudi_device *gaudi;
1158
1159 gaudi = hdev->asic_specific;
1160 cprop = &gaudi->collective_props;
1161 queue_id = job->hw_queue_id;
1162 prop = &hdev->kernel_queues[queue_id].sync_stream_prop;
1163
1164 master_sob_base =
1165 cprop->hw_sob_group[sob_group_offset].base_sob_id;
1166 master_monitor = prop->collective_mstr_mon_id[0];
1167
Ofir Bitton423815b2021-01-05 09:04:07 +02001168 cprop->hw_sob_group[sob_group_offset].queue_id = queue_id;
1169
Ofir Bitton5de406c2020-09-10 10:56:26 +03001170 dev_dbg(hdev->dev,
1171 "Generate master wait CBs, sob %d (mask %#x), val:0x%x, mon %u, q %d\n",
1172 master_sob_base, cprop->mstr_sob_mask[0],
1173 cprop->next_sob_group_val[stream],
1174 master_monitor, queue_id);
1175
1176 wait_prop.data = (void *) job->patched_cb;
1177 wait_prop.sob_base = master_sob_base;
1178 wait_prop.sob_mask = cprop->mstr_sob_mask[0];
1179 wait_prop.sob_val = cprop->next_sob_group_val[stream];
1180 wait_prop.mon_id = master_monitor;
1181 wait_prop.q_idx = queue_id;
1182 wait_prop.size = cb_size;
1183 cb_size += gaudi_gen_wait_cb(hdev, &wait_prop);
1184
1185 master_sob_base += HL_MAX_SOBS_PER_MONITOR;
1186 master_monitor = prop->collective_mstr_mon_id[1];
1187
1188 dev_dbg(hdev->dev,
1189 "Generate master wait CBs, sob %d (mask %#x), val:0x%x, mon %u, q %d\n",
1190 master_sob_base, cprop->mstr_sob_mask[1],
1191 cprop->next_sob_group_val[stream],
1192 master_monitor, queue_id);
1193
1194 wait_prop.sob_base = master_sob_base;
1195 wait_prop.sob_mask = cprop->mstr_sob_mask[1];
1196 wait_prop.mon_id = master_monitor;
1197 wait_prop.size = cb_size;
1198 cb_size += gaudi_gen_wait_cb(hdev, &wait_prop);
1199}
1200
1201static void gaudi_collective_slave_init_job(struct hl_device *hdev,
1202 struct hl_cs_job *job, struct hl_cs_compl *cs_cmpl)
1203{
1204 struct hl_gen_wait_properties wait_prop;
1205 struct hl_sync_stream_properties *prop;
1206 u32 queue_id, cb_size = 0;
1207
1208 queue_id = job->hw_queue_id;
1209 prop = &hdev->kernel_queues[queue_id].sync_stream_prop;
1210
1211 /* Add to wait CBs using slave monitor */
1212 wait_prop.data = (void *) job->user_cb;
1213 wait_prop.sob_base = cs_cmpl->hw_sob->sob_id;
1214 wait_prop.sob_mask = 0x1;
1215 wait_prop.sob_val = cs_cmpl->sob_val;
1216 wait_prop.mon_id = prop->collective_slave_mon_id;
1217 wait_prop.q_idx = queue_id;
1218 wait_prop.size = cb_size;
1219
1220 dev_dbg(hdev->dev,
1221 "Generate slave wait CB, sob %d, val:0x%x, mon %d, q %d\n",
1222 cs_cmpl->hw_sob->sob_id, cs_cmpl->sob_val,
1223 prop->collective_slave_mon_id, queue_id);
1224
1225 cb_size += gaudi_gen_wait_cb(hdev, &wait_prop);
1226
1227 dev_dbg(hdev->dev,
1228 "generate signal CB, sob_id: %d, sob val: 1, q_idx: %d\n",
1229 prop->collective_sob_id, queue_id);
1230
1231 cb_size += gaudi_gen_signal_cb(hdev, job->user_cb,
Alon Mizrahi72ab9ca52020-12-02 19:55:30 +02001232 prop->collective_sob_id, cb_size, false);
Ofir Bitton5fe1c172020-09-10 10:10:55 +03001233}
1234
1235static void gaudi_collective_wait_init_cs(struct hl_cs *cs)
1236{
Ofir Bitton5de406c2020-09-10 10:56:26 +03001237 struct hl_cs_compl *signal_cs_cmpl =
1238 container_of(cs->signal_fence, struct hl_cs_compl, base_fence);
1239 struct hl_cs_compl *cs_cmpl =
1240 container_of(cs->fence, struct hl_cs_compl, base_fence);
1241 struct gaudi_collective_properties *cprop;
1242 u32 stream, queue_id, sob_group_offset;
1243 struct gaudi_device *gaudi;
1244 struct hl_device *hdev;
1245 struct hl_cs_job *job;
1246 struct hl_ctx *ctx;
Ofir Bitton5fe1c172020-09-10 10:10:55 +03001247
Ofir Bitton5de406c2020-09-10 10:56:26 +03001248 ctx = cs->ctx;
1249 hdev = ctx->hdev;
1250 gaudi = hdev->asic_specific;
1251 cprop = &gaudi->collective_props;
1252
1253 /* copy the SOB id and value of the signal CS */
1254 cs_cmpl->hw_sob = signal_cs_cmpl->hw_sob;
1255 cs_cmpl->sob_val = signal_cs_cmpl->sob_val;
1256
1257 /* Calculate the stream from collective master queue (1st job) */
1258 job = list_first_entry(&cs->job_list, struct hl_cs_job, cs_node);
1259 stream = job->hw_queue_id % 4;
1260 sob_group_offset =
1261 stream * HL_RSVD_SOBS + cprop->curr_sob_group_idx[stream];
1262
1263 list_for_each_entry(job, &cs->job_list, cs_node) {
1264 queue_id = job->hw_queue_id;
1265
1266 if (hdev->kernel_queues[queue_id].collective_mode ==
1267 HL_COLLECTIVE_MASTER)
1268 gaudi_collective_master_init_job(hdev, job, stream,
1269 sob_group_offset);
1270 else
1271 gaudi_collective_slave_init_job(hdev, job, cs_cmpl);
1272 }
1273
1274 cs_cmpl->sob_group = sob_group_offset;
1275
1276 /* Handle sob group kref and wraparound */
1277 kref_get(&cprop->hw_sob_group[sob_group_offset].kref);
1278 cprop->next_sob_group_val[stream]++;
1279
1280 if (cprop->next_sob_group_val[stream] == HL_MAX_SOB_VAL) {
1281 /*
1282 * Decrement as we reached the max value.
1283 * The release function won't be called here as we've
1284 * just incremented the refcount.
1285 */
1286 kref_put(&cprop->hw_sob_group[sob_group_offset].kref,
1287 gaudi_sob_group_reset_error);
1288 cprop->next_sob_group_val[stream] = 1;
1289 /* only two SOBs are currently in use */
1290 cprop->curr_sob_group_idx[stream] =
1291 (cprop->curr_sob_group_idx[stream] + 1) &
1292 (HL_RSVD_SOBS - 1);
1293
1294 gaudi_collective_map_sobs(hdev, stream);
1295
1296 dev_dbg(hdev->dev, "switched to SOB group %d, stream: %d\n",
1297 cprop->curr_sob_group_idx[stream], stream);
1298 }
1299
1300 /* Increment kref since all slave queues are now waiting on it */
1301 kref_get(&cs_cmpl->hw_sob->kref);
1302 /*
1303 * Must put the signal fence after the SOB refcnt increment so
1304 * the SOB refcnt won't turn 0 and reset the SOB before the
1305 * wait CS was submitted.
1306 */
1307 mb();
1308 hl_fence_put(cs->signal_fence);
1309 cs->signal_fence = NULL;
1310}
1311
1312static int gaudi_collective_wait_create_job(struct hl_device *hdev,
1313 struct hl_ctx *ctx, struct hl_cs *cs,
1314 enum hl_collective_mode mode, u32 queue_id, u32 wait_queue_id)
1315{
1316 struct hw_queue_properties *hw_queue_prop;
1317 struct hl_cs_counters_atomic *cntr;
1318 struct hl_cs_job *job;
1319 struct hl_cb *cb;
1320 u32 cb_size;
1321 bool patched_cb;
1322
1323 cntr = &hdev->aggregated_cs_counters;
1324
1325 if (mode == HL_COLLECTIVE_MASTER) {
1326 /* CB size of collective master queue contains
1327 * 4 msg short packets for monitor 1 configuration
1328 * 1 fence packet
1329 * 4 msg short packets for monitor 2 configuration
1330 * 1 fence packet
1331 * 2 msg prot packets for completion and MSI-X
1332 */
1333 cb_size = sizeof(struct packet_msg_short) * 8 +
1334 sizeof(struct packet_fence) * 2 +
1335 sizeof(struct packet_msg_prot) * 2;
1336 patched_cb = true;
1337 } else {
1338 /* CB size of collective slave queues contains
1339 * 4 msg short packets for monitor configuration
1340 * 1 fence packet
1341 * 1 additional msg short packet for sob signal
1342 */
1343 cb_size = sizeof(struct packet_msg_short) * 5 +
1344 sizeof(struct packet_fence);
1345 patched_cb = false;
1346 }
1347
1348 hw_queue_prop = &hdev->asic_prop.hw_queues_props[queue_id];
1349 job = hl_cs_allocate_job(hdev, hw_queue_prop->type, true);
1350 if (!job) {
farah kassabrie7536432020-10-12 14:30:26 +03001351 atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
Ofir Bitton5de406c2020-09-10 10:56:26 +03001352 atomic64_inc(&cntr->out_of_mem_drop_cnt);
1353 dev_err(hdev->dev, "Failed to allocate a new job\n");
1354 return -ENOMEM;
1355 }
1356
1357 /* Allocate internal mapped CB for non patched CBs */
1358 cb = hl_cb_kernel_create(hdev, cb_size,
1359 hdev->mmu_enable && !patched_cb);
1360 if (!cb) {
farah kassabrie7536432020-10-12 14:30:26 +03001361 atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
Ofir Bitton5de406c2020-09-10 10:56:26 +03001362 atomic64_inc(&cntr->out_of_mem_drop_cnt);
1363 kfree(job);
1364 return -EFAULT;
1365 }
1366
1367 job->id = 0;
1368 job->cs = cs;
1369 job->user_cb = cb;
Tomer Tayarf07486742020-08-02 22:51:31 +03001370 atomic_inc(&job->user_cb->cs_cnt);
Ofir Bitton5de406c2020-09-10 10:56:26 +03001371 job->user_cb_size = cb_size;
1372 job->hw_queue_id = queue_id;
1373
1374 /*
1375 * No need in parsing, user CB is the patched CB.
1376 * We call hl_cb_destroy() out of two reasons - we don't need
1377 * the CB in the CB idr anymore and to decrement its refcount as
1378 * it was incremented inside hl_cb_kernel_create().
1379 */
1380 if (patched_cb)
1381 job->patched_cb = job->user_cb;
1382 else
1383 job->patched_cb = NULL;
1384
1385 job->job_cb_size = job->user_cb_size;
1386 hl_cb_destroy(hdev, &hdev->kernel_cb_mgr, cb->id << PAGE_SHIFT);
1387
1388 /* increment refcount as for external queues we get completion */
1389 if (hw_queue_prop->type == QUEUE_TYPE_EXT)
1390 cs_get(cs);
1391
1392 cs->jobs_in_queue_cnt[job->hw_queue_id]++;
1393
1394 list_add_tail(&job->cs_node, &cs->job_list);
1395
1396 hl_debugfs_add_job(hdev, job);
1397
1398 return 0;
Ofir Bitton5fe1c172020-09-10 10:10:55 +03001399}
1400
1401static int gaudi_collective_wait_create_jobs(struct hl_device *hdev,
1402 struct hl_ctx *ctx, struct hl_cs *cs, u32 wait_queue_id,
1403 u32 collective_engine_id)
1404{
Ofir Bitton5de406c2020-09-10 10:56:26 +03001405 struct gaudi_device *gaudi = hdev->asic_specific;
1406 struct hw_queue_properties *hw_queue_prop;
1407 u32 queue_id, collective_queue, num_jobs;
1408 u32 stream, nic_queue, nic_idx = 0;
1409 bool skip;
Ofir Bitton266cdfa2020-12-22 15:56:12 +02001410 int i, rc = 0;
Ofir Bitton5de406c2020-09-10 10:56:26 +03001411
1412 /* Verify wait queue id is configured as master */
1413 hw_queue_prop = &hdev->asic_prop.hw_queues_props[wait_queue_id];
1414 if (!(hw_queue_prop->collective_mode == HL_COLLECTIVE_MASTER)) {
1415 dev_err(hdev->dev,
1416 "Queue %d is not configured as collective master\n",
1417 wait_queue_id);
1418 return -EINVAL;
1419 }
1420
1421 /* Verify engine id is supported */
1422 if (collective_engine_id != GAUDI_ENGINE_ID_DMA_5 &&
1423 collective_engine_id != GAUDI_ENGINE_ID_TPC_7) {
1424 dev_err(hdev->dev,
1425 "Collective wait does not support engine %u\n",
1426 collective_engine_id);
1427 return -EINVAL;
1428 }
1429
1430 stream = wait_queue_id % 4;
1431
1432 if (collective_engine_id == GAUDI_ENGINE_ID_DMA_5)
1433 collective_queue = GAUDI_QUEUE_ID_DMA_5_0 + stream;
Ofir Bitton5de406c2020-09-10 10:56:26 +03001434 else
Ofir Bitton71a984f2020-10-19 16:52:00 +03001435 collective_queue = GAUDI_QUEUE_ID_TPC_7_0 + stream;
Ofir Bitton5de406c2020-09-10 10:56:26 +03001436
1437 num_jobs = NUMBER_OF_SOBS_IN_GRP + 1;
1438 nic_queue = GAUDI_QUEUE_ID_NIC_0_0 + stream;
1439
1440 /* First job goes to the collective master queue, it will wait for
1441 * the collective slave queues to finish execution.
1442 * The synchronization is done using two monitors:
1443 * First monitor for NICs 0-7, second monitor for NICs 8-9 and the
1444 * reduction engine (DMA5/TPC7).
1445 *
1446 * Rest of the jobs goes to the collective slave queues which will
1447 * all wait for the user to signal sob 'cs_cmpl->sob_val'.
1448 */
1449 for (i = 0 ; i < num_jobs ; i++) {
1450 if (i == 0) {
1451 queue_id = wait_queue_id;
1452 rc = gaudi_collective_wait_create_job(hdev, ctx, cs,
1453 HL_COLLECTIVE_MASTER, queue_id, wait_queue_id);
1454 } else {
1455 if (nic_idx < NIC_NUMBER_OF_ENGINES) {
1456 if (gaudi->hw_cap_initialized &
1457 BIT(HW_CAP_NIC_SHIFT + nic_idx))
1458 skip = false;
1459 else
1460 skip = true;
1461
1462 queue_id = nic_queue;
1463 nic_queue += 4;
1464 nic_idx++;
1465
1466 if (skip)
1467 continue;
1468 } else {
1469 queue_id = collective_queue;
1470 }
1471
1472 rc = gaudi_collective_wait_create_job(hdev, ctx, cs,
1473 HL_COLLECTIVE_SLAVE, queue_id, wait_queue_id);
1474 }
1475
1476 if (rc)
1477 return rc;
1478 }
1479
1480 return rc;
Ofir Bitton5fe1c172020-09-10 10:10:55 +03001481}
1482
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03001483static int gaudi_late_init(struct hl_device *hdev)
1484{
1485 struct gaudi_device *gaudi = hdev->asic_specific;
1486 int rc;
1487
Oded Gabbay2f553422020-08-15 16:28:10 +03001488 rc = gaudi->cpucp_info_get(hdev);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03001489 if (rc) {
Oded Gabbay2f553422020-08-15 16:28:10 +03001490 dev_err(hdev->dev, "Failed to get cpucp info\n");
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03001491 return rc;
1492 }
1493
Oded Gabbay3c681572020-11-02 21:10:39 +02001494 if ((hdev->card_type == cpucp_card_type_pci) &&
1495 (hdev->nic_ports_mask & 0x3)) {
1496 dev_info(hdev->dev,
1497 "PCI card detected, only 8 ports are enabled\n");
1498 hdev->nic_ports_mask &= ~0x3;
1499
1500 /* Stop and disable unused NIC QMANs */
1501 WREG32(mmNIC0_QM0_GLBL_CFG1, NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
1502 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
1503 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
1504
1505 WREG32(mmNIC0_QM1_GLBL_CFG1, NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
1506 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
1507 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
1508
1509 WREG32(mmNIC0_QM0_GLBL_CFG0, 0);
1510 WREG32(mmNIC0_QM1_GLBL_CFG0, 0);
1511
1512 gaudi->hw_cap_initialized &= ~(HW_CAP_NIC0 | HW_CAP_NIC1);
1513 }
1514
Oded Gabbay2f553422020-08-15 16:28:10 +03001515 rc = hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_ENABLE_PCI_ACCESS);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03001516 if (rc) {
1517 dev_err(hdev->dev, "Failed to enable PCI access from CPU\n");
1518 return rc;
1519 }
1520
Ofir Bitton1cbca892020-10-05 11:36:00 +03001521 rc = gaudi_fetch_psoc_frequency(hdev);
1522 if (rc) {
1523 dev_err(hdev->dev, "Failed to fetch psoc frequency\n");
1524 goto disable_pci_access;
1525 }
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03001526
1527 rc = gaudi_mmu_clear_pgt_range(hdev);
1528 if (rc) {
1529 dev_err(hdev->dev, "Failed to clear MMU page tables range\n");
1530 goto disable_pci_access;
1531 }
1532
1533 rc = gaudi_init_tpc_mem(hdev);
1534 if (rc) {
1535 dev_err(hdev->dev, "Failed to initialize TPC memories\n");
1536 goto disable_pci_access;
1537 }
1538
Ofir Bitton5de406c2020-09-10 10:56:26 +03001539 rc = gaudi_collective_init(hdev);
1540 if (rc) {
1541 dev_err(hdev->dev, "Failed to init collective\n");
1542 goto disable_pci_access;
1543 }
1544
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03001545 return 0;
1546
1547disable_pci_access:
Oded Gabbay2f553422020-08-15 16:28:10 +03001548 hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_DISABLE_PCI_ACCESS);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03001549
1550 return rc;
1551}
1552
1553static void gaudi_late_fini(struct hl_device *hdev)
1554{
1555 const struct hwmon_channel_info **channel_info_arr;
1556 int i = 0;
1557
1558 if (!hdev->hl_chip_info->info)
1559 return;
1560
1561 channel_info_arr = hdev->hl_chip_info->info;
1562
1563 while (channel_info_arr[i]) {
1564 kfree(channel_info_arr[i]->config);
1565 kfree(channel_info_arr[i]);
1566 i++;
1567 }
1568
1569 kfree(channel_info_arr);
1570
1571 hdev->hl_chip_info->info = NULL;
1572}
1573
1574static int gaudi_alloc_cpu_accessible_dma_mem(struct hl_device *hdev)
1575{
1576 dma_addr_t dma_addr_arr[GAUDI_ALLOC_CPU_MEM_RETRY_CNT] = {}, end_addr;
1577 void *virt_addr_arr[GAUDI_ALLOC_CPU_MEM_RETRY_CNT] = {};
1578 int i, j, rc = 0;
1579
1580 /*
1581 * The device CPU works with 40-bits addresses, while bit 39 must be set
1582 * to '1' when accessing the host.
1583 * Bits 49:39 of the full host address are saved for a later
1584 * configuration of the HW to perform extension to 50 bits.
1585 * Because there is a single HW register that holds the extension bits,
1586 * these bits must be identical in all allocated range.
1587 */
1588
1589 for (i = 0 ; i < GAUDI_ALLOC_CPU_MEM_RETRY_CNT ; i++) {
1590 virt_addr_arr[i] =
1591 hdev->asic_funcs->asic_dma_alloc_coherent(hdev,
1592 HL_CPU_ACCESSIBLE_MEM_SIZE,
1593 &dma_addr_arr[i],
1594 GFP_KERNEL | __GFP_ZERO);
1595 if (!virt_addr_arr[i]) {
1596 rc = -ENOMEM;
1597 goto free_dma_mem_arr;
1598 }
1599
1600 end_addr = dma_addr_arr[i] + HL_CPU_ACCESSIBLE_MEM_SIZE - 1;
1601 if (GAUDI_CPU_PCI_MSB_ADDR(dma_addr_arr[i]) ==
1602 GAUDI_CPU_PCI_MSB_ADDR(end_addr))
1603 break;
1604 }
1605
1606 if (i == GAUDI_ALLOC_CPU_MEM_RETRY_CNT) {
1607 dev_err(hdev->dev,
1608 "MSB of CPU accessible DMA memory are not identical in all range\n");
1609 rc = -EFAULT;
1610 goto free_dma_mem_arr;
1611 }
1612
1613 hdev->cpu_accessible_dma_mem = virt_addr_arr[i];
1614 hdev->cpu_accessible_dma_address = dma_addr_arr[i];
1615 hdev->cpu_pci_msb_addr =
1616 GAUDI_CPU_PCI_MSB_ADDR(hdev->cpu_accessible_dma_address);
1617
Ohad Sharabi4cb45082021-05-20 09:09:03 +03001618 if (!hdev->asic_prop.fw_security_enabled)
Ofir Bittonc692dec2020-10-04 17:34:37 +03001619 GAUDI_PCI_TO_CPU_ADDR(hdev->cpu_accessible_dma_address);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03001620
1621free_dma_mem_arr:
1622 for (j = 0 ; j < i ; j++)
1623 hdev->asic_funcs->asic_dma_free_coherent(hdev,
1624 HL_CPU_ACCESSIBLE_MEM_SIZE,
1625 virt_addr_arr[j],
1626 dma_addr_arr[j]);
1627
1628 return rc;
1629}
1630
1631static void gaudi_free_internal_qmans_pq_mem(struct hl_device *hdev)
1632{
1633 struct gaudi_device *gaudi = hdev->asic_specific;
1634 struct gaudi_internal_qman_info *q;
1635 u32 i;
1636
1637 for (i = 0 ; i < GAUDI_QUEUE_ID_SIZE ; i++) {
1638 q = &gaudi->internal_qmans[i];
1639 if (!q->pq_kernel_addr)
1640 continue;
1641 hdev->asic_funcs->asic_dma_free_coherent(hdev, q->pq_size,
1642 q->pq_kernel_addr,
1643 q->pq_dma_addr);
1644 }
1645}
1646
1647static int gaudi_alloc_internal_qmans_pq_mem(struct hl_device *hdev)
1648{
1649 struct gaudi_device *gaudi = hdev->asic_specific;
1650 struct gaudi_internal_qman_info *q;
1651 int rc, i;
1652
1653 for (i = 0 ; i < GAUDI_QUEUE_ID_SIZE ; i++) {
1654 if (gaudi_queue_type[i] != QUEUE_TYPE_INT)
1655 continue;
1656
1657 q = &gaudi->internal_qmans[i];
1658
1659 switch (i) {
Ofir Bitton0940cab2020-08-31 08:52:56 +03001660 case GAUDI_QUEUE_ID_DMA_2_0 ... GAUDI_QUEUE_ID_DMA_7_3:
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03001661 q->pq_size = HBM_DMA_QMAN_SIZE_IN_BYTES;
1662 break;
1663 case GAUDI_QUEUE_ID_MME_0_0 ... GAUDI_QUEUE_ID_MME_1_3:
1664 q->pq_size = MME_QMAN_SIZE_IN_BYTES;
1665 break;
1666 case GAUDI_QUEUE_ID_TPC_0_0 ... GAUDI_QUEUE_ID_TPC_7_3:
1667 q->pq_size = TPC_QMAN_SIZE_IN_BYTES;
1668 break;
Oded Gabbay3c681572020-11-02 21:10:39 +02001669 case GAUDI_QUEUE_ID_NIC_0_0 ... GAUDI_QUEUE_ID_NIC_9_3:
1670 q->pq_size = NIC_QMAN_SIZE_IN_BYTES;
1671 break;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03001672 default:
1673 dev_err(hdev->dev, "Bad internal queue index %d", i);
1674 rc = -EINVAL;
1675 goto free_internal_qmans_pq_mem;
1676 }
1677
1678 q->pq_kernel_addr = hdev->asic_funcs->asic_dma_alloc_coherent(
1679 hdev, q->pq_size,
1680 &q->pq_dma_addr,
1681 GFP_KERNEL | __GFP_ZERO);
1682 if (!q->pq_kernel_addr) {
1683 rc = -ENOMEM;
1684 goto free_internal_qmans_pq_mem;
1685 }
1686 }
1687
1688 return 0;
1689
1690free_internal_qmans_pq_mem:
1691 gaudi_free_internal_qmans_pq_mem(hdev);
1692 return rc;
1693}
1694
Ohad Sharabic592c272021-04-21 13:03:21 +03001695static void gaudi_set_pci_memory_regions(struct hl_device *hdev)
1696{
Ohad Sharabib31e59b2021-04-22 10:01:22 +03001697 struct asic_fixed_properties *prop = &hdev->asic_prop;
Ohad Sharabic592c272021-04-21 13:03:21 +03001698 struct pci_mem_region *region;
1699
1700 /* CFG */
1701 region = &hdev->pci_mem_region[PCI_REGION_CFG];
1702 region->region_base = CFG_BASE;
1703 region->region_size = CFG_SIZE;
1704 region->offset_in_bar = CFG_BASE - SPI_FLASH_BASE_ADDR;
Ohad Sharabib31e59b2021-04-22 10:01:22 +03001705 region->bar_size = CFG_BAR_SIZE;
Ohad Sharabic592c272021-04-21 13:03:21 +03001706 region->bar_id = CFG_BAR_ID;
1707 region->used = 1;
1708
1709 /* SRAM */
1710 region = &hdev->pci_mem_region[PCI_REGION_SRAM];
1711 region->region_base = SRAM_BASE_ADDR;
1712 region->region_size = SRAM_SIZE;
1713 region->offset_in_bar = 0;
Ohad Sharabib31e59b2021-04-22 10:01:22 +03001714 region->bar_size = SRAM_BAR_SIZE;
Ohad Sharabic592c272021-04-21 13:03:21 +03001715 region->bar_id = SRAM_BAR_ID;
1716 region->used = 1;
1717
1718 /* DRAM */
1719 region = &hdev->pci_mem_region[PCI_REGION_DRAM];
1720 region->region_base = DRAM_PHYS_BASE;
1721 region->region_size = hdev->asic_prop.dram_size;
1722 region->offset_in_bar = 0;
Ohad Sharabib31e59b2021-04-22 10:01:22 +03001723 region->bar_size = prop->dram_pci_bar_size;
Ohad Sharabic592c272021-04-21 13:03:21 +03001724 region->bar_id = HBM_BAR_ID;
1725 region->used = 1;
1726
1727 /* SP SRAM */
1728 region = &hdev->pci_mem_region[PCI_REGION_SP_SRAM];
1729 region->region_base = PSOC_SCRATCHPAD_ADDR;
1730 region->region_size = PSOC_SCRATCHPAD_SIZE;
1731 region->offset_in_bar = PSOC_SCRATCHPAD_ADDR - SPI_FLASH_BASE_ADDR;
Ohad Sharabib31e59b2021-04-22 10:01:22 +03001732 region->bar_size = CFG_BAR_SIZE;
Ohad Sharabic592c272021-04-21 13:03:21 +03001733 region->bar_id = CFG_BAR_ID;
1734 region->used = 1;
1735}
1736
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03001737static int gaudi_sw_init(struct hl_device *hdev)
1738{
1739 struct gaudi_device *gaudi;
Ofir Bittonebd8d122020-05-10 13:41:28 +03001740 u32 i, event_id = 0;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03001741 int rc;
1742
1743 /* Allocate device structure */
1744 gaudi = kzalloc(sizeof(*gaudi), GFP_KERNEL);
1745 if (!gaudi)
1746 return -ENOMEM;
1747
Ofir Bittonebd8d122020-05-10 13:41:28 +03001748 for (i = 0 ; i < ARRAY_SIZE(gaudi_irq_map_table) ; i++) {
1749 if (gaudi_irq_map_table[i].valid) {
1750 if (event_id == GAUDI_EVENT_SIZE) {
1751 dev_err(hdev->dev,
1752 "Event array exceeds the limit of %u events\n",
1753 GAUDI_EVENT_SIZE);
1754 rc = -EINVAL;
1755 goto free_gaudi_device;
1756 }
1757
1758 gaudi->events[event_id++] =
1759 gaudi_irq_map_table[i].fc_id;
1760 }
1761 }
1762
Oded Gabbay2f553422020-08-15 16:28:10 +03001763 gaudi->cpucp_info_get = gaudi_cpucp_info_get;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03001764
1765 gaudi->max_freq_value = GAUDI_MAX_CLK_FREQ;
1766
1767 hdev->asic_specific = gaudi;
1768
1769 /* Create DMA pool for small allocations */
1770 hdev->dma_pool = dma_pool_create(dev_name(hdev->dev),
1771 &hdev->pdev->dev, GAUDI_DMA_POOL_BLK_SIZE, 8, 0);
1772 if (!hdev->dma_pool) {
1773 dev_err(hdev->dev, "failed to create DMA pool\n");
1774 rc = -ENOMEM;
1775 goto free_gaudi_device;
1776 }
1777
1778 rc = gaudi_alloc_cpu_accessible_dma_mem(hdev);
1779 if (rc)
1780 goto free_dma_pool;
1781
1782 hdev->cpu_accessible_dma_pool = gen_pool_create(ilog2(32), -1);
1783 if (!hdev->cpu_accessible_dma_pool) {
1784 dev_err(hdev->dev,
1785 "Failed to create CPU accessible DMA pool\n");
1786 rc = -ENOMEM;
1787 goto free_cpu_dma_mem;
1788 }
1789
1790 rc = gen_pool_add(hdev->cpu_accessible_dma_pool,
1791 (uintptr_t) hdev->cpu_accessible_dma_mem,
1792 HL_CPU_ACCESSIBLE_MEM_SIZE, -1);
1793 if (rc) {
1794 dev_err(hdev->dev,
1795 "Failed to add memory to CPU accessible DMA pool\n");
1796 rc = -EFAULT;
1797 goto free_cpu_accessible_dma_pool;
1798 }
1799
1800 rc = gaudi_alloc_internal_qmans_pq_mem(hdev);
1801 if (rc)
1802 goto free_cpu_accessible_dma_pool;
1803
1804 spin_lock_init(&gaudi->hw_queues_lock);
1805 mutex_init(&gaudi->clk_gate_mutex);
1806
1807 hdev->supports_sync_stream = true;
1808 hdev->supports_coresight = true;
Ofir Bitton2795c882020-12-08 13:47:05 +02001809 hdev->supports_staged_submission = true;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03001810
Ohad Sharabic592c272021-04-21 13:03:21 +03001811 gaudi_set_pci_memory_regions(hdev);
1812
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03001813 return 0;
1814
1815free_cpu_accessible_dma_pool:
1816 gen_pool_destroy(hdev->cpu_accessible_dma_pool);
1817free_cpu_dma_mem:
Ohad Sharabi4cb45082021-05-20 09:09:03 +03001818 if (!hdev->asic_prop.fw_security_enabled)
Ofir Bittonc692dec2020-10-04 17:34:37 +03001819 GAUDI_CPU_TO_PCI_ADDR(hdev->cpu_accessible_dma_address,
1820 hdev->cpu_pci_msb_addr);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03001821 hdev->asic_funcs->asic_dma_free_coherent(hdev,
1822 HL_CPU_ACCESSIBLE_MEM_SIZE,
1823 hdev->cpu_accessible_dma_mem,
1824 hdev->cpu_accessible_dma_address);
1825free_dma_pool:
1826 dma_pool_destroy(hdev->dma_pool);
1827free_gaudi_device:
1828 kfree(gaudi);
1829 return rc;
1830}
1831
1832static int gaudi_sw_fini(struct hl_device *hdev)
1833{
1834 struct gaudi_device *gaudi = hdev->asic_specific;
1835
1836 gaudi_free_internal_qmans_pq_mem(hdev);
1837
1838 gen_pool_destroy(hdev->cpu_accessible_dma_pool);
1839
Ohad Sharabi4cb45082021-05-20 09:09:03 +03001840 if (!hdev->asic_prop.fw_security_enabled)
Ofir Bittonc692dec2020-10-04 17:34:37 +03001841 GAUDI_CPU_TO_PCI_ADDR(hdev->cpu_accessible_dma_address,
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03001842 hdev->cpu_pci_msb_addr);
Ofir Bittonc692dec2020-10-04 17:34:37 +03001843
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03001844 hdev->asic_funcs->asic_dma_free_coherent(hdev,
1845 HL_CPU_ACCESSIBLE_MEM_SIZE,
1846 hdev->cpu_accessible_dma_mem,
1847 hdev->cpu_accessible_dma_address);
1848
1849 dma_pool_destroy(hdev->dma_pool);
1850
1851 mutex_destroy(&gaudi->clk_gate_mutex);
1852
1853 kfree(gaudi);
1854
1855 return 0;
1856}
1857
1858static irqreturn_t gaudi_irq_handler_single(int irq, void *arg)
1859{
1860 struct hl_device *hdev = arg;
1861 int i;
1862
1863 if (hdev->disabled)
1864 return IRQ_HANDLED;
1865
1866 for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++)
1867 hl_irq_handler_cq(irq, &hdev->completion_queue[i]);
1868
1869 hl_irq_handler_eq(irq, &hdev->event_queue);
1870
1871 return IRQ_HANDLED;
1872}
1873
1874/*
1875 * For backward compatibility, new MSI interrupts should be set after the
1876 * existing CPU and NIC interrupts.
1877 */
1878static int gaudi_pci_irq_vector(struct hl_device *hdev, unsigned int nr,
1879 bool cpu_eq)
1880{
1881 int msi_vec;
1882
1883 if ((nr != GAUDI_EVENT_QUEUE_MSI_IDX) && (cpu_eq))
1884 dev_crit(hdev->dev, "CPU EQ must use IRQ %d\n",
1885 GAUDI_EVENT_QUEUE_MSI_IDX);
1886
1887 msi_vec = ((nr < GAUDI_EVENT_QUEUE_MSI_IDX) || (cpu_eq)) ? nr :
1888 (nr + NIC_NUMBER_OF_ENGINES + 1);
1889
1890 return pci_irq_vector(hdev->pdev, msi_vec);
1891}
1892
1893static int gaudi_enable_msi_single(struct hl_device *hdev)
1894{
1895 int rc, irq;
1896
Oded Gabbay3b82c342020-11-27 18:10:20 +02001897 dev_dbg(hdev->dev, "Working in single MSI IRQ mode\n");
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03001898
1899 irq = gaudi_pci_irq_vector(hdev, 0, false);
1900 rc = request_irq(irq, gaudi_irq_handler_single, 0,
1901 "gaudi single msi", hdev);
1902 if (rc)
1903 dev_err(hdev->dev,
1904 "Failed to request single MSI IRQ\n");
1905
1906 return rc;
1907}
1908
1909static int gaudi_enable_msi_multi(struct hl_device *hdev)
1910{
1911 int cq_cnt = hdev->asic_prop.completion_queues_count;
1912 int rc, i, irq_cnt_init, irq;
1913
1914 for (i = 0, irq_cnt_init = 0 ; i < cq_cnt ; i++, irq_cnt_init++) {
1915 irq = gaudi_pci_irq_vector(hdev, i, false);
1916 rc = request_irq(irq, hl_irq_handler_cq, 0, gaudi_irq_name[i],
1917 &hdev->completion_queue[i]);
1918 if (rc) {
1919 dev_err(hdev->dev, "Failed to request IRQ %d", irq);
1920 goto free_irqs;
1921 }
1922 }
1923
1924 irq = gaudi_pci_irq_vector(hdev, GAUDI_EVENT_QUEUE_MSI_IDX, true);
1925 rc = request_irq(irq, hl_irq_handler_eq, 0, gaudi_irq_name[cq_cnt],
1926 &hdev->event_queue);
1927 if (rc) {
1928 dev_err(hdev->dev, "Failed to request IRQ %d", irq);
1929 goto free_irqs;
1930 }
1931
1932 return 0;
1933
1934free_irqs:
1935 for (i = 0 ; i < irq_cnt_init ; i++)
1936 free_irq(gaudi_pci_irq_vector(hdev, i, false),
1937 &hdev->completion_queue[i]);
1938 return rc;
1939}
1940
1941static int gaudi_enable_msi(struct hl_device *hdev)
1942{
1943 struct gaudi_device *gaudi = hdev->asic_specific;
1944 int rc;
1945
1946 if (gaudi->hw_cap_initialized & HW_CAP_MSI)
1947 return 0;
1948
Oded Gabbay12e66a12021-03-08 18:06:57 +02001949 rc = pci_alloc_irq_vectors(hdev->pdev, 1, 1, PCI_IRQ_MSI);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03001950 if (rc < 0) {
1951 dev_err(hdev->dev, "MSI: Failed to enable support %d\n", rc);
1952 return rc;
1953 }
1954
1955 if (rc < NUMBER_OF_INTERRUPTS) {
1956 gaudi->multi_msi_mode = false;
1957 rc = gaudi_enable_msi_single(hdev);
1958 } else {
1959 gaudi->multi_msi_mode = true;
1960 rc = gaudi_enable_msi_multi(hdev);
1961 }
1962
1963 if (rc)
1964 goto free_pci_irq_vectors;
1965
1966 gaudi->hw_cap_initialized |= HW_CAP_MSI;
1967
1968 return 0;
1969
1970free_pci_irq_vectors:
1971 pci_free_irq_vectors(hdev->pdev);
1972 return rc;
1973}
1974
1975static void gaudi_sync_irqs(struct hl_device *hdev)
1976{
1977 struct gaudi_device *gaudi = hdev->asic_specific;
1978 int i, cq_cnt = hdev->asic_prop.completion_queues_count;
1979
1980 if (!(gaudi->hw_cap_initialized & HW_CAP_MSI))
1981 return;
1982
1983 /* Wait for all pending IRQs to be finished */
1984 if (gaudi->multi_msi_mode) {
1985 for (i = 0 ; i < cq_cnt ; i++)
1986 synchronize_irq(gaudi_pci_irq_vector(hdev, i, false));
1987
1988 synchronize_irq(gaudi_pci_irq_vector(hdev,
1989 GAUDI_EVENT_QUEUE_MSI_IDX,
1990 true));
1991 } else {
1992 synchronize_irq(gaudi_pci_irq_vector(hdev, 0, false));
1993 }
1994}
1995
1996static void gaudi_disable_msi(struct hl_device *hdev)
1997{
1998 struct gaudi_device *gaudi = hdev->asic_specific;
1999 int i, irq, cq_cnt = hdev->asic_prop.completion_queues_count;
2000
2001 if (!(gaudi->hw_cap_initialized & HW_CAP_MSI))
2002 return;
2003
2004 gaudi_sync_irqs(hdev);
2005
2006 if (gaudi->multi_msi_mode) {
2007 irq = gaudi_pci_irq_vector(hdev, GAUDI_EVENT_QUEUE_MSI_IDX,
2008 true);
2009 free_irq(irq, &hdev->event_queue);
2010
2011 for (i = 0 ; i < cq_cnt ; i++) {
2012 irq = gaudi_pci_irq_vector(hdev, i, false);
2013 free_irq(irq, &hdev->completion_queue[i]);
2014 }
2015 } else {
2016 free_irq(gaudi_pci_irq_vector(hdev, 0, false), hdev);
2017 }
2018
2019 pci_free_irq_vectors(hdev->pdev);
2020
2021 gaudi->hw_cap_initialized &= ~HW_CAP_MSI;
2022}
2023
2024static void gaudi_init_scrambler_sram(struct hl_device *hdev)
2025{
2026 struct gaudi_device *gaudi = hdev->asic_specific;
2027
Ohad Sharabi4cb45082021-05-20 09:09:03 +03002028 if (hdev->asic_prop.fw_security_enabled)
Ofir Bittonc692dec2020-10-04 17:34:37 +03002029 return;
2030
Ohad Sharabi6a785e32021-05-29 23:26:10 +03002031 if (hdev->asic_prop.fw_app_cpu_boot_dev_sts0 &
2032 CPU_BOOT_DEV_STS0_SRAM_SCR_EN)
Ofir Bittonc692dec2020-10-04 17:34:37 +03002033 return;
2034
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03002035 if (gaudi->hw_cap_initialized & HW_CAP_SRAM_SCRAMBLER)
2036 return;
2037
2038 if (!hdev->sram_scrambler_enable)
2039 return;
2040
2041 WREG32(mmNIF_RTR_CTRL_0_SCRAM_SRAM_EN,
2042 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2043 WREG32(mmNIF_RTR_CTRL_1_SCRAM_SRAM_EN,
2044 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2045 WREG32(mmNIF_RTR_CTRL_2_SCRAM_SRAM_EN,
2046 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2047 WREG32(mmNIF_RTR_CTRL_3_SCRAM_SRAM_EN,
2048 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2049 WREG32(mmNIF_RTR_CTRL_4_SCRAM_SRAM_EN,
2050 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2051 WREG32(mmNIF_RTR_CTRL_5_SCRAM_SRAM_EN,
2052 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2053 WREG32(mmNIF_RTR_CTRL_6_SCRAM_SRAM_EN,
2054 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2055 WREG32(mmNIF_RTR_CTRL_7_SCRAM_SRAM_EN,
2056 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2057
2058 WREG32(mmSIF_RTR_CTRL_0_SCRAM_SRAM_EN,
2059 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2060 WREG32(mmSIF_RTR_CTRL_1_SCRAM_SRAM_EN,
2061 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2062 WREG32(mmSIF_RTR_CTRL_2_SCRAM_SRAM_EN,
2063 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2064 WREG32(mmSIF_RTR_CTRL_3_SCRAM_SRAM_EN,
2065 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2066 WREG32(mmSIF_RTR_CTRL_4_SCRAM_SRAM_EN,
2067 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2068 WREG32(mmSIF_RTR_CTRL_5_SCRAM_SRAM_EN,
2069 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2070 WREG32(mmSIF_RTR_CTRL_6_SCRAM_SRAM_EN,
2071 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2072 WREG32(mmSIF_RTR_CTRL_7_SCRAM_SRAM_EN,
2073 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2074
2075 WREG32(mmDMA_IF_E_N_DOWN_CH0_SCRAM_SRAM_EN,
2076 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2077 WREG32(mmDMA_IF_E_N_DOWN_CH1_SCRAM_SRAM_EN,
2078 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2079 WREG32(mmDMA_IF_E_S_DOWN_CH0_SCRAM_SRAM_EN,
2080 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2081 WREG32(mmDMA_IF_E_S_DOWN_CH1_SCRAM_SRAM_EN,
2082 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2083 WREG32(mmDMA_IF_W_N_DOWN_CH0_SCRAM_SRAM_EN,
2084 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2085 WREG32(mmDMA_IF_W_N_DOWN_CH1_SCRAM_SRAM_EN,
2086 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2087 WREG32(mmDMA_IF_W_S_DOWN_CH0_SCRAM_SRAM_EN,
2088 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2089 WREG32(mmDMA_IF_W_S_DOWN_CH1_SCRAM_SRAM_EN,
2090 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2091
2092 gaudi->hw_cap_initialized |= HW_CAP_SRAM_SCRAMBLER;
2093}
2094
2095static void gaudi_init_scrambler_hbm(struct hl_device *hdev)
2096{
2097 struct gaudi_device *gaudi = hdev->asic_specific;
2098
Ohad Sharabi4cb45082021-05-20 09:09:03 +03002099 if (hdev->asic_prop.fw_security_enabled)
Ofir Bittonc692dec2020-10-04 17:34:37 +03002100 return;
2101
Ohad Sharabi6a785e32021-05-29 23:26:10 +03002102 if (hdev->asic_prop.fw_bootfit_cpu_boot_dev_sts0 &
2103 CPU_BOOT_DEV_STS0_DRAM_SCR_EN)
Ofir Bittonc692dec2020-10-04 17:34:37 +03002104 return;
2105
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03002106 if (gaudi->hw_cap_initialized & HW_CAP_HBM_SCRAMBLER)
2107 return;
2108
2109 if (!hdev->dram_scrambler_enable)
2110 return;
2111
2112 WREG32(mmNIF_RTR_CTRL_0_SCRAM_HBM_EN,
2113 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2114 WREG32(mmNIF_RTR_CTRL_1_SCRAM_HBM_EN,
2115 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2116 WREG32(mmNIF_RTR_CTRL_2_SCRAM_HBM_EN,
2117 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2118 WREG32(mmNIF_RTR_CTRL_3_SCRAM_HBM_EN,
2119 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2120 WREG32(mmNIF_RTR_CTRL_4_SCRAM_HBM_EN,
2121 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2122 WREG32(mmNIF_RTR_CTRL_5_SCRAM_HBM_EN,
2123 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2124 WREG32(mmNIF_RTR_CTRL_6_SCRAM_HBM_EN,
2125 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2126 WREG32(mmNIF_RTR_CTRL_7_SCRAM_HBM_EN,
2127 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2128
2129 WREG32(mmSIF_RTR_CTRL_0_SCRAM_HBM_EN,
2130 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2131 WREG32(mmSIF_RTR_CTRL_1_SCRAM_HBM_EN,
2132 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2133 WREG32(mmSIF_RTR_CTRL_2_SCRAM_HBM_EN,
2134 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2135 WREG32(mmSIF_RTR_CTRL_3_SCRAM_HBM_EN,
2136 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2137 WREG32(mmSIF_RTR_CTRL_4_SCRAM_HBM_EN,
2138 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2139 WREG32(mmSIF_RTR_CTRL_5_SCRAM_HBM_EN,
2140 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2141 WREG32(mmSIF_RTR_CTRL_6_SCRAM_HBM_EN,
2142 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2143 WREG32(mmSIF_RTR_CTRL_7_SCRAM_HBM_EN,
2144 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2145
2146 WREG32(mmDMA_IF_E_N_DOWN_CH0_SCRAM_HBM_EN,
2147 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2148 WREG32(mmDMA_IF_E_N_DOWN_CH1_SCRAM_HBM_EN,
2149 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2150 WREG32(mmDMA_IF_E_S_DOWN_CH0_SCRAM_HBM_EN,
2151 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2152 WREG32(mmDMA_IF_E_S_DOWN_CH1_SCRAM_HBM_EN,
2153 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2154 WREG32(mmDMA_IF_W_N_DOWN_CH0_SCRAM_HBM_EN,
2155 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2156 WREG32(mmDMA_IF_W_N_DOWN_CH1_SCRAM_HBM_EN,
2157 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2158 WREG32(mmDMA_IF_W_S_DOWN_CH0_SCRAM_HBM_EN,
2159 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2160 WREG32(mmDMA_IF_W_S_DOWN_CH1_SCRAM_HBM_EN,
2161 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2162
2163 gaudi->hw_cap_initialized |= HW_CAP_HBM_SCRAMBLER;
2164}
2165
2166static void gaudi_init_e2e(struct hl_device *hdev)
2167{
Ohad Sharabi4cb45082021-05-20 09:09:03 +03002168 if (hdev->asic_prop.fw_security_enabled)
Ofir Bittonc692dec2020-10-04 17:34:37 +03002169 return;
2170
Ohad Sharabi6a785e32021-05-29 23:26:10 +03002171 if (hdev->asic_prop.fw_bootfit_cpu_boot_dev_sts0 &
2172 CPU_BOOT_DEV_STS0_E2E_CRED_EN)
Ofir Bittonc692dec2020-10-04 17:34:37 +03002173 return;
2174
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03002175 WREG32(mmSIF_RTR_CTRL_0_E2E_HBM_WR_SIZE, 247 >> 3);
2176 WREG32(mmSIF_RTR_CTRL_0_E2E_HBM_RD_SIZE, 785 >> 3);
2177 WREG32(mmSIF_RTR_CTRL_0_E2E_PCI_WR_SIZE, 49);
2178 WREG32(mmSIF_RTR_CTRL_0_E2E_PCI_RD_SIZE, 101);
2179
2180 WREG32(mmSIF_RTR_CTRL_1_E2E_HBM_WR_SIZE, 275 >> 3);
2181 WREG32(mmSIF_RTR_CTRL_1_E2E_HBM_RD_SIZE, 614 >> 3);
2182 WREG32(mmSIF_RTR_CTRL_1_E2E_PCI_WR_SIZE, 1);
2183 WREG32(mmSIF_RTR_CTRL_1_E2E_PCI_RD_SIZE, 39);
2184
2185 WREG32(mmSIF_RTR_CTRL_2_E2E_HBM_WR_SIZE, 1);
2186 WREG32(mmSIF_RTR_CTRL_2_E2E_HBM_RD_SIZE, 1);
2187 WREG32(mmSIF_RTR_CTRL_2_E2E_PCI_WR_SIZE, 1);
2188 WREG32(mmSIF_RTR_CTRL_2_E2E_PCI_RD_SIZE, 32);
2189
2190 WREG32(mmSIF_RTR_CTRL_3_E2E_HBM_WR_SIZE, 176 >> 3);
2191 WREG32(mmSIF_RTR_CTRL_3_E2E_HBM_RD_SIZE, 32 >> 3);
2192 WREG32(mmSIF_RTR_CTRL_3_E2E_PCI_WR_SIZE, 19);
2193 WREG32(mmSIF_RTR_CTRL_3_E2E_PCI_RD_SIZE, 32);
2194
2195 WREG32(mmSIF_RTR_CTRL_4_E2E_HBM_WR_SIZE, 176 >> 3);
2196 WREG32(mmSIF_RTR_CTRL_4_E2E_HBM_RD_SIZE, 32 >> 3);
2197 WREG32(mmSIF_RTR_CTRL_4_E2E_PCI_WR_SIZE, 19);
2198 WREG32(mmSIF_RTR_CTRL_4_E2E_PCI_RD_SIZE, 32);
2199
2200 WREG32(mmSIF_RTR_CTRL_5_E2E_HBM_WR_SIZE, 1);
2201 WREG32(mmSIF_RTR_CTRL_5_E2E_HBM_RD_SIZE, 1);
2202 WREG32(mmSIF_RTR_CTRL_5_E2E_PCI_WR_SIZE, 1);
2203 WREG32(mmSIF_RTR_CTRL_5_E2E_PCI_RD_SIZE, 32);
2204
2205 WREG32(mmSIF_RTR_CTRL_6_E2E_HBM_WR_SIZE, 275 >> 3);
2206 WREG32(mmSIF_RTR_CTRL_6_E2E_HBM_RD_SIZE, 614 >> 3);
2207 WREG32(mmSIF_RTR_CTRL_6_E2E_PCI_WR_SIZE, 1);
2208 WREG32(mmSIF_RTR_CTRL_6_E2E_PCI_RD_SIZE, 39);
2209
2210 WREG32(mmSIF_RTR_CTRL_7_E2E_HBM_WR_SIZE, 297 >> 3);
2211 WREG32(mmSIF_RTR_CTRL_7_E2E_HBM_RD_SIZE, 908 >> 3);
2212 WREG32(mmSIF_RTR_CTRL_7_E2E_PCI_WR_SIZE, 19);
2213 WREG32(mmSIF_RTR_CTRL_7_E2E_PCI_RD_SIZE, 19);
2214
2215 WREG32(mmNIF_RTR_CTRL_0_E2E_HBM_WR_SIZE, 318 >> 3);
2216 WREG32(mmNIF_RTR_CTRL_0_E2E_HBM_RD_SIZE, 956 >> 3);
2217 WREG32(mmNIF_RTR_CTRL_0_E2E_PCI_WR_SIZE, 79);
2218 WREG32(mmNIF_RTR_CTRL_0_E2E_PCI_RD_SIZE, 163);
2219
2220 WREG32(mmNIF_RTR_CTRL_1_E2E_HBM_WR_SIZE, 275 >> 3);
2221 WREG32(mmNIF_RTR_CTRL_1_E2E_HBM_RD_SIZE, 614 >> 3);
2222 WREG32(mmNIF_RTR_CTRL_1_E2E_PCI_WR_SIZE, 1);
2223 WREG32(mmNIF_RTR_CTRL_1_E2E_PCI_RD_SIZE, 39);
2224
2225 WREG32(mmNIF_RTR_CTRL_2_E2E_HBM_WR_SIZE, 1);
2226 WREG32(mmNIF_RTR_CTRL_2_E2E_HBM_RD_SIZE, 1);
2227 WREG32(mmNIF_RTR_CTRL_2_E2E_PCI_WR_SIZE, 1);
2228 WREG32(mmNIF_RTR_CTRL_2_E2E_PCI_RD_SIZE, 32);
2229
2230 WREG32(mmNIF_RTR_CTRL_3_E2E_HBM_WR_SIZE, 176 >> 3);
2231 WREG32(mmNIF_RTR_CTRL_3_E2E_HBM_RD_SIZE, 32 >> 3);
2232 WREG32(mmNIF_RTR_CTRL_3_E2E_PCI_WR_SIZE, 19);
2233 WREG32(mmNIF_RTR_CTRL_3_E2E_PCI_RD_SIZE, 32);
2234
2235 WREG32(mmNIF_RTR_CTRL_4_E2E_HBM_WR_SIZE, 176 >> 3);
2236 WREG32(mmNIF_RTR_CTRL_4_E2E_HBM_RD_SIZE, 32 >> 3);
2237 WREG32(mmNIF_RTR_CTRL_4_E2E_PCI_WR_SIZE, 19);
2238 WREG32(mmNIF_RTR_CTRL_4_E2E_PCI_RD_SIZE, 32);
2239
2240 WREG32(mmNIF_RTR_CTRL_5_E2E_HBM_WR_SIZE, 1);
2241 WREG32(mmNIF_RTR_CTRL_5_E2E_HBM_RD_SIZE, 1);
2242 WREG32(mmNIF_RTR_CTRL_5_E2E_PCI_WR_SIZE, 1);
2243 WREG32(mmNIF_RTR_CTRL_5_E2E_PCI_RD_SIZE, 32);
2244
2245 WREG32(mmNIF_RTR_CTRL_6_E2E_HBM_WR_SIZE, 275 >> 3);
2246 WREG32(mmNIF_RTR_CTRL_6_E2E_HBM_RD_SIZE, 614 >> 3);
2247 WREG32(mmNIF_RTR_CTRL_6_E2E_PCI_WR_SIZE, 1);
2248 WREG32(mmNIF_RTR_CTRL_6_E2E_PCI_RD_SIZE, 39);
2249
2250 WREG32(mmNIF_RTR_CTRL_7_E2E_HBM_WR_SIZE, 318 >> 3);
2251 WREG32(mmNIF_RTR_CTRL_7_E2E_HBM_RD_SIZE, 956 >> 3);
2252 WREG32(mmNIF_RTR_CTRL_7_E2E_PCI_WR_SIZE, 79);
2253 WREG32(mmNIF_RTR_CTRL_7_E2E_PCI_RD_SIZE, 79);
2254
2255 WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
2256 WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
2257 WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
2258 WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
2259
2260 WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
2261 WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
2262 WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
2263 WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
2264
2265 WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
2266 WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
2267 WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
2268 WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
2269
2270 WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
2271 WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
2272 WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
2273 WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
2274
2275 WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
2276 WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
2277 WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
2278 WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
2279
2280 WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
2281 WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
2282 WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
2283 WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
2284
2285 WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
2286 WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
2287 WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
2288 WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
2289
2290 WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
2291 WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
2292 WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
2293 WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
2294
2295 if (!hdev->dram_scrambler_enable) {
2296 WREG32(mmSIF_RTR_CTRL_0_NL_HBM_SEL_0, 0x21);
2297 WREG32(mmSIF_RTR_CTRL_0_NL_HBM_SEL_1, 0x22);
2298 WREG32(mmSIF_RTR_CTRL_0_NL_HBM_OFFSET_18, 0x1F);
2299 WREG32(mmSIF_RTR_CTRL_0_NL_HBM_PC_SEL_3, 0x20);
2300
2301 WREG32(mmSIF_RTR_CTRL_1_NL_HBM_SEL_0, 0x21);
2302 WREG32(mmSIF_RTR_CTRL_1_NL_HBM_SEL_1, 0x22);
2303 WREG32(mmSIF_RTR_CTRL_1_NL_HBM_OFFSET_18, 0x1F);
2304 WREG32(mmSIF_RTR_CTRL_1_NL_HBM_PC_SEL_3, 0x20);
2305
2306 WREG32(mmSIF_RTR_CTRL_2_NL_HBM_SEL_0, 0x21);
2307 WREG32(mmSIF_RTR_CTRL_2_NL_HBM_SEL_1, 0x22);
2308 WREG32(mmSIF_RTR_CTRL_2_NL_HBM_OFFSET_18, 0x1F);
2309 WREG32(mmSIF_RTR_CTRL_2_NL_HBM_PC_SEL_3, 0x20);
2310
2311 WREG32(mmSIF_RTR_CTRL_3_NL_HBM_SEL_0, 0x21);
2312 WREG32(mmSIF_RTR_CTRL_3_NL_HBM_SEL_1, 0x22);
2313 WREG32(mmSIF_RTR_CTRL_3_NL_HBM_OFFSET_18, 0x1F);
2314 WREG32(mmSIF_RTR_CTRL_3_NL_HBM_PC_SEL_3, 0x20);
2315
2316 WREG32(mmSIF_RTR_CTRL_4_NL_HBM_SEL_0, 0x21);
2317 WREG32(mmSIF_RTR_CTRL_4_NL_HBM_SEL_1, 0x22);
2318 WREG32(mmSIF_RTR_CTRL_4_NL_HBM_OFFSET_18, 0x1F);
2319 WREG32(mmSIF_RTR_CTRL_4_NL_HBM_PC_SEL_3, 0x20);
2320
2321 WREG32(mmSIF_RTR_CTRL_5_NL_HBM_SEL_0, 0x21);
2322 WREG32(mmSIF_RTR_CTRL_5_NL_HBM_SEL_1, 0x22);
2323 WREG32(mmSIF_RTR_CTRL_5_NL_HBM_OFFSET_18, 0x1F);
2324 WREG32(mmSIF_RTR_CTRL_5_NL_HBM_PC_SEL_3, 0x20);
2325
2326 WREG32(mmSIF_RTR_CTRL_6_NL_HBM_SEL_0, 0x21);
2327 WREG32(mmSIF_RTR_CTRL_6_NL_HBM_SEL_1, 0x22);
2328 WREG32(mmSIF_RTR_CTRL_6_NL_HBM_OFFSET_18, 0x1F);
2329 WREG32(mmSIF_RTR_CTRL_6_NL_HBM_PC_SEL_3, 0x20);
2330
2331 WREG32(mmSIF_RTR_CTRL_7_NL_HBM_SEL_0, 0x21);
2332 WREG32(mmSIF_RTR_CTRL_7_NL_HBM_SEL_1, 0x22);
2333 WREG32(mmSIF_RTR_CTRL_7_NL_HBM_OFFSET_18, 0x1F);
2334 WREG32(mmSIF_RTR_CTRL_7_NL_HBM_PC_SEL_3, 0x20);
2335
2336 WREG32(mmNIF_RTR_CTRL_0_NL_HBM_SEL_0, 0x21);
2337 WREG32(mmNIF_RTR_CTRL_0_NL_HBM_SEL_1, 0x22);
2338 WREG32(mmNIF_RTR_CTRL_0_NL_HBM_OFFSET_18, 0x1F);
2339 WREG32(mmNIF_RTR_CTRL_0_NL_HBM_PC_SEL_3, 0x20);
2340
2341 WREG32(mmNIF_RTR_CTRL_1_NL_HBM_SEL_0, 0x21);
2342 WREG32(mmNIF_RTR_CTRL_1_NL_HBM_SEL_1, 0x22);
2343 WREG32(mmNIF_RTR_CTRL_1_NL_HBM_OFFSET_18, 0x1F);
2344 WREG32(mmNIF_RTR_CTRL_1_NL_HBM_PC_SEL_3, 0x20);
2345
2346 WREG32(mmNIF_RTR_CTRL_2_NL_HBM_SEL_0, 0x21);
2347 WREG32(mmNIF_RTR_CTRL_2_NL_HBM_SEL_1, 0x22);
2348 WREG32(mmNIF_RTR_CTRL_2_NL_HBM_OFFSET_18, 0x1F);
2349 WREG32(mmNIF_RTR_CTRL_2_NL_HBM_PC_SEL_3, 0x20);
2350
2351 WREG32(mmNIF_RTR_CTRL_3_NL_HBM_SEL_0, 0x21);
2352 WREG32(mmNIF_RTR_CTRL_3_NL_HBM_SEL_1, 0x22);
2353 WREG32(mmNIF_RTR_CTRL_3_NL_HBM_OFFSET_18, 0x1F);
2354 WREG32(mmNIF_RTR_CTRL_3_NL_HBM_PC_SEL_3, 0x20);
2355
2356 WREG32(mmNIF_RTR_CTRL_4_NL_HBM_SEL_0, 0x21);
2357 WREG32(mmNIF_RTR_CTRL_4_NL_HBM_SEL_1, 0x22);
2358 WREG32(mmNIF_RTR_CTRL_4_NL_HBM_OFFSET_18, 0x1F);
2359 WREG32(mmNIF_RTR_CTRL_4_NL_HBM_PC_SEL_3, 0x20);
2360
2361 WREG32(mmNIF_RTR_CTRL_5_NL_HBM_SEL_0, 0x21);
2362 WREG32(mmNIF_RTR_CTRL_5_NL_HBM_SEL_1, 0x22);
2363 WREG32(mmNIF_RTR_CTRL_5_NL_HBM_OFFSET_18, 0x1F);
2364 WREG32(mmNIF_RTR_CTRL_5_NL_HBM_PC_SEL_3, 0x20);
2365
2366 WREG32(mmNIF_RTR_CTRL_6_NL_HBM_SEL_0, 0x21);
2367 WREG32(mmNIF_RTR_CTRL_6_NL_HBM_SEL_1, 0x22);
2368 WREG32(mmNIF_RTR_CTRL_6_NL_HBM_OFFSET_18, 0x1F);
2369 WREG32(mmNIF_RTR_CTRL_6_NL_HBM_PC_SEL_3, 0x20);
2370
2371 WREG32(mmNIF_RTR_CTRL_7_NL_HBM_SEL_0, 0x21);
2372 WREG32(mmNIF_RTR_CTRL_7_NL_HBM_SEL_1, 0x22);
2373 WREG32(mmNIF_RTR_CTRL_7_NL_HBM_OFFSET_18, 0x1F);
2374 WREG32(mmNIF_RTR_CTRL_7_NL_HBM_PC_SEL_3, 0x20);
2375
2376 WREG32(mmDMA_IF_E_N_DOWN_CH0_NL_HBM_SEL_0, 0x21);
2377 WREG32(mmDMA_IF_E_N_DOWN_CH0_NL_HBM_SEL_1, 0x22);
2378 WREG32(mmDMA_IF_E_N_DOWN_CH0_NL_HBM_OFFSET_18, 0x1F);
2379 WREG32(mmDMA_IF_E_N_DOWN_CH0_NL_HBM_PC_SEL_3, 0x20);
2380
2381 WREG32(mmDMA_IF_E_N_DOWN_CH1_NL_HBM_SEL_0, 0x21);
2382 WREG32(mmDMA_IF_E_N_DOWN_CH1_NL_HBM_SEL_1, 0x22);
2383 WREG32(mmDMA_IF_E_N_DOWN_CH1_NL_HBM_OFFSET_18, 0x1F);
2384 WREG32(mmDMA_IF_E_N_DOWN_CH1_NL_HBM_PC_SEL_3, 0x20);
2385
2386 WREG32(mmDMA_IF_E_S_DOWN_CH0_NL_HBM_SEL_0, 0x21);
2387 WREG32(mmDMA_IF_E_S_DOWN_CH0_NL_HBM_SEL_1, 0x22);
2388 WREG32(mmDMA_IF_E_S_DOWN_CH0_NL_HBM_OFFSET_18, 0x1F);
2389 WREG32(mmDMA_IF_E_S_DOWN_CH0_NL_HBM_PC_SEL_3, 0x20);
2390
2391 WREG32(mmDMA_IF_E_S_DOWN_CH1_NL_HBM_SEL_0, 0x21);
2392 WREG32(mmDMA_IF_E_S_DOWN_CH1_NL_HBM_SEL_1, 0x22);
2393 WREG32(mmDMA_IF_E_S_DOWN_CH1_NL_HBM_OFFSET_18, 0x1F);
2394 WREG32(mmDMA_IF_E_S_DOWN_CH1_NL_HBM_PC_SEL_3, 0x20);
2395
2396 WREG32(mmDMA_IF_W_N_DOWN_CH0_NL_HBM_SEL_0, 0x21);
2397 WREG32(mmDMA_IF_W_N_DOWN_CH0_NL_HBM_SEL_1, 0x22);
2398 WREG32(mmDMA_IF_W_N_DOWN_CH0_NL_HBM_OFFSET_18, 0x1F);
2399 WREG32(mmDMA_IF_W_N_DOWN_CH0_NL_HBM_PC_SEL_3, 0x20);
2400
2401 WREG32(mmDMA_IF_W_N_DOWN_CH1_NL_HBM_SEL_0, 0x21);
2402 WREG32(mmDMA_IF_W_N_DOWN_CH1_NL_HBM_SEL_1, 0x22);
2403 WREG32(mmDMA_IF_W_N_DOWN_CH1_NL_HBM_OFFSET_18, 0x1F);
2404 WREG32(mmDMA_IF_W_N_DOWN_CH1_NL_HBM_PC_SEL_3, 0x20);
2405
2406 WREG32(mmDMA_IF_W_S_DOWN_CH0_NL_HBM_SEL_0, 0x21);
2407 WREG32(mmDMA_IF_W_S_DOWN_CH0_NL_HBM_SEL_1, 0x22);
2408 WREG32(mmDMA_IF_W_S_DOWN_CH0_NL_HBM_OFFSET_18, 0x1F);
2409 WREG32(mmDMA_IF_W_S_DOWN_CH0_NL_HBM_PC_SEL_3, 0x20);
2410
2411 WREG32(mmDMA_IF_W_S_DOWN_CH1_NL_HBM_SEL_0, 0x21);
2412 WREG32(mmDMA_IF_W_S_DOWN_CH1_NL_HBM_SEL_1, 0x22);
2413 WREG32(mmDMA_IF_W_S_DOWN_CH1_NL_HBM_OFFSET_18, 0x1F);
2414 WREG32(mmDMA_IF_W_S_DOWN_CH1_NL_HBM_PC_SEL_3, 0x20);
2415 }
2416
2417 WREG32(mmSIF_RTR_CTRL_0_E2E_HBM_EN,
2418 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2419 WREG32(mmSIF_RTR_CTRL_0_E2E_PCI_EN,
2420 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2421
2422 WREG32(mmSIF_RTR_CTRL_1_E2E_HBM_EN,
2423 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2424 WREG32(mmSIF_RTR_CTRL_1_E2E_PCI_EN,
2425 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2426
2427 WREG32(mmSIF_RTR_CTRL_2_E2E_HBM_EN,
2428 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2429 WREG32(mmSIF_RTR_CTRL_2_E2E_PCI_EN,
2430 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2431
2432 WREG32(mmSIF_RTR_CTRL_3_E2E_HBM_EN,
2433 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2434 WREG32(mmSIF_RTR_CTRL_3_E2E_PCI_EN,
2435 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2436
2437 WREG32(mmSIF_RTR_CTRL_4_E2E_HBM_EN,
2438 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2439 WREG32(mmSIF_RTR_CTRL_4_E2E_PCI_EN,
2440 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2441
2442 WREG32(mmSIF_RTR_CTRL_5_E2E_HBM_EN,
2443 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2444 WREG32(mmSIF_RTR_CTRL_5_E2E_PCI_EN,
2445 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2446
2447 WREG32(mmSIF_RTR_CTRL_6_E2E_HBM_EN,
2448 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2449 WREG32(mmSIF_RTR_CTRL_6_E2E_PCI_EN,
2450 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2451
2452 WREG32(mmSIF_RTR_CTRL_7_E2E_HBM_EN,
2453 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2454 WREG32(mmSIF_RTR_CTRL_7_E2E_PCI_EN,
2455 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2456
2457 WREG32(mmNIF_RTR_CTRL_0_E2E_HBM_EN,
2458 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2459 WREG32(mmNIF_RTR_CTRL_0_E2E_PCI_EN,
2460 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2461
2462 WREG32(mmNIF_RTR_CTRL_1_E2E_HBM_EN,
2463 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2464 WREG32(mmNIF_RTR_CTRL_1_E2E_PCI_EN,
2465 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2466
2467 WREG32(mmNIF_RTR_CTRL_2_E2E_HBM_EN,
2468 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2469 WREG32(mmNIF_RTR_CTRL_2_E2E_PCI_EN,
2470 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2471
2472 WREG32(mmNIF_RTR_CTRL_3_E2E_HBM_EN,
2473 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2474 WREG32(mmNIF_RTR_CTRL_3_E2E_PCI_EN,
2475 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2476
2477 WREG32(mmNIF_RTR_CTRL_4_E2E_HBM_EN,
2478 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2479 WREG32(mmNIF_RTR_CTRL_4_E2E_PCI_EN,
2480 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2481
2482 WREG32(mmNIF_RTR_CTRL_5_E2E_HBM_EN,
2483 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2484 WREG32(mmNIF_RTR_CTRL_5_E2E_PCI_EN,
2485 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2486
2487 WREG32(mmNIF_RTR_CTRL_6_E2E_HBM_EN,
2488 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2489 WREG32(mmNIF_RTR_CTRL_6_E2E_PCI_EN,
2490 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2491
2492 WREG32(mmNIF_RTR_CTRL_7_E2E_HBM_EN,
2493 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2494 WREG32(mmNIF_RTR_CTRL_7_E2E_PCI_EN,
2495 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2496
2497 WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_HBM_EN,
2498 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2499 WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_PCI_EN,
2500 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2501
2502 WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_HBM_EN,
2503 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2504 WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_PCI_EN,
2505 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2506
2507 WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_HBM_EN,
2508 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2509 WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_PCI_EN,
2510 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2511
2512 WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_HBM_EN,
2513 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2514 WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_PCI_EN,
2515 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2516
2517 WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_HBM_EN,
2518 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2519 WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_PCI_EN,
2520 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2521
2522 WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_HBM_EN,
2523 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2524 WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_PCI_EN,
2525 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2526
2527 WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_HBM_EN,
2528 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2529 WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_PCI_EN,
2530 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2531
2532 WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_HBM_EN,
2533 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2534 WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_PCI_EN,
2535 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2536}
2537
2538static void gaudi_init_hbm_cred(struct hl_device *hdev)
2539{
2540 uint32_t hbm0_wr, hbm1_wr, hbm0_rd, hbm1_rd;
2541
Ohad Sharabi4cb45082021-05-20 09:09:03 +03002542 if (hdev->asic_prop.fw_security_enabled)
Ofir Bittonc692dec2020-10-04 17:34:37 +03002543 return;
2544
Ohad Sharabi6a785e32021-05-29 23:26:10 +03002545 if (hdev->asic_prop.fw_bootfit_cpu_boot_dev_sts0 &
2546 CPU_BOOT_DEV_STS0_HBM_CRED_EN)
Ofir Bittonc692dec2020-10-04 17:34:37 +03002547 return;
2548
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03002549 hbm0_wr = 0x33333333;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03002550 hbm0_rd = 0x77777777;
Oded Gabbay0b168c82020-06-15 19:25:57 +03002551 hbm1_wr = 0x55555555;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03002552 hbm1_rd = 0xDDDDDDDD;
2553
2554 WREG32(mmDMA_IF_E_N_HBM0_WR_CRED_CNT, hbm0_wr);
2555 WREG32(mmDMA_IF_E_N_HBM1_WR_CRED_CNT, hbm1_wr);
2556 WREG32(mmDMA_IF_E_N_HBM0_RD_CRED_CNT, hbm0_rd);
2557 WREG32(mmDMA_IF_E_N_HBM1_RD_CRED_CNT, hbm1_rd);
2558
2559 WREG32(mmDMA_IF_E_S_HBM0_WR_CRED_CNT, hbm0_wr);
2560 WREG32(mmDMA_IF_E_S_HBM1_WR_CRED_CNT, hbm1_wr);
2561 WREG32(mmDMA_IF_E_S_HBM0_RD_CRED_CNT, hbm0_rd);
2562 WREG32(mmDMA_IF_E_S_HBM1_RD_CRED_CNT, hbm1_rd);
2563
2564 WREG32(mmDMA_IF_W_N_HBM0_WR_CRED_CNT, hbm0_wr);
2565 WREG32(mmDMA_IF_W_N_HBM1_WR_CRED_CNT, hbm1_wr);
2566 WREG32(mmDMA_IF_W_N_HBM0_RD_CRED_CNT, hbm0_rd);
2567 WREG32(mmDMA_IF_W_N_HBM1_RD_CRED_CNT, hbm1_rd);
2568
2569 WREG32(mmDMA_IF_W_S_HBM0_WR_CRED_CNT, hbm0_wr);
2570 WREG32(mmDMA_IF_W_S_HBM1_WR_CRED_CNT, hbm1_wr);
2571 WREG32(mmDMA_IF_W_S_HBM0_RD_CRED_CNT, hbm0_rd);
2572 WREG32(mmDMA_IF_W_S_HBM1_RD_CRED_CNT, hbm1_rd);
2573
2574 WREG32(mmDMA_IF_E_N_HBM_CRED_EN_0,
2575 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2576 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2577 WREG32(mmDMA_IF_E_S_HBM_CRED_EN_0,
2578 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2579 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2580 WREG32(mmDMA_IF_W_N_HBM_CRED_EN_0,
2581 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2582 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2583 WREG32(mmDMA_IF_W_S_HBM_CRED_EN_0,
2584 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2585 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2586
2587 WREG32(mmDMA_IF_E_N_HBM_CRED_EN_1,
2588 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2589 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2590 WREG32(mmDMA_IF_E_S_HBM_CRED_EN_1,
2591 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2592 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2593 WREG32(mmDMA_IF_W_N_HBM_CRED_EN_1,
2594 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2595 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2596 WREG32(mmDMA_IF_W_S_HBM_CRED_EN_1,
2597 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2598 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2599}
2600
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03002601static void gaudi_init_golden_registers(struct hl_device *hdev)
2602{
2603 u32 tpc_offset;
2604 int tpc_id, i;
2605
2606 gaudi_init_e2e(hdev);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03002607 gaudi_init_hbm_cred(hdev);
2608
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03002609 for (tpc_id = 0, tpc_offset = 0;
2610 tpc_id < TPC_NUMBER_OF_ENGINES;
2611 tpc_id++, tpc_offset += TPC_CFG_OFFSET) {
2612 /* Mask all arithmetic interrupts from TPC */
2613 WREG32(mmTPC0_CFG_TPC_INTR_MASK + tpc_offset, 0x8FFF);
2614 /* Set 16 cache lines */
2615 WREG32_FIELD(TPC0_CFG_MSS_CONFIG, tpc_offset,
2616 ICACHE_FETCH_LINE_NUM, 2);
2617 }
2618
2619 /* Make sure 1st 128 bytes in SRAM are 0 for Tensor DMA */
2620 for (i = 0 ; i < 128 ; i += 8)
2621 writeq(0, hdev->pcie_bar[SRAM_BAR_ID] + i);
2622
2623 WREG32(mmMME0_CTRL_EUS_ROLLUP_CNT_ADD, 3);
2624 WREG32(mmMME1_CTRL_EUS_ROLLUP_CNT_ADD, 3);
2625 WREG32(mmMME2_CTRL_EUS_ROLLUP_CNT_ADD, 3);
2626 WREG32(mmMME3_CTRL_EUS_ROLLUP_CNT_ADD, 3);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03002627}
2628
2629static void gaudi_init_pci_dma_qman(struct hl_device *hdev, int dma_id,
2630 int qman_id, dma_addr_t qman_pq_addr)
2631{
Koby Elbaze591a492021-05-12 18:05:46 +03002632 struct cpu_dyn_regs *dyn_regs =
2633 &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03002634 u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi;
2635 u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi;
2636 u32 q_off, dma_qm_offset;
Koby Elbaz81217362021-05-03 23:03:15 +03002637 u32 dma_qm_err_cfg, irq_handler_offset;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03002638
2639 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
2640
2641 mtr_base_en_lo = lower_32_bits(CFG_BASE +
2642 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2643 mtr_base_en_hi = upper_32_bits(CFG_BASE +
2644 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2645 so_base_en_lo = lower_32_bits(CFG_BASE +
2646 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2647 so_base_en_hi = upper_32_bits(CFG_BASE +
2648 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2649 mtr_base_ws_lo = lower_32_bits(CFG_BASE +
2650 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2651 mtr_base_ws_hi = upper_32_bits(CFG_BASE +
2652 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2653 so_base_ws_lo = lower_32_bits(CFG_BASE +
2654 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
2655 so_base_ws_hi = upper_32_bits(CFG_BASE +
2656 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
2657
2658 q_off = dma_qm_offset + qman_id * 4;
2659
2660 WREG32(mmDMA0_QM_PQ_BASE_LO_0 + q_off, lower_32_bits(qman_pq_addr));
2661 WREG32(mmDMA0_QM_PQ_BASE_HI_0 + q_off, upper_32_bits(qman_pq_addr));
2662
2663 WREG32(mmDMA0_QM_PQ_SIZE_0 + q_off, ilog2(HL_QUEUE_LENGTH));
2664 WREG32(mmDMA0_QM_PQ_PI_0 + q_off, 0);
2665 WREG32(mmDMA0_QM_PQ_CI_0 + q_off, 0);
2666
Ofir Bitton25121d92020-09-24 08:22:58 +03002667 WREG32(mmDMA0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off, QMAN_LDMA_SIZE_OFFSET);
2668 WREG32(mmDMA0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2669 QMAN_LDMA_SRC_OFFSET);
2670 WREG32(mmDMA0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2671 QMAN_LDMA_DST_OFFSET);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03002672
2673 WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo);
2674 WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi);
2675 WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo);
2676 WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi);
2677 WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_LO_0 + q_off, mtr_base_ws_lo);
2678 WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_HI_0 + q_off, mtr_base_ws_hi);
2679 WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_LO_0 + q_off, so_base_ws_lo);
2680 WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_HI_0 + q_off, so_base_ws_hi);
2681
Omer Shpigelmance043262020-06-16 17:56:27 +03002682 WREG32(mmDMA0_QM_CP_BARRIER_CFG_0 + q_off, 0x100);
2683
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03002684 /* The following configuration is needed only once per QMAN */
2685 if (qman_id == 0) {
Koby Elbaz81217362021-05-03 23:03:15 +03002686 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
2687 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
Koby Elbaze591a492021-05-12 18:05:46 +03002688 le32_to_cpu(dyn_regs->gic_dma_qm_irq_ctrl);
Koby Elbaz81217362021-05-03 23:03:15 +03002689
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03002690 /* Configure RAZWI IRQ */
2691 dma_qm_err_cfg = PCI_DMA_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
Tomer Tayarae151bc2021-05-24 22:35:06 +03002692 if (hdev->stop_on_err)
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03002693 dma_qm_err_cfg |=
2694 PCI_DMA_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03002695
2696 WREG32(mmDMA0_QM_GLBL_ERR_CFG + dma_qm_offset, dma_qm_err_cfg);
Koby Elbaz81217362021-05-03 23:03:15 +03002697
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03002698 WREG32(mmDMA0_QM_GLBL_ERR_ADDR_LO + dma_qm_offset,
Koby Elbaz81217362021-05-03 23:03:15 +03002699 lower_32_bits(CFG_BASE + irq_handler_offset));
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03002700 WREG32(mmDMA0_QM_GLBL_ERR_ADDR_HI + dma_qm_offset,
Koby Elbaz81217362021-05-03 23:03:15 +03002701 upper_32_bits(CFG_BASE + irq_handler_offset));
2702
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03002703 WREG32(mmDMA0_QM_GLBL_ERR_WDATA + dma_qm_offset,
2704 gaudi_irq_map_table[GAUDI_EVENT_DMA0_QM].cpu_id +
2705 dma_id);
2706
2707 WREG32(mmDMA0_QM_ARB_ERR_MSG_EN + dma_qm_offset,
2708 QM_ARB_ERR_MSG_EN_MASK);
2709
2710 /* Increase ARB WDT to support streams architecture */
2711 WREG32(mmDMA0_QM_ARB_SLV_CHOISE_WDT + dma_qm_offset,
2712 GAUDI_ARB_WDT_TIMEOUT);
2713
2714 WREG32(mmDMA0_QM_GLBL_PROT + dma_qm_offset,
2715 QMAN_EXTERNAL_MAKE_TRUSTED);
2716
2717 WREG32(mmDMA0_QM_GLBL_CFG1 + dma_qm_offset, 0);
2718 }
2719}
2720
2721static void gaudi_init_dma_core(struct hl_device *hdev, int dma_id)
2722{
Koby Elbaze591a492021-05-12 18:05:46 +03002723 struct cpu_dyn_regs *dyn_regs =
2724 &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03002725 u32 dma_err_cfg = 1 << DMA0_CORE_ERR_CFG_ERR_MSG_EN_SHIFT;
Koby Elbaz81217362021-05-03 23:03:15 +03002726 u32 dma_offset = dma_id * DMA_CORE_OFFSET;
2727 u32 irq_handler_offset;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03002728
2729 /* Set to maximum possible according to physical size */
2730 WREG32(mmDMA0_CORE_RD_MAX_OUTSTAND + dma_offset, 0);
2731 WREG32(mmDMA0_CORE_RD_MAX_SIZE + dma_offset, 0);
2732
Oded Gabbayd1f36332020-09-14 09:26:54 +03002733 /* WA for H/W bug H3-2116 */
2734 WREG32(mmDMA0_CORE_LBW_MAX_OUTSTAND + dma_offset, 15);
2735
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03002736 /* STOP_ON bit implies no completion to operation in case of RAZWI */
2737 if (hdev->stop_on_err)
2738 dma_err_cfg |= 1 << DMA0_CORE_ERR_CFG_STOP_ON_ERR_SHIFT;
2739
2740 WREG32(mmDMA0_CORE_ERR_CFG + dma_offset, dma_err_cfg);
Koby Elbaz81217362021-05-03 23:03:15 +03002741
2742 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
2743 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
Koby Elbaze591a492021-05-12 18:05:46 +03002744 le32_to_cpu(dyn_regs->gic_dma_core_irq_ctrl);
Koby Elbaz81217362021-05-03 23:03:15 +03002745
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03002746 WREG32(mmDMA0_CORE_ERRMSG_ADDR_LO + dma_offset,
Koby Elbaz81217362021-05-03 23:03:15 +03002747 lower_32_bits(CFG_BASE + irq_handler_offset));
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03002748 WREG32(mmDMA0_CORE_ERRMSG_ADDR_HI + dma_offset,
Koby Elbaz81217362021-05-03 23:03:15 +03002749 upper_32_bits(CFG_BASE + irq_handler_offset));
2750
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03002751 WREG32(mmDMA0_CORE_ERRMSG_WDATA + dma_offset,
2752 gaudi_irq_map_table[GAUDI_EVENT_DMA0_CORE].cpu_id + dma_id);
2753 WREG32(mmDMA0_CORE_PROT + dma_offset,
2754 1 << DMA0_CORE_PROT_ERR_VAL_SHIFT);
2755 /* If the channel is secured, it should be in MMU bypass mode */
2756 WREG32(mmDMA0_CORE_SECURE_PROPS + dma_offset,
2757 1 << DMA0_CORE_SECURE_PROPS_MMBP_SHIFT);
2758 WREG32(mmDMA0_CORE_CFG_0 + dma_offset, 1 << DMA0_CORE_CFG_0_EN_SHIFT);
2759}
2760
2761static void gaudi_enable_qman(struct hl_device *hdev, int dma_id,
2762 u32 enable_mask)
2763{
2764 u32 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
2765
2766 WREG32(mmDMA0_QM_GLBL_CFG0 + dma_qm_offset, enable_mask);
2767}
2768
2769static void gaudi_init_pci_dma_qmans(struct hl_device *hdev)
2770{
2771 struct gaudi_device *gaudi = hdev->asic_specific;
2772 struct hl_hw_queue *q;
2773 int i, j, dma_id, cpu_skip, nic_skip, cq_id = 0, q_idx, msi_vec = 0;
2774
2775 if (gaudi->hw_cap_initialized & HW_CAP_PCI_DMA)
2776 return;
2777
2778 for (i = 0 ; i < PCI_DMA_NUMBER_OF_CHNLS ; i++) {
2779 dma_id = gaudi_dma_assignment[i];
2780 /*
2781 * For queues after the CPU Q need to add 1 to get the correct
2782 * queue. In addition, need to add the CPU EQ and NIC IRQs in
2783 * order to get the correct MSI register.
2784 */
2785 if (dma_id > 1) {
2786 cpu_skip = 1;
2787 nic_skip = NIC_NUMBER_OF_ENGINES;
2788 } else {
2789 cpu_skip = 0;
2790 nic_skip = 0;
2791 }
2792
2793 for (j = 0 ; j < QMAN_STREAMS ; j++) {
2794 q_idx = 4 * dma_id + j + cpu_skip;
2795 q = &hdev->kernel_queues[q_idx];
2796 q->cq_id = cq_id++;
2797 q->msi_vec = nic_skip + cpu_skip + msi_vec++;
2798 gaudi_init_pci_dma_qman(hdev, dma_id, j,
2799 q->bus_address);
2800 }
2801
2802 gaudi_init_dma_core(hdev, dma_id);
2803
2804 gaudi_enable_qman(hdev, dma_id, PCI_DMA_QMAN_ENABLE);
2805 }
2806
2807 gaudi->hw_cap_initialized |= HW_CAP_PCI_DMA;
2808}
2809
2810static void gaudi_init_hbm_dma_qman(struct hl_device *hdev, int dma_id,
2811 int qman_id, u64 qman_base_addr)
2812{
Koby Elbaze591a492021-05-12 18:05:46 +03002813 struct cpu_dyn_regs *dyn_regs =
2814 &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
Ofir Bitton5de406c2020-09-10 10:56:26 +03002815 u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi;
2816 u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi;
Koby Elbaz81217362021-05-03 23:03:15 +03002817 u32 dma_qm_err_cfg, irq_handler_offset;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03002818 u32 q_off, dma_qm_offset;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03002819
2820 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
2821
Ofir Bitton5de406c2020-09-10 10:56:26 +03002822 mtr_base_en_lo = lower_32_bits(CFG_BASE +
2823 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2824 mtr_base_en_hi = upper_32_bits(CFG_BASE +
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03002825 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
Ofir Bitton5de406c2020-09-10 10:56:26 +03002826 so_base_en_lo = lower_32_bits(CFG_BASE +
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03002827 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
Ofir Bitton5de406c2020-09-10 10:56:26 +03002828 so_base_en_hi = upper_32_bits(CFG_BASE +
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03002829 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
Ofir Bitton5de406c2020-09-10 10:56:26 +03002830 mtr_base_ws_lo = lower_32_bits(CFG_BASE +
2831 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2832 mtr_base_ws_hi = upper_32_bits(CFG_BASE +
2833 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2834 so_base_ws_lo = lower_32_bits(CFG_BASE +
2835 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
2836 so_base_ws_hi = upper_32_bits(CFG_BASE +
2837 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03002838
2839 q_off = dma_qm_offset + qman_id * 4;
2840
2841 if (qman_id < 4) {
2842 WREG32(mmDMA0_QM_PQ_BASE_LO_0 + q_off,
2843 lower_32_bits(qman_base_addr));
2844 WREG32(mmDMA0_QM_PQ_BASE_HI_0 + q_off,
2845 upper_32_bits(qman_base_addr));
2846
2847 WREG32(mmDMA0_QM_PQ_SIZE_0 + q_off, ilog2(HBM_DMA_QMAN_LENGTH));
2848 WREG32(mmDMA0_QM_PQ_PI_0 + q_off, 0);
2849 WREG32(mmDMA0_QM_PQ_CI_0 + q_off, 0);
2850
Ofir Bitton25121d92020-09-24 08:22:58 +03002851 WREG32(mmDMA0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
2852 QMAN_CPDMA_SIZE_OFFSET);
2853 WREG32(mmDMA0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2854 QMAN_CPDMA_SRC_OFFSET);
2855 WREG32(mmDMA0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2856 QMAN_CPDMA_DST_OFFSET);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03002857 } else {
Koby Elbaz81217362021-05-03 23:03:15 +03002858 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
Koby Elbaze591a492021-05-12 18:05:46 +03002859 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
2860 le32_to_cpu(dyn_regs->gic_dma_qm_irq_ctrl);
Koby Elbaz81217362021-05-03 23:03:15 +03002861
Ofir Bitton25121d92020-09-24 08:22:58 +03002862 WREG32(mmDMA0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
2863 QMAN_LDMA_SIZE_OFFSET);
2864 WREG32(mmDMA0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2865 QMAN_LDMA_SRC_OFFSET);
2866 WREG32(mmDMA0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
Oded Gabbay5b94d6e2020-09-25 20:14:15 +03002867 QMAN_LDMA_DST_OFFSET);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03002868
2869 /* Configure RAZWI IRQ */
2870 dma_qm_err_cfg = HBM_DMA_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
Tomer Tayarae151bc2021-05-24 22:35:06 +03002871 if (hdev->stop_on_err)
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03002872 dma_qm_err_cfg |=
2873 HBM_DMA_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
Tomer Tayarae151bc2021-05-24 22:35:06 +03002874
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03002875 WREG32(mmDMA0_QM_GLBL_ERR_CFG + dma_qm_offset, dma_qm_err_cfg);
2876
2877 WREG32(mmDMA0_QM_GLBL_ERR_ADDR_LO + dma_qm_offset,
Koby Elbaz81217362021-05-03 23:03:15 +03002878 lower_32_bits(CFG_BASE + irq_handler_offset));
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03002879 WREG32(mmDMA0_QM_GLBL_ERR_ADDR_HI + dma_qm_offset,
Koby Elbaz81217362021-05-03 23:03:15 +03002880 upper_32_bits(CFG_BASE + irq_handler_offset));
2881
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03002882 WREG32(mmDMA0_QM_GLBL_ERR_WDATA + dma_qm_offset,
2883 gaudi_irq_map_table[GAUDI_EVENT_DMA0_QM].cpu_id +
2884 dma_id);
2885
2886 WREG32(mmDMA0_QM_ARB_ERR_MSG_EN + dma_qm_offset,
2887 QM_ARB_ERR_MSG_EN_MASK);
2888
2889 /* Increase ARB WDT to support streams architecture */
2890 WREG32(mmDMA0_QM_ARB_SLV_CHOISE_WDT + dma_qm_offset,
2891 GAUDI_ARB_WDT_TIMEOUT);
2892
2893 WREG32(mmDMA0_QM_GLBL_CFG1 + dma_qm_offset, 0);
2894 WREG32(mmDMA0_QM_GLBL_PROT + dma_qm_offset,
2895 QMAN_INTERNAL_MAKE_TRUSTED);
2896 }
2897
Ofir Bitton5de406c2020-09-10 10:56:26 +03002898 WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo);
2899 WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi);
2900 WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo);
2901 WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi);
2902
2903 /* Configure DMA5 CP_MSG_BASE 2/3 for sync stream collective */
2904 if (gaudi_dma_assignment[dma_id] == GAUDI_ENGINE_ID_DMA_5) {
2905 WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_LO_0 + q_off,
2906 mtr_base_ws_lo);
2907 WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_HI_0 + q_off,
2908 mtr_base_ws_hi);
2909 WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_LO_0 + q_off,
2910 so_base_ws_lo);
2911 WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_HI_0 + q_off,
2912 so_base_ws_hi);
2913 }
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03002914}
2915
2916static void gaudi_init_hbm_dma_qmans(struct hl_device *hdev)
2917{
2918 struct gaudi_device *gaudi = hdev->asic_specific;
2919 struct gaudi_internal_qman_info *q;
2920 u64 qman_base_addr;
2921 int i, j, dma_id, internal_q_index;
2922
2923 if (gaudi->hw_cap_initialized & HW_CAP_HBM_DMA)
2924 return;
2925
2926 for (i = 0 ; i < HBM_DMA_NUMBER_OF_CHNLS ; i++) {
2927 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_1 + i];
2928
2929 for (j = 0 ; j < QMAN_STREAMS ; j++) {
2930 /*
2931 * Add the CPU queue in order to get the correct queue
2932 * number as all internal queue are placed after it
2933 */
2934 internal_q_index = dma_id * QMAN_STREAMS + j + 1;
2935
2936 q = &gaudi->internal_qmans[internal_q_index];
2937 qman_base_addr = (u64) q->pq_dma_addr;
2938 gaudi_init_hbm_dma_qman(hdev, dma_id, j,
2939 qman_base_addr);
2940 }
2941
2942 /* Initializing lower CP for HBM DMA QMAN */
2943 gaudi_init_hbm_dma_qman(hdev, dma_id, 4, 0);
2944
2945 gaudi_init_dma_core(hdev, dma_id);
2946
2947 gaudi_enable_qman(hdev, dma_id, HBM_DMA_QMAN_ENABLE);
2948 }
2949
2950 gaudi->hw_cap_initialized |= HW_CAP_HBM_DMA;
2951}
2952
2953static void gaudi_init_mme_qman(struct hl_device *hdev, u32 mme_offset,
2954 int qman_id, u64 qman_base_addr)
2955{
Koby Elbaze591a492021-05-12 18:05:46 +03002956 struct cpu_dyn_regs *dyn_regs =
2957 &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03002958 u32 mtr_base_lo, mtr_base_hi;
2959 u32 so_base_lo, so_base_hi;
Koby Elbaz81217362021-05-03 23:03:15 +03002960 u32 irq_handler_offset;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03002961 u32 q_off, mme_id;
2962 u32 mme_qm_err_cfg;
2963
2964 mtr_base_lo = lower_32_bits(CFG_BASE +
2965 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2966 mtr_base_hi = upper_32_bits(CFG_BASE +
2967 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2968 so_base_lo = lower_32_bits(CFG_BASE +
2969 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2970 so_base_hi = upper_32_bits(CFG_BASE +
2971 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2972
2973 q_off = mme_offset + qman_id * 4;
2974
2975 if (qman_id < 4) {
2976 WREG32(mmMME0_QM_PQ_BASE_LO_0 + q_off,
2977 lower_32_bits(qman_base_addr));
2978 WREG32(mmMME0_QM_PQ_BASE_HI_0 + q_off,
2979 upper_32_bits(qman_base_addr));
2980
2981 WREG32(mmMME0_QM_PQ_SIZE_0 + q_off, ilog2(MME_QMAN_LENGTH));
2982 WREG32(mmMME0_QM_PQ_PI_0 + q_off, 0);
2983 WREG32(mmMME0_QM_PQ_CI_0 + q_off, 0);
2984
Ofir Bitton25121d92020-09-24 08:22:58 +03002985 WREG32(mmMME0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
2986 QMAN_CPDMA_SIZE_OFFSET);
2987 WREG32(mmMME0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2988 QMAN_CPDMA_SRC_OFFSET);
2989 WREG32(mmMME0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2990 QMAN_CPDMA_DST_OFFSET);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03002991 } else {
Koby Elbaz81217362021-05-03 23:03:15 +03002992 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
Koby Elbaze591a492021-05-12 18:05:46 +03002993 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
2994 le32_to_cpu(dyn_regs->gic_mme_qm_irq_ctrl);
Koby Elbaz81217362021-05-03 23:03:15 +03002995
Ofir Bitton25121d92020-09-24 08:22:58 +03002996 WREG32(mmMME0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
2997 QMAN_LDMA_SIZE_OFFSET);
2998 WREG32(mmMME0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2999 QMAN_LDMA_SRC_OFFSET);
3000 WREG32(mmMME0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
3001 QMAN_LDMA_DST_OFFSET);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003002
3003 /* Configure RAZWI IRQ */
3004 mme_id = mme_offset /
Koby Elbazb92c6372021-05-19 15:16:52 +03003005 (mmMME1_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0) / 2;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003006
3007 mme_qm_err_cfg = MME_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
Tomer Tayarae151bc2021-05-24 22:35:06 +03003008 if (hdev->stop_on_err)
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003009 mme_qm_err_cfg |=
3010 MME_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
Tomer Tayarae151bc2021-05-24 22:35:06 +03003011
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003012 WREG32(mmMME0_QM_GLBL_ERR_CFG + mme_offset, mme_qm_err_cfg);
Koby Elbaz81217362021-05-03 23:03:15 +03003013
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003014 WREG32(mmMME0_QM_GLBL_ERR_ADDR_LO + mme_offset,
Koby Elbaz81217362021-05-03 23:03:15 +03003015 lower_32_bits(CFG_BASE + irq_handler_offset));
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003016 WREG32(mmMME0_QM_GLBL_ERR_ADDR_HI + mme_offset,
Koby Elbaz81217362021-05-03 23:03:15 +03003017 upper_32_bits(CFG_BASE + irq_handler_offset));
3018
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003019 WREG32(mmMME0_QM_GLBL_ERR_WDATA + mme_offset,
3020 gaudi_irq_map_table[GAUDI_EVENT_MME0_QM].cpu_id +
3021 mme_id);
3022
3023 WREG32(mmMME0_QM_ARB_ERR_MSG_EN + mme_offset,
3024 QM_ARB_ERR_MSG_EN_MASK);
3025
3026 /* Increase ARB WDT to support streams architecture */
3027 WREG32(mmMME0_QM_ARB_SLV_CHOISE_WDT + mme_offset,
3028 GAUDI_ARB_WDT_TIMEOUT);
3029
3030 WREG32(mmMME0_QM_GLBL_CFG1 + mme_offset, 0);
3031 WREG32(mmMME0_QM_GLBL_PROT + mme_offset,
3032 QMAN_INTERNAL_MAKE_TRUSTED);
3033 }
3034
3035 WREG32(mmMME0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_lo);
3036 WREG32(mmMME0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_hi);
3037 WREG32(mmMME0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_lo);
3038 WREG32(mmMME0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_hi);
3039}
3040
3041static void gaudi_init_mme_qmans(struct hl_device *hdev)
3042{
3043 struct gaudi_device *gaudi = hdev->asic_specific;
3044 struct gaudi_internal_qman_info *q;
3045 u64 qman_base_addr;
3046 u32 mme_offset;
3047 int i, internal_q_index;
3048
3049 if (gaudi->hw_cap_initialized & HW_CAP_MME)
3050 return;
3051
3052 /*
3053 * map GAUDI_QUEUE_ID_MME_0_X to the N_W_MME (mmMME2_QM_BASE)
3054 * and GAUDI_QUEUE_ID_MME_1_X to the S_W_MME (mmMME0_QM_BASE)
3055 */
3056
3057 mme_offset = mmMME2_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0;
3058
3059 for (i = 0 ; i < MME_NUMBER_OF_QMANS ; i++) {
3060 internal_q_index = GAUDI_QUEUE_ID_MME_0_0 + i;
3061 q = &gaudi->internal_qmans[internal_q_index];
3062 qman_base_addr = (u64) q->pq_dma_addr;
3063 gaudi_init_mme_qman(hdev, mme_offset, (i & 0x3),
3064 qman_base_addr);
3065 if (i == 3)
3066 mme_offset = 0;
3067 }
3068
3069 /* Initializing lower CP for MME QMANs */
3070 mme_offset = mmMME2_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0;
3071 gaudi_init_mme_qman(hdev, mme_offset, 4, 0);
3072 gaudi_init_mme_qman(hdev, 0, 4, 0);
3073
3074 WREG32(mmMME2_QM_GLBL_CFG0, QMAN_MME_ENABLE);
3075 WREG32(mmMME0_QM_GLBL_CFG0, QMAN_MME_ENABLE);
3076
3077 gaudi->hw_cap_initialized |= HW_CAP_MME;
3078}
3079
3080static void gaudi_init_tpc_qman(struct hl_device *hdev, u32 tpc_offset,
3081 int qman_id, u64 qman_base_addr)
3082{
Koby Elbaze591a492021-05-12 18:05:46 +03003083 struct cpu_dyn_regs *dyn_regs =
3084 &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
Ofir Bitton5de406c2020-09-10 10:56:26 +03003085 u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi;
3086 u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi;
Koby Elbaz81217362021-05-03 23:03:15 +03003087 u32 tpc_qm_err_cfg, irq_handler_offset;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003088 u32 q_off, tpc_id;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003089
Ofir Bitton5de406c2020-09-10 10:56:26 +03003090 mtr_base_en_lo = lower_32_bits(CFG_BASE +
3091 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3092 mtr_base_en_hi = upper_32_bits(CFG_BASE +
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003093 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
Ofir Bitton5de406c2020-09-10 10:56:26 +03003094 so_base_en_lo = lower_32_bits(CFG_BASE +
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003095 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
Ofir Bitton5de406c2020-09-10 10:56:26 +03003096 so_base_en_hi = upper_32_bits(CFG_BASE +
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003097 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
Ofir Bitton5de406c2020-09-10 10:56:26 +03003098 mtr_base_ws_lo = lower_32_bits(CFG_BASE +
3099 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3100 mtr_base_ws_hi = upper_32_bits(CFG_BASE +
3101 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3102 so_base_ws_lo = lower_32_bits(CFG_BASE +
3103 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
3104 so_base_ws_hi = upper_32_bits(CFG_BASE +
3105 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003106
3107 q_off = tpc_offset + qman_id * 4;
3108
Ofir Bitton5de406c2020-09-10 10:56:26 +03003109 tpc_id = tpc_offset /
3110 (mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0);
3111
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003112 if (qman_id < 4) {
3113 WREG32(mmTPC0_QM_PQ_BASE_LO_0 + q_off,
3114 lower_32_bits(qman_base_addr));
3115 WREG32(mmTPC0_QM_PQ_BASE_HI_0 + q_off,
3116 upper_32_bits(qman_base_addr));
3117
3118 WREG32(mmTPC0_QM_PQ_SIZE_0 + q_off, ilog2(TPC_QMAN_LENGTH));
3119 WREG32(mmTPC0_QM_PQ_PI_0 + q_off, 0);
3120 WREG32(mmTPC0_QM_PQ_CI_0 + q_off, 0);
3121
Ofir Bitton25121d92020-09-24 08:22:58 +03003122 WREG32(mmTPC0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
3123 QMAN_CPDMA_SIZE_OFFSET);
3124 WREG32(mmTPC0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
3125 QMAN_CPDMA_SRC_OFFSET);
3126 WREG32(mmTPC0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
3127 QMAN_CPDMA_DST_OFFSET);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003128 } else {
Koby Elbaz81217362021-05-03 23:03:15 +03003129 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
Koby Elbaze591a492021-05-12 18:05:46 +03003130 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
3131 le32_to_cpu(dyn_regs->gic_tpc_qm_irq_ctrl);
Koby Elbaz81217362021-05-03 23:03:15 +03003132
Ofir Bitton25121d92020-09-24 08:22:58 +03003133 WREG32(mmTPC0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
3134 QMAN_LDMA_SIZE_OFFSET);
3135 WREG32(mmTPC0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
3136 QMAN_LDMA_SRC_OFFSET);
3137 WREG32(mmTPC0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
3138 QMAN_LDMA_DST_OFFSET);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003139
3140 /* Configure RAZWI IRQ */
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003141 tpc_qm_err_cfg = TPC_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
Tomer Tayarae151bc2021-05-24 22:35:06 +03003142 if (hdev->stop_on_err)
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003143 tpc_qm_err_cfg |=
3144 TPC_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003145
3146 WREG32(mmTPC0_QM_GLBL_ERR_CFG + tpc_offset, tpc_qm_err_cfg);
Koby Elbaz81217362021-05-03 23:03:15 +03003147
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003148 WREG32(mmTPC0_QM_GLBL_ERR_ADDR_LO + tpc_offset,
Koby Elbaz81217362021-05-03 23:03:15 +03003149 lower_32_bits(CFG_BASE + irq_handler_offset));
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003150 WREG32(mmTPC0_QM_GLBL_ERR_ADDR_HI + tpc_offset,
Koby Elbaz81217362021-05-03 23:03:15 +03003151 upper_32_bits(CFG_BASE + irq_handler_offset));
3152
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003153 WREG32(mmTPC0_QM_GLBL_ERR_WDATA + tpc_offset,
3154 gaudi_irq_map_table[GAUDI_EVENT_TPC0_QM].cpu_id +
3155 tpc_id);
3156
3157 WREG32(mmTPC0_QM_ARB_ERR_MSG_EN + tpc_offset,
3158 QM_ARB_ERR_MSG_EN_MASK);
3159
3160 /* Increase ARB WDT to support streams architecture */
3161 WREG32(mmTPC0_QM_ARB_SLV_CHOISE_WDT + tpc_offset,
3162 GAUDI_ARB_WDT_TIMEOUT);
3163
3164 WREG32(mmTPC0_QM_GLBL_CFG1 + tpc_offset, 0);
3165 WREG32(mmTPC0_QM_GLBL_PROT + tpc_offset,
3166 QMAN_INTERNAL_MAKE_TRUSTED);
3167 }
3168
Ofir Bitton5de406c2020-09-10 10:56:26 +03003169 WREG32(mmTPC0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo);
3170 WREG32(mmTPC0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi);
3171 WREG32(mmTPC0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo);
3172 WREG32(mmTPC0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi);
3173
3174 /* Configure TPC7 CP_MSG_BASE 2/3 for sync stream collective */
3175 if (tpc_id == 6) {
3176 WREG32(mmTPC0_QM_CP_MSG_BASE2_ADDR_LO_0 + q_off,
3177 mtr_base_ws_lo);
3178 WREG32(mmTPC0_QM_CP_MSG_BASE2_ADDR_HI_0 + q_off,
3179 mtr_base_ws_hi);
3180 WREG32(mmTPC0_QM_CP_MSG_BASE3_ADDR_LO_0 + q_off,
3181 so_base_ws_lo);
3182 WREG32(mmTPC0_QM_CP_MSG_BASE3_ADDR_HI_0 + q_off,
3183 so_base_ws_hi);
3184 }
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003185}
3186
3187static void gaudi_init_tpc_qmans(struct hl_device *hdev)
3188{
3189 struct gaudi_device *gaudi = hdev->asic_specific;
3190 struct gaudi_internal_qman_info *q;
3191 u64 qman_base_addr;
3192 u32 so_base_hi, tpc_offset = 0;
3193 u32 tpc_delta = mmTPC1_CFG_SM_BASE_ADDRESS_HIGH -
3194 mmTPC0_CFG_SM_BASE_ADDRESS_HIGH;
3195 int i, tpc_id, internal_q_index;
3196
3197 if (gaudi->hw_cap_initialized & HW_CAP_TPC_MASK)
3198 return;
3199
3200 so_base_hi = upper_32_bits(CFG_BASE +
3201 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
3202
3203 for (tpc_id = 0 ; tpc_id < TPC_NUMBER_OF_ENGINES ; tpc_id++) {
3204 for (i = 0 ; i < QMAN_STREAMS ; i++) {
3205 internal_q_index = GAUDI_QUEUE_ID_TPC_0_0 +
3206 tpc_id * QMAN_STREAMS + i;
3207 q = &gaudi->internal_qmans[internal_q_index];
3208 qman_base_addr = (u64) q->pq_dma_addr;
3209 gaudi_init_tpc_qman(hdev, tpc_offset, i,
3210 qman_base_addr);
3211
3212 if (i == 3) {
3213 /* Initializing lower CP for TPC QMAN */
3214 gaudi_init_tpc_qman(hdev, tpc_offset, 4, 0);
3215
3216 /* Enable the QMAN and TPC channel */
3217 WREG32(mmTPC0_QM_GLBL_CFG0 + tpc_offset,
3218 QMAN_TPC_ENABLE);
3219 }
3220 }
3221
3222 WREG32(mmTPC0_CFG_SM_BASE_ADDRESS_HIGH + tpc_id * tpc_delta,
3223 so_base_hi);
3224
3225 tpc_offset += mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0;
3226
Oded Gabbay65887292020-08-12 11:21:01 +03003227 gaudi->hw_cap_initialized |=
3228 FIELD_PREP(HW_CAP_TPC_MASK, 1 << tpc_id);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003229 }
3230}
3231
Oded Gabbay3c681572020-11-02 21:10:39 +02003232static void gaudi_init_nic_qman(struct hl_device *hdev, u32 nic_offset,
3233 int qman_id, u64 qman_base_addr, int nic_id)
3234{
Koby Elbaze591a492021-05-12 18:05:46 +03003235 struct cpu_dyn_regs *dyn_regs =
3236 &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
Ofir Bitton5de406c2020-09-10 10:56:26 +03003237 u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi;
3238 u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi;
Koby Elbaz81217362021-05-03 23:03:15 +03003239 u32 nic_qm_err_cfg, irq_handler_offset;
Oded Gabbay3c681572020-11-02 21:10:39 +02003240 u32 q_off;
Oded Gabbay3c681572020-11-02 21:10:39 +02003241
Ofir Bitton5de406c2020-09-10 10:56:26 +03003242 mtr_base_en_lo = lower_32_bits(CFG_BASE +
3243 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3244 mtr_base_en_hi = upper_32_bits(CFG_BASE +
Oded Gabbay3c681572020-11-02 21:10:39 +02003245 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
Ofir Bitton5de406c2020-09-10 10:56:26 +03003246 so_base_en_lo = lower_32_bits(CFG_BASE +
Oded Gabbay3c681572020-11-02 21:10:39 +02003247 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
Ofir Bitton5de406c2020-09-10 10:56:26 +03003248 so_base_en_hi = upper_32_bits(CFG_BASE +
Oded Gabbay3c681572020-11-02 21:10:39 +02003249 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
Ofir Bitton5de406c2020-09-10 10:56:26 +03003250 mtr_base_ws_lo = lower_32_bits(CFG_BASE +
3251 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3252 mtr_base_ws_hi = upper_32_bits(CFG_BASE +
3253 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3254 so_base_ws_lo = lower_32_bits(CFG_BASE +
3255 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
3256 so_base_ws_hi = upper_32_bits(CFG_BASE +
3257 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
Oded Gabbay3c681572020-11-02 21:10:39 +02003258
3259 q_off = nic_offset + qman_id * 4;
3260
3261 WREG32(mmNIC0_QM0_PQ_BASE_LO_0 + q_off, lower_32_bits(qman_base_addr));
3262 WREG32(mmNIC0_QM0_PQ_BASE_HI_0 + q_off, upper_32_bits(qman_base_addr));
3263
3264 WREG32(mmNIC0_QM0_PQ_SIZE_0 + q_off, ilog2(NIC_QMAN_LENGTH));
3265 WREG32(mmNIC0_QM0_PQ_PI_0 + q_off, 0);
3266 WREG32(mmNIC0_QM0_PQ_CI_0 + q_off, 0);
3267
Ofir Bitton5de406c2020-09-10 10:56:26 +03003268 WREG32(mmNIC0_QM0_CP_LDMA_TSIZE_OFFSET_0 + q_off,
3269 QMAN_LDMA_SIZE_OFFSET);
3270 WREG32(mmNIC0_QM0_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
3271 QMAN_LDMA_SRC_OFFSET);
3272 WREG32(mmNIC0_QM0_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
3273 QMAN_LDMA_DST_OFFSET);
Oded Gabbay3c681572020-11-02 21:10:39 +02003274
Ofir Bitton5de406c2020-09-10 10:56:26 +03003275 WREG32(mmNIC0_QM0_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo);
3276 WREG32(mmNIC0_QM0_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi);
3277 WREG32(mmNIC0_QM0_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo);
3278 WREG32(mmNIC0_QM0_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi);
3279
3280 /* Configure NIC CP_MSG_BASE 2/3 for sync stream collective */
3281 WREG32(mmNIC0_QM0_CP_MSG_BASE2_ADDR_LO_0 + q_off, mtr_base_ws_lo);
3282 WREG32(mmNIC0_QM0_CP_MSG_BASE2_ADDR_HI_0 + q_off, mtr_base_ws_hi);
3283 WREG32(mmNIC0_QM0_CP_MSG_BASE3_ADDR_LO_0 + q_off, so_base_ws_lo);
3284 WREG32(mmNIC0_QM0_CP_MSG_BASE3_ADDR_HI_0 + q_off, so_base_ws_hi);
Oded Gabbay3c681572020-11-02 21:10:39 +02003285
3286 if (qman_id == 0) {
Koby Elbaz81217362021-05-03 23:03:15 +03003287 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
Koby Elbaze591a492021-05-12 18:05:46 +03003288 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
3289 le32_to_cpu(dyn_regs->gic_nic_qm_irq_ctrl);
Koby Elbaz81217362021-05-03 23:03:15 +03003290
Oded Gabbay3c681572020-11-02 21:10:39 +02003291 /* Configure RAZWI IRQ */
3292 nic_qm_err_cfg = NIC_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
Tomer Tayarae151bc2021-05-24 22:35:06 +03003293 if (hdev->stop_on_err)
Oded Gabbay3c681572020-11-02 21:10:39 +02003294 nic_qm_err_cfg |=
3295 NIC_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
Oded Gabbay3c681572020-11-02 21:10:39 +02003296
3297 WREG32(mmNIC0_QM0_GLBL_ERR_CFG + nic_offset, nic_qm_err_cfg);
Koby Elbaz81217362021-05-03 23:03:15 +03003298
Oded Gabbay3c681572020-11-02 21:10:39 +02003299 WREG32(mmNIC0_QM0_GLBL_ERR_ADDR_LO + nic_offset,
Koby Elbaz81217362021-05-03 23:03:15 +03003300 lower_32_bits(CFG_BASE + irq_handler_offset));
Oded Gabbay3c681572020-11-02 21:10:39 +02003301 WREG32(mmNIC0_QM0_GLBL_ERR_ADDR_HI + nic_offset,
Koby Elbaz81217362021-05-03 23:03:15 +03003302 upper_32_bits(CFG_BASE + irq_handler_offset));
3303
Oded Gabbay3c681572020-11-02 21:10:39 +02003304 WREG32(mmNIC0_QM0_GLBL_ERR_WDATA + nic_offset,
3305 gaudi_irq_map_table[GAUDI_EVENT_NIC0_QM0].cpu_id +
3306 nic_id);
3307
3308 WREG32(mmNIC0_QM0_ARB_ERR_MSG_EN + nic_offset,
3309 QM_ARB_ERR_MSG_EN_MASK);
3310
3311 /* Increase ARB WDT to support streams architecture */
3312 WREG32(mmNIC0_QM0_ARB_SLV_CHOISE_WDT + nic_offset,
3313 GAUDI_ARB_WDT_TIMEOUT);
3314
3315 WREG32(mmNIC0_QM0_GLBL_CFG1 + nic_offset, 0);
3316 WREG32(mmNIC0_QM0_GLBL_PROT + nic_offset,
3317 QMAN_INTERNAL_MAKE_TRUSTED);
3318 }
3319}
3320
3321static void gaudi_init_nic_qmans(struct hl_device *hdev)
3322{
3323 struct gaudi_device *gaudi = hdev->asic_specific;
3324 struct gaudi_internal_qman_info *q;
3325 u64 qman_base_addr;
3326 u32 nic_offset = 0;
3327 u32 nic_delta_between_qmans =
3328 mmNIC0_QM1_GLBL_CFG0 - mmNIC0_QM0_GLBL_CFG0;
3329 u32 nic_delta_between_nics =
3330 mmNIC1_QM0_GLBL_CFG0 - mmNIC0_QM0_GLBL_CFG0;
3331 int i, nic_id, internal_q_index;
3332
3333 if (!hdev->nic_ports_mask)
3334 return;
3335
3336 if (gaudi->hw_cap_initialized & HW_CAP_NIC_MASK)
3337 return;
3338
3339 dev_dbg(hdev->dev, "Initializing NIC QMANs\n");
3340
3341 for (nic_id = 0 ; nic_id < NIC_NUMBER_OF_ENGINES ; nic_id++) {
3342 if (!(hdev->nic_ports_mask & (1 << nic_id))) {
3343 nic_offset += nic_delta_between_qmans;
3344 if (nic_id & 1) {
3345 nic_offset -= (nic_delta_between_qmans * 2);
3346 nic_offset += nic_delta_between_nics;
3347 }
3348 continue;
3349 }
3350
3351 for (i = 0 ; i < QMAN_STREAMS ; i++) {
3352 internal_q_index = GAUDI_QUEUE_ID_NIC_0_0 +
3353 nic_id * QMAN_STREAMS + i;
3354 q = &gaudi->internal_qmans[internal_q_index];
3355 qman_base_addr = (u64) q->pq_dma_addr;
3356 gaudi_init_nic_qman(hdev, nic_offset, (i & 0x3),
3357 qman_base_addr, nic_id);
3358 }
3359
3360 /* Enable the QMAN */
3361 WREG32(mmNIC0_QM0_GLBL_CFG0 + nic_offset, NIC_QMAN_ENABLE);
3362
3363 nic_offset += nic_delta_between_qmans;
3364 if (nic_id & 1) {
3365 nic_offset -= (nic_delta_between_qmans * 2);
3366 nic_offset += nic_delta_between_nics;
3367 }
3368
3369 gaudi->hw_cap_initialized |= 1 << (HW_CAP_NIC_SHIFT + nic_id);
3370 }
3371}
3372
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003373static void gaudi_disable_pci_dma_qmans(struct hl_device *hdev)
3374{
3375 struct gaudi_device *gaudi = hdev->asic_specific;
3376
3377 if (!(gaudi->hw_cap_initialized & HW_CAP_PCI_DMA))
3378 return;
3379
3380 WREG32(mmDMA0_QM_GLBL_CFG0, 0);
3381 WREG32(mmDMA1_QM_GLBL_CFG0, 0);
3382 WREG32(mmDMA5_QM_GLBL_CFG0, 0);
3383}
3384
3385static void gaudi_disable_hbm_dma_qmans(struct hl_device *hdev)
3386{
3387 struct gaudi_device *gaudi = hdev->asic_specific;
3388
3389 if (!(gaudi->hw_cap_initialized & HW_CAP_HBM_DMA))
3390 return;
3391
3392 WREG32(mmDMA2_QM_GLBL_CFG0, 0);
3393 WREG32(mmDMA3_QM_GLBL_CFG0, 0);
3394 WREG32(mmDMA4_QM_GLBL_CFG0, 0);
3395 WREG32(mmDMA6_QM_GLBL_CFG0, 0);
3396 WREG32(mmDMA7_QM_GLBL_CFG0, 0);
3397}
3398
3399static void gaudi_disable_mme_qmans(struct hl_device *hdev)
3400{
3401 struct gaudi_device *gaudi = hdev->asic_specific;
3402
3403 if (!(gaudi->hw_cap_initialized & HW_CAP_MME))
3404 return;
3405
3406 WREG32(mmMME2_QM_GLBL_CFG0, 0);
3407 WREG32(mmMME0_QM_GLBL_CFG0, 0);
3408}
3409
3410static void gaudi_disable_tpc_qmans(struct hl_device *hdev)
3411{
3412 struct gaudi_device *gaudi = hdev->asic_specific;
3413 u32 tpc_offset = 0;
3414 int tpc_id;
3415
3416 if (!(gaudi->hw_cap_initialized & HW_CAP_TPC_MASK))
3417 return;
3418
3419 for (tpc_id = 0 ; tpc_id < TPC_NUMBER_OF_ENGINES ; tpc_id++) {
3420 WREG32(mmTPC0_QM_GLBL_CFG0 + tpc_offset, 0);
3421 tpc_offset += mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0;
3422 }
3423}
3424
Oded Gabbay3c681572020-11-02 21:10:39 +02003425static void gaudi_disable_nic_qmans(struct hl_device *hdev)
3426{
3427 struct gaudi_device *gaudi = hdev->asic_specific;
3428 u32 nic_mask, nic_offset = 0;
3429 u32 nic_delta_between_qmans =
3430 mmNIC0_QM1_GLBL_CFG0 - mmNIC0_QM0_GLBL_CFG0;
3431 u32 nic_delta_between_nics =
3432 mmNIC1_QM0_GLBL_CFG0 - mmNIC0_QM0_GLBL_CFG0;
3433 int nic_id;
3434
3435 for (nic_id = 0 ; nic_id < NIC_NUMBER_OF_ENGINES ; nic_id++) {
3436 nic_mask = 1 << (HW_CAP_NIC_SHIFT + nic_id);
3437
3438 if (gaudi->hw_cap_initialized & nic_mask)
3439 WREG32(mmNIC0_QM0_GLBL_CFG0 + nic_offset, 0);
3440
3441 nic_offset += nic_delta_between_qmans;
3442 if (nic_id & 1) {
3443 nic_offset -= (nic_delta_between_qmans * 2);
3444 nic_offset += nic_delta_between_nics;
3445 }
3446 }
3447}
3448
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003449static void gaudi_stop_pci_dma_qmans(struct hl_device *hdev)
3450{
3451 struct gaudi_device *gaudi = hdev->asic_specific;
3452
3453 if (!(gaudi->hw_cap_initialized & HW_CAP_PCI_DMA))
3454 return;
3455
3456 /* Stop upper CPs of QMANs 0.0 to 1.3 and 5.0 to 5.3 */
3457 WREG32(mmDMA0_QM_GLBL_CFG1, 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3458 WREG32(mmDMA1_QM_GLBL_CFG1, 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3459 WREG32(mmDMA5_QM_GLBL_CFG1, 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3460}
3461
3462static void gaudi_stop_hbm_dma_qmans(struct hl_device *hdev)
3463{
3464 struct gaudi_device *gaudi = hdev->asic_specific;
3465
3466 if (!(gaudi->hw_cap_initialized & HW_CAP_HBM_DMA))
3467 return;
3468
3469 /* Stop CPs of HBM DMA QMANs */
3470
3471 WREG32(mmDMA2_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3472 WREG32(mmDMA3_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3473 WREG32(mmDMA4_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3474 WREG32(mmDMA6_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3475 WREG32(mmDMA7_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3476}
3477
3478static void gaudi_stop_mme_qmans(struct hl_device *hdev)
3479{
3480 struct gaudi_device *gaudi = hdev->asic_specific;
3481
3482 if (!(gaudi->hw_cap_initialized & HW_CAP_MME))
3483 return;
3484
3485 /* Stop CPs of MME QMANs */
3486 WREG32(mmMME2_QM_GLBL_CFG1, 0x1F << MME0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3487 WREG32(mmMME0_QM_GLBL_CFG1, 0x1F << MME0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3488}
3489
3490static void gaudi_stop_tpc_qmans(struct hl_device *hdev)
3491{
3492 struct gaudi_device *gaudi = hdev->asic_specific;
3493
3494 if (!(gaudi->hw_cap_initialized & HW_CAP_TPC_MASK))
3495 return;
3496
3497 WREG32(mmTPC0_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3498 WREG32(mmTPC1_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3499 WREG32(mmTPC2_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3500 WREG32(mmTPC3_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3501 WREG32(mmTPC4_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3502 WREG32(mmTPC5_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3503 WREG32(mmTPC6_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3504 WREG32(mmTPC7_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3505}
3506
Oded Gabbay3c681572020-11-02 21:10:39 +02003507static void gaudi_stop_nic_qmans(struct hl_device *hdev)
3508{
3509 struct gaudi_device *gaudi = hdev->asic_specific;
3510
3511 /* Stop upper CPs of QMANs */
3512
3513 if (gaudi->hw_cap_initialized & HW_CAP_NIC0)
3514 WREG32(mmNIC0_QM0_GLBL_CFG1,
3515 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3516 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3517 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3518
3519 if (gaudi->hw_cap_initialized & HW_CAP_NIC1)
3520 WREG32(mmNIC0_QM1_GLBL_CFG1,
3521 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3522 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3523 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3524
3525 if (gaudi->hw_cap_initialized & HW_CAP_NIC2)
3526 WREG32(mmNIC1_QM0_GLBL_CFG1,
3527 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3528 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3529 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3530
3531 if (gaudi->hw_cap_initialized & HW_CAP_NIC3)
3532 WREG32(mmNIC1_QM1_GLBL_CFG1,
3533 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3534 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3535 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3536
3537 if (gaudi->hw_cap_initialized & HW_CAP_NIC4)
3538 WREG32(mmNIC2_QM0_GLBL_CFG1,
3539 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3540 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3541 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3542
3543 if (gaudi->hw_cap_initialized & HW_CAP_NIC5)
3544 WREG32(mmNIC2_QM1_GLBL_CFG1,
3545 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3546 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3547 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3548
3549 if (gaudi->hw_cap_initialized & HW_CAP_NIC6)
3550 WREG32(mmNIC3_QM0_GLBL_CFG1,
3551 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3552 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3553 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3554
3555 if (gaudi->hw_cap_initialized & HW_CAP_NIC7)
3556 WREG32(mmNIC3_QM1_GLBL_CFG1,
3557 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3558 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3559 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3560
3561 if (gaudi->hw_cap_initialized & HW_CAP_NIC8)
3562 WREG32(mmNIC4_QM0_GLBL_CFG1,
3563 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3564 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3565 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3566
3567 if (gaudi->hw_cap_initialized & HW_CAP_NIC9)
3568 WREG32(mmNIC4_QM1_GLBL_CFG1,
3569 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3570 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3571 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3572}
3573
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003574static void gaudi_pci_dma_stall(struct hl_device *hdev)
3575{
3576 struct gaudi_device *gaudi = hdev->asic_specific;
3577
3578 if (!(gaudi->hw_cap_initialized & HW_CAP_PCI_DMA))
3579 return;
3580
3581 WREG32(mmDMA0_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3582 WREG32(mmDMA1_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3583 WREG32(mmDMA5_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3584}
3585
3586static void gaudi_hbm_dma_stall(struct hl_device *hdev)
3587{
3588 struct gaudi_device *gaudi = hdev->asic_specific;
3589
3590 if (!(gaudi->hw_cap_initialized & HW_CAP_HBM_DMA))
3591 return;
3592
3593 WREG32(mmDMA2_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3594 WREG32(mmDMA3_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3595 WREG32(mmDMA4_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3596 WREG32(mmDMA6_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3597 WREG32(mmDMA7_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3598}
3599
3600static void gaudi_mme_stall(struct hl_device *hdev)
3601{
3602 struct gaudi_device *gaudi = hdev->asic_specific;
3603
3604 if (!(gaudi->hw_cap_initialized & HW_CAP_MME))
3605 return;
3606
3607 /* WA for H3-1800 bug: do ACC and SBAB writes twice */
3608 WREG32(mmMME0_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3609 WREG32(mmMME0_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3610 WREG32(mmMME0_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3611 WREG32(mmMME0_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3612 WREG32(mmMME1_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3613 WREG32(mmMME1_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3614 WREG32(mmMME1_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3615 WREG32(mmMME1_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3616 WREG32(mmMME2_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3617 WREG32(mmMME2_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3618 WREG32(mmMME2_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3619 WREG32(mmMME2_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3620 WREG32(mmMME3_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3621 WREG32(mmMME3_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3622 WREG32(mmMME3_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3623 WREG32(mmMME3_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3624}
3625
3626static void gaudi_tpc_stall(struct hl_device *hdev)
3627{
3628 struct gaudi_device *gaudi = hdev->asic_specific;
3629
3630 if (!(gaudi->hw_cap_initialized & HW_CAP_TPC_MASK))
3631 return;
3632
3633 WREG32(mmTPC0_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3634 WREG32(mmTPC1_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3635 WREG32(mmTPC2_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3636 WREG32(mmTPC3_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3637 WREG32(mmTPC4_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3638 WREG32(mmTPC5_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3639 WREG32(mmTPC6_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3640 WREG32(mmTPC7_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3641}
3642
Oded Gabbaye38bfd32020-07-03 20:46:12 +03003643static void gaudi_set_clock_gating(struct hl_device *hdev)
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003644{
3645 struct gaudi_device *gaudi = hdev->asic_specific;
3646 u32 qman_offset;
Ofir Bittonf44d23b2020-08-04 13:38:43 +03003647 bool enable;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003648 int i;
3649
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003650 /* In case we are during debug session, don't enable the clock gate
3651 * as it may interfere
3652 */
3653 if (hdev->in_debug)
3654 return;
3655
Ohad Sharabi4cb45082021-05-20 09:09:03 +03003656 if (hdev->asic_prop.fw_security_enabled)
Oded Gabbay0024c0942020-12-05 22:55:09 +02003657 return;
3658
Oded Gabbaye38bfd32020-07-03 20:46:12 +03003659 for (i = GAUDI_PCI_DMA_1, qman_offset = 0 ; i < GAUDI_HBM_DMA_1 ; i++) {
Ofir Bittonf44d23b2020-08-04 13:38:43 +03003660 enable = !!(hdev->clock_gating_mask &
3661 (BIT_ULL(gaudi_dma_assignment[i])));
Oded Gabbaye38bfd32020-07-03 20:46:12 +03003662
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003663 qman_offset = gaudi_dma_assignment[i] * DMA_QMAN_OFFSET;
Ofir Bittonf44d23b2020-08-04 13:38:43 +03003664 WREG32(mmDMA0_QM_CGM_CFG1 + qman_offset,
3665 enable ? QMAN_CGM1_PWR_GATE_EN : 0);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003666 WREG32(mmDMA0_QM_CGM_CFG + qman_offset,
Ofir Bittonf44d23b2020-08-04 13:38:43 +03003667 enable ? QMAN_UPPER_CP_CGM_PWR_GATE_EN : 0);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003668 }
3669
Oded Gabbaye38bfd32020-07-03 20:46:12 +03003670 for (i = GAUDI_HBM_DMA_1 ; i < GAUDI_DMA_MAX ; i++) {
Ofir Bittonf44d23b2020-08-04 13:38:43 +03003671 enable = !!(hdev->clock_gating_mask &
3672 (BIT_ULL(gaudi_dma_assignment[i])));
Oded Gabbaye38bfd32020-07-03 20:46:12 +03003673
Oded Gabbayda5dfbb2021-02-06 19:34:59 +02003674 /* GC sends work to DMA engine through Upper CP in DMA5 so
3675 * we need to not enable clock gating in that DMA
3676 */
3677 if (i == GAUDI_HBM_DMA_4)
3678 enable = 0;
3679
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003680 qman_offset = gaudi_dma_assignment[i] * DMA_QMAN_OFFSET;
Ofir Bittonf44d23b2020-08-04 13:38:43 +03003681 WREG32(mmDMA0_QM_CGM_CFG1 + qman_offset,
3682 enable ? QMAN_CGM1_PWR_GATE_EN : 0);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003683 WREG32(mmDMA0_QM_CGM_CFG + qman_offset,
Ofir Bittonf44d23b2020-08-04 13:38:43 +03003684 enable ? QMAN_COMMON_CP_CGM_PWR_GATE_EN : 0);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003685 }
3686
Ofir Bittonf44d23b2020-08-04 13:38:43 +03003687 enable = !!(hdev->clock_gating_mask & (BIT_ULL(GAUDI_ENGINE_ID_MME_0)));
3688 WREG32(mmMME0_QM_CGM_CFG1, enable ? QMAN_CGM1_PWR_GATE_EN : 0);
3689 WREG32(mmMME0_QM_CGM_CFG, enable ? QMAN_COMMON_CP_CGM_PWR_GATE_EN : 0);
Oded Gabbaye38bfd32020-07-03 20:46:12 +03003690
Ofir Bittonf44d23b2020-08-04 13:38:43 +03003691 enable = !!(hdev->clock_gating_mask & (BIT_ULL(GAUDI_ENGINE_ID_MME_2)));
3692 WREG32(mmMME2_QM_CGM_CFG1, enable ? QMAN_CGM1_PWR_GATE_EN : 0);
3693 WREG32(mmMME2_QM_CGM_CFG, enable ? QMAN_COMMON_CP_CGM_PWR_GATE_EN : 0);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003694
3695 for (i = 0, qman_offset = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) {
Ofir Bittonf44d23b2020-08-04 13:38:43 +03003696 enable = !!(hdev->clock_gating_mask &
3697 (BIT_ULL(GAUDI_ENGINE_ID_TPC_0 + i)));
Oded Gabbaye38bfd32020-07-03 20:46:12 +03003698
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003699 WREG32(mmTPC0_QM_CGM_CFG1 + qman_offset,
Ofir Bittonf44d23b2020-08-04 13:38:43 +03003700 enable ? QMAN_CGM1_PWR_GATE_EN : 0);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003701 WREG32(mmTPC0_QM_CGM_CFG + qman_offset,
Ofir Bittonf44d23b2020-08-04 13:38:43 +03003702 enable ? QMAN_COMMON_CP_CGM_PWR_GATE_EN : 0);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003703
3704 qman_offset += TPC_QMAN_OFFSET;
3705 }
3706
3707 gaudi->hw_cap_initialized |= HW_CAP_CLK_GATE;
3708}
3709
3710static void gaudi_disable_clock_gating(struct hl_device *hdev)
3711{
3712 struct gaudi_device *gaudi = hdev->asic_specific;
3713 u32 qman_offset;
3714 int i;
3715
Ohad Sharabi4cb45082021-05-20 09:09:03 +03003716 if (hdev->asic_prop.fw_security_enabled)
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003717 return;
3718
3719 for (i = 0, qman_offset = 0 ; i < DMA_NUMBER_OF_CHANNELS ; i++) {
3720 WREG32(mmDMA0_QM_CGM_CFG + qman_offset, 0);
3721 WREG32(mmDMA0_QM_CGM_CFG1 + qman_offset, 0);
3722
3723 qman_offset += (mmDMA1_QM_CGM_CFG - mmDMA0_QM_CGM_CFG);
3724 }
3725
3726 WREG32(mmMME0_QM_CGM_CFG, 0);
3727 WREG32(mmMME0_QM_CGM_CFG1, 0);
3728 WREG32(mmMME2_QM_CGM_CFG, 0);
3729 WREG32(mmMME2_QM_CGM_CFG1, 0);
3730
3731 for (i = 0, qman_offset = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) {
3732 WREG32(mmTPC0_QM_CGM_CFG + qman_offset, 0);
3733 WREG32(mmTPC0_QM_CGM_CFG1 + qman_offset, 0);
3734
3735 qman_offset += (mmTPC1_QM_CGM_CFG - mmTPC0_QM_CGM_CFG);
3736 }
3737
3738 gaudi->hw_cap_initialized &= ~(HW_CAP_CLK_GATE);
3739}
3740
3741static void gaudi_enable_timestamp(struct hl_device *hdev)
3742{
3743 /* Disable the timestamp counter */
3744 WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 0);
3745
3746 /* Zero the lower/upper parts of the 64-bit counter */
3747 WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE + 0xC, 0);
3748 WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE + 0x8, 0);
3749
3750 /* Enable the counter */
3751 WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 1);
3752}
3753
3754static void gaudi_disable_timestamp(struct hl_device *hdev)
3755{
3756 /* Disable the timestamp counter */
3757 WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 0);
3758}
3759
3760static void gaudi_halt_engines(struct hl_device *hdev, bool hard_reset)
3761{
Oded Gabbayc83c4172020-07-05 15:48:34 +03003762 u32 wait_timeout_ms;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003763
3764 dev_info(hdev->dev,
3765 "Halting compute engines and disabling interrupts\n");
3766
Oded Gabbayc83c4172020-07-05 15:48:34 +03003767 if (hdev->pldm)
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003768 wait_timeout_ms = GAUDI_PLDM_RESET_WAIT_MSEC;
Oded Gabbayc83c4172020-07-05 15:48:34 +03003769 else
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003770 wait_timeout_ms = GAUDI_RESET_WAIT_MSEC;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003771
Oded Gabbay3c681572020-11-02 21:10:39 +02003772 gaudi_stop_nic_qmans(hdev);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003773 gaudi_stop_mme_qmans(hdev);
3774 gaudi_stop_tpc_qmans(hdev);
3775 gaudi_stop_hbm_dma_qmans(hdev);
3776 gaudi_stop_pci_dma_qmans(hdev);
3777
Oded Gabbaye38bfd32020-07-03 20:46:12 +03003778 hdev->asic_funcs->disable_clock_gating(hdev);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003779
3780 msleep(wait_timeout_ms);
3781
3782 gaudi_pci_dma_stall(hdev);
3783 gaudi_hbm_dma_stall(hdev);
3784 gaudi_tpc_stall(hdev);
3785 gaudi_mme_stall(hdev);
3786
3787 msleep(wait_timeout_ms);
3788
Oded Gabbay3c681572020-11-02 21:10:39 +02003789 gaudi_disable_nic_qmans(hdev);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003790 gaudi_disable_mme_qmans(hdev);
3791 gaudi_disable_tpc_qmans(hdev);
3792 gaudi_disable_hbm_dma_qmans(hdev);
3793 gaudi_disable_pci_dma_qmans(hdev);
3794
3795 gaudi_disable_timestamp(hdev);
3796
Oded Gabbay12ae3132020-07-03 20:58:23 +03003797 gaudi_disable_msi(hdev);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003798}
3799
3800static int gaudi_mmu_init(struct hl_device *hdev)
3801{
3802 struct asic_fixed_properties *prop = &hdev->asic_prop;
3803 struct gaudi_device *gaudi = hdev->asic_specific;
3804 u64 hop0_addr;
3805 int rc, i;
3806
3807 if (!hdev->mmu_enable)
3808 return 0;
3809
3810 if (gaudi->hw_cap_initialized & HW_CAP_MMU)
3811 return 0;
3812
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003813 for (i = 0 ; i < prop->max_asid ; i++) {
3814 hop0_addr = prop->mmu_pgt_addr +
3815 (i * prop->mmu_hop_table_size);
3816
3817 rc = gaudi_mmu_update_asid_hop0_addr(hdev, i, hop0_addr);
3818 if (rc) {
3819 dev_err(hdev->dev,
3820 "failed to set hop0 addr for asid %d\n", i);
3821 goto err;
3822 }
3823 }
3824
3825 /* init MMU cache manage page */
3826 WREG32(mmSTLB_CACHE_INV_BASE_39_8, MMU_CACHE_MNG_ADDR >> 8);
3827 WREG32(mmSTLB_CACHE_INV_BASE_49_40, MMU_CACHE_MNG_ADDR >> 40);
3828
Tomer Tayar644883e2020-07-19 11:00:03 +03003829 hdev->asic_funcs->mmu_invalidate_cache(hdev, true, 0);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003830
3831 WREG32(mmMMU_UP_MMU_ENABLE, 1);
3832 WREG32(mmMMU_UP_SPI_MASK, 0xF);
3833
3834 WREG32(mmSTLB_HOP_CONFIGURATION,
3835 hdev->mmu_huge_page_opt ? 0x30440 : 0x40440);
3836
Omer Shpigelmancfd41762020-06-03 13:03:35 +03003837 /*
3838 * The H/W expects the first PI after init to be 1. After wraparound
3839 * we'll write 0.
3840 */
3841 gaudi->mmu_cache_inv_pi = 1;
3842
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003843 gaudi->hw_cap_initialized |= HW_CAP_MMU;
3844
3845 return 0;
3846
3847err:
3848 return rc;
3849}
3850
3851static int gaudi_load_firmware_to_device(struct hl_device *hdev)
3852{
3853 void __iomem *dst;
3854
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003855 dst = hdev->pcie_bar[HBM_BAR_ID] + LINUX_FW_OFFSET;
3856
Ofir Bitton9bb86b62020-10-20 10:45:37 +03003857 return hl_fw_load_fw_to_device(hdev, GAUDI_LINUX_FW_FILE, dst, 0, 0);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003858}
3859
3860static int gaudi_load_boot_fit_to_device(struct hl_device *hdev)
3861{
3862 void __iomem *dst;
3863
3864 dst = hdev->pcie_bar[SRAM_BAR_ID] + BOOT_FIT_SRAM_OFFSET;
3865
Ofir Bitton9bb86b62020-10-20 10:45:37 +03003866 return hl_fw_load_fw_to_device(hdev, GAUDI_BOOT_FIT_FILE, dst, 0, 0);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003867}
3868
Ohad Sharabi22a795b2021-04-08 13:42:00 +03003869static void gaudi_init_dynamic_firmware_loader(struct hl_device *hdev)
3870{
Ohad Sharabi8a43c832021-04-11 10:32:18 +03003871 struct dynamic_fw_load_mgr *dynamic_loader;
3872 struct cpu_dyn_regs *dyn_regs;
Ohad Sharabi22a795b2021-04-08 13:42:00 +03003873
Ohad Sharabi8a43c832021-04-11 10:32:18 +03003874 dynamic_loader = &hdev->fw_loader.dynamic_loader;
3875
3876 /*
3877 * here we update initial values for few specific dynamic regs (as
3878 * before reading the first descriptor from FW those value has to be
3879 * hard-coded) in later stages of the protocol those values will be
3880 * updated automatically by reading the FW descriptor so data there
3881 * will always be up-to-date
3882 */
3883 dyn_regs = &dynamic_loader->comm_desc.cpu_dyn_regs;
3884 dyn_regs->kmd_msg_to_cpu =
3885 cpu_to_le32(mmPSOC_GLOBAL_CONF_KMD_MSG_TO_CPU);
3886 dyn_regs->cpu_cmd_status_to_host =
3887 cpu_to_le32(mmCPU_CMD_STATUS_TO_HOST);
Ohad Sharabib31e59b2021-04-22 10:01:22 +03003888
3889 dynamic_loader->wait_for_bl_timeout = GAUDI_WAIT_FOR_BL_TIMEOUT_USEC;
Ohad Sharabi22a795b2021-04-08 13:42:00 +03003890}
3891
3892static void gaudi_init_static_firmware_loader(struct hl_device *hdev)
3893{
3894 struct static_fw_load_mgr *static_loader;
3895
3896 static_loader = &hdev->fw_loader.static_loader;
3897
3898 static_loader->preboot_version_max_off = SRAM_SIZE - VERSION_MAX_LEN;
3899 static_loader->boot_fit_version_max_off = SRAM_SIZE - VERSION_MAX_LEN;
Ohad Sharabi8a43c832021-04-11 10:32:18 +03003900 static_loader->kmd_msg_to_cpu_reg = mmPSOC_GLOBAL_CONF_KMD_MSG_TO_CPU;
3901 static_loader->cpu_cmd_status_to_host_reg = mmCPU_CMD_STATUS_TO_HOST;
Ohad Sharabi22a795b2021-04-08 13:42:00 +03003902 static_loader->cpu_boot_status_reg = mmPSOC_GLOBAL_CONF_CPU_BOOT_STATUS;
Ohad Sharabie67a60402021-05-02 15:45:21 +03003903 static_loader->cpu_boot_dev_status0_reg = mmCPU_BOOT_DEV_STS0;
3904 static_loader->cpu_boot_dev_status1_reg = mmCPU_BOOT_DEV_STS1;
Ohad Sharabi22a795b2021-04-08 13:42:00 +03003905 static_loader->boot_err0_reg = mmCPU_BOOT_ERR0;
Ohad Sharabie67a60402021-05-02 15:45:21 +03003906 static_loader->boot_err1_reg = mmCPU_BOOT_ERR1;
Ohad Sharabi22a795b2021-04-08 13:42:00 +03003907 static_loader->preboot_version_offset_reg = mmPREBOOT_VER_OFFSET;
3908 static_loader->boot_fit_version_offset_reg = mmUBOOT_VER_OFFSET;
Koby Elbaz69dbbba2021-06-17 17:04:16 +03003909 static_loader->sram_offset_mask = ~(lower_32_bits(SRAM_BASE_ADDR));
Koby Elbazb7a71fd2021-06-15 17:07:02 +03003910 static_loader->cpu_reset_wait_msec = hdev->pldm ?
3911 GAUDI_PLDM_RESET_WAIT_MSEC :
3912 GAUDI_CPU_RESET_WAIT_MSEC;
Ohad Sharabi22a795b2021-04-08 13:42:00 +03003913}
3914
Ohad Sharabia22f0ec2021-04-11 23:06:46 +03003915static void gaudi_init_firmware_loader(struct hl_device *hdev)
3916{
Ohad Sharabi22a795b2021-04-08 13:42:00 +03003917 struct asic_fixed_properties *prop = &hdev->asic_prop;
Ohad Sharabia22f0ec2021-04-11 23:06:46 +03003918 struct fw_load_mgr *fw_loader = &hdev->fw_loader;
3919
Ohad Sharabi22a795b2021-04-08 13:42:00 +03003920 /* fill common fields */
Koby Elbaz3649eae2021-05-18 15:43:47 +03003921 fw_loader->linux_loaded = false;
Ohad Sharabi8a43c832021-04-11 10:32:18 +03003922 fw_loader->boot_fit_img.image_name = GAUDI_BOOT_FIT_FILE;
3923 fw_loader->linux_img.image_name = GAUDI_LINUX_FW_FILE;
Ohad Sharabia22f0ec2021-04-11 23:06:46 +03003924 fw_loader->cpu_timeout = GAUDI_CPU_TIMEOUT_USEC;
3925 fw_loader->boot_fit_timeout = GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC;
3926 fw_loader->skip_bmc = !hdev->bmc_enable;
Ohad Sharabi50f036d2021-04-11 15:26:28 +03003927 fw_loader->sram_bar_id = SRAM_BAR_ID;
Ohad Sharabi8a43c832021-04-11 10:32:18 +03003928 fw_loader->dram_bar_id = HBM_BAR_ID;
Ohad Sharabi22a795b2021-04-08 13:42:00 +03003929
3930 if (prop->dynamic_fw_load)
3931 gaudi_init_dynamic_firmware_loader(hdev);
3932 else
3933 gaudi_init_static_firmware_loader(hdev);
Ohad Sharabia22f0ec2021-04-11 23:06:46 +03003934}
3935
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003936static int gaudi_init_cpu(struct hl_device *hdev)
3937{
3938 struct gaudi_device *gaudi = hdev->asic_specific;
3939 int rc;
3940
Ofir Bitton6a2f5d72021-02-15 13:23:04 +02003941 if (!(hdev->fw_components & FW_TYPE_PREBOOT_CPU))
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003942 return 0;
3943
3944 if (gaudi->hw_cap_initialized & HW_CAP_CPU)
3945 return 0;
3946
3947 /*
3948 * The device CPU works with 40 bits addresses.
3949 * This register sets the extension to 50 bits.
3950 */
Ohad Sharabi4cb45082021-05-20 09:09:03 +03003951 if (!hdev->asic_prop.fw_security_enabled)
Ofir Bittonc692dec2020-10-04 17:34:37 +03003952 WREG32(mmCPU_IF_CPU_MSB_ADDR, hdev->cpu_pci_msb_addr);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003953
Ohad Sharabia22f0ec2021-04-11 23:06:46 +03003954 rc = hl_fw_init_cpu(hdev);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003955
3956 if (rc)
3957 return rc;
3958
3959 gaudi->hw_cap_initialized |= HW_CAP_CPU;
3960
3961 return 0;
3962}
3963
3964static int gaudi_init_cpu_queues(struct hl_device *hdev, u32 cpu_timeout)
3965{
Koby Elbaze591a492021-05-12 18:05:46 +03003966 struct cpu_dyn_regs *dyn_regs =
3967 &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
Ohad Sharabi5b6b7802021-02-02 13:33:34 +02003968 struct asic_fixed_properties *prop = &hdev->asic_prop;
Koby Elbaz81217362021-05-03 23:03:15 +03003969 struct gaudi_device *gaudi = hdev->asic_specific;
3970 u32 status, irq_handler_offset;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003971 struct hl_eq *eq;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003972 struct hl_hw_queue *cpu_pq =
3973 &hdev->kernel_queues[GAUDI_QUEUE_ID_CPU_PQ];
3974 int err;
3975
3976 if (!hdev->cpu_queues_enable)
3977 return 0;
3978
3979 if (gaudi->hw_cap_initialized & HW_CAP_CPU_Q)
3980 return 0;
3981
3982 eq = &hdev->event_queue;
3983
3984 WREG32(mmCPU_IF_PQ_BASE_ADDR_LOW, lower_32_bits(cpu_pq->bus_address));
3985 WREG32(mmCPU_IF_PQ_BASE_ADDR_HIGH, upper_32_bits(cpu_pq->bus_address));
3986
3987 WREG32(mmCPU_IF_EQ_BASE_ADDR_LOW, lower_32_bits(eq->bus_address));
3988 WREG32(mmCPU_IF_EQ_BASE_ADDR_HIGH, upper_32_bits(eq->bus_address));
3989
3990 WREG32(mmCPU_IF_CQ_BASE_ADDR_LOW,
3991 lower_32_bits(hdev->cpu_accessible_dma_address));
3992 WREG32(mmCPU_IF_CQ_BASE_ADDR_HIGH,
3993 upper_32_bits(hdev->cpu_accessible_dma_address));
3994
3995 WREG32(mmCPU_IF_PQ_LENGTH, HL_QUEUE_SIZE_IN_BYTES);
3996 WREG32(mmCPU_IF_EQ_LENGTH, HL_EQ_SIZE_IN_BYTES);
3997 WREG32(mmCPU_IF_CQ_LENGTH, HL_CPU_ACCESSIBLE_MEM_SIZE);
3998
3999 /* Used for EQ CI */
4000 WREG32(mmCPU_IF_EQ_RD_OFFS, 0);
4001
4002 WREG32(mmCPU_IF_PF_PQ_PI, 0);
4003
4004 if (gaudi->multi_msi_mode)
4005 WREG32(mmCPU_IF_QUEUE_INIT, PQ_INIT_STATUS_READY_FOR_CP);
4006 else
4007 WREG32(mmCPU_IF_QUEUE_INIT,
4008 PQ_INIT_STATUS_READY_FOR_CP_SINGLE_MSI);
4009
Koby Elbaz81217362021-05-03 23:03:15 +03004010 irq_handler_offset = prop->gic_interrupts_enable ?
4011 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
Ofir Bitton5bc691d2021-05-25 22:09:13 +03004012 le32_to_cpu(dyn_regs->gic_host_pi_upd_irq);
Koby Elbaz81217362021-05-03 23:03:15 +03004013
Ofir Bitton7d5ba002021-06-07 15:22:56 +03004014 WREG32(irq_handler_offset,
4015 gaudi_irq_map_table[GAUDI_EVENT_PI_UPDATE].cpu_id);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03004016
4017 err = hl_poll_timeout(
4018 hdev,
4019 mmCPU_IF_QUEUE_INIT,
4020 status,
4021 (status == PQ_INIT_STATUS_READY_FOR_HOST),
4022 1000,
4023 cpu_timeout);
4024
4025 if (err) {
4026 dev_err(hdev->dev,
Oded Gabbay6138bbe2020-09-04 20:18:16 +03004027 "Failed to communicate with Device CPU (CPU-CP timeout)\n");
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03004028 return -EIO;
4029 }
4030
Ohad Sharabi5b6b7802021-02-02 13:33:34 +02004031 /* update FW application security bits */
Ohad Sharabie67a60402021-05-02 15:45:21 +03004032 if (prop->fw_cpu_boot_dev_sts0_valid)
4033 prop->fw_app_cpu_boot_dev_sts0 = RREG32(mmCPU_BOOT_DEV_STS0);
4034 if (prop->fw_cpu_boot_dev_sts1_valid)
4035 prop->fw_app_cpu_boot_dev_sts1 = RREG32(mmCPU_BOOT_DEV_STS1);
Ohad Sharabi5b6b7802021-02-02 13:33:34 +02004036
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03004037 gaudi->hw_cap_initialized |= HW_CAP_CPU_Q;
4038 return 0;
4039}
4040
4041static void gaudi_pre_hw_init(struct hl_device *hdev)
4042{
4043 /* Perform read from the device to make sure device is up */
Oded Gabbay377182a2020-12-09 19:50:46 +02004044 RREG32(mmHW_STATE);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03004045
Ohad Sharabi4cb45082021-05-20 09:09:03 +03004046 if (!hdev->asic_prop.fw_security_enabled) {
Ofir Bittonc692dec2020-10-04 17:34:37 +03004047 /* Set the access through PCI bars (Linux driver only) as
4048 * secured
4049 */
4050 WREG32(mmPCIE_WRAP_LBW_PROT_OVR,
4051 (PCIE_WRAP_LBW_PROT_OVR_RD_EN_MASK |
4052 PCIE_WRAP_LBW_PROT_OVR_WR_EN_MASK));
Oded Gabbay57799ce2020-09-13 15:51:28 +03004053
Ofir Bittonc692dec2020-10-04 17:34:37 +03004054 /* Perform read to flush the waiting writes to ensure
4055 * configuration was set in the device
4056 */
4057 RREG32(mmPCIE_WRAP_LBW_PROT_OVR);
4058 }
Oded Gabbay57799ce2020-09-13 15:51:28 +03004059
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03004060 /*
4061 * Let's mark in the H/W that we have reached this point. We check
4062 * this value in the reset_before_init function to understand whether
4063 * we need to reset the chip before doing H/W init. This register is
4064 * cleared by the H/W upon H/W reset
4065 */
4066 WREG32(mmHW_STATE, HL_DEVICE_HW_STATE_DIRTY);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03004067}
4068
4069static int gaudi_hw_init(struct hl_device *hdev)
4070{
Ofir Bitton1dae12f2021-05-12 09:07:39 +03004071 struct gaudi_device *gaudi = hdev->asic_specific;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03004072 int rc;
4073
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03004074 gaudi_pre_hw_init(hdev);
4075
Ofir Bitton1dae12f2021-05-12 09:07:39 +03004076 /* If iATU is done by FW, the HBM bar ALWAYS points to DRAM_PHYS_BASE.
4077 * So we set it here and if anyone tries to move it later to
4078 * a different address, there will be an error
4079 */
4080 if (hdev->asic_prop.iatu_done_by_fw)
4081 gaudi->hbm_bar_cur_addr = DRAM_PHYS_BASE;
4082
4083 /*
4084 * Before pushing u-boot/linux to device, need to set the hbm bar to
4085 * base address of dram
4086 */
4087 if (gaudi_set_hbm_bar_base(hdev, DRAM_PHYS_BASE) == U64_MAX) {
4088 dev_err(hdev->dev,
4089 "failed to map HBM bar to DRAM base address\n");
4090 return -EIO;
4091 }
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03004092
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03004093 rc = gaudi_init_cpu(hdev);
4094 if (rc) {
4095 dev_err(hdev->dev, "failed to initialize CPU\n");
4096 return rc;
4097 }
4098
Oded Gabbay0024c0942020-12-05 22:55:09 +02004099 /* In case the clock gating was enabled in preboot we need to disable
4100 * it here before touching the MME/TPC registers.
4101 * There is no need to take clk gating mutex because when this function
4102 * runs, no other relevant code can run
4103 */
4104 hdev->asic_funcs->disable_clock_gating(hdev);
4105
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03004106 /* SRAM scrambler must be initialized after CPU is running from HBM */
4107 gaudi_init_scrambler_sram(hdev);
4108
4109 /* This is here just in case we are working without CPU */
4110 gaudi_init_scrambler_hbm(hdev);
4111
4112 gaudi_init_golden_registers(hdev);
4113
4114 rc = gaudi_mmu_init(hdev);
4115 if (rc)
4116 return rc;
4117
Omer Shpigelman3a3a5bf12020-05-11 10:45:12 +03004118 gaudi_init_security(hdev);
4119
Koby Elbaze591a492021-05-12 18:05:46 +03004120 gaudi_init_pci_dma_qmans(hdev);
4121
4122 gaudi_init_hbm_dma_qmans(hdev);
4123
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03004124 gaudi_init_mme_qmans(hdev);
4125
4126 gaudi_init_tpc_qmans(hdev);
4127
Oded Gabbay3c681572020-11-02 21:10:39 +02004128 gaudi_init_nic_qmans(hdev);
4129
Oded Gabbaye38bfd32020-07-03 20:46:12 +03004130 hdev->asic_funcs->set_clock_gating(hdev);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03004131
4132 gaudi_enable_timestamp(hdev);
4133
Oded Gabbay3c681572020-11-02 21:10:39 +02004134 /* MSI must be enabled before CPU queues and NIC are initialized */
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03004135 rc = gaudi_enable_msi(hdev);
4136 if (rc)
4137 goto disable_queues;
4138
4139 /* must be called after MSI was enabled */
4140 rc = gaudi_init_cpu_queues(hdev, GAUDI_CPU_TIMEOUT_USEC);
4141 if (rc) {
4142 dev_err(hdev->dev, "failed to initialize CPU H/W queues %d\n",
4143 rc);
4144 goto disable_msi;
4145 }
4146
4147 /* Perform read from the device to flush all configuration */
Oded Gabbay377182a2020-12-09 19:50:46 +02004148 RREG32(mmHW_STATE);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03004149
4150 return 0;
4151
4152disable_msi:
4153 gaudi_disable_msi(hdev);
4154disable_queues:
4155 gaudi_disable_mme_qmans(hdev);
4156 gaudi_disable_pci_dma_qmans(hdev);
4157
4158 return rc;
4159}
4160
4161static void gaudi_hw_fini(struct hl_device *hdev, bool hard_reset)
4162{
Koby Elbaze591a492021-05-12 18:05:46 +03004163 struct cpu_dyn_regs *dyn_regs =
4164 &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
Koby Elbaz81217362021-05-03 23:03:15 +03004165 u32 status, reset_timeout_ms, cpu_timeout_ms, irq_handler_offset;
Oded Gabbaya60d0752021-05-23 19:00:49 +03004166 struct gaudi_device *gaudi = hdev->asic_specific;
4167 bool driver_performs_reset;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03004168
Oded Gabbay12ae3132020-07-03 20:58:23 +03004169 if (!hard_reset) {
4170 dev_err(hdev->dev, "GAUDI doesn't support soft-reset\n");
4171 return;
4172 }
4173
Oded Gabbayc83c4172020-07-05 15:48:34 +03004174 if (hdev->pldm) {
Oded Gabbay12ae3132020-07-03 20:58:23 +03004175 reset_timeout_ms = GAUDI_PLDM_HRESET_TIMEOUT_MSEC;
Oded Gabbayc83c4172020-07-05 15:48:34 +03004176 cpu_timeout_ms = GAUDI_PLDM_RESET_WAIT_MSEC;
4177 } else {
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03004178 reset_timeout_ms = GAUDI_RESET_TIMEOUT_MSEC;
Oded Gabbayc83c4172020-07-05 15:48:34 +03004179 cpu_timeout_ms = GAUDI_CPU_RESET_WAIT_MSEC;
4180 }
4181
Oded Gabbaya60d0752021-05-23 19:00:49 +03004182 driver_performs_reset = !!(!hdev->asic_prop.fw_security_enabled &&
4183 !hdev->asic_prop.hard_reset_done_by_fw);
4184
Oded Gabbayc83c4172020-07-05 15:48:34 +03004185 /* Set device to handle FLR by H/W as we will put the device CPU to
4186 * halt mode
4187 */
Oded Gabbaya60d0752021-05-23 19:00:49 +03004188 if (driver_performs_reset)
Ofir Bittonb90c8942020-11-08 12:59:04 +02004189 WREG32(mmPCIE_AUX_FLR_CTRL, (PCIE_AUX_FLR_CTRL_HW_CTRL_MASK |
Oded Gabbayc83c4172020-07-05 15:48:34 +03004190 PCIE_AUX_FLR_CTRL_INT_MASK_MASK));
4191
Oded Gabbaya60d0752021-05-23 19:00:49 +03004192 /* If linux is loaded in the device CPU we need to communicate with it
4193 * via the GIC. Otherwise, we need to use COMMS or the MSG_TO_CPU
4194 * registers in case of old F/Ws
Oded Gabbayc83c4172020-07-05 15:48:34 +03004195 */
Koby Elbaz3649eae2021-05-18 15:43:47 +03004196 if (hdev->fw_loader.linux_loaded) {
4197 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
4198 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
Ofir Bitton5bc691d2021-05-25 22:09:13 +03004199 le32_to_cpu(dyn_regs->gic_host_halt_irq);
Koby Elbaz81217362021-05-03 23:03:15 +03004200
Ofir Bitton7d5ba002021-06-07 15:22:56 +03004201 WREG32(irq_handler_offset,
4202 gaudi_irq_map_table[GAUDI_EVENT_HALT_MACHINE].cpu_id);
Oded Gabbaya60d0752021-05-23 19:00:49 +03004203 } else {
4204 if (hdev->asic_prop.hard_reset_done_by_fw)
Koby Elbazb7a71fd2021-06-15 17:07:02 +03004205 hl_fw_ask_hard_reset_without_linux(hdev);
Oded Gabbaya60d0752021-05-23 19:00:49 +03004206 else
Koby Elbazb7a71fd2021-06-15 17:07:02 +03004207 hl_fw_ask_halt_machine_without_linux(hdev);
Koby Elbaz3649eae2021-05-18 15:43:47 +03004208 }
Oded Gabbayc83c4172020-07-05 15:48:34 +03004209
Oded Gabbaya60d0752021-05-23 19:00:49 +03004210 if (driver_performs_reset) {
Oded Gabbaya63c3fb2020-11-26 18:11:05 +02004211
4212 /* Configure the reset registers. Must be done as early as
4213 * possible in case we fail during H/W initialization
4214 */
4215 WREG32(mmPSOC_GLOBAL_CONF_SOFT_RST_CFG_H,
4216 (CFG_RST_H_DMA_MASK |
4217 CFG_RST_H_MME_MASK |
4218 CFG_RST_H_SM_MASK |
4219 CFG_RST_H_TPC_7_MASK));
4220
4221 WREG32(mmPSOC_GLOBAL_CONF_SOFT_RST_CFG_L, CFG_RST_L_TPC_MASK);
4222
4223 WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST_CFG_H,
4224 (CFG_RST_H_HBM_MASK |
4225 CFG_RST_H_TPC_7_MASK |
4226 CFG_RST_H_NIC_MASK |
4227 CFG_RST_H_SM_MASK |
4228 CFG_RST_H_DMA_MASK |
4229 CFG_RST_H_MME_MASK |
4230 CFG_RST_H_CPU_MASK |
4231 CFG_RST_H_MMU_MASK));
4232
4233 WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST_CFG_L,
4234 (CFG_RST_L_IF_MASK |
4235 CFG_RST_L_PSOC_MASK |
4236 CFG_RST_L_TPC_MASK));
4237
Ofir Bittonb90c8942020-11-08 12:59:04 +02004238 msleep(cpu_timeout_ms);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03004239
Ofir Bittonb90c8942020-11-08 12:59:04 +02004240 /* Tell ASIC not to re-initialize PCIe */
4241 WREG32(mmPREBOOT_PCIE_EN, LKD_HARD_RESET_MAGIC);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03004242
Ofir Bittonb90c8942020-11-08 12:59:04 +02004243 /* Restart BTL/BLR upon hard-reset */
Oded Gabbaya60d0752021-05-23 19:00:49 +03004244 WREG32(mmPSOC_GLOBAL_CONF_BOOT_SEQ_RE_START, 1);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03004245
Ofir Bittonb90c8942020-11-08 12:59:04 +02004246 WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST,
Oded Gabbay12ae3132020-07-03 20:58:23 +03004247 1 << PSOC_GLOBAL_CONF_SW_ALL_RST_IND_SHIFT);
Ofir Bittonb90c8942020-11-08 12:59:04 +02004248
Oded Gabbay13d0ee12020-12-06 23:48:45 +02004249 dev_info(hdev->dev,
4250 "Issued HARD reset command, going to wait %dms\n",
4251 reset_timeout_ms);
4252 } else {
4253 dev_info(hdev->dev,
4254 "Firmware performs HARD reset, going to wait %dms\n",
4255 reset_timeout_ms);
4256 }
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03004257
4258 /*
4259 * After hard reset, we can't poll the BTM_FSM register because the PSOC
4260 * itself is in reset. Need to wait until the reset is deasserted
4261 */
4262 msleep(reset_timeout_ms);
4263
4264 status = RREG32(mmPSOC_GLOBAL_CONF_BTM_FSM);
4265 if (status & PSOC_GLOBAL_CONF_BTM_FSM_STATE_MASK)
4266 dev_err(hdev->dev,
4267 "Timeout while waiting for device to reset 0x%x\n",
4268 status);
4269
farah kassabrieb10b892020-10-14 15:17:36 +03004270 if (gaudi) {
4271 gaudi->hw_cap_initialized &= ~(HW_CAP_CPU | HW_CAP_CPU_Q |
4272 HW_CAP_HBM | HW_CAP_PCI_DMA |
4273 HW_CAP_MME | HW_CAP_TPC_MASK |
4274 HW_CAP_HBM_DMA | HW_CAP_PLL |
4275 HW_CAP_NIC_MASK | HW_CAP_MMU |
4276 HW_CAP_SRAM_SCRAMBLER |
4277 HW_CAP_HBM_SCRAMBLER |
4278 HW_CAP_CLK_GATE);
Oded Gabbaye38bfd32020-07-03 20:46:12 +03004279
farah kassabrieb10b892020-10-14 15:17:36 +03004280 memset(gaudi->events_stat, 0, sizeof(gaudi->events_stat));
Oded Gabbaya60d0752021-05-23 19:00:49 +03004281
Koby Elbazb7a71fd2021-06-15 17:07:02 +03004282 hdev->device_cpu_is_halted = false;
farah kassabrieb10b892020-10-14 15:17:36 +03004283 }
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03004284}
4285
4286static int gaudi_suspend(struct hl_device *hdev)
4287{
4288 int rc;
4289
Oded Gabbay2f553422020-08-15 16:28:10 +03004290 rc = hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_DISABLE_PCI_ACCESS);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03004291 if (rc)
4292 dev_err(hdev->dev, "Failed to disable PCI access from CPU\n");
4293
4294 return rc;
4295}
4296
4297static int gaudi_resume(struct hl_device *hdev)
4298{
4299 return gaudi_init_iatu(hdev);
4300}
4301
4302static int gaudi_cb_mmap(struct hl_device *hdev, struct vm_area_struct *vma,
Hillf Danton0db57532020-08-23 07:32:42 +08004303 void *cpu_addr, dma_addr_t dma_addr, size_t size)
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03004304{
4305 int rc;
4306
4307 vma->vm_flags |= VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP |
4308 VM_DONTCOPY | VM_NORESERVE;
4309
Oded Gabbaya9d4ef62021-01-11 13:49:38 +02004310 rc = dma_mmap_coherent(hdev->dev, vma, cpu_addr,
4311 (dma_addr - HOST_PHYS_BASE), size);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03004312 if (rc)
Hillf Danton0db57532020-08-23 07:32:42 +08004313 dev_err(hdev->dev, "dma_mmap_coherent error %d", rc);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03004314
4315 return rc;
4316}
4317
4318static void gaudi_ring_doorbell(struct hl_device *hdev, u32 hw_queue_id, u32 pi)
4319{
Koby Elbaze591a492021-05-12 18:05:46 +03004320 struct cpu_dyn_regs *dyn_regs =
4321 &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
Koby Elbaz81217362021-05-03 23:03:15 +03004322 u32 db_reg_offset, db_value, dma_qm_offset, q_off, irq_handler_offset;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03004323 struct gaudi_device *gaudi = hdev->asic_specific;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03004324 bool invalid_queue = false;
Ofir Bittona3972582021-05-24 22:58:44 +03004325 int dma_id;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03004326
4327 switch (hw_queue_id) {
4328 case GAUDI_QUEUE_ID_DMA_0_0...GAUDI_QUEUE_ID_DMA_0_3:
4329 dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_1];
4330 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4331 q_off = dma_qm_offset + (hw_queue_id & 0x3) * 4;
4332 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4333 break;
4334
4335 case GAUDI_QUEUE_ID_DMA_1_0...GAUDI_QUEUE_ID_DMA_1_3:
4336 dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_2];
4337 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4338 q_off = dma_qm_offset + (hw_queue_id & 0x3) * 4;
4339 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4340 break;
4341
4342 case GAUDI_QUEUE_ID_DMA_2_0...GAUDI_QUEUE_ID_DMA_2_3:
4343 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_1];
4344 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4345 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4346 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4347 break;
4348
4349 case GAUDI_QUEUE_ID_DMA_3_0...GAUDI_QUEUE_ID_DMA_3_3:
4350 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_2];
4351 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4352 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4353 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4354 break;
4355
4356 case GAUDI_QUEUE_ID_DMA_4_0...GAUDI_QUEUE_ID_DMA_4_3:
4357 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_3];
4358 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4359 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4360 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4361 break;
4362
4363 case GAUDI_QUEUE_ID_DMA_5_0...GAUDI_QUEUE_ID_DMA_5_3:
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03004364 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_4];
4365 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4366 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4367 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4368 break;
4369
Ofir Bitton0940cab2020-08-31 08:52:56 +03004370 case GAUDI_QUEUE_ID_DMA_6_0...GAUDI_QUEUE_ID_DMA_6_3:
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03004371 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_5];
4372 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4373 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4374 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4375 break;
4376
Ofir Bitton0940cab2020-08-31 08:52:56 +03004377 case GAUDI_QUEUE_ID_DMA_7_0...GAUDI_QUEUE_ID_DMA_7_3:
4378 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_6];
4379 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4380 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4381 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4382 break;
4383
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03004384 case GAUDI_QUEUE_ID_CPU_PQ:
4385 if (gaudi->hw_cap_initialized & HW_CAP_CPU_Q)
4386 db_reg_offset = mmCPU_IF_PF_PQ_PI;
4387 else
4388 invalid_queue = true;
4389 break;
4390
4391 case GAUDI_QUEUE_ID_MME_0_0:
4392 db_reg_offset = mmMME2_QM_PQ_PI_0;
4393 break;
4394
4395 case GAUDI_QUEUE_ID_MME_0_1:
4396 db_reg_offset = mmMME2_QM_PQ_PI_1;
4397 break;
4398
4399 case GAUDI_QUEUE_ID_MME_0_2:
4400 db_reg_offset = mmMME2_QM_PQ_PI_2;
4401 break;
4402
4403 case GAUDI_QUEUE_ID_MME_0_3:
4404 db_reg_offset = mmMME2_QM_PQ_PI_3;
4405 break;
4406
4407 case GAUDI_QUEUE_ID_MME_1_0:
4408 db_reg_offset = mmMME0_QM_PQ_PI_0;
4409 break;
4410
4411 case GAUDI_QUEUE_ID_MME_1_1:
4412 db_reg_offset = mmMME0_QM_PQ_PI_1;
4413 break;
4414
4415 case GAUDI_QUEUE_ID_MME_1_2:
4416 db_reg_offset = mmMME0_QM_PQ_PI_2;
4417 break;
4418
4419 case GAUDI_QUEUE_ID_MME_1_3:
4420 db_reg_offset = mmMME0_QM_PQ_PI_3;
4421 break;
4422
4423 case GAUDI_QUEUE_ID_TPC_0_0:
4424 db_reg_offset = mmTPC0_QM_PQ_PI_0;
4425 break;
4426
4427 case GAUDI_QUEUE_ID_TPC_0_1:
4428 db_reg_offset = mmTPC0_QM_PQ_PI_1;
4429 break;
4430
4431 case GAUDI_QUEUE_ID_TPC_0_2:
4432 db_reg_offset = mmTPC0_QM_PQ_PI_2;
4433 break;
4434
4435 case GAUDI_QUEUE_ID_TPC_0_3:
4436 db_reg_offset = mmTPC0_QM_PQ_PI_3;
4437 break;
4438
4439 case GAUDI_QUEUE_ID_TPC_1_0:
4440 db_reg_offset = mmTPC1_QM_PQ_PI_0;
4441 break;
4442
4443 case GAUDI_QUEUE_ID_TPC_1_1:
4444 db_reg_offset = mmTPC1_QM_PQ_PI_1;
4445 break;
4446
4447 case GAUDI_QUEUE_ID_TPC_1_2:
4448 db_reg_offset = mmTPC1_QM_PQ_PI_2;
4449 break;
4450
4451 case GAUDI_QUEUE_ID_TPC_1_3:
4452 db_reg_offset = mmTPC1_QM_PQ_PI_3;
4453 break;
4454
4455 case GAUDI_QUEUE_ID_TPC_2_0:
4456 db_reg_offset = mmTPC2_QM_PQ_PI_0;
4457 break;
4458
4459 case GAUDI_QUEUE_ID_TPC_2_1:
4460 db_reg_offset = mmTPC2_QM_PQ_PI_1;
4461 break;
4462
4463 case GAUDI_QUEUE_ID_TPC_2_2:
4464 db_reg_offset = mmTPC2_QM_PQ_PI_2;
4465 break;
4466
4467 case GAUDI_QUEUE_ID_TPC_2_3:
4468 db_reg_offset = mmTPC2_QM_PQ_PI_3;
4469 break;
4470
4471 case GAUDI_QUEUE_ID_TPC_3_0:
4472 db_reg_offset = mmTPC3_QM_PQ_PI_0;
4473 break;
4474
4475 case GAUDI_QUEUE_ID_TPC_3_1:
4476 db_reg_offset = mmTPC3_QM_PQ_PI_1;
4477 break;
4478
4479 case GAUDI_QUEUE_ID_TPC_3_2:
4480 db_reg_offset = mmTPC3_QM_PQ_PI_2;
4481 break;
4482
4483 case GAUDI_QUEUE_ID_TPC_3_3:
4484 db_reg_offset = mmTPC3_QM_PQ_PI_3;
4485 break;
4486
4487 case GAUDI_QUEUE_ID_TPC_4_0:
4488 db_reg_offset = mmTPC4_QM_PQ_PI_0;
4489 break;
4490
4491 case GAUDI_QUEUE_ID_TPC_4_1:
4492 db_reg_offset = mmTPC4_QM_PQ_PI_1;
4493 break;
4494
4495 case GAUDI_QUEUE_ID_TPC_4_2:
4496 db_reg_offset = mmTPC4_QM_PQ_PI_2;
4497 break;
4498
4499 case GAUDI_QUEUE_ID_TPC_4_3:
4500 db_reg_offset = mmTPC4_QM_PQ_PI_3;
4501 break;
4502
4503 case GAUDI_QUEUE_ID_TPC_5_0:
4504 db_reg_offset = mmTPC5_QM_PQ_PI_0;
4505 break;
4506
4507 case GAUDI_QUEUE_ID_TPC_5_1:
4508 db_reg_offset = mmTPC5_QM_PQ_PI_1;
4509 break;
4510
4511 case GAUDI_QUEUE_ID_TPC_5_2:
4512 db_reg_offset = mmTPC5_QM_PQ_PI_2;
4513 break;
4514
4515 case GAUDI_QUEUE_ID_TPC_5_3:
4516 db_reg_offset = mmTPC5_QM_PQ_PI_3;
4517 break;
4518
4519 case GAUDI_QUEUE_ID_TPC_6_0:
4520 db_reg_offset = mmTPC6_QM_PQ_PI_0;
4521 break;
4522
4523 case GAUDI_QUEUE_ID_TPC_6_1:
4524 db_reg_offset = mmTPC6_QM_PQ_PI_1;
4525 break;
4526
4527 case GAUDI_QUEUE_ID_TPC_6_2:
4528 db_reg_offset = mmTPC6_QM_PQ_PI_2;
4529 break;
4530
4531 case GAUDI_QUEUE_ID_TPC_6_3:
4532 db_reg_offset = mmTPC6_QM_PQ_PI_3;
4533 break;
4534
4535 case GAUDI_QUEUE_ID_TPC_7_0:
4536 db_reg_offset = mmTPC7_QM_PQ_PI_0;
4537 break;
4538
4539 case GAUDI_QUEUE_ID_TPC_7_1:
4540 db_reg_offset = mmTPC7_QM_PQ_PI_1;
4541 break;
4542
4543 case GAUDI_QUEUE_ID_TPC_7_2:
4544 db_reg_offset = mmTPC7_QM_PQ_PI_2;
4545 break;
4546
4547 case GAUDI_QUEUE_ID_TPC_7_3:
4548 db_reg_offset = mmTPC7_QM_PQ_PI_3;
4549 break;
4550
Ofir Bittona3972582021-05-24 22:58:44 +03004551 case GAUDI_QUEUE_ID_NIC_0_0...GAUDI_QUEUE_ID_NIC_0_3:
4552 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC0))
4553 invalid_queue = true;
4554
4555 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4556 db_reg_offset = mmNIC0_QM0_PQ_PI_0 + q_off;
Oded Gabbay3c681572020-11-02 21:10:39 +02004557 break;
4558
Ofir Bittona3972582021-05-24 22:58:44 +03004559 case GAUDI_QUEUE_ID_NIC_1_0...GAUDI_QUEUE_ID_NIC_1_3:
4560 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC1))
4561 invalid_queue = true;
4562
4563 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4564 db_reg_offset = mmNIC0_QM1_PQ_PI_0 + q_off;
Oded Gabbay3c681572020-11-02 21:10:39 +02004565 break;
4566
Ofir Bittona3972582021-05-24 22:58:44 +03004567 case GAUDI_QUEUE_ID_NIC_2_0...GAUDI_QUEUE_ID_NIC_2_3:
4568 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC2))
4569 invalid_queue = true;
4570
4571 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4572 db_reg_offset = mmNIC1_QM0_PQ_PI_0 + q_off;
Oded Gabbay3c681572020-11-02 21:10:39 +02004573 break;
4574
Ofir Bittona3972582021-05-24 22:58:44 +03004575 case GAUDI_QUEUE_ID_NIC_3_0...GAUDI_QUEUE_ID_NIC_3_3:
4576 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC3))
4577 invalid_queue = true;
4578
4579 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4580 db_reg_offset = mmNIC1_QM1_PQ_PI_0 + q_off;
Oded Gabbay3c681572020-11-02 21:10:39 +02004581 break;
4582
Ofir Bittona3972582021-05-24 22:58:44 +03004583 case GAUDI_QUEUE_ID_NIC_4_0...GAUDI_QUEUE_ID_NIC_4_3:
4584 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC4))
4585 invalid_queue = true;
4586
4587 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4588 db_reg_offset = mmNIC2_QM0_PQ_PI_0 + q_off;
Oded Gabbay3c681572020-11-02 21:10:39 +02004589 break;
4590
Ofir Bittona3972582021-05-24 22:58:44 +03004591 case GAUDI_QUEUE_ID_NIC_5_0...GAUDI_QUEUE_ID_NIC_5_3:
4592 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC5))
4593 invalid_queue = true;
4594
4595 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4596 db_reg_offset = mmNIC2_QM1_PQ_PI_0 + q_off;
Oded Gabbay3c681572020-11-02 21:10:39 +02004597 break;
4598
Ofir Bittona3972582021-05-24 22:58:44 +03004599 case GAUDI_QUEUE_ID_NIC_6_0...GAUDI_QUEUE_ID_NIC_6_3:
4600 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC6))
4601 invalid_queue = true;
4602
4603 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4604 db_reg_offset = mmNIC3_QM0_PQ_PI_0 + q_off;
Oded Gabbay3c681572020-11-02 21:10:39 +02004605 break;
4606
Ofir Bittona3972582021-05-24 22:58:44 +03004607 case GAUDI_QUEUE_ID_NIC_7_0...GAUDI_QUEUE_ID_NIC_7_3:
4608 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC7))
4609 invalid_queue = true;
4610
4611 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4612 db_reg_offset = mmNIC3_QM1_PQ_PI_0 + q_off;
Oded Gabbay3c681572020-11-02 21:10:39 +02004613 break;
4614
Ofir Bittona3972582021-05-24 22:58:44 +03004615 case GAUDI_QUEUE_ID_NIC_8_0...GAUDI_QUEUE_ID_NIC_8_3:
4616 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC8))
4617 invalid_queue = true;
4618
4619 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4620 db_reg_offset = mmNIC4_QM0_PQ_PI_0 + q_off;
Oded Gabbay3c681572020-11-02 21:10:39 +02004621 break;
4622
Ofir Bittona3972582021-05-24 22:58:44 +03004623 case GAUDI_QUEUE_ID_NIC_9_0...GAUDI_QUEUE_ID_NIC_9_3:
4624 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC9))
4625 invalid_queue = true;
Oded Gabbay3c681572020-11-02 21:10:39 +02004626
Ofir Bittona3972582021-05-24 22:58:44 +03004627 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4628 db_reg_offset = mmNIC4_QM1_PQ_PI_0 + q_off;
Oded Gabbay3c681572020-11-02 21:10:39 +02004629 break;
4630
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03004631 default:
4632 invalid_queue = true;
4633 }
4634
4635 if (invalid_queue) {
4636 /* Should never get here */
4637 dev_err(hdev->dev, "h/w queue %d is invalid. Can't set pi\n",
4638 hw_queue_id);
4639 return;
4640 }
4641
4642 db_value = pi;
4643
4644 /* ring the doorbell */
4645 WREG32(db_reg_offset, db_value);
4646
Ofir Bitton5dbd7b42021-01-28 16:30:25 +02004647 if (hw_queue_id == GAUDI_QUEUE_ID_CPU_PQ) {
4648 /* make sure device CPU will read latest data from host */
4649 mb();
Koby Elbaz81217362021-05-03 23:03:15 +03004650
4651 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
Koby Elbaze591a492021-05-12 18:05:46 +03004652 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
Ofir Bitton5bc691d2021-05-25 22:09:13 +03004653 le32_to_cpu(dyn_regs->gic_host_pi_upd_irq);
Koby Elbaz81217362021-05-03 23:03:15 +03004654
Ofir Bitton7d5ba002021-06-07 15:22:56 +03004655 WREG32(irq_handler_offset,
4656 gaudi_irq_map_table[GAUDI_EVENT_PI_UPDATE].cpu_id);
Ofir Bitton5dbd7b42021-01-28 16:30:25 +02004657 }
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03004658}
4659
4660static void gaudi_pqe_write(struct hl_device *hdev, __le64 *pqe,
4661 struct hl_bd *bd)
4662{
4663 __le64 *pbd = (__le64 *) bd;
4664
4665 /* The QMANs are on the host memory so a simple copy suffice */
4666 pqe[0] = pbd[0];
4667 pqe[1] = pbd[1];
4668}
4669
4670static void *gaudi_dma_alloc_coherent(struct hl_device *hdev, size_t size,
4671 dma_addr_t *dma_handle, gfp_t flags)
4672{
4673 void *kernel_addr = dma_alloc_coherent(&hdev->pdev->dev, size,
4674 dma_handle, flags);
4675
4676 /* Shift to the device's base physical address of host memory */
4677 if (kernel_addr)
4678 *dma_handle += HOST_PHYS_BASE;
4679
4680 return kernel_addr;
4681}
4682
4683static void gaudi_dma_free_coherent(struct hl_device *hdev, size_t size,
4684 void *cpu_addr, dma_addr_t dma_handle)
4685{
4686 /* Cancel the device's base physical address of host memory */
4687 dma_addr_t fixed_dma_handle = dma_handle - HOST_PHYS_BASE;
4688
4689 dma_free_coherent(&hdev->pdev->dev, size, cpu_addr, fixed_dma_handle);
4690}
4691
farah kassabri03df1362020-05-06 11:17:38 +03004692static int gaudi_hbm_scrubbing(struct hl_device *hdev)
4693{
4694 struct asic_fixed_properties *prop = &hdev->asic_prop;
4695 u64 cur_addr = DRAM_BASE_ADDR_USER;
4696 u32 val;
4697 u32 chunk_size;
4698 int rc, dma_id;
4699
4700 while (cur_addr < prop->dram_end_address) {
4701 for (dma_id = 0 ; dma_id < DMA_NUMBER_OF_CHANNELS ; dma_id++) {
4702 u32 dma_offset = dma_id * DMA_CORE_OFFSET;
4703
4704 chunk_size =
4705 min((u64)SZ_2G, prop->dram_end_address - cur_addr);
4706
4707 dev_dbg(hdev->dev,
4708 "Doing HBM scrubbing for 0x%09llx - 0x%09llx\n",
4709 cur_addr, cur_addr + chunk_size);
4710
4711 WREG32(mmDMA0_CORE_SRC_BASE_LO + dma_offset, 0);
4712 WREG32(mmDMA0_CORE_SRC_BASE_HI + dma_offset, 0);
4713 WREG32(mmDMA0_CORE_DST_BASE_LO + dma_offset,
4714 lower_32_bits(cur_addr));
4715 WREG32(mmDMA0_CORE_DST_BASE_HI + dma_offset,
4716 upper_32_bits(cur_addr));
4717 WREG32(mmDMA0_CORE_DST_TSIZE_0 + dma_offset,
4718 chunk_size);
4719 WREG32(mmDMA0_CORE_COMMIT + dma_offset,
4720 ((1 << DMA0_CORE_COMMIT_LIN_SHIFT) |
4721 (1 << DMA0_CORE_COMMIT_MEM_SET_SHIFT)));
4722
4723 cur_addr += chunk_size;
4724
4725 if (cur_addr == prop->dram_end_address)
4726 break;
4727 }
4728
4729 for (dma_id = 0 ; dma_id < DMA_NUMBER_OF_CHANNELS ; dma_id++) {
4730 u32 dma_offset = dma_id * DMA_CORE_OFFSET;
4731
4732 rc = hl_poll_timeout(
4733 hdev,
4734 mmDMA0_CORE_STS0 + dma_offset,
4735 val,
4736 ((val & DMA0_CORE_STS0_BUSY_MASK) == 0),
4737 1000,
4738 HBM_SCRUBBING_TIMEOUT_US);
4739
4740 if (rc) {
4741 dev_err(hdev->dev,
4742 "DMA Timeout during HBM scrubbing of DMA #%d\n",
4743 dma_id);
4744 return -EIO;
4745 }
4746 }
4747 }
4748
4749 return 0;
4750}
4751
4752static int gaudi_scrub_device_mem(struct hl_device *hdev, u64 addr, u64 size)
4753{
4754 struct asic_fixed_properties *prop = &hdev->asic_prop;
4755 struct gaudi_device *gaudi = hdev->asic_specific;
farah kassabri03df1362020-05-06 11:17:38 +03004756 int rc = 0;
4757 u64 val = 0;
4758
4759 if (!hdev->memory_scrub)
4760 return 0;
4761
4762 if (!addr && !size) {
4763 /* Wait till device is idle */
4764 rc = hl_poll_timeout(
4765 hdev,
4766 mmDMA0_CORE_STS0/* dummy */,
4767 val/* dummy */,
Ohad Sharabicf303392021-01-17 16:01:56 +02004768 (hdev->asic_funcs->is_device_idle(hdev, NULL,
4769 0, NULL)),
farah kassabri03df1362020-05-06 11:17:38 +03004770 1000,
4771 HBM_SCRUBBING_TIMEOUT_US);
4772 if (rc) {
4773 dev_err(hdev->dev, "waiting for idle timeout\n");
4774 return -EIO;
4775 }
4776
4777 /* Scrub SRAM */
4778 addr = prop->sram_user_base_address;
4779 size = hdev->pldm ? 0x10000 :
4780 (prop->sram_size - SRAM_USER_BASE_OFFSET);
4781 val = 0x7777777777777777ull;
4782
4783 rc = gaudi_memset_device_memory(hdev, addr, size, val);
4784 if (rc) {
4785 dev_err(hdev->dev,
4786 "Failed to clear SRAM in mem scrub all\n");
4787 return rc;
4788 }
4789
4790 mutex_lock(&gaudi->clk_gate_mutex);
4791 hdev->asic_funcs->disable_clock_gating(hdev);
4792
4793 /* Scrub HBM using all DMA channels in parallel */
4794 rc = gaudi_hbm_scrubbing(hdev);
4795 if (rc)
4796 dev_err(hdev->dev,
4797 "Failed to clear HBM in mem scrub all\n");
4798
4799 hdev->asic_funcs->set_clock_gating(hdev);
4800 mutex_unlock(&gaudi->clk_gate_mutex);
4801 }
4802
4803 return rc;
4804}
4805
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03004806static void *gaudi_get_int_queue_base(struct hl_device *hdev,
4807 u32 queue_id, dma_addr_t *dma_handle,
4808 u16 *queue_len)
4809{
4810 struct gaudi_device *gaudi = hdev->asic_specific;
4811 struct gaudi_internal_qman_info *q;
4812
4813 if (queue_id >= GAUDI_QUEUE_ID_SIZE ||
4814 gaudi_queue_type[queue_id] != QUEUE_TYPE_INT) {
4815 dev_err(hdev->dev, "Got invalid queue id %d\n", queue_id);
4816 return NULL;
4817 }
4818
4819 q = &gaudi->internal_qmans[queue_id];
4820 *dma_handle = q->pq_dma_addr;
4821 *queue_len = q->pq_size / QMAN_PQ_ENTRY_SIZE;
4822
4823 return q->pq_kernel_addr;
4824}
4825
4826static int gaudi_send_cpu_message(struct hl_device *hdev, u32 *msg,
Alon Mizrahi439bc472020-11-10 13:49:10 +02004827 u16 len, u32 timeout, u64 *result)
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03004828{
4829 struct gaudi_device *gaudi = hdev->asic_specific;
4830
4831 if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q)) {
4832 if (result)
4833 *result = 0;
4834 return 0;
4835 }
4836
Oded Gabbay788cacf2020-07-07 17:30:13 +03004837 if (!timeout)
4838 timeout = GAUDI_MSG_TO_CPU_TIMEOUT_USEC;
4839
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03004840 return hl_fw_send_cpu_message(hdev, GAUDI_QUEUE_ID_CPU_PQ, msg, len,
4841 timeout, result);
4842}
4843
4844static int gaudi_test_queue(struct hl_device *hdev, u32 hw_queue_id)
4845{
4846 struct packet_msg_prot *fence_pkt;
4847 dma_addr_t pkt_dma_addr;
4848 u32 fence_val, tmp, timeout_usec;
4849 dma_addr_t fence_dma_addr;
4850 u32 *fence_ptr;
4851 int rc;
4852
4853 if (hdev->pldm)
4854 timeout_usec = GAUDI_PLDM_TEST_QUEUE_WAIT_USEC;
4855 else
4856 timeout_usec = GAUDI_TEST_QUEUE_WAIT_USEC;
4857
4858 fence_val = GAUDI_QMAN0_FENCE_VAL;
4859
4860 fence_ptr = hdev->asic_funcs->asic_dma_pool_zalloc(hdev, 4, GFP_KERNEL,
4861 &fence_dma_addr);
4862 if (!fence_ptr) {
4863 dev_err(hdev->dev,
Dotan Barakd6b045c2020-08-06 09:20:49 +03004864 "Failed to allocate memory for H/W queue %d testing\n",
4865 hw_queue_id);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03004866 return -ENOMEM;
4867 }
4868
4869 *fence_ptr = 0;
4870
4871 fence_pkt = hdev->asic_funcs->asic_dma_pool_zalloc(hdev,
4872 sizeof(struct packet_msg_prot),
4873 GFP_KERNEL, &pkt_dma_addr);
4874 if (!fence_pkt) {
4875 dev_err(hdev->dev,
Dotan Barakd6b045c2020-08-06 09:20:49 +03004876 "Failed to allocate packet for H/W queue %d testing\n",
4877 hw_queue_id);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03004878 rc = -ENOMEM;
4879 goto free_fence_ptr;
4880 }
4881
Oded Gabbay65887292020-08-12 11:21:01 +03004882 tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT);
4883 tmp |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);
4884 tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
4885
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03004886 fence_pkt->ctl = cpu_to_le32(tmp);
4887 fence_pkt->value = cpu_to_le32(fence_val);
4888 fence_pkt->addr = cpu_to_le64(fence_dma_addr);
4889
4890 rc = hl_hw_queue_send_cb_no_cmpl(hdev, hw_queue_id,
4891 sizeof(struct packet_msg_prot),
4892 pkt_dma_addr);
4893 if (rc) {
4894 dev_err(hdev->dev,
Dotan Barakd6b045c2020-08-06 09:20:49 +03004895 "Failed to send fence packet to H/W queue %d\n",
4896 hw_queue_id);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03004897 goto free_pkt;
4898 }
4899
4900 rc = hl_poll_timeout_memory(hdev, fence_ptr, tmp, (tmp == fence_val),
4901 1000, timeout_usec, true);
4902
4903 hl_hw_queue_inc_ci_kernel(hdev, hw_queue_id);
4904
4905 if (rc == -ETIMEDOUT) {
4906 dev_err(hdev->dev,
4907 "H/W queue %d test failed (scratch(0x%08llX) == 0x%08X)\n",
4908 hw_queue_id, (unsigned long long) fence_dma_addr, tmp);
4909 rc = -EIO;
4910 }
4911
4912free_pkt:
4913 hdev->asic_funcs->asic_dma_pool_free(hdev, (void *) fence_pkt,
4914 pkt_dma_addr);
4915free_fence_ptr:
4916 hdev->asic_funcs->asic_dma_pool_free(hdev, (void *) fence_ptr,
4917 fence_dma_addr);
4918 return rc;
4919}
4920
4921static int gaudi_test_cpu_queue(struct hl_device *hdev)
4922{
4923 struct gaudi_device *gaudi = hdev->asic_specific;
4924
4925 /*
4926 * check capability here as send_cpu_message() won't update the result
4927 * value if no capability
4928 */
4929 if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
4930 return 0;
4931
4932 return hl_fw_test_cpu_queue(hdev);
4933}
4934
4935static int gaudi_test_queues(struct hl_device *hdev)
4936{
4937 int i, rc, ret_val = 0;
4938
Ofir Bitton3abc99b2020-06-23 14:50:39 +03004939 for (i = 0 ; i < hdev->asic_prop.max_queues ; i++) {
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03004940 if (hdev->asic_prop.hw_queues_props[i].type == QUEUE_TYPE_EXT) {
4941 rc = gaudi_test_queue(hdev, i);
4942 if (rc)
4943 ret_val = -EINVAL;
4944 }
4945 }
4946
4947 rc = gaudi_test_cpu_queue(hdev);
4948 if (rc)
4949 ret_val = -EINVAL;
4950
4951 return ret_val;
4952}
4953
4954static void *gaudi_dma_pool_zalloc(struct hl_device *hdev, size_t size,
4955 gfp_t mem_flags, dma_addr_t *dma_handle)
4956{
4957 void *kernel_addr;
4958
4959 if (size > GAUDI_DMA_POOL_BLK_SIZE)
4960 return NULL;
4961
4962 kernel_addr = dma_pool_zalloc(hdev->dma_pool, mem_flags, dma_handle);
4963
4964 /* Shift to the device's base physical address of host memory */
4965 if (kernel_addr)
4966 *dma_handle += HOST_PHYS_BASE;
4967
4968 return kernel_addr;
4969}
4970
4971static void gaudi_dma_pool_free(struct hl_device *hdev, void *vaddr,
4972 dma_addr_t dma_addr)
4973{
4974 /* Cancel the device's base physical address of host memory */
4975 dma_addr_t fixed_dma_addr = dma_addr - HOST_PHYS_BASE;
4976
4977 dma_pool_free(hdev->dma_pool, vaddr, fixed_dma_addr);
4978}
4979
4980static void *gaudi_cpu_accessible_dma_pool_alloc(struct hl_device *hdev,
4981 size_t size, dma_addr_t *dma_handle)
4982{
4983 return hl_fw_cpu_accessible_dma_pool_alloc(hdev, size, dma_handle);
4984}
4985
4986static void gaudi_cpu_accessible_dma_pool_free(struct hl_device *hdev,
4987 size_t size, void *vaddr)
4988{
4989 hl_fw_cpu_accessible_dma_pool_free(hdev, size, vaddr);
4990}
4991
4992static int gaudi_dma_map_sg(struct hl_device *hdev, struct scatterlist *sgl,
4993 int nents, enum dma_data_direction dir)
4994{
4995 struct scatterlist *sg;
4996 int i;
4997
4998 if (!dma_map_sg(&hdev->pdev->dev, sgl, nents, dir))
4999 return -ENOMEM;
5000
5001 /* Shift to the device's base physical address of host memory */
5002 for_each_sg(sgl, sg, nents, i)
5003 sg->dma_address += HOST_PHYS_BASE;
5004
5005 return 0;
5006}
5007
5008static void gaudi_dma_unmap_sg(struct hl_device *hdev, struct scatterlist *sgl,
5009 int nents, enum dma_data_direction dir)
5010{
5011 struct scatterlist *sg;
5012 int i;
5013
5014 /* Cancel the device's base physical address of host memory */
5015 for_each_sg(sgl, sg, nents, i)
5016 sg->dma_address -= HOST_PHYS_BASE;
5017
5018 dma_unmap_sg(&hdev->pdev->dev, sgl, nents, dir);
5019}
5020
5021static u32 gaudi_get_dma_desc_list_size(struct hl_device *hdev,
5022 struct sg_table *sgt)
5023{
5024 struct scatterlist *sg, *sg_next_iter;
5025 u32 count, dma_desc_cnt;
5026 u64 len, len_next;
5027 dma_addr_t addr, addr_next;
5028
5029 dma_desc_cnt = 0;
5030
5031 for_each_sg(sgt->sgl, sg, sgt->nents, count) {
5032
5033 len = sg_dma_len(sg);
5034 addr = sg_dma_address(sg);
5035
5036 if (len == 0)
5037 break;
5038
5039 while ((count + 1) < sgt->nents) {
5040 sg_next_iter = sg_next(sg);
5041 len_next = sg_dma_len(sg_next_iter);
5042 addr_next = sg_dma_address(sg_next_iter);
5043
5044 if (len_next == 0)
5045 break;
5046
5047 if ((addr + len == addr_next) &&
5048 (len + len_next <= DMA_MAX_TRANSFER_SIZE)) {
5049 len += len_next;
5050 count++;
5051 sg = sg_next_iter;
5052 } else {
5053 break;
5054 }
5055 }
5056
5057 dma_desc_cnt++;
5058 }
5059
5060 return dma_desc_cnt * sizeof(struct packet_lin_dma);
5061}
5062
5063static int gaudi_pin_memory_before_cs(struct hl_device *hdev,
5064 struct hl_cs_parser *parser,
5065 struct packet_lin_dma *user_dma_pkt,
5066 u64 addr, enum dma_data_direction dir)
5067{
5068 struct hl_userptr *userptr;
5069 int rc;
5070
5071 if (hl_userptr_is_pinned(hdev, addr, le32_to_cpu(user_dma_pkt->tsize),
5072 parser->job_userptr_list, &userptr))
5073 goto already_pinned;
5074
Ofir Bittond5eb8372021-02-14 15:35:56 +02005075 userptr = kzalloc(sizeof(*userptr), GFP_KERNEL);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005076 if (!userptr)
5077 return -ENOMEM;
5078
5079 rc = hl_pin_host_memory(hdev, addr, le32_to_cpu(user_dma_pkt->tsize),
5080 userptr);
5081 if (rc)
5082 goto free_userptr;
5083
5084 list_add_tail(&userptr->job_node, parser->job_userptr_list);
5085
5086 rc = hdev->asic_funcs->asic_dma_map_sg(hdev, userptr->sgt->sgl,
5087 userptr->sgt->nents, dir);
5088 if (rc) {
5089 dev_err(hdev->dev, "failed to map sgt with DMA region\n");
5090 goto unpin_memory;
5091 }
5092
5093 userptr->dma_mapped = true;
5094 userptr->dir = dir;
5095
5096already_pinned:
5097 parser->patched_cb_size +=
5098 gaudi_get_dma_desc_list_size(hdev, userptr->sgt);
5099
5100 return 0;
5101
5102unpin_memory:
Koby Elbazf5eb7bf2021-06-09 21:43:52 +03005103 list_del(&userptr->job_node);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005104 hl_unpin_host_memory(hdev, userptr);
5105free_userptr:
5106 kfree(userptr);
5107 return rc;
5108}
5109
5110static int gaudi_validate_dma_pkt_host(struct hl_device *hdev,
5111 struct hl_cs_parser *parser,
5112 struct packet_lin_dma *user_dma_pkt,
5113 bool src_in_host)
5114{
5115 enum dma_data_direction dir;
5116 bool skip_host_mem_pin = false, user_memset;
5117 u64 addr;
5118 int rc = 0;
5119
5120 user_memset = (le32_to_cpu(user_dma_pkt->ctl) &
5121 GAUDI_PKT_LIN_DMA_CTL_MEMSET_MASK) >>
5122 GAUDI_PKT_LIN_DMA_CTL_MEMSET_SHIFT;
5123
5124 if (src_in_host) {
5125 if (user_memset)
5126 skip_host_mem_pin = true;
5127
5128 dev_dbg(hdev->dev, "DMA direction is HOST --> DEVICE\n");
5129 dir = DMA_TO_DEVICE;
5130 addr = le64_to_cpu(user_dma_pkt->src_addr);
5131 } else {
5132 dev_dbg(hdev->dev, "DMA direction is DEVICE --> HOST\n");
5133 dir = DMA_FROM_DEVICE;
5134 addr = (le64_to_cpu(user_dma_pkt->dst_addr) &
5135 GAUDI_PKT_LIN_DMA_DST_ADDR_MASK) >>
5136 GAUDI_PKT_LIN_DMA_DST_ADDR_SHIFT;
5137 }
5138
5139 if (skip_host_mem_pin)
5140 parser->patched_cb_size += sizeof(*user_dma_pkt);
5141 else
5142 rc = gaudi_pin_memory_before_cs(hdev, parser, user_dma_pkt,
5143 addr, dir);
5144
5145 return rc;
5146}
5147
5148static int gaudi_validate_dma_pkt_no_mmu(struct hl_device *hdev,
5149 struct hl_cs_parser *parser,
5150 struct packet_lin_dma *user_dma_pkt)
5151{
5152 bool src_in_host = false;
5153 u64 dst_addr = (le64_to_cpu(user_dma_pkt->dst_addr) &
5154 GAUDI_PKT_LIN_DMA_DST_ADDR_MASK) >>
5155 GAUDI_PKT_LIN_DMA_DST_ADDR_SHIFT;
5156
5157 dev_dbg(hdev->dev, "DMA packet details:\n");
5158 dev_dbg(hdev->dev, "source == 0x%llx\n",
5159 le64_to_cpu(user_dma_pkt->src_addr));
5160 dev_dbg(hdev->dev, "destination == 0x%llx\n", dst_addr);
5161 dev_dbg(hdev->dev, "size == %u\n", le32_to_cpu(user_dma_pkt->tsize));
5162
5163 /*
5164 * Special handling for DMA with size 0. Bypass all validations
5165 * because no transactions will be done except for WR_COMP, which
5166 * is not a security issue
5167 */
5168 if (!le32_to_cpu(user_dma_pkt->tsize)) {
5169 parser->patched_cb_size += sizeof(*user_dma_pkt);
5170 return 0;
5171 }
5172
5173 if (parser->hw_queue_id <= GAUDI_QUEUE_ID_DMA_0_3)
5174 src_in_host = true;
5175
5176 return gaudi_validate_dma_pkt_host(hdev, parser, user_dma_pkt,
5177 src_in_host);
5178}
5179
Oded Gabbay64536ab2020-05-27 12:38:16 +03005180static int gaudi_validate_load_and_exe_pkt(struct hl_device *hdev,
5181 struct hl_cs_parser *parser,
5182 struct packet_load_and_exe *user_pkt)
5183{
5184 u32 cfg;
5185
5186 cfg = le32_to_cpu(user_pkt->cfg);
5187
5188 if (cfg & GAUDI_PKT_LOAD_AND_EXE_CFG_DST_MASK) {
5189 dev_err(hdev->dev,
5190 "User not allowed to use Load and Execute\n");
5191 return -EPERM;
5192 }
5193
5194 parser->patched_cb_size += sizeof(struct packet_load_and_exe);
5195
5196 return 0;
5197}
5198
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005199static int gaudi_validate_cb(struct hl_device *hdev,
5200 struct hl_cs_parser *parser, bool is_mmu)
5201{
5202 u32 cb_parsed_length = 0;
5203 int rc = 0;
5204
5205 parser->patched_cb_size = 0;
5206
5207 /* cb_user_size is more than 0 so loop will always be executed */
5208 while (cb_parsed_length < parser->user_cb_size) {
5209 enum packet_id pkt_id;
5210 u16 pkt_size;
5211 struct gaudi_packet *user_pkt;
5212
Arnd Bergmann82948e62020-10-26 17:08:06 +01005213 user_pkt = parser->user_cb->kernel_address + cb_parsed_length;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005214
5215 pkt_id = (enum packet_id) (
5216 (le64_to_cpu(user_pkt->header) &
5217 PACKET_HEADER_PACKET_ID_MASK) >>
5218 PACKET_HEADER_PACKET_ID_SHIFT);
5219
Ofir Bittonbc75be22020-07-30 14:56:38 +03005220 if (!validate_packet_id(pkt_id)) {
5221 dev_err(hdev->dev, "Invalid packet id %u\n", pkt_id);
5222 rc = -EINVAL;
5223 break;
5224 }
5225
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005226 pkt_size = gaudi_packet_sizes[pkt_id];
5227 cb_parsed_length += pkt_size;
5228 if (cb_parsed_length > parser->user_cb_size) {
5229 dev_err(hdev->dev,
5230 "packet 0x%x is out of CB boundary\n", pkt_id);
5231 rc = -EINVAL;
5232 break;
5233 }
5234
5235 switch (pkt_id) {
5236 case PACKET_MSG_PROT:
5237 dev_err(hdev->dev,
5238 "User not allowed to use MSG_PROT\n");
5239 rc = -EPERM;
5240 break;
5241
5242 case PACKET_CP_DMA:
5243 dev_err(hdev->dev, "User not allowed to use CP_DMA\n");
5244 rc = -EPERM;
5245 break;
5246
5247 case PACKET_STOP:
5248 dev_err(hdev->dev, "User not allowed to use STOP\n");
5249 rc = -EPERM;
5250 break;
5251
Oded Gabbay2edc66e2020-07-03 19:28:54 +03005252 case PACKET_WREG_BULK:
5253 dev_err(hdev->dev,
5254 "User not allowed to use WREG_BULK\n");
5255 rc = -EPERM;
5256 break;
5257
Oded Gabbay64536ab2020-05-27 12:38:16 +03005258 case PACKET_LOAD_AND_EXE:
5259 rc = gaudi_validate_load_and_exe_pkt(hdev, parser,
5260 (struct packet_load_and_exe *) user_pkt);
5261 break;
5262
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005263 case PACKET_LIN_DMA:
5264 parser->contains_dma_pkt = true;
5265 if (is_mmu)
5266 parser->patched_cb_size += pkt_size;
5267 else
5268 rc = gaudi_validate_dma_pkt_no_mmu(hdev, parser,
5269 (struct packet_lin_dma *) user_pkt);
5270 break;
5271
5272 case PACKET_WREG_32:
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005273 case PACKET_MSG_LONG:
5274 case PACKET_MSG_SHORT:
5275 case PACKET_REPEAT:
5276 case PACKET_FENCE:
5277 case PACKET_NOP:
5278 case PACKET_ARB_POINT:
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005279 parser->patched_cb_size += pkt_size;
5280 break;
5281
5282 default:
5283 dev_err(hdev->dev, "Invalid packet header 0x%x\n",
5284 pkt_id);
5285 rc = -EINVAL;
5286 break;
5287 }
5288
5289 if (rc)
5290 break;
5291 }
5292
5293 /*
5294 * The new CB should have space at the end for two MSG_PROT packets:
5295 * 1. A packet that will act as a completion packet
5296 * 2. A packet that will generate MSI-X interrupt
5297 */
Ofir Bittonac6fdbf2020-12-03 16:59:28 +02005298 if (parser->completion)
5299 parser->patched_cb_size += sizeof(struct packet_msg_prot) * 2;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005300
5301 return rc;
5302}
5303
5304static int gaudi_patch_dma_packet(struct hl_device *hdev,
5305 struct hl_cs_parser *parser,
5306 struct packet_lin_dma *user_dma_pkt,
5307 struct packet_lin_dma *new_dma_pkt,
5308 u32 *new_dma_pkt_size)
5309{
5310 struct hl_userptr *userptr;
5311 struct scatterlist *sg, *sg_next_iter;
5312 u32 count, dma_desc_cnt, user_wrcomp_en_mask, ctl;
5313 u64 len, len_next;
5314 dma_addr_t dma_addr, dma_addr_next;
5315 u64 device_memory_addr, addr;
5316 enum dma_data_direction dir;
5317 struct sg_table *sgt;
5318 bool src_in_host = false;
5319 bool skip_host_mem_pin = false;
5320 bool user_memset;
5321
5322 ctl = le32_to_cpu(user_dma_pkt->ctl);
5323
5324 if (parser->hw_queue_id <= GAUDI_QUEUE_ID_DMA_0_3)
5325 src_in_host = true;
5326
5327 user_memset = (ctl & GAUDI_PKT_LIN_DMA_CTL_MEMSET_MASK) >>
5328 GAUDI_PKT_LIN_DMA_CTL_MEMSET_SHIFT;
5329
5330 if (src_in_host) {
5331 addr = le64_to_cpu(user_dma_pkt->src_addr);
5332 device_memory_addr = le64_to_cpu(user_dma_pkt->dst_addr);
5333 dir = DMA_TO_DEVICE;
5334 if (user_memset)
5335 skip_host_mem_pin = true;
5336 } else {
5337 addr = le64_to_cpu(user_dma_pkt->dst_addr);
5338 device_memory_addr = le64_to_cpu(user_dma_pkt->src_addr);
5339 dir = DMA_FROM_DEVICE;
5340 }
5341
5342 if ((!skip_host_mem_pin) &&
5343 (!hl_userptr_is_pinned(hdev, addr,
5344 le32_to_cpu(user_dma_pkt->tsize),
5345 parser->job_userptr_list, &userptr))) {
5346 dev_err(hdev->dev, "Userptr 0x%llx + 0x%x NOT mapped\n",
5347 addr, user_dma_pkt->tsize);
5348 return -EFAULT;
5349 }
5350
5351 if ((user_memset) && (dir == DMA_TO_DEVICE)) {
5352 memcpy(new_dma_pkt, user_dma_pkt, sizeof(*user_dma_pkt));
5353 *new_dma_pkt_size = sizeof(*user_dma_pkt);
5354 return 0;
5355 }
5356
5357 user_wrcomp_en_mask = ctl & GAUDI_PKT_LIN_DMA_CTL_WRCOMP_EN_MASK;
5358
5359 sgt = userptr->sgt;
5360 dma_desc_cnt = 0;
5361
5362 for_each_sg(sgt->sgl, sg, sgt->nents, count) {
5363 len = sg_dma_len(sg);
5364 dma_addr = sg_dma_address(sg);
5365
5366 if (len == 0)
5367 break;
5368
5369 while ((count + 1) < sgt->nents) {
5370 sg_next_iter = sg_next(sg);
5371 len_next = sg_dma_len(sg_next_iter);
5372 dma_addr_next = sg_dma_address(sg_next_iter);
5373
5374 if (len_next == 0)
5375 break;
5376
5377 if ((dma_addr + len == dma_addr_next) &&
5378 (len + len_next <= DMA_MAX_TRANSFER_SIZE)) {
5379 len += len_next;
5380 count++;
5381 sg = sg_next_iter;
5382 } else {
5383 break;
5384 }
5385 }
5386
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005387 ctl = le32_to_cpu(user_dma_pkt->ctl);
5388 if (likely(dma_desc_cnt))
5389 ctl &= ~GAUDI_PKT_CTL_EB_MASK;
5390 ctl &= ~GAUDI_PKT_LIN_DMA_CTL_WRCOMP_EN_MASK;
5391 new_dma_pkt->ctl = cpu_to_le32(ctl);
5392 new_dma_pkt->tsize = cpu_to_le32(len);
5393
5394 if (dir == DMA_TO_DEVICE) {
5395 new_dma_pkt->src_addr = cpu_to_le64(dma_addr);
5396 new_dma_pkt->dst_addr = cpu_to_le64(device_memory_addr);
5397 } else {
5398 new_dma_pkt->src_addr = cpu_to_le64(device_memory_addr);
5399 new_dma_pkt->dst_addr = cpu_to_le64(dma_addr);
5400 }
5401
5402 if (!user_memset)
5403 device_memory_addr += len;
5404 dma_desc_cnt++;
5405 new_dma_pkt++;
5406 }
5407
5408 if (!dma_desc_cnt) {
5409 dev_err(hdev->dev,
5410 "Error of 0 SG entries when patching DMA packet\n");
5411 return -EFAULT;
5412 }
5413
5414 /* Fix the last dma packet - wrcomp must be as user set it */
5415 new_dma_pkt--;
5416 new_dma_pkt->ctl |= cpu_to_le32(user_wrcomp_en_mask);
5417
5418 *new_dma_pkt_size = dma_desc_cnt * sizeof(struct packet_lin_dma);
5419
5420 return 0;
5421}
5422
5423static int gaudi_patch_cb(struct hl_device *hdev,
5424 struct hl_cs_parser *parser)
5425{
5426 u32 cb_parsed_length = 0;
5427 u32 cb_patched_cur_length = 0;
5428 int rc = 0;
5429
5430 /* cb_user_size is more than 0 so loop will always be executed */
5431 while (cb_parsed_length < parser->user_cb_size) {
5432 enum packet_id pkt_id;
5433 u16 pkt_size;
5434 u32 new_pkt_size = 0;
5435 struct gaudi_packet *user_pkt, *kernel_pkt;
5436
Arnd Bergmann82948e62020-10-26 17:08:06 +01005437 user_pkt = parser->user_cb->kernel_address + cb_parsed_length;
5438 kernel_pkt = parser->patched_cb->kernel_address +
5439 cb_patched_cur_length;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005440
5441 pkt_id = (enum packet_id) (
5442 (le64_to_cpu(user_pkt->header) &
5443 PACKET_HEADER_PACKET_ID_MASK) >>
5444 PACKET_HEADER_PACKET_ID_SHIFT);
5445
Ofir Bittonbc75be22020-07-30 14:56:38 +03005446 if (!validate_packet_id(pkt_id)) {
5447 dev_err(hdev->dev, "Invalid packet id %u\n", pkt_id);
5448 rc = -EINVAL;
5449 break;
5450 }
5451
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005452 pkt_size = gaudi_packet_sizes[pkt_id];
5453 cb_parsed_length += pkt_size;
5454 if (cb_parsed_length > parser->user_cb_size) {
5455 dev_err(hdev->dev,
5456 "packet 0x%x is out of CB boundary\n", pkt_id);
5457 rc = -EINVAL;
5458 break;
5459 }
5460
5461 switch (pkt_id) {
5462 case PACKET_LIN_DMA:
5463 rc = gaudi_patch_dma_packet(hdev, parser,
5464 (struct packet_lin_dma *) user_pkt,
5465 (struct packet_lin_dma *) kernel_pkt,
5466 &new_pkt_size);
5467 cb_patched_cur_length += new_pkt_size;
5468 break;
5469
5470 case PACKET_MSG_PROT:
5471 dev_err(hdev->dev,
5472 "User not allowed to use MSG_PROT\n");
5473 rc = -EPERM;
5474 break;
5475
5476 case PACKET_CP_DMA:
5477 dev_err(hdev->dev, "User not allowed to use CP_DMA\n");
5478 rc = -EPERM;
5479 break;
5480
5481 case PACKET_STOP:
5482 dev_err(hdev->dev, "User not allowed to use STOP\n");
5483 rc = -EPERM;
5484 break;
5485
5486 case PACKET_WREG_32:
5487 case PACKET_WREG_BULK:
5488 case PACKET_MSG_LONG:
5489 case PACKET_MSG_SHORT:
5490 case PACKET_REPEAT:
5491 case PACKET_FENCE:
5492 case PACKET_NOP:
5493 case PACKET_ARB_POINT:
5494 case PACKET_LOAD_AND_EXE:
5495 memcpy(kernel_pkt, user_pkt, pkt_size);
5496 cb_patched_cur_length += pkt_size;
5497 break;
5498
5499 default:
5500 dev_err(hdev->dev, "Invalid packet header 0x%x\n",
5501 pkt_id);
5502 rc = -EINVAL;
5503 break;
5504 }
5505
5506 if (rc)
5507 break;
5508 }
5509
5510 return rc;
5511}
5512
5513static int gaudi_parse_cb_mmu(struct hl_device *hdev,
5514 struct hl_cs_parser *parser)
5515{
5516 u64 patched_cb_handle;
5517 u32 patched_cb_size;
5518 struct hl_cb *user_cb;
5519 int rc;
5520
5521 /*
5522 * The new CB should have space at the end for two MSG_PROT pkt:
5523 * 1. A packet that will act as a completion packet
5524 * 2. A packet that will generate MSI interrupt
5525 */
Ofir Bittonac6fdbf2020-12-03 16:59:28 +02005526 if (parser->completion)
5527 parser->patched_cb_size = parser->user_cb_size +
5528 sizeof(struct packet_msg_prot) * 2;
5529 else
5530 parser->patched_cb_size = parser->user_cb_size;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005531
Tomer Tayarfa8641a12020-09-07 17:36:41 +03005532 rc = hl_cb_create(hdev, &hdev->kernel_cb_mgr, hdev->kernel_ctx,
Tomer Tayaref6a0f62020-07-09 16:17:48 +03005533 parser->patched_cb_size, false, false,
Tomer Tayarfa8641a12020-09-07 17:36:41 +03005534 &patched_cb_handle);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005535
5536 if (rc) {
5537 dev_err(hdev->dev,
5538 "Failed to allocate patched CB for DMA CS %d\n",
5539 rc);
5540 return rc;
5541 }
5542
5543 patched_cb_handle >>= PAGE_SHIFT;
5544 parser->patched_cb = hl_cb_get(hdev, &hdev->kernel_cb_mgr,
5545 (u32) patched_cb_handle);
Alon Mizrahi75d9a2a2020-12-03 17:32:19 +02005546 /* hl_cb_get should never fail */
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005547 if (!parser->patched_cb) {
Alon Mizrahi75d9a2a2020-12-03 17:32:19 +02005548 dev_crit(hdev->dev, "DMA CB handle invalid 0x%x\n",
5549 (u32) patched_cb_handle);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005550 rc = -EFAULT;
5551 goto out;
5552 }
5553
5554 /*
5555 * The check that parser->user_cb_size <= parser->user_cb->size was done
5556 * in validate_queue_index().
5557 */
Arnd Bergmann82948e62020-10-26 17:08:06 +01005558 memcpy(parser->patched_cb->kernel_address,
5559 parser->user_cb->kernel_address,
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005560 parser->user_cb_size);
5561
5562 patched_cb_size = parser->patched_cb_size;
5563
5564 /* Validate patched CB instead of user CB */
5565 user_cb = parser->user_cb;
5566 parser->user_cb = parser->patched_cb;
5567 rc = gaudi_validate_cb(hdev, parser, true);
5568 parser->user_cb = user_cb;
5569
5570 if (rc) {
5571 hl_cb_put(parser->patched_cb);
5572 goto out;
5573 }
5574
5575 if (patched_cb_size != parser->patched_cb_size) {
5576 dev_err(hdev->dev, "user CB size mismatch\n");
5577 hl_cb_put(parser->patched_cb);
5578 rc = -EINVAL;
5579 goto out;
5580 }
5581
5582out:
5583 /*
5584 * Always call cb destroy here because we still have 1 reference
5585 * to it by calling cb_get earlier. After the job will be completed,
5586 * cb_put will release it, but here we want to remove it from the
5587 * idr
5588 */
5589 hl_cb_destroy(hdev, &hdev->kernel_cb_mgr,
5590 patched_cb_handle << PAGE_SHIFT);
5591
5592 return rc;
5593}
5594
5595static int gaudi_parse_cb_no_mmu(struct hl_device *hdev,
5596 struct hl_cs_parser *parser)
5597{
5598 u64 patched_cb_handle;
5599 int rc;
5600
5601 rc = gaudi_validate_cb(hdev, parser, false);
5602
5603 if (rc)
5604 goto free_userptr;
5605
Tomer Tayarfa8641a12020-09-07 17:36:41 +03005606 rc = hl_cb_create(hdev, &hdev->kernel_cb_mgr, hdev->kernel_ctx,
Tomer Tayaref6a0f62020-07-09 16:17:48 +03005607 parser->patched_cb_size, false, false,
Tomer Tayarfa8641a12020-09-07 17:36:41 +03005608 &patched_cb_handle);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005609 if (rc) {
5610 dev_err(hdev->dev,
5611 "Failed to allocate patched CB for DMA CS %d\n", rc);
5612 goto free_userptr;
5613 }
5614
5615 patched_cb_handle >>= PAGE_SHIFT;
5616 parser->patched_cb = hl_cb_get(hdev, &hdev->kernel_cb_mgr,
5617 (u32) patched_cb_handle);
Alon Mizrahi75d9a2a2020-12-03 17:32:19 +02005618 /* hl_cb_get should never fail here */
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005619 if (!parser->patched_cb) {
Alon Mizrahi75d9a2a2020-12-03 17:32:19 +02005620 dev_crit(hdev->dev, "DMA CB handle invalid 0x%x\n",
5621 (u32) patched_cb_handle);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005622 rc = -EFAULT;
5623 goto out;
5624 }
5625
5626 rc = gaudi_patch_cb(hdev, parser);
5627
5628 if (rc)
5629 hl_cb_put(parser->patched_cb);
5630
5631out:
5632 /*
5633 * Always call cb destroy here because we still have 1 reference
5634 * to it by calling cb_get earlier. After the job will be completed,
5635 * cb_put will release it, but here we want to remove it from the
5636 * idr
5637 */
5638 hl_cb_destroy(hdev, &hdev->kernel_cb_mgr,
5639 patched_cb_handle << PAGE_SHIFT);
5640
5641free_userptr:
5642 if (rc)
5643 hl_userptr_delete_list(hdev, parser->job_userptr_list);
5644 return rc;
5645}
5646
5647static int gaudi_parse_cb_no_ext_queue(struct hl_device *hdev,
5648 struct hl_cs_parser *parser)
5649{
5650 struct asic_fixed_properties *asic_prop = &hdev->asic_prop;
Oded Gabbay3c681572020-11-02 21:10:39 +02005651 struct gaudi_device *gaudi = hdev->asic_specific;
5652 u32 nic_mask_q_id = 1 << (HW_CAP_NIC_SHIFT +
5653 ((parser->hw_queue_id - GAUDI_QUEUE_ID_NIC_0_0) >> 2));
5654
5655 if ((parser->hw_queue_id >= GAUDI_QUEUE_ID_NIC_0_0) &&
5656 (parser->hw_queue_id <= GAUDI_QUEUE_ID_NIC_9_3) &&
5657 (!(gaudi->hw_cap_initialized & nic_mask_q_id))) {
5658 dev_err(hdev->dev, "h/w queue %d is disabled\n",
5659 parser->hw_queue_id);
5660 return -EINVAL;
5661 }
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005662
5663 /* For internal queue jobs just check if CB address is valid */
5664 if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,
5665 parser->user_cb_size,
5666 asic_prop->sram_user_base_address,
5667 asic_prop->sram_end_address))
5668 return 0;
5669
5670 if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,
5671 parser->user_cb_size,
5672 asic_prop->dram_user_base_address,
5673 asic_prop->dram_end_address))
5674 return 0;
5675
5676 /* PMMU and HPMMU addresses are equal, check only one of them */
5677 if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,
5678 parser->user_cb_size,
5679 asic_prop->pmmu.start_addr,
5680 asic_prop->pmmu.end_addr))
5681 return 0;
5682
5683 dev_err(hdev->dev,
5684 "CB address 0x%px + 0x%x for internal QMAN is not valid\n",
5685 parser->user_cb, parser->user_cb_size);
5686
5687 return -EFAULT;
5688}
5689
5690static int gaudi_cs_parser(struct hl_device *hdev, struct hl_cs_parser *parser)
5691{
5692 struct gaudi_device *gaudi = hdev->asic_specific;
5693
5694 if (parser->queue_type == QUEUE_TYPE_INT)
5695 return gaudi_parse_cb_no_ext_queue(hdev, parser);
5696
5697 if (gaudi->hw_cap_initialized & HW_CAP_MMU)
5698 return gaudi_parse_cb_mmu(hdev, parser);
5699 else
5700 return gaudi_parse_cb_no_mmu(hdev, parser);
5701}
5702
5703static void gaudi_add_end_of_cb_packets(struct hl_device *hdev,
Arnd Bergmann82948e62020-10-26 17:08:06 +01005704 void *kernel_address, u32 len,
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005705 u64 cq_addr, u32 cq_val, u32 msi_vec,
5706 bool eb)
5707{
5708 struct gaudi_device *gaudi = hdev->asic_specific;
5709 struct packet_msg_prot *cq_pkt;
5710 u32 tmp;
5711
Arnd Bergmann82948e62020-10-26 17:08:06 +01005712 cq_pkt = kernel_address + len - (sizeof(struct packet_msg_prot) * 2);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005713
Oded Gabbay65887292020-08-12 11:21:01 +03005714 tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT);
5715 tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005716
5717 if (eb)
Oded Gabbay65887292020-08-12 11:21:01 +03005718 tmp |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005719
5720 cq_pkt->ctl = cpu_to_le32(tmp);
5721 cq_pkt->value = cpu_to_le32(cq_val);
5722 cq_pkt->addr = cpu_to_le64(cq_addr);
5723
5724 cq_pkt++;
5725
Oded Gabbay65887292020-08-12 11:21:01 +03005726 tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT);
5727 tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005728 cq_pkt->ctl = cpu_to_le32(tmp);
5729 cq_pkt->value = cpu_to_le32(1);
5730
5731 if (!gaudi->multi_msi_mode)
5732 msi_vec = 0;
5733
5734 cq_pkt->addr = cpu_to_le64(CFG_BASE + mmPCIE_MSI_INTR_0 + msi_vec * 4);
5735}
5736
5737static void gaudi_update_eq_ci(struct hl_device *hdev, u32 val)
5738{
5739 WREG32(mmCPU_IF_EQ_RD_OFFS, val);
5740}
5741
5742static int gaudi_memset_device_memory(struct hl_device *hdev, u64 addr,
5743 u32 size, u64 val)
5744{
5745 struct packet_lin_dma *lin_dma_pkt;
5746 struct hl_cs_job *job;
Moti Haimovskia9855a2d92020-06-24 19:40:57 +03005747 u32 cb_size, ctl, err_cause;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005748 struct hl_cb *cb;
Lv Yunlong115726c2021-04-26 06:43:46 -07005749 u64 id;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005750 int rc;
5751
Ofir Bittona04b7cd2020-07-13 13:36:55 +03005752 cb = hl_cb_kernel_create(hdev, PAGE_SIZE, false);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005753 if (!cb)
5754 return -EFAULT;
5755
Arnd Bergmann82948e62020-10-26 17:08:06 +01005756 lin_dma_pkt = cb->kernel_address;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005757 memset(lin_dma_pkt, 0, sizeof(*lin_dma_pkt));
5758 cb_size = sizeof(*lin_dma_pkt);
5759
Oded Gabbay65887292020-08-12 11:21:01 +03005760 ctl = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_LIN_DMA);
5761 ctl |= FIELD_PREP(GAUDI_PKT_LIN_DMA_CTL_MEMSET_MASK, 1);
5762 ctl |= FIELD_PREP(GAUDI_PKT_LIN_DMA_CTL_LIN_MASK, 1);
5763 ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
5764 ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
5765
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005766 lin_dma_pkt->ctl = cpu_to_le32(ctl);
5767 lin_dma_pkt->src_addr = cpu_to_le64(val);
5768 lin_dma_pkt->dst_addr |= cpu_to_le64(addr);
5769 lin_dma_pkt->tsize = cpu_to_le32(size);
5770
5771 job = hl_cs_allocate_job(hdev, QUEUE_TYPE_EXT, true);
5772 if (!job) {
5773 dev_err(hdev->dev, "Failed to allocate a new job\n");
5774 rc = -ENOMEM;
5775 goto release_cb;
5776 }
5777
Moti Haimovskia9855a2d92020-06-24 19:40:57 +03005778 /* Verify DMA is OK */
5779 err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE);
5780 if (err_cause && !hdev->init_done) {
5781 dev_dbg(hdev->dev,
5782 "Clearing DMA0 engine from errors (cause 0x%x)\n",
5783 err_cause);
5784 WREG32(mmDMA0_CORE_ERR_CAUSE, err_cause);
5785 }
5786
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005787 job->id = 0;
5788 job->user_cb = cb;
Tomer Tayarf07486742020-08-02 22:51:31 +03005789 atomic_inc(&job->user_cb->cs_cnt);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005790 job->user_cb_size = cb_size;
5791 job->hw_queue_id = GAUDI_QUEUE_ID_DMA_0_0;
5792 job->patched_cb = job->user_cb;
5793 job->job_cb_size = job->user_cb_size + sizeof(struct packet_msg_prot);
5794
5795 hl_debugfs_add_job(hdev, job);
5796
5797 rc = gaudi_send_job_on_qman0(hdev, job);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005798 hl_debugfs_remove_job(hdev, job);
5799 kfree(job);
Tomer Tayarf07486742020-08-02 22:51:31 +03005800 atomic_dec(&cb->cs_cnt);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005801
Moti Haimovskia9855a2d92020-06-24 19:40:57 +03005802 /* Verify DMA is OK */
5803 err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE);
5804 if (err_cause) {
5805 dev_err(hdev->dev, "DMA Failed, cause 0x%x\n", err_cause);
5806 rc = -EIO;
5807 if (!hdev->init_done) {
5808 dev_dbg(hdev->dev,
5809 "Clearing DMA0 engine from errors (cause 0x%x)\n",
5810 err_cause);
5811 WREG32(mmDMA0_CORE_ERR_CAUSE, err_cause);
5812 }
5813 }
5814
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005815release_cb:
Lv Yunlong115726c2021-04-26 06:43:46 -07005816 id = cb->id;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005817 hl_cb_put(cb);
Lv Yunlong115726c2021-04-26 06:43:46 -07005818 hl_cb_destroy(hdev, &hdev->kernel_cb_mgr, id << PAGE_SHIFT);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005819
5820 return rc;
5821}
5822
Ofir Bitton423815b2021-01-05 09:04:07 +02005823static int gaudi_memset_registers(struct hl_device *hdev, u64 reg_base,
5824 u32 num_regs, u32 val)
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005825{
Ofir Bitton423815b2021-01-05 09:04:07 +02005826 struct packet_msg_long *pkt;
5827 struct hl_cs_job *job;
5828 u32 cb_size, ctl;
5829 struct hl_cb *cb;
5830 int i, rc;
5831
5832 cb_size = (sizeof(*pkt) * num_regs) + sizeof(struct packet_msg_prot);
5833
5834 if (cb_size > SZ_2M) {
5835 dev_err(hdev->dev, "CB size must be smaller than %uMB", SZ_2M);
5836 return -ENOMEM;
5837 }
5838
5839 cb = hl_cb_kernel_create(hdev, cb_size, false);
5840 if (!cb)
5841 return -EFAULT;
5842
5843 pkt = cb->kernel_address;
5844
5845 ctl = FIELD_PREP(GAUDI_PKT_LONG_CTL_OP_MASK, 0); /* write the value */
5846 ctl |= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_LONG);
5847 ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);
5848 ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
5849 ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
5850
5851 for (i = 0; i < num_regs ; i++, pkt++) {
5852 pkt->ctl = cpu_to_le32(ctl);
5853 pkt->value = cpu_to_le32(val);
5854 pkt->addr = cpu_to_le64(reg_base + (i * 4));
5855 }
5856
5857 job = hl_cs_allocate_job(hdev, QUEUE_TYPE_EXT, true);
5858 if (!job) {
5859 dev_err(hdev->dev, "Failed to allocate a new job\n");
5860 rc = -ENOMEM;
5861 goto release_cb;
5862 }
5863
5864 job->id = 0;
5865 job->user_cb = cb;
5866 atomic_inc(&job->user_cb->cs_cnt);
5867 job->user_cb_size = cb_size;
5868 job->hw_queue_id = GAUDI_QUEUE_ID_DMA_0_0;
5869 job->patched_cb = job->user_cb;
5870 job->job_cb_size = cb_size;
5871
5872 hl_debugfs_add_job(hdev, job);
5873
5874 rc = gaudi_send_job_on_qman0(hdev, job);
5875 hl_debugfs_remove_job(hdev, job);
5876 kfree(job);
5877 atomic_dec(&cb->cs_cnt);
5878
5879release_cb:
5880 hl_cb_put(cb);
5881 hl_cb_destroy(hdev, &hdev->kernel_cb_mgr, cb->id << PAGE_SHIFT);
5882
5883 return rc;
5884}
5885
5886static int gaudi_schedule_register_memset(struct hl_device *hdev,
5887 u32 hw_queue_id, u64 reg_base, u32 num_regs, u32 val)
5888{
farah kassabrie65448f2021-03-30 16:38:02 +03005889 struct hl_ctx *ctx;
Ofir Bitton423815b2021-01-05 09:04:07 +02005890 struct hl_pending_cb *pending_cb;
5891 struct packet_msg_long *pkt;
5892 u32 cb_size, ctl;
5893 struct hl_cb *cb;
farah kassabrie65448f2021-03-30 16:38:02 +03005894 int i, rc;
5895
5896 mutex_lock(&hdev->fpriv_list_lock);
5897 ctx = hdev->compute_ctx;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005898
Ofir Bitton423815b2021-01-05 09:04:07 +02005899 /* If no compute context available or context is going down
5900 * memset registers directly
5901 */
farah kassabrie65448f2021-03-30 16:38:02 +03005902 if (!ctx || kref_read(&ctx->refcount) == 0) {
5903 rc = gaudi_memset_registers(hdev, reg_base, num_regs, val);
5904 mutex_unlock(&hdev->fpriv_list_lock);
5905 return rc;
5906 }
5907
5908 mutex_unlock(&hdev->fpriv_list_lock);
Ofir Bitton423815b2021-01-05 09:04:07 +02005909
5910 cb_size = (sizeof(*pkt) * num_regs) +
5911 sizeof(struct packet_msg_prot) * 2;
5912
5913 if (cb_size > SZ_2M) {
5914 dev_err(hdev->dev, "CB size must be smaller than %uMB", SZ_2M);
5915 return -ENOMEM;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005916 }
5917
Ofir Bitton423815b2021-01-05 09:04:07 +02005918 pending_cb = kzalloc(sizeof(*pending_cb), GFP_KERNEL);
5919 if (!pending_cb)
5920 return -ENOMEM;
5921
5922 cb = hl_cb_kernel_create(hdev, cb_size, false);
5923 if (!cb) {
5924 kfree(pending_cb);
5925 return -EFAULT;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005926 }
5927
Ofir Bitton423815b2021-01-05 09:04:07 +02005928 pkt = cb->kernel_address;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005929
Ofir Bitton423815b2021-01-05 09:04:07 +02005930 ctl = FIELD_PREP(GAUDI_PKT_LONG_CTL_OP_MASK, 0); /* write the value */
5931 ctl |= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_LONG);
5932 ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);
5933 ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
5934 ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005935
Ofir Bitton423815b2021-01-05 09:04:07 +02005936 for (i = 0; i < num_regs ; i++, pkt++) {
5937 pkt->ctl = cpu_to_le32(ctl);
5938 pkt->value = cpu_to_le32(val);
5939 pkt->addr = cpu_to_le64(reg_base + (i * 4));
5940 }
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005941
Ofir Bitton423815b2021-01-05 09:04:07 +02005942 hl_cb_destroy(hdev, &hdev->kernel_cb_mgr, cb->id << PAGE_SHIFT);
5943
5944 pending_cb->cb = cb;
5945 pending_cb->cb_size = cb_size;
5946 /* The queue ID MUST be an external queue ID. Otherwise, we will
5947 * have undefined behavior
5948 */
5949 pending_cb->hw_queue_id = hw_queue_id;
5950
5951 spin_lock(&ctx->pending_cb_lock);
5952 list_add_tail(&pending_cb->cb_node, &ctx->pending_cb_list);
5953 spin_unlock(&ctx->pending_cb_lock);
5954
5955 return 0;
5956}
5957
5958static int gaudi_restore_sm_registers(struct hl_device *hdev)
5959{
5960 u64 base_addr;
5961 u32 num_regs;
5962 int rc;
5963
5964 base_addr = CFG_BASE + mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0;
5965 num_regs = NUM_OF_SOB_IN_BLOCK;
5966 rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5967 if (rc) {
5968 dev_err(hdev->dev, "failed resetting SM registers");
5969 return -ENOMEM;
5970 }
5971
5972 base_addr = CFG_BASE + mmSYNC_MNGR_E_S_SYNC_MNGR_OBJS_SOB_OBJ_0;
5973 num_regs = NUM_OF_SOB_IN_BLOCK;
5974 rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5975 if (rc) {
5976 dev_err(hdev->dev, "failed resetting SM registers");
5977 return -ENOMEM;
5978 }
5979
5980 base_addr = CFG_BASE + mmSYNC_MNGR_W_N_SYNC_MNGR_OBJS_SOB_OBJ_0;
5981 num_regs = NUM_OF_SOB_IN_BLOCK;
5982 rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5983 if (rc) {
5984 dev_err(hdev->dev, "failed resetting SM registers");
5985 return -ENOMEM;
5986 }
5987
5988 base_addr = CFG_BASE + mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_STATUS_0;
5989 num_regs = NUM_OF_MONITORS_IN_BLOCK;
5990 rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5991 if (rc) {
5992 dev_err(hdev->dev, "failed resetting SM registers");
5993 return -ENOMEM;
5994 }
5995
5996 base_addr = CFG_BASE + mmSYNC_MNGR_E_S_SYNC_MNGR_OBJS_MON_STATUS_0;
5997 num_regs = NUM_OF_MONITORS_IN_BLOCK;
5998 rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5999 if (rc) {
6000 dev_err(hdev->dev, "failed resetting SM registers");
6001 return -ENOMEM;
6002 }
6003
6004 base_addr = CFG_BASE + mmSYNC_MNGR_W_N_SYNC_MNGR_OBJS_MON_STATUS_0;
6005 num_regs = NUM_OF_MONITORS_IN_BLOCK;
6006 rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
6007 if (rc) {
6008 dev_err(hdev->dev, "failed resetting SM registers");
6009 return -ENOMEM;
6010 }
6011
6012 base_addr = CFG_BASE + mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 +
6013 (GAUDI_FIRST_AVAILABLE_W_S_SYNC_OBJECT * 4);
6014 num_regs = NUM_OF_SOB_IN_BLOCK - GAUDI_FIRST_AVAILABLE_W_S_SYNC_OBJECT;
6015 rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
6016 if (rc) {
6017 dev_err(hdev->dev, "failed resetting SM registers");
6018 return -ENOMEM;
6019 }
6020
6021 base_addr = CFG_BASE + mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_STATUS_0 +
6022 (GAUDI_FIRST_AVAILABLE_W_S_MONITOR * 4);
6023 num_regs = NUM_OF_MONITORS_IN_BLOCK - GAUDI_FIRST_AVAILABLE_W_S_MONITOR;
6024 rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
6025 if (rc) {
6026 dev_err(hdev->dev, "failed resetting SM registers");
6027 return -ENOMEM;
6028 }
6029
6030 return 0;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006031}
6032
6033static void gaudi_restore_dma_registers(struct hl_device *hdev)
6034{
6035 u32 sob_delta = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_1 -
6036 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0;
6037 int i;
6038
6039 for (i = 0 ; i < DMA_NUMBER_OF_CHANNELS ; i++) {
6040 u64 sob_addr = CFG_BASE +
6041 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0 +
6042 (i * sob_delta);
6043 u32 dma_offset = i * DMA_CORE_OFFSET;
6044
6045 WREG32(mmDMA0_CORE_WR_COMP_ADDR_LO + dma_offset,
6046 lower_32_bits(sob_addr));
6047 WREG32(mmDMA0_CORE_WR_COMP_ADDR_HI + dma_offset,
6048 upper_32_bits(sob_addr));
6049 WREG32(mmDMA0_CORE_WR_COMP_WDATA + dma_offset, 0x80000001);
6050
6051 /* For DMAs 2-7, need to restore WR_AWUSER_31_11 as it can be
6052 * modified by the user for SRAM reduction
6053 */
6054 if (i > 1)
6055 WREG32(mmDMA0_CORE_WR_AWUSER_31_11 + dma_offset,
6056 0x00000001);
6057 }
6058}
6059
6060static void gaudi_restore_qm_registers(struct hl_device *hdev)
6061{
6062 u32 qman_offset;
6063 int i;
6064
6065 for (i = 0 ; i < DMA_NUMBER_OF_CHANNELS ; i++) {
6066 qman_offset = i * DMA_QMAN_OFFSET;
6067 WREG32(mmDMA0_QM_ARB_CFG_0 + qman_offset, 0);
6068 }
6069
6070 for (i = 0 ; i < MME_NUMBER_OF_MASTER_ENGINES ; i++) {
6071 qman_offset = i * (mmMME2_QM_BASE - mmMME0_QM_BASE);
6072 WREG32(mmMME0_QM_ARB_CFG_0 + qman_offset, 0);
6073 }
6074
6075 for (i = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) {
6076 qman_offset = i * TPC_QMAN_OFFSET;
6077 WREG32(mmTPC0_QM_ARB_CFG_0 + qman_offset, 0);
6078 }
Oded Gabbay3c681572020-11-02 21:10:39 +02006079
6080 for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++) {
6081 qman_offset = (i >> 1) * NIC_MACRO_QMAN_OFFSET +
6082 (i & 0x1) * NIC_ENGINE_QMAN_OFFSET;
6083 WREG32(mmNIC0_QM0_ARB_CFG_0 + qman_offset, 0);
6084 }
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006085}
6086
Ofir Bitton423815b2021-01-05 09:04:07 +02006087static int gaudi_restore_user_registers(struct hl_device *hdev)
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006088{
Ofir Bitton423815b2021-01-05 09:04:07 +02006089 int rc;
6090
6091 rc = gaudi_restore_sm_registers(hdev);
6092 if (rc)
6093 return rc;
6094
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006095 gaudi_restore_dma_registers(hdev);
6096 gaudi_restore_qm_registers(hdev);
Ofir Bitton423815b2021-01-05 09:04:07 +02006097
6098 return 0;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006099}
6100
6101static int gaudi_context_switch(struct hl_device *hdev, u32 asid)
6102{
Ofir Bitton423815b2021-01-05 09:04:07 +02006103 return gaudi_restore_user_registers(hdev);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006104}
6105
6106static int gaudi_mmu_clear_pgt_range(struct hl_device *hdev)
6107{
6108 struct asic_fixed_properties *prop = &hdev->asic_prop;
6109 struct gaudi_device *gaudi = hdev->asic_specific;
6110 u64 addr = prop->mmu_pgt_addr;
6111 u32 size = prop->mmu_pgt_size + MMU_CACHE_MNG_SIZE;
6112
6113 if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
6114 return 0;
6115
6116 return gaudi_memset_device_memory(hdev, addr, size, 0);
6117}
6118
6119static void gaudi_restore_phase_topology(struct hl_device *hdev)
6120{
6121
6122}
6123
Ofir Bittona5778d12021-02-24 11:51:40 +02006124static int gaudi_debugfs_read32(struct hl_device *hdev, u64 addr,
6125 bool user_address, u32 *val)
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006126{
6127 struct asic_fixed_properties *prop = &hdev->asic_prop;
6128 struct gaudi_device *gaudi = hdev->asic_specific;
Ofir Bittona5778d12021-02-24 11:51:40 +02006129 u64 hbm_bar_addr, host_phys_end;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006130 int rc = 0;
6131
Ofir Bittona5778d12021-02-24 11:51:40 +02006132 host_phys_end = HOST_PHYS_BASE + HOST_PHYS_SIZE;
6133
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006134 if ((addr >= CFG_BASE) && (addr < CFG_BASE + CFG_SIZE)) {
Oded Gabbaye38bfd32020-07-03 20:46:12 +03006135
6136 if ((gaudi->hw_cap_initialized & HW_CAP_CLK_GATE) &&
6137 (hdev->clock_gating_mask &
6138 GAUDI_CLK_GATE_DEBUGFS_MASK)) {
6139
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006140 dev_err_ratelimited(hdev->dev,
6141 "Can't read register - clock gating is enabled!\n");
6142 rc = -EFAULT;
6143 } else {
6144 *val = RREG32(addr - CFG_BASE);
6145 }
Oded Gabbaye38bfd32020-07-03 20:46:12 +03006146
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006147 } else if ((addr >= SRAM_BASE_ADDR) &&
6148 (addr < SRAM_BASE_ADDR + SRAM_BAR_SIZE)) {
6149 *val = readl(hdev->pcie_bar[SRAM_BAR_ID] +
6150 (addr - SRAM_BASE_ADDR));
6151 } else if (addr < DRAM_PHYS_BASE + hdev->asic_prop.dram_size) {
6152 u64 bar_base_addr = DRAM_PHYS_BASE +
6153 (addr & ~(prop->dram_pci_bar_size - 0x1ull));
6154
6155 hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, bar_base_addr);
6156 if (hbm_bar_addr != U64_MAX) {
6157 *val = readl(hdev->pcie_bar[HBM_BAR_ID] +
6158 (addr - bar_base_addr));
6159
6160 hbm_bar_addr = gaudi_set_hbm_bar_base(hdev,
6161 hbm_bar_addr);
6162 }
6163 if (hbm_bar_addr == U64_MAX)
6164 rc = -EIO;
Ofir Bittona5778d12021-02-24 11:51:40 +02006165 } else if (addr >= HOST_PHYS_BASE && addr < host_phys_end &&
6166 user_address && !iommu_present(&pci_bus_type)) {
6167 *val = *(u32 *) phys_to_virt(addr - HOST_PHYS_BASE);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006168 } else {
6169 rc = -EFAULT;
6170 }
6171
6172 return rc;
6173}
6174
Ofir Bittona5778d12021-02-24 11:51:40 +02006175static int gaudi_debugfs_write32(struct hl_device *hdev, u64 addr,
6176 bool user_address, u32 val)
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006177{
6178 struct asic_fixed_properties *prop = &hdev->asic_prop;
6179 struct gaudi_device *gaudi = hdev->asic_specific;
Ofir Bittona5778d12021-02-24 11:51:40 +02006180 u64 hbm_bar_addr, host_phys_end;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006181 int rc = 0;
6182
Ofir Bittona5778d12021-02-24 11:51:40 +02006183 host_phys_end = HOST_PHYS_BASE + HOST_PHYS_SIZE;
6184
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006185 if ((addr >= CFG_BASE) && (addr < CFG_BASE + CFG_SIZE)) {
Oded Gabbaye38bfd32020-07-03 20:46:12 +03006186
6187 if ((gaudi->hw_cap_initialized & HW_CAP_CLK_GATE) &&
6188 (hdev->clock_gating_mask &
6189 GAUDI_CLK_GATE_DEBUGFS_MASK)) {
6190
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006191 dev_err_ratelimited(hdev->dev,
6192 "Can't write register - clock gating is enabled!\n");
6193 rc = -EFAULT;
6194 } else {
6195 WREG32(addr - CFG_BASE, val);
6196 }
Oded Gabbaye38bfd32020-07-03 20:46:12 +03006197
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006198 } else if ((addr >= SRAM_BASE_ADDR) &&
6199 (addr < SRAM_BASE_ADDR + SRAM_BAR_SIZE)) {
6200 writel(val, hdev->pcie_bar[SRAM_BAR_ID] +
6201 (addr - SRAM_BASE_ADDR));
6202 } else if (addr < DRAM_PHYS_BASE + hdev->asic_prop.dram_size) {
6203 u64 bar_base_addr = DRAM_PHYS_BASE +
6204 (addr & ~(prop->dram_pci_bar_size - 0x1ull));
6205
6206 hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, bar_base_addr);
6207 if (hbm_bar_addr != U64_MAX) {
6208 writel(val, hdev->pcie_bar[HBM_BAR_ID] +
6209 (addr - bar_base_addr));
6210
6211 hbm_bar_addr = gaudi_set_hbm_bar_base(hdev,
6212 hbm_bar_addr);
6213 }
6214 if (hbm_bar_addr == U64_MAX)
6215 rc = -EIO;
Ofir Bittona5778d12021-02-24 11:51:40 +02006216 } else if (addr >= HOST_PHYS_BASE && addr < host_phys_end &&
6217 user_address && !iommu_present(&pci_bus_type)) {
6218 *(u32 *) phys_to_virt(addr - HOST_PHYS_BASE) = val;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006219 } else {
6220 rc = -EFAULT;
6221 }
6222
6223 return rc;
6224}
6225
Ofir Bittona5778d12021-02-24 11:51:40 +02006226static int gaudi_debugfs_read64(struct hl_device *hdev, u64 addr,
6227 bool user_address, u64 *val)
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006228{
6229 struct asic_fixed_properties *prop = &hdev->asic_prop;
6230 struct gaudi_device *gaudi = hdev->asic_specific;
Ofir Bittona5778d12021-02-24 11:51:40 +02006231 u64 hbm_bar_addr, host_phys_end;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006232 int rc = 0;
6233
Ofir Bittona5778d12021-02-24 11:51:40 +02006234 host_phys_end = HOST_PHYS_BASE + HOST_PHYS_SIZE;
6235
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006236 if ((addr >= CFG_BASE) && (addr <= CFG_BASE + CFG_SIZE - sizeof(u64))) {
Oded Gabbaye38bfd32020-07-03 20:46:12 +03006237
6238 if ((gaudi->hw_cap_initialized & HW_CAP_CLK_GATE) &&
6239 (hdev->clock_gating_mask &
6240 GAUDI_CLK_GATE_DEBUGFS_MASK)) {
6241
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006242 dev_err_ratelimited(hdev->dev,
6243 "Can't read register - clock gating is enabled!\n");
6244 rc = -EFAULT;
6245 } else {
6246 u32 val_l = RREG32(addr - CFG_BASE);
6247 u32 val_h = RREG32(addr + sizeof(u32) - CFG_BASE);
6248
6249 *val = (((u64) val_h) << 32) | val_l;
6250 }
Oded Gabbaye38bfd32020-07-03 20:46:12 +03006251
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006252 } else if ((addr >= SRAM_BASE_ADDR) &&
6253 (addr <= SRAM_BASE_ADDR + SRAM_BAR_SIZE - sizeof(u64))) {
6254 *val = readq(hdev->pcie_bar[SRAM_BAR_ID] +
6255 (addr - SRAM_BASE_ADDR));
6256 } else if (addr <=
6257 DRAM_PHYS_BASE + hdev->asic_prop.dram_size - sizeof(u64)) {
6258 u64 bar_base_addr = DRAM_PHYS_BASE +
6259 (addr & ~(prop->dram_pci_bar_size - 0x1ull));
6260
6261 hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, bar_base_addr);
6262 if (hbm_bar_addr != U64_MAX) {
6263 *val = readq(hdev->pcie_bar[HBM_BAR_ID] +
6264 (addr - bar_base_addr));
6265
6266 hbm_bar_addr = gaudi_set_hbm_bar_base(hdev,
6267 hbm_bar_addr);
6268 }
6269 if (hbm_bar_addr == U64_MAX)
6270 rc = -EIO;
Ofir Bittona5778d12021-02-24 11:51:40 +02006271 } else if (addr >= HOST_PHYS_BASE && addr < host_phys_end &&
6272 user_address && !iommu_present(&pci_bus_type)) {
6273 *val = *(u64 *) phys_to_virt(addr - HOST_PHYS_BASE);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006274 } else {
6275 rc = -EFAULT;
6276 }
6277
6278 return rc;
6279}
6280
Ofir Bittona5778d12021-02-24 11:51:40 +02006281static int gaudi_debugfs_write64(struct hl_device *hdev, u64 addr,
6282 bool user_address, u64 val)
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006283{
6284 struct asic_fixed_properties *prop = &hdev->asic_prop;
6285 struct gaudi_device *gaudi = hdev->asic_specific;
Ofir Bittona5778d12021-02-24 11:51:40 +02006286 u64 hbm_bar_addr, host_phys_end;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006287 int rc = 0;
6288
Ofir Bittona5778d12021-02-24 11:51:40 +02006289 host_phys_end = HOST_PHYS_BASE + HOST_PHYS_SIZE;
6290
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006291 if ((addr >= CFG_BASE) && (addr <= CFG_BASE + CFG_SIZE - sizeof(u64))) {
Oded Gabbaye38bfd32020-07-03 20:46:12 +03006292
6293 if ((gaudi->hw_cap_initialized & HW_CAP_CLK_GATE) &&
6294 (hdev->clock_gating_mask &
6295 GAUDI_CLK_GATE_DEBUGFS_MASK)) {
6296
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006297 dev_err_ratelimited(hdev->dev,
6298 "Can't write register - clock gating is enabled!\n");
6299 rc = -EFAULT;
6300 } else {
6301 WREG32(addr - CFG_BASE, lower_32_bits(val));
6302 WREG32(addr + sizeof(u32) - CFG_BASE,
6303 upper_32_bits(val));
6304 }
Oded Gabbaye38bfd32020-07-03 20:46:12 +03006305
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006306 } else if ((addr >= SRAM_BASE_ADDR) &&
6307 (addr <= SRAM_BASE_ADDR + SRAM_BAR_SIZE - sizeof(u64))) {
6308 writeq(val, hdev->pcie_bar[SRAM_BAR_ID] +
6309 (addr - SRAM_BASE_ADDR));
6310 } else if (addr <=
6311 DRAM_PHYS_BASE + hdev->asic_prop.dram_size - sizeof(u64)) {
6312 u64 bar_base_addr = DRAM_PHYS_BASE +
6313 (addr & ~(prop->dram_pci_bar_size - 0x1ull));
6314
6315 hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, bar_base_addr);
6316 if (hbm_bar_addr != U64_MAX) {
6317 writeq(val, hdev->pcie_bar[HBM_BAR_ID] +
6318 (addr - bar_base_addr));
6319
6320 hbm_bar_addr = gaudi_set_hbm_bar_base(hdev,
6321 hbm_bar_addr);
6322 }
6323 if (hbm_bar_addr == U64_MAX)
6324 rc = -EIO;
Ofir Bittona5778d12021-02-24 11:51:40 +02006325 } else if (addr >= HOST_PHYS_BASE && addr < host_phys_end &&
6326 user_address && !iommu_present(&pci_bus_type)) {
6327 *(u64 *) phys_to_virt(addr - HOST_PHYS_BASE) = val;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006328 } else {
6329 rc = -EFAULT;
6330 }
6331
6332 return rc;
6333}
6334
Oded Gabbay639781d2021-04-02 01:43:18 +03006335static int gaudi_dma_core_transfer(struct hl_device *hdev, int dma_id, u64 addr,
6336 u32 size_to_dma, dma_addr_t dma_addr)
6337{
6338 u32 err_cause, val;
6339 u64 dma_offset;
6340 int rc;
6341
6342 dma_offset = dma_id * DMA_CORE_OFFSET;
6343
6344 WREG32(mmDMA0_CORE_SRC_BASE_LO + dma_offset, lower_32_bits(addr));
6345 WREG32(mmDMA0_CORE_SRC_BASE_HI + dma_offset, upper_32_bits(addr));
6346 WREG32(mmDMA0_CORE_DST_BASE_LO + dma_offset, lower_32_bits(dma_addr));
6347 WREG32(mmDMA0_CORE_DST_BASE_HI + dma_offset, upper_32_bits(dma_addr));
6348 WREG32(mmDMA0_CORE_DST_TSIZE_0 + dma_offset, size_to_dma);
6349 WREG32(mmDMA0_CORE_COMMIT + dma_offset,
6350 (1 << DMA0_CORE_COMMIT_LIN_SHIFT));
6351
6352 rc = hl_poll_timeout(
6353 hdev,
6354 mmDMA0_CORE_STS0 + dma_offset,
6355 val,
6356 ((val & DMA0_CORE_STS0_BUSY_MASK) == 0),
6357 0,
6358 1000000);
6359
6360 if (rc) {
6361 dev_err(hdev->dev,
6362 "DMA %d timed-out during reading of 0x%llx\n",
6363 dma_id, addr);
6364 return -EIO;
6365 }
6366
6367 /* Verify DMA is OK */
6368 err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset);
6369 if (err_cause) {
6370 dev_err(hdev->dev, "DMA Failed, cause 0x%x\n", err_cause);
6371 dev_dbg(hdev->dev,
6372 "Clearing DMA0 engine from errors (cause 0x%x)\n",
6373 err_cause);
6374 WREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset, err_cause);
6375
6376 return -EIO;
6377 }
6378
6379 return 0;
6380}
6381
6382static int gaudi_debugfs_read_dma(struct hl_device *hdev, u64 addr, u32 size,
6383 void *blob_addr)
6384{
6385 u32 dma_core_sts0, err_cause, cfg1, size_left, pos, size_to_dma;
6386 struct gaudi_device *gaudi = hdev->asic_specific;
6387 u64 dma_offset, qm_offset;
6388 dma_addr_t dma_addr;
6389 void *kernel_addr;
6390 bool is_eng_idle;
Colin Ian Kingb4e964f2021-04-12 17:10:12 +01006391 int rc = 0, dma_id;
Oded Gabbay639781d2021-04-02 01:43:18 +03006392
6393 kernel_addr = hdev->asic_funcs->asic_dma_alloc_coherent(
6394 hdev, SZ_2M,
6395 &dma_addr,
6396 GFP_KERNEL | __GFP_ZERO);
6397
6398 if (!kernel_addr)
6399 return -ENOMEM;
6400
6401 mutex_lock(&gaudi->clk_gate_mutex);
6402
6403 hdev->asic_funcs->disable_clock_gating(hdev);
6404
6405 hdev->asic_funcs->hw_queues_lock(hdev);
6406
6407 dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_1];
6408 dma_offset = dma_id * DMA_CORE_OFFSET;
6409 qm_offset = dma_id * DMA_QMAN_OFFSET;
6410 dma_core_sts0 = RREG32(mmDMA0_CORE_STS0 + dma_offset);
6411 is_eng_idle = IS_DMA_IDLE(dma_core_sts0);
6412
6413 if (!is_eng_idle) {
6414 dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_2];
6415 dma_offset = dma_id * DMA_CORE_OFFSET;
6416 qm_offset = dma_id * DMA_QMAN_OFFSET;
6417 dma_core_sts0 = RREG32(mmDMA0_CORE_STS0 + dma_offset);
6418 is_eng_idle = IS_DMA_IDLE(dma_core_sts0);
6419
6420 if (!is_eng_idle) {
6421 dev_err_ratelimited(hdev->dev,
6422 "Can't read via DMA because it is BUSY\n");
6423 rc = -EAGAIN;
6424 goto out;
6425 }
6426 }
6427
6428 cfg1 = RREG32(mmDMA0_QM_GLBL_CFG1 + qm_offset);
6429 WREG32(mmDMA0_QM_GLBL_CFG1 + qm_offset,
6430 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
6431
6432 /* TODO: remove this by mapping the DMA temporary buffer to the MMU
6433 * using the compute ctx ASID, if exists. If not, use the kernel ctx
6434 * ASID
6435 */
6436 WREG32_OR(mmDMA0_CORE_PROT + dma_offset, BIT(DMA0_CORE_PROT_VAL_SHIFT));
6437
6438 /* Verify DMA is OK */
6439 err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset);
6440 if (err_cause) {
6441 dev_dbg(hdev->dev,
6442 "Clearing DMA0 engine from errors (cause 0x%x)\n",
6443 err_cause);
6444 WREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset, err_cause);
6445 }
6446
6447 pos = 0;
6448 size_left = size;
6449 size_to_dma = SZ_2M;
6450
6451 while (size_left > 0) {
6452
6453 if (size_left < SZ_2M)
6454 size_to_dma = size_left;
6455
6456 rc = gaudi_dma_core_transfer(hdev, dma_id, addr, size_to_dma,
6457 dma_addr);
6458 if (rc)
6459 break;
6460
6461 memcpy(blob_addr + pos, kernel_addr, size_to_dma);
6462
6463 if (size_left <= SZ_2M)
6464 break;
6465
6466 pos += SZ_2M;
6467 addr += SZ_2M;
6468 size_left -= SZ_2M;
6469 }
6470
6471 /* TODO: remove this by mapping the DMA temporary buffer to the MMU
6472 * using the compute ctx ASID, if exists. If not, use the kernel ctx
6473 * ASID
6474 */
6475 WREG32_AND(mmDMA0_CORE_PROT + dma_offset,
6476 ~BIT(DMA0_CORE_PROT_VAL_SHIFT));
6477
6478 WREG32(mmDMA0_QM_GLBL_CFG1 + qm_offset, cfg1);
6479
6480out:
6481 hdev->asic_funcs->hw_queues_unlock(hdev);
6482
6483 hdev->asic_funcs->set_clock_gating(hdev);
6484
6485 mutex_unlock(&gaudi->clk_gate_mutex);
6486
6487 hdev->asic_funcs->asic_dma_free_coherent(hdev, SZ_2M, kernel_addr,
6488 dma_addr);
6489
6490 return rc;
6491}
6492
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006493static u64 gaudi_read_pte(struct hl_device *hdev, u64 addr)
6494{
6495 struct gaudi_device *gaudi = hdev->asic_specific;
6496
6497 if (hdev->hard_reset_pending)
6498 return U64_MAX;
6499
6500 return readq(hdev->pcie_bar[HBM_BAR_ID] +
6501 (addr - gaudi->hbm_bar_cur_addr));
6502}
6503
6504static void gaudi_write_pte(struct hl_device *hdev, u64 addr, u64 val)
6505{
6506 struct gaudi_device *gaudi = hdev->asic_specific;
6507
6508 if (hdev->hard_reset_pending)
6509 return;
6510
6511 writeq(val, hdev->pcie_bar[HBM_BAR_ID] +
6512 (addr - gaudi->hbm_bar_cur_addr));
6513}
6514
Ofir Bitton1137e1e2020-09-30 18:43:52 +03006515void gaudi_mmu_prepare_reg(struct hl_device *hdev, u64 reg, u32 asid)
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006516{
6517 /* mask to zero the MMBP and ASID bits */
6518 WREG32_AND(reg, ~0x7FF);
6519 WREG32_OR(reg, asid);
6520}
6521
6522static void gaudi_mmu_prepare(struct hl_device *hdev, u32 asid)
6523{
6524 struct gaudi_device *gaudi = hdev->asic_specific;
6525
6526 if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
6527 return;
6528
6529 if (asid & ~DMA0_QM_GLBL_NON_SECURE_PROPS_0_ASID_MASK) {
Alon Mizrahi75d9a2a2020-12-03 17:32:19 +02006530 dev_crit(hdev->dev, "asid %u is too big\n", asid);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006531 return;
6532 }
6533
6534 mutex_lock(&gaudi->clk_gate_mutex);
6535
6536 hdev->asic_funcs->disable_clock_gating(hdev);
6537
6538 gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_0, asid);
6539 gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_1, asid);
6540 gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_2, asid);
6541 gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_3, asid);
6542 gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_4, asid);
6543
6544 gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_0, asid);
6545 gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_1, asid);
6546 gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_2, asid);
6547 gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_3, asid);
6548 gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_4, asid);
6549
6550 gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_0, asid);
6551 gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_1, asid);
6552 gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_2, asid);
6553 gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_3, asid);
6554 gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_4, asid);
6555
6556 gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_0, asid);
6557 gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_1, asid);
6558 gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_2, asid);
6559 gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_3, asid);
6560 gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_4, asid);
6561
6562 gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_0, asid);
6563 gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_1, asid);
6564 gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_2, asid);
6565 gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_3, asid);
6566 gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_4, asid);
6567
6568 gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_0, asid);
6569 gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_1, asid);
6570 gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_2, asid);
6571 gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_3, asid);
6572 gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_4, asid);
6573
6574 gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_0, asid);
6575 gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_1, asid);
6576 gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_2, asid);
6577 gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_3, asid);
6578 gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_4, asid);
6579
6580 gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_0, asid);
6581 gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_1, asid);
6582 gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_2, asid);
6583 gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_3, asid);
6584 gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_4, asid);
6585
6586 gaudi_mmu_prepare_reg(hdev, mmDMA0_CORE_NON_SECURE_PROPS, asid);
6587 gaudi_mmu_prepare_reg(hdev, mmDMA1_CORE_NON_SECURE_PROPS, asid);
6588 gaudi_mmu_prepare_reg(hdev, mmDMA2_CORE_NON_SECURE_PROPS, asid);
6589 gaudi_mmu_prepare_reg(hdev, mmDMA3_CORE_NON_SECURE_PROPS, asid);
6590 gaudi_mmu_prepare_reg(hdev, mmDMA4_CORE_NON_SECURE_PROPS, asid);
6591 gaudi_mmu_prepare_reg(hdev, mmDMA5_CORE_NON_SECURE_PROPS, asid);
6592 gaudi_mmu_prepare_reg(hdev, mmDMA6_CORE_NON_SECURE_PROPS, asid);
6593 gaudi_mmu_prepare_reg(hdev, mmDMA7_CORE_NON_SECURE_PROPS, asid);
6594
6595 gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_0, asid);
6596 gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_1, asid);
6597 gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_2, asid);
6598 gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_3, asid);
6599 gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_4, asid);
6600 gaudi_mmu_prepare_reg(hdev, mmTPC0_CFG_ARUSER_LO, asid);
6601 gaudi_mmu_prepare_reg(hdev, mmTPC0_CFG_AWUSER_LO, asid);
6602
6603 gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_0, asid);
6604 gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_1, asid);
6605 gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_2, asid);
6606 gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_3, asid);
6607 gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_4, asid);
6608 gaudi_mmu_prepare_reg(hdev, mmTPC1_CFG_ARUSER_LO, asid);
6609 gaudi_mmu_prepare_reg(hdev, mmTPC1_CFG_AWUSER_LO, asid);
6610
6611 gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_0, asid);
6612 gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_1, asid);
6613 gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_2, asid);
6614 gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_3, asid);
6615 gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_4, asid);
6616 gaudi_mmu_prepare_reg(hdev, mmTPC2_CFG_ARUSER_LO, asid);
6617 gaudi_mmu_prepare_reg(hdev, mmTPC2_CFG_AWUSER_LO, asid);
6618
6619 gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_0, asid);
6620 gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_1, asid);
6621 gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_2, asid);
6622 gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_3, asid);
6623 gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_4, asid);
6624 gaudi_mmu_prepare_reg(hdev, mmTPC3_CFG_ARUSER_LO, asid);
6625 gaudi_mmu_prepare_reg(hdev, mmTPC3_CFG_AWUSER_LO, asid);
6626
6627 gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_0, asid);
6628 gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_1, asid);
6629 gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_2, asid);
6630 gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_3, asid);
6631 gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_4, asid);
6632 gaudi_mmu_prepare_reg(hdev, mmTPC4_CFG_ARUSER_LO, asid);
6633 gaudi_mmu_prepare_reg(hdev, mmTPC4_CFG_AWUSER_LO, asid);
6634
6635 gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_0, asid);
6636 gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_1, asid);
6637 gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_2, asid);
6638 gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_3, asid);
6639 gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_4, asid);
6640 gaudi_mmu_prepare_reg(hdev, mmTPC5_CFG_ARUSER_LO, asid);
6641 gaudi_mmu_prepare_reg(hdev, mmTPC5_CFG_AWUSER_LO, asid);
6642
6643 gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_0, asid);
6644 gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_1, asid);
6645 gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_2, asid);
6646 gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_3, asid);
6647 gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_4, asid);
6648 gaudi_mmu_prepare_reg(hdev, mmTPC6_CFG_ARUSER_LO, asid);
6649 gaudi_mmu_prepare_reg(hdev, mmTPC6_CFG_AWUSER_LO, asid);
6650
6651 gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_0, asid);
6652 gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_1, asid);
6653 gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_2, asid);
6654 gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_3, asid);
6655 gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_4, asid);
6656 gaudi_mmu_prepare_reg(hdev, mmTPC7_CFG_ARUSER_LO, asid);
6657 gaudi_mmu_prepare_reg(hdev, mmTPC7_CFG_AWUSER_LO, asid);
6658
6659 gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_0, asid);
6660 gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_1, asid);
6661 gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_2, asid);
6662 gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_3, asid);
6663 gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_4, asid);
6664 gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_0, asid);
6665 gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_1, asid);
6666 gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_2, asid);
6667 gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_3, asid);
6668 gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_4, asid);
6669
6670 gaudi_mmu_prepare_reg(hdev, mmMME0_SBAB_ARUSER0, asid);
6671 gaudi_mmu_prepare_reg(hdev, mmMME0_SBAB_ARUSER1, asid);
6672 gaudi_mmu_prepare_reg(hdev, mmMME1_SBAB_ARUSER0, asid);
6673 gaudi_mmu_prepare_reg(hdev, mmMME1_SBAB_ARUSER1, asid);
6674 gaudi_mmu_prepare_reg(hdev, mmMME2_SBAB_ARUSER0, asid);
6675 gaudi_mmu_prepare_reg(hdev, mmMME2_SBAB_ARUSER1, asid);
6676 gaudi_mmu_prepare_reg(hdev, mmMME3_SBAB_ARUSER0, asid);
6677 gaudi_mmu_prepare_reg(hdev, mmMME3_SBAB_ARUSER1, asid);
6678 gaudi_mmu_prepare_reg(hdev, mmMME0_ACC_WBC, asid);
6679 gaudi_mmu_prepare_reg(hdev, mmMME1_ACC_WBC, asid);
6680 gaudi_mmu_prepare_reg(hdev, mmMME2_ACC_WBC, asid);
6681 gaudi_mmu_prepare_reg(hdev, mmMME3_ACC_WBC, asid);
6682
Oded Gabbay90810212021-05-25 21:35:13 +03006683 if (gaudi->hw_cap_initialized & HW_CAP_NIC0) {
Oded Gabbay3c681572020-11-02 21:10:39 +02006684 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_0,
6685 asid);
6686 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_1,
6687 asid);
6688 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_2,
6689 asid);
6690 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_3,
6691 asid);
6692 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_4,
6693 asid);
6694 }
6695
Oded Gabbay90810212021-05-25 21:35:13 +03006696 if (gaudi->hw_cap_initialized & HW_CAP_NIC1) {
Oded Gabbay3c681572020-11-02 21:10:39 +02006697 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_0,
6698 asid);
6699 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_1,
6700 asid);
6701 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_2,
6702 asid);
6703 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_3,
6704 asid);
6705 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_4,
6706 asid);
6707 }
6708
Oded Gabbay90810212021-05-25 21:35:13 +03006709 if (gaudi->hw_cap_initialized & HW_CAP_NIC2) {
Oded Gabbay3c681572020-11-02 21:10:39 +02006710 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_0,
6711 asid);
6712 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_1,
6713 asid);
6714 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_2,
6715 asid);
6716 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_3,
6717 asid);
6718 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_4,
6719 asid);
6720 }
6721
Oded Gabbay90810212021-05-25 21:35:13 +03006722 if (gaudi->hw_cap_initialized & HW_CAP_NIC3) {
Oded Gabbay3c681572020-11-02 21:10:39 +02006723 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_0,
6724 asid);
6725 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_1,
6726 asid);
6727 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_2,
6728 asid);
6729 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_3,
6730 asid);
6731 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_4,
6732 asid);
6733 }
6734
Oded Gabbay90810212021-05-25 21:35:13 +03006735 if (gaudi->hw_cap_initialized & HW_CAP_NIC4) {
Oded Gabbay3c681572020-11-02 21:10:39 +02006736 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_0,
6737 asid);
6738 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_1,
6739 asid);
6740 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_2,
6741 asid);
6742 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_3,
6743 asid);
6744 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_4,
6745 asid);
6746 }
6747
Oded Gabbay90810212021-05-25 21:35:13 +03006748 if (gaudi->hw_cap_initialized & HW_CAP_NIC5) {
Oded Gabbay3c681572020-11-02 21:10:39 +02006749 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_0,
6750 asid);
6751 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_1,
6752 asid);
6753 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_2,
6754 asid);
6755 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_3,
6756 asid);
6757 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_4,
6758 asid);
6759 }
6760
Oded Gabbay90810212021-05-25 21:35:13 +03006761 if (gaudi->hw_cap_initialized & HW_CAP_NIC6) {
Oded Gabbay3c681572020-11-02 21:10:39 +02006762 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_0,
6763 asid);
6764 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_1,
6765 asid);
6766 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_2,
6767 asid);
6768 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_3,
6769 asid);
6770 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_4,
6771 asid);
6772 }
6773
Oded Gabbay90810212021-05-25 21:35:13 +03006774 if (gaudi->hw_cap_initialized & HW_CAP_NIC7) {
Oded Gabbay3c681572020-11-02 21:10:39 +02006775 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_0,
6776 asid);
6777 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_1,
6778 asid);
6779 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_2,
6780 asid);
6781 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_3,
6782 asid);
6783 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_4,
6784 asid);
6785 }
6786
Oded Gabbay90810212021-05-25 21:35:13 +03006787 if (gaudi->hw_cap_initialized & HW_CAP_NIC8) {
Oded Gabbay3c681572020-11-02 21:10:39 +02006788 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_0,
6789 asid);
6790 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_1,
6791 asid);
6792 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_2,
6793 asid);
6794 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_3,
6795 asid);
6796 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_4,
6797 asid);
6798 }
6799
Oded Gabbay90810212021-05-25 21:35:13 +03006800 if (gaudi->hw_cap_initialized & HW_CAP_NIC9) {
Oded Gabbay3c681572020-11-02 21:10:39 +02006801 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_0,
6802 asid);
6803 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_1,
6804 asid);
6805 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_2,
6806 asid);
6807 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_3,
6808 asid);
6809 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_4,
6810 asid);
6811 }
6812
Oded Gabbaye38bfd32020-07-03 20:46:12 +03006813 hdev->asic_funcs->set_clock_gating(hdev);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006814
6815 mutex_unlock(&gaudi->clk_gate_mutex);
6816}
6817
6818static int gaudi_send_job_on_qman0(struct hl_device *hdev,
6819 struct hl_cs_job *job)
6820{
6821 struct packet_msg_prot *fence_pkt;
6822 u32 *fence_ptr;
6823 dma_addr_t fence_dma_addr;
6824 struct hl_cb *cb;
6825 u32 tmp, timeout, dma_offset;
6826 int rc;
6827
6828 if (hdev->pldm)
6829 timeout = GAUDI_PLDM_QMAN0_TIMEOUT_USEC;
6830 else
6831 timeout = HL_DEVICE_TIMEOUT_USEC;
6832
Ohad Sharabicf303392021-01-17 16:01:56 +02006833 if (!hdev->asic_funcs->is_device_idle(hdev, NULL, 0, NULL)) {
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006834 dev_err_ratelimited(hdev->dev,
6835 "Can't send driver job on QMAN0 because the device is not idle\n");
6836 return -EBUSY;
6837 }
6838
6839 fence_ptr = hdev->asic_funcs->asic_dma_pool_zalloc(hdev, 4, GFP_KERNEL,
6840 &fence_dma_addr);
6841 if (!fence_ptr) {
6842 dev_err(hdev->dev,
6843 "Failed to allocate fence memory for QMAN0\n");
6844 return -ENOMEM;
6845 }
6846
6847 cb = job->patched_cb;
6848
Arnd Bergmann82948e62020-10-26 17:08:06 +01006849 fence_pkt = cb->kernel_address +
6850 job->job_cb_size - sizeof(struct packet_msg_prot);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006851
Oded Gabbay65887292020-08-12 11:21:01 +03006852 tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT);
6853 tmp |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);
6854 tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
6855
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006856 fence_pkt->ctl = cpu_to_le32(tmp);
6857 fence_pkt->value = cpu_to_le32(GAUDI_QMAN0_FENCE_VAL);
6858 fence_pkt->addr = cpu_to_le64(fence_dma_addr);
6859
6860 dma_offset = gaudi_dma_assignment[GAUDI_PCI_DMA_1] * DMA_CORE_OFFSET;
6861
6862 WREG32_OR(mmDMA0_CORE_PROT + dma_offset, BIT(DMA0_CORE_PROT_VAL_SHIFT));
6863
6864 rc = hl_hw_queue_send_cb_no_cmpl(hdev, GAUDI_QUEUE_ID_DMA_0_0,
6865 job->job_cb_size, cb->bus_address);
6866 if (rc) {
6867 dev_err(hdev->dev, "Failed to send CB on QMAN0, %d\n", rc);
6868 goto free_fence_ptr;
6869 }
6870
6871 rc = hl_poll_timeout_memory(hdev, fence_ptr, tmp,
6872 (tmp == GAUDI_QMAN0_FENCE_VAL), 1000,
6873 timeout, true);
6874
6875 hl_hw_queue_inc_ci_kernel(hdev, GAUDI_QUEUE_ID_DMA_0_0);
6876
6877 if (rc == -ETIMEDOUT) {
6878 dev_err(hdev->dev, "QMAN0 Job timeout (0x%x)\n", tmp);
6879 goto free_fence_ptr;
6880 }
6881
6882free_fence_ptr:
6883 WREG32_AND(mmDMA0_CORE_PROT + dma_offset,
6884 ~BIT(DMA0_CORE_PROT_VAL_SHIFT));
6885
6886 hdev->asic_funcs->asic_dma_pool_free(hdev, (void *) fence_ptr,
6887 fence_dma_addr);
6888 return rc;
6889}
6890
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006891static void gaudi_get_event_desc(u16 event_type, char *desc, size_t size)
6892{
Ofir Bittonebd8d122020-05-10 13:41:28 +03006893 if (event_type >= GAUDI_EVENT_SIZE)
6894 goto event_not_supported;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006895
Ofir Bittonebd8d122020-05-10 13:41:28 +03006896 if (!gaudi_irq_map_table[event_type].valid)
6897 goto event_not_supported;
6898
6899 snprintf(desc, size, gaudi_irq_map_table[event_type].name);
6900
6901 return;
6902
6903event_not_supported:
6904 snprintf(desc, size, "N/A");
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006905}
6906
6907static const char *gaudi_get_razwi_initiator_dma_name(struct hl_device *hdev,
6908 u32 x_y, bool is_write)
6909{
6910 u32 dma_id[2], dma_offset, err_cause[2], mask, i;
6911
6912 mask = is_write ? DMA0_CORE_ERR_CAUSE_HBW_WR_ERR_MASK :
6913 DMA0_CORE_ERR_CAUSE_HBW_RD_ERR_MASK;
6914
6915 switch (x_y) {
6916 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_0:
6917 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_1:
6918 dma_id[0] = 0;
6919 dma_id[1] = 2;
6920 break;
6921 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_0:
6922 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_1:
6923 dma_id[0] = 1;
6924 dma_id[1] = 3;
6925 break;
6926 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_0:
6927 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_1:
6928 dma_id[0] = 4;
6929 dma_id[1] = 6;
6930 break;
6931 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_0:
6932 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_1:
6933 dma_id[0] = 5;
6934 dma_id[1] = 7;
6935 break;
6936 default:
6937 goto unknown_initiator;
6938 }
6939
6940 for (i = 0 ; i < 2 ; i++) {
6941 dma_offset = dma_id[i] * DMA_CORE_OFFSET;
6942 err_cause[i] = RREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset);
6943 }
6944
6945 switch (x_y) {
6946 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_0:
6947 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_1:
6948 if ((err_cause[0] & mask) && !(err_cause[1] & mask))
6949 return "DMA0";
6950 else if (!(err_cause[0] & mask) && (err_cause[1] & mask))
6951 return "DMA2";
6952 else
6953 return "DMA0 or DMA2";
6954 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_0:
6955 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_1:
6956 if ((err_cause[0] & mask) && !(err_cause[1] & mask))
6957 return "DMA1";
6958 else if (!(err_cause[0] & mask) && (err_cause[1] & mask))
6959 return "DMA3";
6960 else
6961 return "DMA1 or DMA3";
6962 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_0:
6963 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_1:
6964 if ((err_cause[0] & mask) && !(err_cause[1] & mask))
6965 return "DMA4";
6966 else if (!(err_cause[0] & mask) && (err_cause[1] & mask))
6967 return "DMA6";
6968 else
6969 return "DMA4 or DMA6";
6970 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_0:
6971 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_1:
6972 if ((err_cause[0] & mask) && !(err_cause[1] & mask))
6973 return "DMA5";
6974 else if (!(err_cause[0] & mask) && (err_cause[1] & mask))
6975 return "DMA7";
6976 else
6977 return "DMA5 or DMA7";
6978 }
6979
6980unknown_initiator:
6981 return "unknown initiator";
6982}
6983
6984static const char *gaudi_get_razwi_initiator_name(struct hl_device *hdev,
6985 bool is_write)
6986{
6987 u32 val, x_y, axi_id;
6988
6989 val = is_write ? RREG32(mmMMU_UP_RAZWI_WRITE_ID) :
6990 RREG32(mmMMU_UP_RAZWI_READ_ID);
6991 x_y = val & ((RAZWI_INITIATOR_Y_MASK << RAZWI_INITIATOR_Y_SHIFT) |
6992 (RAZWI_INITIATOR_X_MASK << RAZWI_INITIATOR_X_SHIFT));
6993 axi_id = val & (RAZWI_INITIATOR_AXI_ID_MASK <<
6994 RAZWI_INITIATOR_AXI_ID_SHIFT);
6995
6996 switch (x_y) {
6997 case RAZWI_INITIATOR_ID_X_Y_TPC0_NIC0:
6998 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC))
6999 return "TPC0";
7000 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC))
7001 return "NIC0";
7002 break;
7003 case RAZWI_INITIATOR_ID_X_Y_TPC1:
7004 return "TPC1";
7005 case RAZWI_INITIATOR_ID_X_Y_MME0_0:
7006 case RAZWI_INITIATOR_ID_X_Y_MME0_1:
7007 return "MME0";
7008 case RAZWI_INITIATOR_ID_X_Y_MME1_0:
7009 case RAZWI_INITIATOR_ID_X_Y_MME1_1:
7010 return "MME1";
7011 case RAZWI_INITIATOR_ID_X_Y_TPC2:
7012 return "TPC2";
7013 case RAZWI_INITIATOR_ID_X_Y_TPC3_PCI_CPU_PSOC:
7014 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC))
7015 return "TPC3";
7016 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_PCI))
7017 return "PCI";
7018 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_CPU))
7019 return "CPU";
7020 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_PSOC))
7021 return "PSOC";
7022 break;
7023 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_0:
7024 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_1:
7025 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_0:
7026 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_1:
7027 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_0:
7028 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_1:
7029 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_0:
7030 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_1:
7031 return gaudi_get_razwi_initiator_dma_name(hdev, x_y, is_write);
7032 case RAZWI_INITIATOR_ID_X_Y_TPC4_NIC1_NIC2:
7033 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC))
7034 return "TPC4";
7035 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC))
7036 return "NIC1";
7037 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC_FT))
7038 return "NIC2";
7039 break;
7040 case RAZWI_INITIATOR_ID_X_Y_TPC5:
7041 return "TPC5";
7042 case RAZWI_INITIATOR_ID_X_Y_MME2_0:
7043 case RAZWI_INITIATOR_ID_X_Y_MME2_1:
7044 return "MME2";
7045 case RAZWI_INITIATOR_ID_X_Y_MME3_0:
7046 case RAZWI_INITIATOR_ID_X_Y_MME3_1:
7047 return "MME3";
7048 case RAZWI_INITIATOR_ID_X_Y_TPC6:
7049 return "TPC6";
7050 case RAZWI_INITIATOR_ID_X_Y_TPC7_NIC4_NIC5:
7051 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC))
7052 return "TPC7";
7053 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC))
7054 return "NIC4";
7055 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC_FT))
7056 return "NIC5";
7057 break;
7058 default:
7059 break;
7060 }
7061
7062 dev_err(hdev->dev,
7063 "Unknown RAZWI initiator ID 0x%x [Y=%d, X=%d, AXI_ID=%d]\n",
7064 val,
7065 (val >> RAZWI_INITIATOR_Y_SHIFT) & RAZWI_INITIATOR_Y_MASK,
7066 (val >> RAZWI_INITIATOR_X_SHIFT) & RAZWI_INITIATOR_X_MASK,
7067 (val >> RAZWI_INITIATOR_AXI_ID_SHIFT) &
7068 RAZWI_INITIATOR_AXI_ID_MASK);
7069
7070 return "unknown initiator";
7071}
7072
7073static void gaudi_print_razwi_info(struct hl_device *hdev)
7074{
7075 if (RREG32(mmMMU_UP_RAZWI_WRITE_VLD)) {
7076 dev_err_ratelimited(hdev->dev,
7077 "RAZWI event caused by illegal write of %s\n",
7078 gaudi_get_razwi_initiator_name(hdev, true));
7079 WREG32(mmMMU_UP_RAZWI_WRITE_VLD, 0);
7080 }
7081
7082 if (RREG32(mmMMU_UP_RAZWI_READ_VLD)) {
7083 dev_err_ratelimited(hdev->dev,
7084 "RAZWI event caused by illegal read of %s\n",
7085 gaudi_get_razwi_initiator_name(hdev, false));
7086 WREG32(mmMMU_UP_RAZWI_READ_VLD, 0);
7087 }
7088}
7089
7090static void gaudi_print_mmu_error_info(struct hl_device *hdev)
7091{
7092 struct gaudi_device *gaudi = hdev->asic_specific;
7093 u64 addr;
7094 u32 val;
7095
7096 if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
7097 return;
7098
7099 val = RREG32(mmMMU_UP_PAGE_ERROR_CAPTURE);
7100 if (val & MMU_UP_PAGE_ERROR_CAPTURE_ENTRY_VALID_MASK) {
7101 addr = val & MMU_UP_PAGE_ERROR_CAPTURE_VA_49_32_MASK;
7102 addr <<= 32;
7103 addr |= RREG32(mmMMU_UP_PAGE_ERROR_CAPTURE_VA);
7104
7105 dev_err_ratelimited(hdev->dev, "MMU page fault on va 0x%llx\n",
7106 addr);
7107
7108 WREG32(mmMMU_UP_PAGE_ERROR_CAPTURE, 0);
7109 }
7110
7111 val = RREG32(mmMMU_UP_ACCESS_ERROR_CAPTURE);
7112 if (val & MMU_UP_ACCESS_ERROR_CAPTURE_ENTRY_VALID_MASK) {
7113 addr = val & MMU_UP_ACCESS_ERROR_CAPTURE_VA_49_32_MASK;
7114 addr <<= 32;
7115 addr |= RREG32(mmMMU_UP_ACCESS_ERROR_CAPTURE_VA);
7116
7117 dev_err_ratelimited(hdev->dev,
7118 "MMU access error on va 0x%llx\n", addr);
7119
7120 WREG32(mmMMU_UP_ACCESS_ERROR_CAPTURE, 0);
7121 }
7122}
7123
7124/*
7125 * +-------------------+------------------------------------------------------+
7126 * | Configuration Reg | Description |
7127 * | Address | |
7128 * +-------------------+------------------------------------------------------+
7129 * | 0xF30 - 0xF3F |ECC single error indication (1 bit per memory wrapper)|
7130 * | |0xF30 memory wrappers 31:0 (MSB to LSB) |
7131 * | |0xF34 memory wrappers 63:32 |
7132 * | |0xF38 memory wrappers 95:64 |
7133 * | |0xF3C memory wrappers 127:96 |
7134 * +-------------------+------------------------------------------------------+
7135 * | 0xF40 - 0xF4F |ECC double error indication (1 bit per memory wrapper)|
7136 * | |0xF40 memory wrappers 31:0 (MSB to LSB) |
7137 * | |0xF44 memory wrappers 63:32 |
7138 * | |0xF48 memory wrappers 95:64 |
7139 * | |0xF4C memory wrappers 127:96 |
7140 * +-------------------+------------------------------------------------------+
7141 */
Oded Gabbayfcc6a4e2020-05-17 08:20:35 +03007142static int gaudi_extract_ecc_info(struct hl_device *hdev,
7143 struct ecc_info_extract_params *params, u64 *ecc_address,
7144 u64 *ecc_syndrom, u8 *memory_wrapper_idx)
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007145{
7146 struct gaudi_device *gaudi = hdev->asic_specific;
Oded Gabbayfcc6a4e2020-05-17 08:20:35 +03007147 u32 i, num_mem_regs, reg, err_bit;
7148 u64 err_addr, err_word = 0;
7149 int rc = 0;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007150
Oded Gabbayfcc6a4e2020-05-17 08:20:35 +03007151 num_mem_regs = params->num_memories / 32 +
7152 ((params->num_memories % 32) ? 1 : 0);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007153
Oded Gabbayfcc6a4e2020-05-17 08:20:35 +03007154 if (params->block_address >= CFG_BASE)
7155 params->block_address -= CFG_BASE;
7156
7157 if (params->derr)
7158 err_addr = params->block_address + GAUDI_ECC_DERR0_OFFSET;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007159 else
Oded Gabbayfcc6a4e2020-05-17 08:20:35 +03007160 err_addr = params->block_address + GAUDI_ECC_SERR0_OFFSET;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007161
Oded Gabbayfcc6a4e2020-05-17 08:20:35 +03007162 if (params->disable_clock_gating) {
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007163 mutex_lock(&gaudi->clk_gate_mutex);
7164 hdev->asic_funcs->disable_clock_gating(hdev);
7165 }
7166
Oded Gabbayfcc6a4e2020-05-17 08:20:35 +03007167 /* Set invalid wrapper index */
7168 *memory_wrapper_idx = 0xFF;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007169
Oded Gabbayfcc6a4e2020-05-17 08:20:35 +03007170 /* Iterate through memory wrappers, a single bit must be set */
Dan Carpenterb0353542020-08-05 12:51:05 +03007171 for (i = 0 ; i < num_mem_regs ; i++) {
Oded Gabbayfcc6a4e2020-05-17 08:20:35 +03007172 err_addr += i * 4;
7173 err_word = RREG32(err_addr);
7174 if (err_word) {
7175 err_bit = __ffs(err_word);
7176 *memory_wrapper_idx = err_bit + (32 * i);
7177 break;
7178 }
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007179 }
7180
Oded Gabbayfcc6a4e2020-05-17 08:20:35 +03007181 if (*memory_wrapper_idx == 0xFF) {
7182 dev_err(hdev->dev, "ECC error information cannot be found\n");
7183 rc = -EINVAL;
7184 goto enable_clk_gate;
7185 }
7186
7187 WREG32(params->block_address + GAUDI_ECC_MEM_SEL_OFFSET,
7188 *memory_wrapper_idx);
7189
7190 *ecc_address =
7191 RREG32(params->block_address + GAUDI_ECC_ADDRESS_OFFSET);
7192 *ecc_syndrom =
7193 RREG32(params->block_address + GAUDI_ECC_SYNDROME_OFFSET);
7194
7195 /* Clear error indication */
7196 reg = RREG32(params->block_address + GAUDI_ECC_MEM_INFO_CLR_OFFSET);
7197 if (params->derr)
7198 reg |= FIELD_PREP(GAUDI_ECC_MEM_INFO_CLR_DERR_MASK, 1);
7199 else
7200 reg |= FIELD_PREP(GAUDI_ECC_MEM_INFO_CLR_SERR_MASK, 1);
7201
7202 WREG32(params->block_address + GAUDI_ECC_MEM_INFO_CLR_OFFSET, reg);
7203
7204enable_clk_gate:
7205 if (params->disable_clock_gating) {
Oded Gabbaye38bfd32020-07-03 20:46:12 +03007206 hdev->asic_funcs->set_clock_gating(hdev);
Greg Kroah-Hartman65a9bde62020-07-27 11:49:37 +02007207
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007208 mutex_unlock(&gaudi->clk_gate_mutex);
7209 }
Oded Gabbayfcc6a4e2020-05-17 08:20:35 +03007210
7211 return rc;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007212}
7213
Ohad Sharabi2718e1d2021-05-24 09:59:31 +03007214/*
7215 * gaudi_queue_idx_dec - decrement queue index (pi/ci) and handle wrap
7216 *
7217 * @idx: the current pi/ci value
7218 * @q_len: the queue length (power of 2)
7219 *
7220 * @return the cyclically decremented index
7221 */
7222static inline u32 gaudi_queue_idx_dec(u32 idx, u32 q_len)
7223{
7224 u32 mask = q_len - 1;
7225
7226 /*
7227 * modular decrement is equivalent to adding (queue_size -1)
7228 * later we take LSBs to make sure the value is in the
7229 * range [0, queue_len - 1]
7230 */
7231 return (idx + q_len - 1) & mask;
7232}
7233
7234/**
7235 * gaudi_print_sw_config_stream_data - print SW config stream data
7236 *
7237 * @hdev: pointer to the habanalabs device structure
7238 * @stream: the QMAN's stream
7239 * @qman_base: base address of QMAN registers block
7240 */
7241static void gaudi_print_sw_config_stream_data(struct hl_device *hdev, u32 stream,
7242 u64 qman_base)
7243{
7244 u64 cq_ptr_lo, cq_ptr_hi, cq_tsize, cq_ptr;
7245 u32 cq_ptr_lo_off, size;
7246
7247 cq_ptr_lo_off = mmTPC0_QM_CQ_PTR_LO_1 - mmTPC0_QM_CQ_PTR_LO_0;
7248
7249 cq_ptr_lo = qman_base + (mmTPC0_QM_CQ_PTR_LO_0 - mmTPC0_QM_BASE) +
7250 stream * cq_ptr_lo_off;
7251 cq_ptr_hi = cq_ptr_lo +
7252 (mmTPC0_QM_CQ_PTR_HI_0 - mmTPC0_QM_CQ_PTR_LO_0);
7253 cq_tsize = cq_ptr_lo +
7254 (mmTPC0_QM_CQ_TSIZE_0 - mmTPC0_QM_CQ_PTR_LO_0);
7255
7256 cq_ptr = (((u64) RREG32(cq_ptr_hi)) << 32) | RREG32(cq_ptr_lo);
7257 size = RREG32(cq_tsize);
7258 dev_info(hdev->dev, "stop on err: stream: %u, addr: %#llx, size: %x\n",
7259 stream, cq_ptr, size);
7260}
7261
7262/**
7263 * gaudi_print_last_pqes_on_err - print last PQEs on error
7264 *
7265 * @hdev: pointer to the habanalabs device structure
7266 * @qid_base: first QID of the QMAN (out of 4 streams)
7267 * @stream: the QMAN's stream
7268 * @qman_base: base address of QMAN registers block
7269 * @pr_sw_conf: if true print the SW config stream data (CQ PTR and SIZE)
7270 */
7271static void gaudi_print_last_pqes_on_err(struct hl_device *hdev, u32 qid_base,
7272 u32 stream, u64 qman_base,
7273 bool pr_sw_conf)
7274{
7275 u32 ci, qm_ci_stream_off, queue_len;
7276 struct hl_hw_queue *q;
7277 u64 pq_ci;
7278 int i;
7279
7280 q = &hdev->kernel_queues[qid_base + stream];
7281
7282 qm_ci_stream_off = mmTPC0_QM_PQ_CI_1 - mmTPC0_QM_PQ_CI_0;
7283 pq_ci = qman_base + (mmTPC0_QM_PQ_CI_0 - mmTPC0_QM_BASE) +
7284 stream * qm_ci_stream_off;
7285
7286 queue_len = (q->queue_type == QUEUE_TYPE_INT) ?
7287 q->int_queue_len : HL_QUEUE_LENGTH;
7288
7289 hdev->asic_funcs->hw_queues_lock(hdev);
7290
7291 if (pr_sw_conf)
7292 gaudi_print_sw_config_stream_data(hdev, stream, qman_base);
7293
7294 ci = RREG32(pq_ci);
7295
7296 /* we should start printing form ci -1 */
7297 ci = gaudi_queue_idx_dec(ci, queue_len);
7298
7299 for (i = 0; i < PQ_FETCHER_CACHE_SIZE; i++) {
7300 struct hl_bd *bd;
7301 u64 addr;
7302 u32 len;
7303
7304 bd = q->kernel_address;
7305 bd += ci;
7306
7307 len = le32_to_cpu(bd->len);
7308 /* len 0 means uninitialized entry- break */
7309 if (!len)
7310 break;
7311
7312 addr = le64_to_cpu(bd->ptr);
7313
7314 dev_info(hdev->dev, "stop on err PQE(stream %u): ci: %u, addr: %#llx, size: %x\n",
7315 stream, ci, addr, len);
7316
7317 /* get previous ci, wrap if needed */
7318 ci = gaudi_queue_idx_dec(ci, queue_len);
7319 }
7320
7321 hdev->asic_funcs->hw_queues_unlock(hdev);
7322}
7323
7324/**
7325 * print_qman_data_on_err - extract QMAN data on error
7326 *
7327 * @hdev: pointer to the habanalabs device structure
7328 * @qid_base: first QID of the QMAN (out of 4 streams)
7329 * @stream: the QMAN's stream
7330 * @qman_base: base address of QMAN registers block
7331 *
7332 * This function attempt to exatract as much data as possible on QMAN error.
7333 * On upper CP print the SW config stream data and last 8 PQEs.
7334 * On lower CP print SW config data and last PQEs of ALL 4 upper CPs
7335 */
7336static void print_qman_data_on_err(struct hl_device *hdev, u32 qid_base,
7337 u32 stream, u64 qman_base)
7338{
7339 u32 i;
7340
7341 if (stream != QMAN_STREAMS) {
7342 gaudi_print_last_pqes_on_err(hdev, qid_base, stream, qman_base,
7343 true);
7344 return;
7345 }
7346
7347 gaudi_print_sw_config_stream_data(hdev, stream, qman_base);
7348
7349 for (i = 0; i < QMAN_STREAMS; i++)
7350 gaudi_print_last_pqes_on_err(hdev, qid_base, i, qman_base,
7351 false);
7352}
7353
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007354static void gaudi_handle_qman_err_generic(struct hl_device *hdev,
7355 const char *qm_name,
Ohad Sharabi2718e1d2021-05-24 09:59:31 +03007356 u64 qman_base,
7357 u32 qid_base)
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007358{
7359 u32 i, j, glbl_sts_val, arb_err_val, glbl_sts_clr_val;
Ohad Sharabi2718e1d2021-05-24 09:59:31 +03007360 u64 glbl_sts_addr, arb_err_addr;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007361 char reg_desc[32];
7362
Ohad Sharabi2718e1d2021-05-24 09:59:31 +03007363 glbl_sts_addr = qman_base + (mmTPC0_QM_GLBL_STS1_0 - mmTPC0_QM_BASE);
7364 arb_err_addr = qman_base + (mmTPC0_QM_ARB_ERR_CAUSE - mmTPC0_QM_BASE);
7365
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007366 /* Iterate through all stream GLBL_STS1 registers + Lower CP */
7367 for (i = 0 ; i < QMAN_STREAMS + 1 ; i++) {
7368 glbl_sts_clr_val = 0;
7369 glbl_sts_val = RREG32(glbl_sts_addr + 4 * i);
7370
7371 if (!glbl_sts_val)
7372 continue;
7373
7374 if (i == QMAN_STREAMS)
7375 snprintf(reg_desc, ARRAY_SIZE(reg_desc), "LowerCP");
7376 else
7377 snprintf(reg_desc, ARRAY_SIZE(reg_desc), "stream%u", i);
7378
7379 for (j = 0 ; j < GAUDI_NUM_OF_QM_ERR_CAUSE ; j++) {
7380 if (glbl_sts_val & BIT(j)) {
7381 dev_err_ratelimited(hdev->dev,
7382 "%s %s. err cause: %s\n",
7383 qm_name, reg_desc,
7384 gaudi_qman_error_cause[j]);
7385 glbl_sts_clr_val |= BIT(j);
7386 }
7387 }
7388
7389 /* Write 1 clear errors */
Tomer Tayar1b497152021-04-06 13:32:20 +03007390 if (!hdev->stop_on_err)
7391 WREG32(glbl_sts_addr + 4 * i, glbl_sts_clr_val);
Ohad Sharabi2718e1d2021-05-24 09:59:31 +03007392 else
7393 print_qman_data_on_err(hdev, qid_base, i, qman_base);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007394 }
7395
7396 arb_err_val = RREG32(arb_err_addr);
7397
7398 if (!arb_err_val)
7399 return;
7400
7401 for (j = 0 ; j < GAUDI_NUM_OF_QM_ARB_ERR_CAUSE ; j++) {
7402 if (arb_err_val & BIT(j)) {
7403 dev_err_ratelimited(hdev->dev,
7404 "%s ARB_ERR. err cause: %s\n",
7405 qm_name,
7406 gaudi_qman_arb_error_cause[j]);
7407 }
7408 }
7409}
7410
Ofir Bittonf8bc7f02021-01-03 20:52:40 +02007411static void gaudi_print_sm_sei_info(struct hl_device *hdev, u16 event_type,
7412 struct hl_eq_sm_sei_data *sei_data)
7413{
7414 u32 index = event_type - GAUDI_EVENT_DMA_IF_SEI_0;
7415
7416 switch (sei_data->sei_cause) {
Oded Gabbay78385042021-01-26 22:56:56 +02007417 case SM_SEI_SO_OVERFLOW:
Ofir Bittonf8bc7f02021-01-03 20:52:40 +02007418 dev_err(hdev->dev,
7419 "SM %u SEI Error: SO %u overflow/underflow",
Oded Gabbay78385042021-01-26 22:56:56 +02007420 index, le32_to_cpu(sei_data->sei_log));
Ofir Bittonf8bc7f02021-01-03 20:52:40 +02007421 break;
Oded Gabbay78385042021-01-26 22:56:56 +02007422 case SM_SEI_LBW_4B_UNALIGNED:
Ofir Bittonf8bc7f02021-01-03 20:52:40 +02007423 dev_err(hdev->dev,
7424 "SM %u SEI Error: Unaligned 4B LBW access, monitor agent address low - %#x",
Oded Gabbay78385042021-01-26 22:56:56 +02007425 index, le32_to_cpu(sei_data->sei_log));
Ofir Bittonf8bc7f02021-01-03 20:52:40 +02007426 break;
Oded Gabbay78385042021-01-26 22:56:56 +02007427 case SM_SEI_AXI_RESPONSE_ERR:
Ofir Bittonf8bc7f02021-01-03 20:52:40 +02007428 dev_err(hdev->dev,
7429 "SM %u SEI Error: AXI ID %u response error",
Oded Gabbay78385042021-01-26 22:56:56 +02007430 index, le32_to_cpu(sei_data->sei_log));
Ofir Bittonf8bc7f02021-01-03 20:52:40 +02007431 break;
7432 default:
7433 dev_err(hdev->dev, "Unknown SM SEI cause %u",
Oded Gabbay78385042021-01-26 22:56:56 +02007434 le32_to_cpu(sei_data->sei_log));
Ofir Bittonf8bc7f02021-01-03 20:52:40 +02007435 break;
7436 }
7437}
7438
Oded Gabbayfcc6a4e2020-05-17 08:20:35 +03007439static void gaudi_handle_ecc_event(struct hl_device *hdev, u16 event_type,
7440 struct hl_eq_ecc_data *ecc_data)
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007441{
Oded Gabbayfcc6a4e2020-05-17 08:20:35 +03007442 struct ecc_info_extract_params params;
7443 u64 ecc_address = 0, ecc_syndrom = 0;
7444 u8 index, memory_wrapper_idx = 0;
7445 bool extract_info_from_fw;
7446 int rc;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007447
7448 switch (event_type) {
Oded Gabbayfcc6a4e2020-05-17 08:20:35 +03007449 case GAUDI_EVENT_PCIE_CORE_SERR ... GAUDI_EVENT_PCIE_PHY_DERR:
7450 case GAUDI_EVENT_DMA0_SERR_ECC ... GAUDI_EVENT_MMU_DERR:
7451 extract_info_from_fw = true;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007452 break;
7453 case GAUDI_EVENT_TPC0_SERR ... GAUDI_EVENT_TPC7_SERR:
7454 index = event_type - GAUDI_EVENT_TPC0_SERR;
Oded Gabbayfcc6a4e2020-05-17 08:20:35 +03007455 params.block_address = mmTPC0_CFG_BASE + index * TPC_CFG_OFFSET;
7456 params.num_memories = 90;
7457 params.derr = false;
7458 params.disable_clock_gating = true;
7459 extract_info_from_fw = false;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007460 break;
7461 case GAUDI_EVENT_TPC0_DERR ... GAUDI_EVENT_TPC7_DERR:
7462 index = event_type - GAUDI_EVENT_TPC0_DERR;
Oded Gabbayfcc6a4e2020-05-17 08:20:35 +03007463 params.block_address =
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007464 mmTPC0_CFG_BASE + index * TPC_CFG_OFFSET;
Oded Gabbayfcc6a4e2020-05-17 08:20:35 +03007465 params.num_memories = 90;
7466 params.derr = true;
7467 params.disable_clock_gating = true;
7468 extract_info_from_fw = false;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007469 break;
7470 case GAUDI_EVENT_MME0_ACC_SERR:
7471 case GAUDI_EVENT_MME1_ACC_SERR:
7472 case GAUDI_EVENT_MME2_ACC_SERR:
7473 case GAUDI_EVENT_MME3_ACC_SERR:
7474 index = (event_type - GAUDI_EVENT_MME0_ACC_SERR) / 4;
Oded Gabbayfcc6a4e2020-05-17 08:20:35 +03007475 params.block_address = mmMME0_ACC_BASE + index * MME_ACC_OFFSET;
7476 params.num_memories = 128;
7477 params.derr = false;
7478 params.disable_clock_gating = true;
7479 extract_info_from_fw = false;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007480 break;
7481 case GAUDI_EVENT_MME0_ACC_DERR:
7482 case GAUDI_EVENT_MME1_ACC_DERR:
7483 case GAUDI_EVENT_MME2_ACC_DERR:
7484 case GAUDI_EVENT_MME3_ACC_DERR:
7485 index = (event_type - GAUDI_EVENT_MME0_ACC_DERR) / 4;
Oded Gabbayfcc6a4e2020-05-17 08:20:35 +03007486 params.block_address = mmMME0_ACC_BASE + index * MME_ACC_OFFSET;
7487 params.num_memories = 128;
7488 params.derr = true;
7489 params.disable_clock_gating = true;
7490 extract_info_from_fw = false;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007491 break;
7492 case GAUDI_EVENT_MME0_SBAB_SERR:
7493 case GAUDI_EVENT_MME1_SBAB_SERR:
7494 case GAUDI_EVENT_MME2_SBAB_SERR:
7495 case GAUDI_EVENT_MME3_SBAB_SERR:
7496 index = (event_type - GAUDI_EVENT_MME0_SBAB_SERR) / 4;
Oded Gabbayfcc6a4e2020-05-17 08:20:35 +03007497 params.block_address =
7498 mmMME0_SBAB_BASE + index * MME_ACC_OFFSET;
7499 params.num_memories = 33;
7500 params.derr = false;
7501 params.disable_clock_gating = true;
7502 extract_info_from_fw = false;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007503 break;
7504 case GAUDI_EVENT_MME0_SBAB_DERR:
7505 case GAUDI_EVENT_MME1_SBAB_DERR:
7506 case GAUDI_EVENT_MME2_SBAB_DERR:
7507 case GAUDI_EVENT_MME3_SBAB_DERR:
7508 index = (event_type - GAUDI_EVENT_MME0_SBAB_DERR) / 4;
Oded Gabbayfcc6a4e2020-05-17 08:20:35 +03007509 params.block_address =
7510 mmMME0_SBAB_BASE + index * MME_ACC_OFFSET;
7511 params.num_memories = 33;
7512 params.derr = true;
7513 params.disable_clock_gating = true;
Oded Gabbay652b4442020-11-21 14:35:35 +02007514 extract_info_from_fw = false;
7515 break;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007516 default:
7517 return;
7518 }
7519
Oded Gabbayfcc6a4e2020-05-17 08:20:35 +03007520 if (extract_info_from_fw) {
7521 ecc_address = le64_to_cpu(ecc_data->ecc_address);
7522 ecc_syndrom = le64_to_cpu(ecc_data->ecc_syndrom);
7523 memory_wrapper_idx = ecc_data->memory_wrapper_idx;
7524 } else {
7525 rc = gaudi_extract_ecc_info(hdev, &params, &ecc_address,
7526 &ecc_syndrom, &memory_wrapper_idx);
7527 if (rc)
7528 return;
7529 }
7530
7531 dev_err(hdev->dev,
7532 "ECC error detected. address: %#llx. Syndrom: %#llx. block id %u\n",
7533 ecc_address, ecc_syndrom, memory_wrapper_idx);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007534}
7535
7536static void gaudi_handle_qman_err(struct hl_device *hdev, u16 event_type)
7537{
Ohad Sharabi2718e1d2021-05-24 09:59:31 +03007538 u64 qman_base;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007539 char desc[32];
Ohad Sharabi2718e1d2021-05-24 09:59:31 +03007540 u32 qid_base;
7541 u8 index;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007542
7543 switch (event_type) {
7544 case GAUDI_EVENT_TPC0_QM ... GAUDI_EVENT_TPC7_QM:
7545 index = event_type - GAUDI_EVENT_TPC0_QM;
Ohad Sharabi2718e1d2021-05-24 09:59:31 +03007546 qid_base = GAUDI_QUEUE_ID_TPC_0_0 + index * QMAN_STREAMS;
7547 qman_base = mmTPC0_QM_BASE + index * TPC_QMAN_OFFSET;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007548 snprintf(desc, ARRAY_SIZE(desc), "%s%d", "TPC_QM", index);
7549 break;
7550 case GAUDI_EVENT_MME0_QM ... GAUDI_EVENT_MME2_QM:
7551 index = event_type - GAUDI_EVENT_MME0_QM;
Ohad Sharabi2718e1d2021-05-24 09:59:31 +03007552 qid_base = GAUDI_QUEUE_ID_MME_0_0 + index * QMAN_STREAMS;
7553 qman_base = mmMME0_QM_BASE + index * MME_QMAN_OFFSET;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007554 snprintf(desc, ARRAY_SIZE(desc), "%s%d", "MME_QM", index);
7555 break;
7556 case GAUDI_EVENT_DMA0_QM ... GAUDI_EVENT_DMA7_QM:
7557 index = event_type - GAUDI_EVENT_DMA0_QM;
Ohad Sharabi2718e1d2021-05-24 09:59:31 +03007558 qid_base = GAUDI_QUEUE_ID_DMA_0_0 + index * QMAN_STREAMS;
7559 /* skip GAUDI_QUEUE_ID_CPU_PQ if necessary */
7560 if (index > 1)
7561 qid_base++;
7562 qman_base = mmDMA0_QM_BASE + index * DMA_QMAN_OFFSET;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007563 snprintf(desc, ARRAY_SIZE(desc), "%s%d", "DMA_QM", index);
7564 break;
Oded Gabbay3c681572020-11-02 21:10:39 +02007565 case GAUDI_EVENT_NIC0_QM0:
Ohad Sharabi2718e1d2021-05-24 09:59:31 +03007566 qid_base = GAUDI_QUEUE_ID_NIC_0_0;
7567 qman_base = mmNIC0_QM0_BASE;
Oded Gabbay3c681572020-11-02 21:10:39 +02007568 snprintf(desc, ARRAY_SIZE(desc), "NIC0_QM0");
7569 break;
7570 case GAUDI_EVENT_NIC0_QM1:
Ohad Sharabi2718e1d2021-05-24 09:59:31 +03007571 qid_base = GAUDI_QUEUE_ID_NIC_1_0;
7572 qman_base = mmNIC0_QM1_BASE;
Oded Gabbay3c681572020-11-02 21:10:39 +02007573 snprintf(desc, ARRAY_SIZE(desc), "NIC0_QM1");
7574 break;
7575 case GAUDI_EVENT_NIC1_QM0:
Ohad Sharabi2718e1d2021-05-24 09:59:31 +03007576 qid_base = GAUDI_QUEUE_ID_NIC_2_0;
7577 qman_base = mmNIC1_QM0_BASE;
Oded Gabbay3c681572020-11-02 21:10:39 +02007578 snprintf(desc, ARRAY_SIZE(desc), "NIC1_QM0");
7579 break;
7580 case GAUDI_EVENT_NIC1_QM1:
Ohad Sharabi2718e1d2021-05-24 09:59:31 +03007581 qid_base = GAUDI_QUEUE_ID_NIC_3_0;
7582 qman_base = mmNIC1_QM1_BASE;
Oded Gabbay3c681572020-11-02 21:10:39 +02007583 snprintf(desc, ARRAY_SIZE(desc), "NIC1_QM1");
7584 break;
7585 case GAUDI_EVENT_NIC2_QM0:
Ohad Sharabi2718e1d2021-05-24 09:59:31 +03007586 qid_base = GAUDI_QUEUE_ID_NIC_4_0;
7587 qman_base = mmNIC2_QM0_BASE;
Oded Gabbay3c681572020-11-02 21:10:39 +02007588 snprintf(desc, ARRAY_SIZE(desc), "NIC2_QM0");
7589 break;
7590 case GAUDI_EVENT_NIC2_QM1:
Ohad Sharabi2718e1d2021-05-24 09:59:31 +03007591 qid_base = GAUDI_QUEUE_ID_NIC_5_0;
7592 qman_base = mmNIC2_QM1_BASE;
Oded Gabbay3c681572020-11-02 21:10:39 +02007593 snprintf(desc, ARRAY_SIZE(desc), "NIC2_QM1");
7594 break;
7595 case GAUDI_EVENT_NIC3_QM0:
Ohad Sharabi2718e1d2021-05-24 09:59:31 +03007596 qid_base = GAUDI_QUEUE_ID_NIC_6_0;
7597 qman_base = mmNIC3_QM0_BASE;
Oded Gabbay3c681572020-11-02 21:10:39 +02007598 snprintf(desc, ARRAY_SIZE(desc), "NIC3_QM0");
7599 break;
7600 case GAUDI_EVENT_NIC3_QM1:
Ohad Sharabi2718e1d2021-05-24 09:59:31 +03007601 qid_base = GAUDI_QUEUE_ID_NIC_7_0;
7602 qman_base = mmNIC3_QM1_BASE;
Oded Gabbay3c681572020-11-02 21:10:39 +02007603 snprintf(desc, ARRAY_SIZE(desc), "NIC3_QM1");
7604 break;
7605 case GAUDI_EVENT_NIC4_QM0:
Ohad Sharabi2718e1d2021-05-24 09:59:31 +03007606 qid_base = GAUDI_QUEUE_ID_NIC_8_0;
7607 qman_base = mmNIC4_QM0_BASE;
Oded Gabbay3c681572020-11-02 21:10:39 +02007608 snprintf(desc, ARRAY_SIZE(desc), "NIC4_QM0");
7609 break;
7610 case GAUDI_EVENT_NIC4_QM1:
Ohad Sharabi2718e1d2021-05-24 09:59:31 +03007611 qid_base = GAUDI_QUEUE_ID_NIC_9_0;
7612 qman_base = mmNIC4_QM1_BASE;
Oded Gabbay3c681572020-11-02 21:10:39 +02007613 snprintf(desc, ARRAY_SIZE(desc), "NIC4_QM1");
7614 break;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007615 default:
7616 return;
7617 }
7618
Ohad Sharabi2718e1d2021-05-24 09:59:31 +03007619 gaudi_handle_qman_err_generic(hdev, desc, qman_base, qid_base);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007620}
7621
7622static void gaudi_print_irq_info(struct hl_device *hdev, u16 event_type,
7623 bool razwi)
7624{
Ofir Bittonebd8d122020-05-10 13:41:28 +03007625 char desc[64] = "";
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007626
7627 gaudi_get_event_desc(event_type, desc, sizeof(desc));
7628 dev_err_ratelimited(hdev->dev, "Received H/W interrupt %d [\"%s\"]\n",
7629 event_type, desc);
7630
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007631 if (razwi) {
7632 gaudi_print_razwi_info(hdev);
7633 gaudi_print_mmu_error_info(hdev);
7634 }
7635}
7636
Ohad Sharabi5d6a1982021-02-08 14:53:56 +02007637static void gaudi_print_out_of_sync_info(struct hl_device *hdev,
7638 struct cpucp_pkt_sync_err *sync_err)
7639{
7640 struct hl_hw_queue *q = &hdev->kernel_queues[GAUDI_QUEUE_ID_CPU_PQ];
7641
7642 dev_err(hdev->dev, "Out of sync with FW, FW: pi=%u, ci=%u, LKD: pi=%u, ci=%u\n",
7643 sync_err->pi, sync_err->ci, q->pi, atomic_read(&q->ci));
7644}
7645
Ofir Bitton254fac62021-06-02 11:56:31 +03007646static void gaudi_print_fw_alive_info(struct hl_device *hdev,
7647 struct hl_eq_fw_alive *fw_alive)
7648{
7649 dev_err(hdev->dev,
7650 "FW alive report: severity=%s, process_id=%u, thread_id=%u, uptime=%llu seconds\n",
7651 (fw_alive->severity == FW_ALIVE_SEVERITY_MINOR) ?
7652 "Minor" : "Critical", fw_alive->process_id,
7653 fw_alive->thread_id, fw_alive->uptime_seconds);
7654}
7655
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007656static int gaudi_soft_reset_late_init(struct hl_device *hdev)
7657{
Ofir Bittonebd8d122020-05-10 13:41:28 +03007658 struct gaudi_device *gaudi = hdev->asic_specific;
7659
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007660 /* Unmask all IRQs since some could have been received
7661 * during the soft reset
7662 */
Ofir Bittonebd8d122020-05-10 13:41:28 +03007663 return hl_fw_unmask_irq_arr(hdev, gaudi->events, sizeof(gaudi->events));
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007664}
7665
Ofir Bitton5a2998f2020-10-05 13:44:59 +03007666static int gaudi_hbm_read_interrupts(struct hl_device *hdev, int device,
7667 struct hl_eq_hbm_ecc_data *hbm_ecc_data)
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007668{
Ofir Bitton5a2998f2020-10-05 13:44:59 +03007669 u32 base, val, val2, wr_par, rd_par, ca_par, derr, serr, type, ch;
Oded Gabbayf1a29772021-06-06 11:38:12 +03007670 int rc = 0;
Ofir Bitton5a2998f2020-10-05 13:44:59 +03007671
Ohad Sharabi6a785e32021-05-29 23:26:10 +03007672 if (hdev->asic_prop.fw_app_cpu_boot_dev_sts0 &
7673 CPU_BOOT_DEV_STS0_HBM_ECC_EN) {
Ofir Bitton5a2998f2020-10-05 13:44:59 +03007674 if (!hbm_ecc_data) {
7675 dev_err(hdev->dev, "No FW ECC data");
7676 return 0;
7677 }
7678
7679 wr_par = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_WR_PAR_MASK,
7680 le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7681 rd_par = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_RD_PAR_MASK,
7682 le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7683 ca_par = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_CA_PAR_MASK,
7684 le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7685 derr = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_DERR_MASK,
7686 le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7687 serr = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_SERR_MASK,
7688 le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7689 type = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_TYPE_MASK,
7690 le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7691 ch = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_HBM_CH_MASK,
7692 le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7693
7694 dev_err(hdev->dev,
Ohad Sharabib520ca52021-01-27 15:42:53 +02007695 "HBM%d pc%d interrupts info: WR_PAR=%d, RD_PAR=%d, CA_PAR=%d, SERR=%d, DERR=%d\n",
7696 device, ch, wr_par, rd_par, ca_par, serr, derr);
7697 dev_err(hdev->dev,
7698 "HBM%d pc%d ECC info: 1ST_ERR_ADDR=0x%x, 1ST_ERR_TYPE=%d, SEC_CONT_CNT=%u, SEC_CNT=%d, DEC_CNT=%d\n",
7699 device, ch, hbm_ecc_data->first_addr, type,
7700 hbm_ecc_data->sec_cont_cnt, hbm_ecc_data->sec_cnt,
7701 hbm_ecc_data->dec_cnt);
Ofir Bitton5a2998f2020-10-05 13:44:59 +03007702 return 0;
7703 }
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007704
Ohad Sharabi4cb45082021-05-20 09:09:03 +03007705 if (hdev->asic_prop.fw_security_enabled) {
Ohad Sharabib520ca52021-01-27 15:42:53 +02007706 dev_info(hdev->dev, "Cannot access MC regs for ECC data while security is enabled\n");
7707 return 0;
7708 }
7709
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007710 base = GAUDI_HBM_CFG_BASE + device * GAUDI_HBM_CFG_OFFSET;
7711 for (ch = 0 ; ch < GAUDI_HBM_CHANNELS ; ch++) {
7712 val = RREG32_MASK(base + ch * 0x1000 + 0x06C, 0x0000FFFF);
7713 val = (val & 0xFF) | ((val >> 8) & 0xFF);
7714 if (val) {
Oded Gabbayf1a29772021-06-06 11:38:12 +03007715 rc = -EIO;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007716 dev_err(hdev->dev,
7717 "HBM%d pc%d interrupts info: WR_PAR=%d, RD_PAR=%d, CA_PAR=%d, SERR=%d, DERR=%d\n",
7718 device, ch * 2, val & 0x1, (val >> 1) & 0x1,
7719 (val >> 2) & 0x1, (val >> 3) & 0x1,
7720 (val >> 4) & 0x1);
7721
7722 val2 = RREG32(base + ch * 0x1000 + 0x060);
7723 dev_err(hdev->dev,
Ofir Bitton5a2998f2020-10-05 13:44:59 +03007724 "HBM%d pc%d ECC info: 1ST_ERR_ADDR=0x%x, 1ST_ERR_TYPE=%d, SEC_CONT_CNT=%d, SEC_CNT=%d, DEC_CNT=%d\n",
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007725 device, ch * 2,
7726 RREG32(base + ch * 0x1000 + 0x064),
7727 (val2 & 0x200) >> 9, (val2 & 0xFC00) >> 10,
7728 (val2 & 0xFF0000) >> 16,
7729 (val2 & 0xFF000000) >> 24);
7730 }
7731
7732 val = RREG32_MASK(base + ch * 0x1000 + 0x07C, 0x0000FFFF);
7733 val = (val & 0xFF) | ((val >> 8) & 0xFF);
7734 if (val) {
Oded Gabbayf1a29772021-06-06 11:38:12 +03007735 rc = -EIO;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007736 dev_err(hdev->dev,
7737 "HBM%d pc%d interrupts info: WR_PAR=%d, RD_PAR=%d, CA_PAR=%d, SERR=%d, DERR=%d\n",
7738 device, ch * 2 + 1, val & 0x1, (val >> 1) & 0x1,
7739 (val >> 2) & 0x1, (val >> 3) & 0x1,
7740 (val >> 4) & 0x1);
7741
7742 val2 = RREG32(base + ch * 0x1000 + 0x070);
7743 dev_err(hdev->dev,
Ofir Bitton5a2998f2020-10-05 13:44:59 +03007744 "HBM%d pc%d ECC info: 1ST_ERR_ADDR=0x%x, 1ST_ERR_TYPE=%d, SEC_CONT_CNT=%d, SEC_CNT=%d, DEC_CNT=%d\n",
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007745 device, ch * 2 + 1,
7746 RREG32(base + ch * 0x1000 + 0x074),
7747 (val2 & 0x200) >> 9, (val2 & 0xFC00) >> 10,
7748 (val2 & 0xFF0000) >> 16,
7749 (val2 & 0xFF000000) >> 24);
7750 }
7751
7752 /* Clear interrupts */
7753 RMWREG32(base + (ch * 0x1000) + 0x060, 0x1C8, 0x1FF);
7754 RMWREG32(base + (ch * 0x1000) + 0x070, 0x1C8, 0x1FF);
7755 WREG32(base + (ch * 0x1000) + 0x06C, 0x1F1F);
7756 WREG32(base + (ch * 0x1000) + 0x07C, 0x1F1F);
7757 RMWREG32(base + (ch * 0x1000) + 0x060, 0x0, 0xF);
7758 RMWREG32(base + (ch * 0x1000) + 0x070, 0x0, 0xF);
7759 }
7760
7761 val = RREG32(base + 0x8F30);
7762 val2 = RREG32(base + 0x8F34);
7763 if (val | val2) {
Oded Gabbayf1a29772021-06-06 11:38:12 +03007764 rc = -EIO;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007765 dev_err(hdev->dev,
7766 "HBM %d MC SRAM SERR info: Reg 0x8F30=0x%x, Reg 0x8F34=0x%x\n",
7767 device, val, val2);
7768 }
7769 val = RREG32(base + 0x8F40);
7770 val2 = RREG32(base + 0x8F44);
7771 if (val | val2) {
Oded Gabbayf1a29772021-06-06 11:38:12 +03007772 rc = -EIO;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007773 dev_err(hdev->dev,
7774 "HBM %d MC SRAM DERR info: Reg 0x8F40=0x%x, Reg 0x8F44=0x%x\n",
7775 device, val, val2);
7776 }
7777
Oded Gabbayf1a29772021-06-06 11:38:12 +03007778 return rc;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007779}
7780
7781static int gaudi_hbm_event_to_dev(u16 hbm_event_type)
7782{
7783 switch (hbm_event_type) {
7784 case GAUDI_EVENT_HBM0_SPI_0:
7785 case GAUDI_EVENT_HBM0_SPI_1:
7786 return 0;
7787 case GAUDI_EVENT_HBM1_SPI_0:
7788 case GAUDI_EVENT_HBM1_SPI_1:
7789 return 1;
7790 case GAUDI_EVENT_HBM2_SPI_0:
7791 case GAUDI_EVENT_HBM2_SPI_1:
7792 return 2;
7793 case GAUDI_EVENT_HBM3_SPI_0:
7794 case GAUDI_EVENT_HBM3_SPI_1:
7795 return 3;
7796 default:
7797 break;
7798 }
7799
7800 /* Should never happen */
7801 return 0;
7802}
7803
7804static bool gaudi_tpc_read_interrupts(struct hl_device *hdev, u8 tpc_id,
7805 char *interrupt_name)
7806{
7807 struct gaudi_device *gaudi = hdev->asic_specific;
7808 u32 tpc_offset = tpc_id * TPC_CFG_OFFSET, tpc_interrupts_cause, i;
7809 bool soft_reset_required = false;
7810
7811 /* Accessing the TPC_INTR_CAUSE registers requires disabling the clock
Oded Gabbay6138bbe2020-09-04 20:18:16 +03007812 * gating, and thus cannot be done in CPU-CP and should be done instead
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007813 * by the driver.
7814 */
7815
7816 mutex_lock(&gaudi->clk_gate_mutex);
7817
7818 hdev->asic_funcs->disable_clock_gating(hdev);
7819
7820 tpc_interrupts_cause = RREG32(mmTPC0_CFG_TPC_INTR_CAUSE + tpc_offset) &
7821 TPC0_CFG_TPC_INTR_CAUSE_CAUSE_MASK;
7822
7823 for (i = 0 ; i < GAUDI_NUM_OF_TPC_INTR_CAUSE ; i++)
7824 if (tpc_interrupts_cause & BIT(i)) {
7825 dev_err_ratelimited(hdev->dev,
7826 "TPC%d_%s interrupt cause: %s\n",
7827 tpc_id, interrupt_name,
7828 gaudi_tpc_interrupts_cause[i]);
7829 /* If this is QM error, we need to soft-reset */
7830 if (i == 15)
7831 soft_reset_required = true;
7832 }
7833
7834 /* Clear interrupts */
7835 WREG32(mmTPC0_CFG_TPC_INTR_CAUSE + tpc_offset, 0);
7836
Oded Gabbaye38bfd32020-07-03 20:46:12 +03007837 hdev->asic_funcs->set_clock_gating(hdev);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007838
7839 mutex_unlock(&gaudi->clk_gate_mutex);
7840
7841 return soft_reset_required;
7842}
7843
7844static int tpc_dec_event_to_tpc_id(u16 tpc_dec_event_type)
7845{
7846 return (tpc_dec_event_type - GAUDI_EVENT_TPC0_DEC) >> 1;
7847}
7848
7849static int tpc_krn_event_to_tpc_id(u16 tpc_dec_event_type)
7850{
7851 return (tpc_dec_event_type - GAUDI_EVENT_TPC0_KRN_ERR) / 6;
7852}
7853
7854static void gaudi_print_clk_change_info(struct hl_device *hdev,
7855 u16 event_type)
7856{
7857 switch (event_type) {
7858 case GAUDI_EVENT_FIX_POWER_ENV_S:
Ofir Bitton0a068ad2020-07-21 10:49:51 +03007859 hdev->clk_throttling_reason |= HL_CLK_THROTTLE_POWER;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007860 dev_info_ratelimited(hdev->dev,
7861 "Clock throttling due to power consumption\n");
7862 break;
7863
7864 case GAUDI_EVENT_FIX_POWER_ENV_E:
Ofir Bitton0a068ad2020-07-21 10:49:51 +03007865 hdev->clk_throttling_reason &= ~HL_CLK_THROTTLE_POWER;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007866 dev_info_ratelimited(hdev->dev,
7867 "Power envelop is safe, back to optimal clock\n");
7868 break;
7869
7870 case GAUDI_EVENT_FIX_THERMAL_ENV_S:
Ofir Bitton0a068ad2020-07-21 10:49:51 +03007871 hdev->clk_throttling_reason |= HL_CLK_THROTTLE_THERMAL;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007872 dev_info_ratelimited(hdev->dev,
7873 "Clock throttling due to overheating\n");
7874 break;
7875
7876 case GAUDI_EVENT_FIX_THERMAL_ENV_E:
Ofir Bitton0a068ad2020-07-21 10:49:51 +03007877 hdev->clk_throttling_reason &= ~HL_CLK_THROTTLE_THERMAL;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007878 dev_info_ratelimited(hdev->dev,
7879 "Thermal envelop is safe, back to optimal clock\n");
7880 break;
7881
7882 default:
7883 dev_err(hdev->dev, "Received invalid clock change event %d\n",
7884 event_type);
7885 break;
7886 }
7887}
7888
7889static void gaudi_handle_eqe(struct hl_device *hdev,
7890 struct hl_eq_entry *eq_entry)
7891{
7892 struct gaudi_device *gaudi = hdev->asic_specific;
7893 u32 ctl = le32_to_cpu(eq_entry->hdr.ctl);
7894 u16 event_type = ((ctl & EQ_CTL_EVENT_TYPE_MASK)
7895 >> EQ_CTL_EVENT_TYPE_SHIFT);
7896 u8 cause;
Oded Gabbay66446822020-05-18 16:48:01 +03007897 bool reset_required;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007898
7899 gaudi->events_stat[event_type]++;
7900 gaudi->events_stat_aggregate[event_type]++;
7901
7902 switch (event_type) {
7903 case GAUDI_EVENT_PCIE_CORE_DERR:
7904 case GAUDI_EVENT_PCIE_IF_DERR:
7905 case GAUDI_EVENT_PCIE_PHY_DERR:
7906 case GAUDI_EVENT_TPC0_DERR ... GAUDI_EVENT_TPC7_DERR:
7907 case GAUDI_EVENT_MME0_ACC_DERR:
7908 case GAUDI_EVENT_MME0_SBAB_DERR:
7909 case GAUDI_EVENT_MME1_ACC_DERR:
7910 case GAUDI_EVENT_MME1_SBAB_DERR:
7911 case GAUDI_EVENT_MME2_ACC_DERR:
7912 case GAUDI_EVENT_MME2_SBAB_DERR:
7913 case GAUDI_EVENT_MME3_ACC_DERR:
7914 case GAUDI_EVENT_MME3_SBAB_DERR:
7915 case GAUDI_EVENT_DMA0_DERR_ECC ... GAUDI_EVENT_DMA7_DERR_ECC:
7916 fallthrough;
7917 case GAUDI_EVENT_CPU_IF_ECC_DERR:
7918 case GAUDI_EVENT_PSOC_MEM_DERR:
7919 case GAUDI_EVENT_PSOC_CORESIGHT_DERR:
7920 case GAUDI_EVENT_SRAM0_DERR ... GAUDI_EVENT_SRAM28_DERR:
7921 case GAUDI_EVENT_DMA_IF0_DERR ... GAUDI_EVENT_DMA_IF3_DERR:
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007922 case GAUDI_EVENT_HBM_0_DERR ... GAUDI_EVENT_HBM_3_DERR:
7923 case GAUDI_EVENT_MMU_DERR:
Ofir Bitton6c31f4942021-06-17 09:52:55 +03007924 case GAUDI_EVENT_NIC0_CS_DBG_DERR ... GAUDI_EVENT_NIC4_CS_DBG_DERR:
Oded Gabbayfcc6a4e2020-05-17 08:20:35 +03007925 gaudi_print_irq_info(hdev, event_type, true);
7926 gaudi_handle_ecc_event(hdev, event_type, &eq_entry->ecc_data);
Ohad Sharabie42a6400f2021-02-17 20:42:48 +02007927 goto reset_device;
Oded Gabbayfcc6a4e2020-05-17 08:20:35 +03007928
7929 case GAUDI_EVENT_GIC500:
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007930 case GAUDI_EVENT_AXI_ECC:
7931 case GAUDI_EVENT_L2_RAM_ECC:
7932 case GAUDI_EVENT_PLL0 ... GAUDI_EVENT_PLL17:
7933 gaudi_print_irq_info(hdev, event_type, false);
Ohad Sharabie42a6400f2021-02-17 20:42:48 +02007934 goto reset_device;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007935
7936 case GAUDI_EVENT_HBM0_SPI_0:
7937 case GAUDI_EVENT_HBM1_SPI_0:
7938 case GAUDI_EVENT_HBM2_SPI_0:
7939 case GAUDI_EVENT_HBM3_SPI_0:
7940 gaudi_print_irq_info(hdev, event_type, false);
7941 gaudi_hbm_read_interrupts(hdev,
Ofir Bitton5a2998f2020-10-05 13:44:59 +03007942 gaudi_hbm_event_to_dev(event_type),
7943 &eq_entry->hbm_ecc_data);
Ohad Sharabie42a6400f2021-02-17 20:42:48 +02007944 goto reset_device;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007945
7946 case GAUDI_EVENT_HBM0_SPI_1:
7947 case GAUDI_EVENT_HBM1_SPI_1:
7948 case GAUDI_EVENT_HBM2_SPI_1:
7949 case GAUDI_EVENT_HBM3_SPI_1:
7950 gaudi_print_irq_info(hdev, event_type, false);
7951 gaudi_hbm_read_interrupts(hdev,
Ofir Bitton5a2998f2020-10-05 13:44:59 +03007952 gaudi_hbm_event_to_dev(event_type),
7953 &eq_entry->hbm_ecc_data);
Oded Gabbay230cd892021-01-26 22:58:13 +02007954 hl_fw_unmask_irq(hdev, event_type);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007955 break;
7956
7957 case GAUDI_EVENT_TPC0_DEC:
7958 case GAUDI_EVENT_TPC1_DEC:
7959 case GAUDI_EVENT_TPC2_DEC:
7960 case GAUDI_EVENT_TPC3_DEC:
7961 case GAUDI_EVENT_TPC4_DEC:
7962 case GAUDI_EVENT_TPC5_DEC:
7963 case GAUDI_EVENT_TPC6_DEC:
7964 case GAUDI_EVENT_TPC7_DEC:
7965 gaudi_print_irq_info(hdev, event_type, true);
Oded Gabbay66446822020-05-18 16:48:01 +03007966 reset_required = gaudi_tpc_read_interrupts(hdev,
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007967 tpc_dec_event_to_tpc_id(event_type),
7968 "AXI_SLV_DEC_Error");
Oded Gabbay66446822020-05-18 16:48:01 +03007969 if (reset_required) {
7970 dev_err(hdev->dev, "hard reset required due to %s\n",
7971 gaudi_irq_map_table[event_type].name);
7972
Ohad Sharabie42a6400f2021-02-17 20:42:48 +02007973 goto reset_device;
Oded Gabbay66446822020-05-18 16:48:01 +03007974 } else {
7975 hl_fw_unmask_irq(hdev, event_type);
Omer Shpigelmand7985072020-05-17 23:01:22 +03007976 }
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007977 break;
7978
7979 case GAUDI_EVENT_TPC0_KRN_ERR:
7980 case GAUDI_EVENT_TPC1_KRN_ERR:
7981 case GAUDI_EVENT_TPC2_KRN_ERR:
7982 case GAUDI_EVENT_TPC3_KRN_ERR:
7983 case GAUDI_EVENT_TPC4_KRN_ERR:
7984 case GAUDI_EVENT_TPC5_KRN_ERR:
7985 case GAUDI_EVENT_TPC6_KRN_ERR:
7986 case GAUDI_EVENT_TPC7_KRN_ERR:
7987 gaudi_print_irq_info(hdev, event_type, true);
Oded Gabbay66446822020-05-18 16:48:01 +03007988 reset_required = gaudi_tpc_read_interrupts(hdev,
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007989 tpc_krn_event_to_tpc_id(event_type),
7990 "KRN_ERR");
Oded Gabbay66446822020-05-18 16:48:01 +03007991 if (reset_required) {
7992 dev_err(hdev->dev, "hard reset required due to %s\n",
7993 gaudi_irq_map_table[event_type].name);
7994
Ohad Sharabie42a6400f2021-02-17 20:42:48 +02007995 goto reset_device;
Oded Gabbay66446822020-05-18 16:48:01 +03007996 } else {
7997 hl_fw_unmask_irq(hdev, event_type);
Omer Shpigelmand7985072020-05-17 23:01:22 +03007998 }
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007999 break;
8000
8001 case GAUDI_EVENT_PCIE_CORE_SERR:
8002 case GAUDI_EVENT_PCIE_IF_SERR:
8003 case GAUDI_EVENT_PCIE_PHY_SERR:
8004 case GAUDI_EVENT_TPC0_SERR ... GAUDI_EVENT_TPC7_SERR:
8005 case GAUDI_EVENT_MME0_ACC_SERR:
8006 case GAUDI_EVENT_MME0_SBAB_SERR:
8007 case GAUDI_EVENT_MME1_ACC_SERR:
8008 case GAUDI_EVENT_MME1_SBAB_SERR:
8009 case GAUDI_EVENT_MME2_ACC_SERR:
8010 case GAUDI_EVENT_MME2_SBAB_SERR:
8011 case GAUDI_EVENT_MME3_ACC_SERR:
8012 case GAUDI_EVENT_MME3_SBAB_SERR:
8013 case GAUDI_EVENT_DMA0_SERR_ECC ... GAUDI_EVENT_DMA7_SERR_ECC:
8014 case GAUDI_EVENT_CPU_IF_ECC_SERR:
8015 case GAUDI_EVENT_PSOC_MEM_SERR:
8016 case GAUDI_EVENT_PSOC_CORESIGHT_SERR:
8017 case GAUDI_EVENT_SRAM0_SERR ... GAUDI_EVENT_SRAM28_SERR:
8018 case GAUDI_EVENT_DMA_IF0_SERR ... GAUDI_EVENT_DMA_IF3_SERR:
8019 case GAUDI_EVENT_HBM_0_SERR ... GAUDI_EVENT_HBM_3_SERR:
8020 fallthrough;
8021 case GAUDI_EVENT_MMU_SERR:
Oded Gabbayfcc6a4e2020-05-17 08:20:35 +03008022 gaudi_print_irq_info(hdev, event_type, true);
8023 gaudi_handle_ecc_event(hdev, event_type, &eq_entry->ecc_data);
8024 hl_fw_unmask_irq(hdev, event_type);
8025 break;
8026
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03008027 case GAUDI_EVENT_PCIE_DEC:
8028 case GAUDI_EVENT_MME0_WBC_RSP:
8029 case GAUDI_EVENT_MME0_SBAB0_RSP:
8030 case GAUDI_EVENT_MME1_WBC_RSP:
8031 case GAUDI_EVENT_MME1_SBAB0_RSP:
8032 case GAUDI_EVENT_MME2_WBC_RSP:
8033 case GAUDI_EVENT_MME2_SBAB0_RSP:
8034 case GAUDI_EVENT_MME3_WBC_RSP:
8035 case GAUDI_EVENT_MME3_SBAB0_RSP:
8036 case GAUDI_EVENT_CPU_AXI_SPLITTER:
8037 case GAUDI_EVENT_PSOC_AXI_DEC:
8038 case GAUDI_EVENT_PSOC_PRSTN_FALL:
8039 case GAUDI_EVENT_MMU_PAGE_FAULT:
8040 case GAUDI_EVENT_MMU_WR_PERM:
8041 case GAUDI_EVENT_RAZWI_OR_ADC:
8042 case GAUDI_EVENT_TPC0_QM ... GAUDI_EVENT_TPC7_QM:
8043 case GAUDI_EVENT_MME0_QM ... GAUDI_EVENT_MME2_QM:
8044 case GAUDI_EVENT_DMA0_QM ... GAUDI_EVENT_DMA7_QM:
8045 fallthrough;
Oded Gabbay3c681572020-11-02 21:10:39 +02008046 case GAUDI_EVENT_NIC0_QM0:
8047 case GAUDI_EVENT_NIC0_QM1:
8048 case GAUDI_EVENT_NIC1_QM0:
8049 case GAUDI_EVENT_NIC1_QM1:
8050 case GAUDI_EVENT_NIC2_QM0:
8051 case GAUDI_EVENT_NIC2_QM1:
8052 case GAUDI_EVENT_NIC3_QM0:
8053 case GAUDI_EVENT_NIC3_QM1:
8054 case GAUDI_EVENT_NIC4_QM0:
8055 case GAUDI_EVENT_NIC4_QM1:
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03008056 case GAUDI_EVENT_DMA0_CORE ... GAUDI_EVENT_DMA7_CORE:
8057 gaudi_print_irq_info(hdev, event_type, true);
8058 gaudi_handle_qman_err(hdev, event_type);
Ofir Bittonebd8d122020-05-10 13:41:28 +03008059 hl_fw_unmask_irq(hdev, event_type);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03008060 break;
8061
8062 case GAUDI_EVENT_RAZWI_OR_ADC_SW:
8063 gaudi_print_irq_info(hdev, event_type, true);
Ohad Sharabie42a6400f2021-02-17 20:42:48 +02008064 goto reset_device;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03008065
8066 case GAUDI_EVENT_TPC0_BMON_SPMU:
8067 case GAUDI_EVENT_TPC1_BMON_SPMU:
8068 case GAUDI_EVENT_TPC2_BMON_SPMU:
8069 case GAUDI_EVENT_TPC3_BMON_SPMU:
8070 case GAUDI_EVENT_TPC4_BMON_SPMU:
8071 case GAUDI_EVENT_TPC5_BMON_SPMU:
8072 case GAUDI_EVENT_TPC6_BMON_SPMU:
8073 case GAUDI_EVENT_TPC7_BMON_SPMU:
8074 case GAUDI_EVENT_DMA_BM_CH0 ... GAUDI_EVENT_DMA_BM_CH7:
8075 gaudi_print_irq_info(hdev, event_type, false);
Ofir Bittonebd8d122020-05-10 13:41:28 +03008076 hl_fw_unmask_irq(hdev, event_type);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03008077 break;
8078
Ofir Bittonf8bc7f02021-01-03 20:52:40 +02008079 case GAUDI_EVENT_DMA_IF_SEI_0 ... GAUDI_EVENT_DMA_IF_SEI_3:
8080 gaudi_print_irq_info(hdev, event_type, false);
8081 gaudi_print_sm_sei_info(hdev, event_type,
8082 &eq_entry->sm_sei_data);
8083 hl_fw_unmask_irq(hdev, event_type);
8084 break;
8085
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03008086 case GAUDI_EVENT_FIX_POWER_ENV_S ... GAUDI_EVENT_FIX_THERMAL_ENV_E:
8087 gaudi_print_clk_change_info(hdev, event_type);
Ofir Bittonebd8d122020-05-10 13:41:28 +03008088 hl_fw_unmask_irq(hdev, event_type);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03008089 break;
8090
8091 case GAUDI_EVENT_PSOC_GPIO_U16_0:
8092 cause = le64_to_cpu(eq_entry->data[0]) & 0xFF;
8093 dev_err(hdev->dev,
8094 "Received high temp H/W interrupt %d (cause %d)\n",
8095 event_type, cause);
8096 break;
8097
Ofir Bittond661d792021-03-09 14:45:04 +02008098 case GAUDI_EVENT_DEV_RESET_REQ:
Ofir Bitton2ea09532021-03-03 13:23:47 +02008099 gaudi_print_irq_info(hdev, event_type, false);
8100 goto reset_device;
8101
Ohad Sharabi5d6a1982021-02-08 14:53:56 +02008102 case GAUDI_EVENT_PKT_QUEUE_OUT_SYNC:
8103 gaudi_print_irq_info(hdev, event_type, false);
8104 gaudi_print_out_of_sync_info(hdev, &eq_entry->pkt_sync_err);
Ohad Sharabie42a6400f2021-02-17 20:42:48 +02008105 goto reset_device;
Ohad Sharabi5d6a1982021-02-08 14:53:56 +02008106
Ofir Bitton254fac62021-06-02 11:56:31 +03008107 case GAUDI_EVENT_FW_ALIVE_S:
8108 gaudi_print_irq_info(hdev, event_type, false);
8109 gaudi_print_fw_alive_info(hdev, &eq_entry->fw_alive);
8110 goto reset_device;
8111
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03008112 default:
8113 dev_err(hdev->dev, "Received invalid H/W interrupt %d\n",
8114 event_type);
8115 break;
8116 }
Ohad Sharabie42a6400f2021-02-17 20:42:48 +02008117
8118 return;
8119
8120reset_device:
8121 if (hdev->hard_reset_on_fw_events)
8122 hl_device_reset(hdev, HL_RESET_HARD);
8123 else
8124 hl_fw_unmask_irq(hdev, event_type);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03008125}
8126
8127static void *gaudi_get_events_stat(struct hl_device *hdev, bool aggregate,
8128 u32 *size)
8129{
8130 struct gaudi_device *gaudi = hdev->asic_specific;
8131
8132 if (aggregate) {
8133 *size = (u32) sizeof(gaudi->events_stat_aggregate);
8134 return gaudi->events_stat_aggregate;
8135 }
8136
8137 *size = (u32) sizeof(gaudi->events_stat);
8138 return gaudi->events_stat;
8139}
8140
Omer Shpigelman8ff5f4f2020-05-24 23:06:59 +03008141static int gaudi_mmu_invalidate_cache(struct hl_device *hdev, bool is_hard,
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03008142 u32 flags)
8143{
8144 struct gaudi_device *gaudi = hdev->asic_specific;
8145 u32 status, timeout_usec;
8146 int rc;
8147
8148 if (!(gaudi->hw_cap_initialized & HW_CAP_MMU) ||
8149 hdev->hard_reset_pending)
Omer Shpigelman8ff5f4f2020-05-24 23:06:59 +03008150 return 0;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03008151
8152 if (hdev->pldm)
8153 timeout_usec = GAUDI_PLDM_MMU_TIMEOUT_USEC;
8154 else
8155 timeout_usec = MMU_CONFIG_TIMEOUT_USEC;
8156
8157 /* L0 & L1 invalidation */
Omer Shpigelmancfd41762020-06-03 13:03:35 +03008158 WREG32(mmSTLB_INV_PS, 3);
8159 WREG32(mmSTLB_CACHE_INV, gaudi->mmu_cache_inv_pi++);
Omer Shpigelman42d0b0b2020-05-17 17:35:39 +03008160 WREG32(mmSTLB_INV_PS, 2);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03008161
8162 rc = hl_poll_timeout(
8163 hdev,
Omer Shpigelman42d0b0b2020-05-17 17:35:39 +03008164 mmSTLB_INV_PS,
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03008165 status,
8166 !status,
8167 1000,
8168 timeout_usec);
8169
Omer Shpigelman42d0b0b2020-05-17 17:35:39 +03008170 WREG32(mmSTLB_INV_SET, 0);
8171
Omer Shpigelman8ff5f4f2020-05-24 23:06:59 +03008172 if (rc) {
8173 dev_err_ratelimited(hdev->dev,
8174 "MMU cache invalidation timeout\n");
Ohad Sharabie42a6400f2021-02-17 20:42:48 +02008175 hl_device_reset(hdev, HL_RESET_HARD);
Omer Shpigelman8ff5f4f2020-05-24 23:06:59 +03008176 }
8177
8178 return rc;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03008179}
8180
Omer Shpigelman8ff5f4f2020-05-24 23:06:59 +03008181static int gaudi_mmu_invalidate_cache_range(struct hl_device *hdev,
Alon Mizrahi08c03a12021-04-08 15:30:59 +03008182 bool is_hard, u32 flags,
8183 u32 asid, u64 va, u64 size)
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03008184{
Alon Mizrahi08c03a12021-04-08 15:30:59 +03008185 /* Treat as invalidate all because there is no range invalidation
8186 * in Gaudi
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03008187 */
Alon Mizrahi08c03a12021-04-08 15:30:59 +03008188 return hdev->asic_funcs->mmu_invalidate_cache(hdev, is_hard, flags);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03008189}
8190
8191static int gaudi_mmu_update_asid_hop0_addr(struct hl_device *hdev,
8192 u32 asid, u64 phys_addr)
8193{
8194 u32 status, timeout_usec;
8195 int rc;
8196
8197 if (hdev->pldm)
8198 timeout_usec = GAUDI_PLDM_MMU_TIMEOUT_USEC;
8199 else
8200 timeout_usec = MMU_CONFIG_TIMEOUT_USEC;
8201
8202 WREG32(MMU_ASID, asid);
8203 WREG32(MMU_HOP0_PA43_12, phys_addr >> MMU_HOP0_PA43_12_SHIFT);
8204 WREG32(MMU_HOP0_PA49_44, phys_addr >> MMU_HOP0_PA49_44_SHIFT);
8205 WREG32(MMU_BUSY, 0x80000000);
8206
8207 rc = hl_poll_timeout(
8208 hdev,
8209 MMU_BUSY,
8210 status,
8211 !(status & 0x80000000),
8212 1000,
8213 timeout_usec);
8214
8215 if (rc) {
8216 dev_err(hdev->dev,
8217 "Timeout during MMU hop0 config of asid %d\n", asid);
8218 return rc;
8219 }
8220
8221 return 0;
8222}
8223
8224static int gaudi_send_heartbeat(struct hl_device *hdev)
8225{
8226 struct gaudi_device *gaudi = hdev->asic_specific;
8227
8228 if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
8229 return 0;
8230
8231 return hl_fw_send_heartbeat(hdev);
8232}
8233
Oded Gabbay2f553422020-08-15 16:28:10 +03008234static int gaudi_cpucp_info_get(struct hl_device *hdev)
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03008235{
8236 struct gaudi_device *gaudi = hdev->asic_specific;
8237 struct asic_fixed_properties *prop = &hdev->asic_prop;
8238 int rc;
8239
8240 if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
8241 return 0;
8242
Ohad Sharabie67a60402021-05-02 15:45:21 +03008243 rc = hl_fw_cpucp_handshake(hdev, mmCPU_BOOT_DEV_STS0,
8244 mmCPU_BOOT_DEV_STS1, mmCPU_BOOT_ERR0,
8245 mmCPU_BOOT_ERR1);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03008246 if (rc)
8247 return rc;
8248
Oded Gabbay2f553422020-08-15 16:28:10 +03008249 if (!strlen(prop->cpucp_info.card_name))
8250 strncpy(prop->cpucp_info.card_name, GAUDI_DEFAULT_CARD_NAME,
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03008251 CARD_NAME_MAX_LEN);
8252
Oded Gabbay2f553422020-08-15 16:28:10 +03008253 hdev->card_type = le32_to_cpu(hdev->asic_prop.cpucp_info.card_type);
Oded Gabbay58361aa2020-08-08 23:34:47 +03008254
Koby Elbazcd5def82021-02-23 21:31:27 +02008255 set_default_power_values(hdev);
Oded Gabbay58361aa2020-08-08 23:34:47 +03008256
8257 hdev->max_power = prop->max_power_default;
8258
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03008259 return 0;
8260}
8261
Ohad Sharabicf303392021-01-17 16:01:56 +02008262static bool gaudi_is_device_idle(struct hl_device *hdev, u64 *mask_arr,
8263 u8 mask_len, struct seq_file *s)
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03008264{
8265 struct gaudi_device *gaudi = hdev->asic_specific;
8266 const char *fmt = "%-5d%-9s%#-14x%#-12x%#x\n";
8267 const char *mme_slave_fmt = "%-5d%-9s%-14s%-12s%#x\n";
Oded Gabbay3c681572020-11-02 21:10:39 +02008268 const char *nic_fmt = "%-5d%-9s%#-14x%#x\n";
Ohad Sharabicf303392021-01-17 16:01:56 +02008269 unsigned long *mask = (unsigned long *)mask_arr;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03008270 u32 qm_glbl_sts0, qm_cgm_sts, dma_core_sts0, tpc_cfg_sts, mme_arch_sts;
8271 bool is_idle = true, is_eng_idle, is_slave;
8272 u64 offset;
Oded Gabbay3c681572020-11-02 21:10:39 +02008273 int i, dma_id, port;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03008274
8275 mutex_lock(&gaudi->clk_gate_mutex);
8276
8277 hdev->asic_funcs->disable_clock_gating(hdev);
8278
8279 if (s)
8280 seq_puts(s,
8281 "\nDMA is_idle QM_GLBL_STS0 QM_CGM_STS DMA_CORE_STS0\n"
8282 "--- ------- ------------ ---------- -------------\n");
8283
8284 for (i = 0 ; i < DMA_NUMBER_OF_CHNLS ; i++) {
8285 dma_id = gaudi_dma_assignment[i];
8286 offset = dma_id * DMA_QMAN_OFFSET;
8287
8288 qm_glbl_sts0 = RREG32(mmDMA0_QM_GLBL_STS0 + offset);
8289 qm_cgm_sts = RREG32(mmDMA0_QM_CGM_STS + offset);
8290 dma_core_sts0 = RREG32(mmDMA0_CORE_STS0 + offset);
8291 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts) &&
8292 IS_DMA_IDLE(dma_core_sts0);
8293 is_idle &= is_eng_idle;
8294
Ohad Sharabicf303392021-01-17 16:01:56 +02008295 if (mask && !is_eng_idle)
8296 set_bit(GAUDI_ENGINE_ID_DMA_0 + dma_id, mask);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03008297 if (s)
8298 seq_printf(s, fmt, dma_id,
8299 is_eng_idle ? "Y" : "N", qm_glbl_sts0,
8300 qm_cgm_sts, dma_core_sts0);
8301 }
8302
8303 if (s)
8304 seq_puts(s,
8305 "\nTPC is_idle QM_GLBL_STS0 QM_CGM_STS CFG_STATUS\n"
8306 "--- ------- ------------ ---------- ----------\n");
8307
8308 for (i = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) {
8309 offset = i * TPC_QMAN_OFFSET;
8310 qm_glbl_sts0 = RREG32(mmTPC0_QM_GLBL_STS0 + offset);
8311 qm_cgm_sts = RREG32(mmTPC0_QM_CGM_STS + offset);
8312 tpc_cfg_sts = RREG32(mmTPC0_CFG_STATUS + offset);
8313 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts) &&
8314 IS_TPC_IDLE(tpc_cfg_sts);
8315 is_idle &= is_eng_idle;
8316
Ohad Sharabicf303392021-01-17 16:01:56 +02008317 if (mask && !is_eng_idle)
8318 set_bit(GAUDI_ENGINE_ID_TPC_0 + i, mask);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03008319 if (s)
8320 seq_printf(s, fmt, i,
8321 is_eng_idle ? "Y" : "N",
8322 qm_glbl_sts0, qm_cgm_sts, tpc_cfg_sts);
8323 }
8324
8325 if (s)
8326 seq_puts(s,
8327 "\nMME is_idle QM_GLBL_STS0 QM_CGM_STS ARCH_STATUS\n"
8328 "--- ------- ------------ ---------- -----------\n");
8329
8330 for (i = 0 ; i < MME_NUMBER_OF_ENGINES ; i++) {
8331 offset = i * MME_QMAN_OFFSET;
8332 mme_arch_sts = RREG32(mmMME0_CTRL_ARCH_STATUS + offset);
8333 is_eng_idle = IS_MME_IDLE(mme_arch_sts);
8334
8335 /* MME 1 & 3 are slaves, no need to check their QMANs */
8336 is_slave = i % 2;
8337 if (!is_slave) {
8338 qm_glbl_sts0 = RREG32(mmMME0_QM_GLBL_STS0 + offset);
8339 qm_cgm_sts = RREG32(mmMME0_QM_CGM_STS + offset);
8340 is_eng_idle &= IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts);
8341 }
8342
8343 is_idle &= is_eng_idle;
8344
Ohad Sharabicf303392021-01-17 16:01:56 +02008345 if (mask && !is_eng_idle)
8346 set_bit(GAUDI_ENGINE_ID_MME_0 + i, mask);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03008347 if (s) {
8348 if (!is_slave)
8349 seq_printf(s, fmt, i,
8350 is_eng_idle ? "Y" : "N",
8351 qm_glbl_sts0, qm_cgm_sts, mme_arch_sts);
8352 else
8353 seq_printf(s, mme_slave_fmt, i,
8354 is_eng_idle ? "Y" : "N", "-",
8355 "-", mme_arch_sts);
8356 }
8357 }
8358
8359 if (s)
Oded Gabbay3c681572020-11-02 21:10:39 +02008360 seq_puts(s, "\nNIC is_idle QM_GLBL_STS0 QM_CGM_STS\n"
8361 "--- ------- ------------ ----------\n");
8362
8363 for (i = 0 ; i < (NIC_NUMBER_OF_ENGINES / 2) ; i++) {
8364 offset = i * NIC_MACRO_QMAN_OFFSET;
8365 port = 2 * i;
Oded Gabbay90810212021-05-25 21:35:13 +03008366 if (gaudi->hw_cap_initialized & BIT(HW_CAP_NIC_SHIFT + port)) {
Oded Gabbay3c681572020-11-02 21:10:39 +02008367 qm_glbl_sts0 = RREG32(mmNIC0_QM0_GLBL_STS0 + offset);
8368 qm_cgm_sts = RREG32(mmNIC0_QM0_CGM_STS + offset);
8369 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts);
8370 is_idle &= is_eng_idle;
8371
Ohad Sharabicf303392021-01-17 16:01:56 +02008372 if (mask && !is_eng_idle)
8373 set_bit(GAUDI_ENGINE_ID_NIC_0 + port, mask);
Oded Gabbay3c681572020-11-02 21:10:39 +02008374 if (s)
8375 seq_printf(s, nic_fmt, port,
8376 is_eng_idle ? "Y" : "N",
8377 qm_glbl_sts0, qm_cgm_sts);
8378 }
8379
8380 port = 2 * i + 1;
Oded Gabbay90810212021-05-25 21:35:13 +03008381 if (gaudi->hw_cap_initialized & BIT(HW_CAP_NIC_SHIFT + port)) {
Oded Gabbay3c681572020-11-02 21:10:39 +02008382 qm_glbl_sts0 = RREG32(mmNIC0_QM1_GLBL_STS0 + offset);
8383 qm_cgm_sts = RREG32(mmNIC0_QM1_CGM_STS + offset);
8384 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts);
8385 is_idle &= is_eng_idle;
8386
Ohad Sharabicf303392021-01-17 16:01:56 +02008387 if (mask && !is_eng_idle)
8388 set_bit(GAUDI_ENGINE_ID_NIC_0 + port, mask);
Oded Gabbay3c681572020-11-02 21:10:39 +02008389 if (s)
8390 seq_printf(s, nic_fmt, port,
8391 is_eng_idle ? "Y" : "N",
8392 qm_glbl_sts0, qm_cgm_sts);
8393 }
8394 }
8395
8396 if (s)
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03008397 seq_puts(s, "\n");
8398
Oded Gabbaye38bfd32020-07-03 20:46:12 +03008399 hdev->asic_funcs->set_clock_gating(hdev);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03008400
8401 mutex_unlock(&gaudi->clk_gate_mutex);
8402
8403 return is_idle;
8404}
8405
8406static void gaudi_hw_queues_lock(struct hl_device *hdev)
8407 __acquires(&gaudi->hw_queues_lock)
8408{
8409 struct gaudi_device *gaudi = hdev->asic_specific;
8410
8411 spin_lock(&gaudi->hw_queues_lock);
8412}
8413
8414static void gaudi_hw_queues_unlock(struct hl_device *hdev)
8415 __releases(&gaudi->hw_queues_lock)
8416{
8417 struct gaudi_device *gaudi = hdev->asic_specific;
8418
8419 spin_unlock(&gaudi->hw_queues_lock);
8420}
8421
8422static u32 gaudi_get_pci_id(struct hl_device *hdev)
8423{
8424 return hdev->pdev->device;
8425}
8426
8427static int gaudi_get_eeprom_data(struct hl_device *hdev, void *data,
8428 size_t max_size)
8429{
8430 struct gaudi_device *gaudi = hdev->asic_specific;
8431
8432 if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
8433 return 0;
8434
8435 return hl_fw_get_eeprom_data(hdev, data, max_size);
8436}
8437
8438/*
8439 * this function should be used only during initialization and/or after reset,
8440 * when there are no active users.
8441 */
8442static int gaudi_run_tpc_kernel(struct hl_device *hdev, u64 tpc_kernel,
8443 u32 tpc_id)
8444{
8445 struct gaudi_device *gaudi = hdev->asic_specific;
8446 u64 kernel_timeout;
8447 u32 status, offset;
8448 int rc;
8449
8450 offset = tpc_id * (mmTPC1_CFG_STATUS - mmTPC0_CFG_STATUS);
8451
8452 if (hdev->pldm)
8453 kernel_timeout = GAUDI_PLDM_TPC_KERNEL_WAIT_USEC;
8454 else
8455 kernel_timeout = HL_DEVICE_TIMEOUT_USEC;
8456
8457 mutex_lock(&gaudi->clk_gate_mutex);
8458
8459 hdev->asic_funcs->disable_clock_gating(hdev);
8460
8461 WREG32(mmTPC0_CFG_QM_KERNEL_BASE_ADDRESS_LOW + offset,
8462 lower_32_bits(tpc_kernel));
8463 WREG32(mmTPC0_CFG_QM_KERNEL_BASE_ADDRESS_HIGH + offset,
8464 upper_32_bits(tpc_kernel));
8465
8466 WREG32(mmTPC0_CFG_ICACHE_BASE_ADDERESS_LOW + offset,
8467 lower_32_bits(tpc_kernel));
8468 WREG32(mmTPC0_CFG_ICACHE_BASE_ADDERESS_HIGH + offset,
8469 upper_32_bits(tpc_kernel));
8470 /* set a valid LUT pointer, content is of no significance */
8471 WREG32(mmTPC0_CFG_LUT_FUNC256_BASE_ADDR_LO + offset,
8472 lower_32_bits(tpc_kernel));
8473 WREG32(mmTPC0_CFG_LUT_FUNC256_BASE_ADDR_HI + offset,
8474 upper_32_bits(tpc_kernel));
8475
8476 WREG32(mmTPC0_CFG_QM_SYNC_OBJECT_ADDR + offset,
8477 lower_32_bits(CFG_BASE +
8478 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0));
8479
8480 WREG32(mmTPC0_CFG_TPC_CMD + offset,
8481 (1 << TPC0_CFG_TPC_CMD_ICACHE_INVALIDATE_SHIFT |
8482 1 << TPC0_CFG_TPC_CMD_ICACHE_PREFETCH_64KB_SHIFT));
8483 /* wait a bit for the engine to start executing */
8484 usleep_range(1000, 1500);
8485
8486 /* wait until engine has finished executing */
8487 rc = hl_poll_timeout(
8488 hdev,
8489 mmTPC0_CFG_STATUS + offset,
8490 status,
8491 (status & TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK) ==
8492 TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK,
8493 1000,
8494 kernel_timeout);
8495
8496 if (rc) {
8497 dev_err(hdev->dev,
8498 "Timeout while waiting for TPC%d icache prefetch\n",
8499 tpc_id);
Oded Gabbaye38bfd32020-07-03 20:46:12 +03008500 hdev->asic_funcs->set_clock_gating(hdev);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03008501 mutex_unlock(&gaudi->clk_gate_mutex);
8502 return -EIO;
8503 }
8504
8505 WREG32(mmTPC0_CFG_TPC_EXECUTE + offset,
8506 1 << TPC0_CFG_TPC_EXECUTE_V_SHIFT);
8507
8508 /* wait a bit for the engine to start executing */
8509 usleep_range(1000, 1500);
8510
8511 /* wait until engine has finished executing */
8512 rc = hl_poll_timeout(
8513 hdev,
8514 mmTPC0_CFG_STATUS + offset,
8515 status,
8516 (status & TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK) ==
8517 TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK,
8518 1000,
8519 kernel_timeout);
8520
Oded Gabbay31ac1f12020-08-12 11:28:13 +03008521 if (rc) {
8522 dev_err(hdev->dev,
8523 "Timeout while waiting for TPC%d vector pipe\n",
8524 tpc_id);
8525 hdev->asic_funcs->set_clock_gating(hdev);
8526 mutex_unlock(&gaudi->clk_gate_mutex);
8527 return -EIO;
8528 }
8529
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03008530 rc = hl_poll_timeout(
8531 hdev,
8532 mmTPC0_CFG_WQ_INFLIGHT_CNTR + offset,
8533 status,
8534 (status == 0),
8535 1000,
8536 kernel_timeout);
8537
Oded Gabbaye38bfd32020-07-03 20:46:12 +03008538 hdev->asic_funcs->set_clock_gating(hdev);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03008539 mutex_unlock(&gaudi->clk_gate_mutex);
8540
8541 if (rc) {
8542 dev_err(hdev->dev,
8543 "Timeout while waiting for TPC%d kernel to execute\n",
8544 tpc_id);
8545 return -EIO;
8546 }
8547
8548 return 0;
8549}
8550
Ofir Bitton5de406c2020-09-10 10:56:26 +03008551static int gaudi_internal_cb_pool_init(struct hl_device *hdev,
8552 struct hl_ctx *ctx)
8553{
8554 struct gaudi_device *gaudi = hdev->asic_specific;
Ofir Bitton5de406c2020-09-10 10:56:26 +03008555 int min_alloc_order, rc, collective_cb_size;
8556
8557 if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
8558 return 0;
8559
8560 hdev->internal_cb_pool_virt_addr =
8561 hdev->asic_funcs->asic_dma_alloc_coherent(hdev,
8562 HOST_SPACE_INTERNAL_CB_SZ,
8563 &hdev->internal_cb_pool_dma_addr,
8564 GFP_KERNEL | __GFP_ZERO);
8565
8566 if (!hdev->internal_cb_pool_virt_addr)
8567 return -ENOMEM;
8568
8569 collective_cb_size = sizeof(struct packet_msg_short) * 5 +
8570 sizeof(struct packet_fence);
8571 min_alloc_order = ilog2(collective_cb_size);
8572
8573 hdev->internal_cb_pool = gen_pool_create(min_alloc_order, -1);
8574 if (!hdev->internal_cb_pool) {
8575 dev_err(hdev->dev,
8576 "Failed to create internal CB pool\n");
8577 rc = -ENOMEM;
8578 goto free_internal_cb_pool;
8579 }
8580
8581 rc = gen_pool_add(hdev->internal_cb_pool,
8582 (uintptr_t) hdev->internal_cb_pool_virt_addr,
8583 HOST_SPACE_INTERNAL_CB_SZ, -1);
8584 if (rc) {
8585 dev_err(hdev->dev,
8586 "Failed to add memory to internal CB pool\n");
8587 rc = -EFAULT;
8588 goto destroy_internal_cb_pool;
8589 }
8590
Ofir Bittonbe91b912020-10-22 15:04:10 +03008591 hdev->internal_cb_va_base = hl_reserve_va_block(hdev, ctx,
Ofir Bitton412c41f2020-11-04 15:18:55 +02008592 HL_VA_RANGE_TYPE_HOST, HOST_SPACE_INTERNAL_CB_SZ,
8593 HL_MMU_VA_ALIGNMENT_NOT_NEEDED);
Ofir Bittonbe91b912020-10-22 15:04:10 +03008594
Koby Elbaz1f7ef4b2021-06-10 09:14:43 +03008595 if (!hdev->internal_cb_va_base) {
8596 rc = -ENOMEM;
Ofir Bittonbe91b912020-10-22 15:04:10 +03008597 goto destroy_internal_cb_pool;
Koby Elbaz1f7ef4b2021-06-10 09:14:43 +03008598 }
Ofir Bitton5de406c2020-09-10 10:56:26 +03008599
8600 mutex_lock(&ctx->mmu_lock);
Ofir Bitton5c054872020-10-22 15:13:10 +03008601 rc = hl_mmu_map_contiguous(ctx, hdev->internal_cb_va_base,
8602 hdev->internal_cb_pool_dma_addr,
8603 HOST_SPACE_INTERNAL_CB_SZ);
Ofir Bitton5de406c2020-09-10 10:56:26 +03008604
8605 hdev->asic_funcs->mmu_invalidate_cache(hdev, false, VM_TYPE_USERPTR);
Ofir Bitton5de406c2020-09-10 10:56:26 +03008606 mutex_unlock(&ctx->mmu_lock);
8607
Ofir Bitton5c054872020-10-22 15:13:10 +03008608 if (rc)
8609 goto unreserve_internal_cb_pool;
8610
Ofir Bitton5de406c2020-09-10 10:56:26 +03008611 return 0;
8612
Ofir Bitton5c054872020-10-22 15:13:10 +03008613unreserve_internal_cb_pool:
Ofir Bittonbe91b912020-10-22 15:04:10 +03008614 hl_unreserve_va_block(hdev, ctx, hdev->internal_cb_va_base,
8615 HOST_SPACE_INTERNAL_CB_SZ);
Ofir Bitton5de406c2020-09-10 10:56:26 +03008616destroy_internal_cb_pool:
8617 gen_pool_destroy(hdev->internal_cb_pool);
Ofir Bitton5de406c2020-09-10 10:56:26 +03008618free_internal_cb_pool:
8619 hdev->asic_funcs->asic_dma_free_coherent(hdev,
8620 HOST_SPACE_INTERNAL_CB_SZ,
8621 hdev->internal_cb_pool_virt_addr,
8622 hdev->internal_cb_pool_dma_addr);
8623
8624 return rc;
8625}
8626
8627static void gaudi_internal_cb_pool_fini(struct hl_device *hdev,
8628 struct hl_ctx *ctx)
8629{
8630 struct gaudi_device *gaudi = hdev->asic_specific;
Ofir Bitton5de406c2020-09-10 10:56:26 +03008631
8632 if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
8633 return;
8634
8635 mutex_lock(&ctx->mmu_lock);
Ofir Bitton5c054872020-10-22 15:13:10 +03008636 hl_mmu_unmap_contiguous(ctx, hdev->internal_cb_va_base,
8637 HOST_SPACE_INTERNAL_CB_SZ);
Ofir Bittonbe91b912020-10-22 15:04:10 +03008638 hl_unreserve_va_block(hdev, ctx, hdev->internal_cb_va_base,
8639 HOST_SPACE_INTERNAL_CB_SZ);
Ofir Bitton5de406c2020-09-10 10:56:26 +03008640 hdev->asic_funcs->mmu_invalidate_cache(hdev, true, VM_TYPE_USERPTR);
Ofir Bitton5de406c2020-09-10 10:56:26 +03008641 mutex_unlock(&ctx->mmu_lock);
8642
8643 gen_pool_destroy(hdev->internal_cb_pool);
8644
8645 hdev->asic_funcs->asic_dma_free_coherent(hdev,
8646 HOST_SPACE_INTERNAL_CB_SZ,
8647 hdev->internal_cb_pool_virt_addr,
8648 hdev->internal_cb_pool_dma_addr);
8649}
8650
kernel test robotbb34bf72020-07-29 08:03:13 +08008651static int gaudi_ctx_init(struct hl_ctx *ctx)
Ofir Bittona04b7cd2020-07-13 13:36:55 +03008652{
Ofir Bitton8e39e752020-11-12 11:03:32 +02008653 if (ctx->asid == HL_KERNEL_ASID_ID)
8654 return 0;
8655
Ofir Bitton20b75252020-09-30 15:51:10 +03008656 gaudi_mmu_prepare(ctx->hdev, ctx->asid);
Ofir Bitton5de406c2020-09-10 10:56:26 +03008657 return gaudi_internal_cb_pool_init(ctx->hdev, ctx);
8658}
Ofir Bitton20b75252020-09-30 15:51:10 +03008659
kernel test robot293744d2020-11-19 12:25:43 +08008660static void gaudi_ctx_fini(struct hl_ctx *ctx)
Ofir Bitton5de406c2020-09-10 10:56:26 +03008661{
Ofir Bitton8e39e752020-11-12 11:03:32 +02008662 if (ctx->asid == HL_KERNEL_ASID_ID)
Ofir Bitton5de406c2020-09-10 10:56:26 +03008663 return;
8664
8665 gaudi_internal_cb_pool_fini(ctx->hdev, ctx);
Ofir Bittona04b7cd2020-07-13 13:36:55 +03008666}
8667
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03008668static u32 gaudi_get_queue_id_for_cq(struct hl_device *hdev, u32 cq_idx)
8669{
8670 return gaudi_cq_assignment[cq_idx];
8671}
8672
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03008673static u32 gaudi_get_signal_cb_size(struct hl_device *hdev)
8674{
8675 return sizeof(struct packet_msg_short) +
8676 sizeof(struct packet_msg_prot) * 2;
8677}
8678
8679static u32 gaudi_get_wait_cb_size(struct hl_device *hdev)
8680{
8681 return sizeof(struct packet_msg_short) * 4 +
8682 sizeof(struct packet_fence) +
8683 sizeof(struct packet_msg_prot) * 2;
8684}
8685
Ofir Bitton2992c1d2020-09-10 09:40:35 +03008686static u32 gaudi_gen_signal_cb(struct hl_device *hdev, void *data, u16 sob_id,
Alon Mizrahi72ab9ca52020-12-02 19:55:30 +02008687 u32 size, bool eb)
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03008688{
8689 struct hl_cb *cb = (struct hl_cb *) data;
8690 struct packet_msg_short *pkt;
Ofir Bitton2992c1d2020-09-10 09:40:35 +03008691 u32 value, ctl, pkt_size = sizeof(*pkt);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03008692
Ofir Bitton2992c1d2020-09-10 09:40:35 +03008693 pkt = cb->kernel_address + size;
8694 memset(pkt, 0, pkt_size);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03008695
Ofir Bitton6c07bab2020-06-01 10:38:46 +03008696 /* Inc by 1, Mode ADD */
8697 value = FIELD_PREP(GAUDI_PKT_SHORT_VAL_SOB_SYNC_VAL_MASK, 1);
8698 value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_SOB_MOD_MASK, 1);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03008699
Ofir Bitton6c07bab2020-06-01 10:38:46 +03008700 ctl = FIELD_PREP(GAUDI_PKT_SHORT_CTL_ADDR_MASK, sob_id * 4);
8701 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_OP_MASK, 0); /* write the value */
8702 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_BASE_MASK, 3); /* W_S SOB base */
Ofir Bittonf8b0f2e2020-12-06 10:22:32 +02008703 ctl |= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_SHORT);
8704 ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, eb);
8705 ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
8706 ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03008707
8708 pkt->value = cpu_to_le32(value);
8709 pkt->ctl = cpu_to_le32(ctl);
Ofir Bitton2992c1d2020-09-10 09:40:35 +03008710
8711 return size + pkt_size;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03008712}
8713
8714static u32 gaudi_add_mon_msg_short(struct packet_msg_short *pkt, u32 value,
8715 u16 addr)
8716{
8717 u32 ctl, pkt_size = sizeof(*pkt);
8718
8719 memset(pkt, 0, pkt_size);
8720
Ofir Bitton6c07bab2020-06-01 10:38:46 +03008721 ctl = FIELD_PREP(GAUDI_PKT_SHORT_CTL_ADDR_MASK, addr);
8722 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_BASE_MASK, 2); /* W_S MON base */
Ofir Bittonf8b0f2e2020-12-06 10:22:32 +02008723 ctl |= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_SHORT);
8724 ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 0);
8725 ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
8726 ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 0); /* last pkt MB */
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03008727
8728 pkt->value = cpu_to_le32(value);
8729 pkt->ctl = cpu_to_le32(ctl);
8730
8731 return pkt_size;
8732}
8733
Ofir Bitton2992c1d2020-09-10 09:40:35 +03008734static u32 gaudi_add_arm_monitor_pkt(struct hl_device *hdev,
8735 struct packet_msg_short *pkt, u16 sob_base, u8 sob_mask,
8736 u16 sob_val, u16 mon_id)
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03008737{
Ofir Bitton2992c1d2020-09-10 09:40:35 +03008738 u64 monitor_base;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03008739 u32 ctl, value, pkt_size = sizeof(*pkt);
Ofir Bitton2992c1d2020-09-10 09:40:35 +03008740 u16 msg_addr_offset;
8741 u8 mask;
8742
8743 if (hl_gen_sob_mask(sob_base, sob_mask, &mask)) {
8744 dev_err(hdev->dev,
8745 "sob_base %u (mask %#x) is not valid\n",
8746 sob_base, sob_mask);
8747 return 0;
8748 }
8749
8750 /*
8751 * monitor_base should be the content of the base0 address registers,
8752 * so it will be added to the msg short offsets
8753 */
8754 monitor_base = mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0;
8755
8756 msg_addr_offset =
8757 (mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0 + mon_id * 4) -
8758 monitor_base;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03008759
8760 memset(pkt, 0, pkt_size);
8761
Ofir Bitton2992c1d2020-09-10 09:40:35 +03008762 /* Monitor config packet: bind the monitor to a sync object */
8763 value = FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_SYNC_GID_MASK, sob_base / 8);
Ofir Bitton6c07bab2020-06-01 10:38:46 +03008764 value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_SYNC_VAL_MASK, sob_val);
8765 value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_MODE_MASK,
8766 0); /* GREATER OR EQUAL*/
8767 value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_MASK_MASK, mask);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03008768
Ofir Bitton2992c1d2020-09-10 09:40:35 +03008769 ctl = FIELD_PREP(GAUDI_PKT_SHORT_CTL_ADDR_MASK, msg_addr_offset);
Ofir Bitton6c07bab2020-06-01 10:38:46 +03008770 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_OP_MASK, 0); /* write the value */
8771 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_BASE_MASK, 2); /* W_S MON base */
Ofir Bittonf8b0f2e2020-12-06 10:22:32 +02008772 ctl |= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_SHORT);
8773 ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 0);
8774 ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
8775 ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03008776
8777 pkt->value = cpu_to_le32(value);
8778 pkt->ctl = cpu_to_le32(ctl);
8779
8780 return pkt_size;
8781}
8782
8783static u32 gaudi_add_fence_pkt(struct packet_fence *pkt)
8784{
8785 u32 ctl, cfg, pkt_size = sizeof(*pkt);
8786
8787 memset(pkt, 0, pkt_size);
8788
Ofir Bitton6c07bab2020-06-01 10:38:46 +03008789 cfg = FIELD_PREP(GAUDI_PKT_FENCE_CFG_DEC_VAL_MASK, 1);
8790 cfg |= FIELD_PREP(GAUDI_PKT_FENCE_CFG_TARGET_VAL_MASK, 1);
8791 cfg |= FIELD_PREP(GAUDI_PKT_FENCE_CFG_ID_MASK, 2);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03008792
Ofir Bittonf8b0f2e2020-12-06 10:22:32 +02008793 ctl = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_FENCE);
8794 ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 0);
8795 ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
8796 ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03008797
8798 pkt->cfg = cpu_to_le32(cfg);
8799 pkt->ctl = cpu_to_le32(ctl);
8800
8801 return pkt_size;
8802}
8803
Ofir Bitton2992c1d2020-09-10 09:40:35 +03008804static int gaudi_get_fence_addr(struct hl_device *hdev, u32 queue_id, u64 *addr)
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03008805{
Ofir Bitton5de406c2020-09-10 10:56:26 +03008806 u32 offset, nic_index;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03008807
Ofir Bitton2992c1d2020-09-10 09:40:35 +03008808 switch (queue_id) {
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03008809 case GAUDI_QUEUE_ID_DMA_0_0:
Ofir Bitton2992c1d2020-09-10 09:40:35 +03008810 offset = mmDMA0_QM_CP_FENCE2_RDATA_0;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03008811 break;
8812 case GAUDI_QUEUE_ID_DMA_0_1:
Ofir Bitton2992c1d2020-09-10 09:40:35 +03008813 offset = mmDMA0_QM_CP_FENCE2_RDATA_1;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03008814 break;
8815 case GAUDI_QUEUE_ID_DMA_0_2:
Ofir Bitton2992c1d2020-09-10 09:40:35 +03008816 offset = mmDMA0_QM_CP_FENCE2_RDATA_2;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03008817 break;
8818 case GAUDI_QUEUE_ID_DMA_0_3:
Ofir Bitton2992c1d2020-09-10 09:40:35 +03008819 offset = mmDMA0_QM_CP_FENCE2_RDATA_3;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03008820 break;
8821 case GAUDI_QUEUE_ID_DMA_1_0:
Ofir Bitton2992c1d2020-09-10 09:40:35 +03008822 offset = mmDMA1_QM_CP_FENCE2_RDATA_0;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03008823 break;
8824 case GAUDI_QUEUE_ID_DMA_1_1:
Ofir Bitton2992c1d2020-09-10 09:40:35 +03008825 offset = mmDMA1_QM_CP_FENCE2_RDATA_1;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03008826 break;
8827 case GAUDI_QUEUE_ID_DMA_1_2:
Ofir Bitton2992c1d2020-09-10 09:40:35 +03008828 offset = mmDMA1_QM_CP_FENCE2_RDATA_2;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03008829 break;
8830 case GAUDI_QUEUE_ID_DMA_1_3:
Ofir Bitton2992c1d2020-09-10 09:40:35 +03008831 offset = mmDMA1_QM_CP_FENCE2_RDATA_3;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03008832 break;
8833 case GAUDI_QUEUE_ID_DMA_5_0:
Ofir Bitton2992c1d2020-09-10 09:40:35 +03008834 offset = mmDMA5_QM_CP_FENCE2_RDATA_0;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03008835 break;
8836 case GAUDI_QUEUE_ID_DMA_5_1:
Ofir Bitton2992c1d2020-09-10 09:40:35 +03008837 offset = mmDMA5_QM_CP_FENCE2_RDATA_1;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03008838 break;
8839 case GAUDI_QUEUE_ID_DMA_5_2:
Ofir Bitton2992c1d2020-09-10 09:40:35 +03008840 offset = mmDMA5_QM_CP_FENCE2_RDATA_2;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03008841 break;
8842 case GAUDI_QUEUE_ID_DMA_5_3:
Ofir Bitton2992c1d2020-09-10 09:40:35 +03008843 offset = mmDMA5_QM_CP_FENCE2_RDATA_3;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03008844 break;
Ofir Bitton5de406c2020-09-10 10:56:26 +03008845 case GAUDI_QUEUE_ID_TPC_7_0:
8846 offset = mmTPC7_QM_CP_FENCE2_RDATA_0;
8847 break;
8848 case GAUDI_QUEUE_ID_TPC_7_1:
8849 offset = mmTPC7_QM_CP_FENCE2_RDATA_1;
8850 break;
8851 case GAUDI_QUEUE_ID_TPC_7_2:
8852 offset = mmTPC7_QM_CP_FENCE2_RDATA_2;
8853 break;
8854 case GAUDI_QUEUE_ID_TPC_7_3:
8855 offset = mmTPC7_QM_CP_FENCE2_RDATA_3;
8856 break;
8857 case GAUDI_QUEUE_ID_NIC_0_0:
8858 case GAUDI_QUEUE_ID_NIC_1_0:
8859 case GAUDI_QUEUE_ID_NIC_2_0:
8860 case GAUDI_QUEUE_ID_NIC_3_0:
8861 case GAUDI_QUEUE_ID_NIC_4_0:
8862 case GAUDI_QUEUE_ID_NIC_5_0:
8863 case GAUDI_QUEUE_ID_NIC_6_0:
8864 case GAUDI_QUEUE_ID_NIC_7_0:
8865 case GAUDI_QUEUE_ID_NIC_8_0:
8866 case GAUDI_QUEUE_ID_NIC_9_0:
8867 nic_index = (queue_id - GAUDI_QUEUE_ID_NIC_0_0) >> 2;
8868 offset = mmNIC0_QM0_CP_FENCE2_RDATA_0 +
8869 (nic_index >> 1) * NIC_MACRO_QMAN_OFFSET +
8870 (nic_index & 0x1) * NIC_ENGINE_QMAN_OFFSET;
8871 break;
8872 case GAUDI_QUEUE_ID_NIC_0_1:
8873 case GAUDI_QUEUE_ID_NIC_1_1:
8874 case GAUDI_QUEUE_ID_NIC_2_1:
8875 case GAUDI_QUEUE_ID_NIC_3_1:
8876 case GAUDI_QUEUE_ID_NIC_4_1:
8877 case GAUDI_QUEUE_ID_NIC_5_1:
8878 case GAUDI_QUEUE_ID_NIC_6_1:
8879 case GAUDI_QUEUE_ID_NIC_7_1:
8880 case GAUDI_QUEUE_ID_NIC_8_1:
8881 case GAUDI_QUEUE_ID_NIC_9_1:
8882 nic_index = (queue_id - GAUDI_QUEUE_ID_NIC_0_1) >> 2;
8883 offset = mmNIC0_QM0_CP_FENCE2_RDATA_1 +
8884 (nic_index >> 1) * NIC_MACRO_QMAN_OFFSET +
8885 (nic_index & 0x1) * NIC_ENGINE_QMAN_OFFSET;
8886 break;
8887 case GAUDI_QUEUE_ID_NIC_0_2:
8888 case GAUDI_QUEUE_ID_NIC_1_2:
8889 case GAUDI_QUEUE_ID_NIC_2_2:
8890 case GAUDI_QUEUE_ID_NIC_3_2:
8891 case GAUDI_QUEUE_ID_NIC_4_2:
8892 case GAUDI_QUEUE_ID_NIC_5_2:
8893 case GAUDI_QUEUE_ID_NIC_6_2:
8894 case GAUDI_QUEUE_ID_NIC_7_2:
8895 case GAUDI_QUEUE_ID_NIC_8_2:
8896 case GAUDI_QUEUE_ID_NIC_9_2:
8897 nic_index = (queue_id - GAUDI_QUEUE_ID_NIC_0_2) >> 2;
8898 offset = mmNIC0_QM0_CP_FENCE2_RDATA_2 +
8899 (nic_index >> 1) * NIC_MACRO_QMAN_OFFSET +
8900 (nic_index & 0x1) * NIC_ENGINE_QMAN_OFFSET;
8901 break;
8902 case GAUDI_QUEUE_ID_NIC_0_3:
8903 case GAUDI_QUEUE_ID_NIC_1_3:
8904 case GAUDI_QUEUE_ID_NIC_2_3:
8905 case GAUDI_QUEUE_ID_NIC_3_3:
8906 case GAUDI_QUEUE_ID_NIC_4_3:
8907 case GAUDI_QUEUE_ID_NIC_5_3:
8908 case GAUDI_QUEUE_ID_NIC_6_3:
8909 case GAUDI_QUEUE_ID_NIC_7_3:
8910 case GAUDI_QUEUE_ID_NIC_8_3:
8911 case GAUDI_QUEUE_ID_NIC_9_3:
8912 nic_index = (queue_id - GAUDI_QUEUE_ID_NIC_0_3) >> 2;
8913 offset = mmNIC0_QM0_CP_FENCE2_RDATA_3 +
8914 (nic_index >> 1) * NIC_MACRO_QMAN_OFFSET +
8915 (nic_index & 0x1) * NIC_ENGINE_QMAN_OFFSET;
8916 break;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03008917 default:
Ofir Bitton2992c1d2020-09-10 09:40:35 +03008918 return -EINVAL;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03008919 }
8920
Ofir Bitton2992c1d2020-09-10 09:40:35 +03008921 *addr = CFG_BASE + offset;
8922
8923 return 0;
8924}
8925
8926static u32 gaudi_add_mon_pkts(void *buf, u16 mon_id, u64 fence_addr)
8927{
8928 u64 monitor_base;
8929 u32 size = 0;
8930 u16 msg_addr_offset;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03008931
8932 /*
8933 * monitor_base should be the content of the base0 address registers,
8934 * so it will be added to the msg short offsets
8935 */
8936 monitor_base = mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0;
8937
8938 /* First monitor config packet: low address of the sync */
Ofir Bitton2992c1d2020-09-10 09:40:35 +03008939 msg_addr_offset =
8940 (mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0 + mon_id * 4) -
8941 monitor_base;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03008942
8943 size += gaudi_add_mon_msg_short(buf + size, (u32) fence_addr,
8944 msg_addr_offset);
8945
8946 /* Second monitor config packet: high address of the sync */
Ofir Bitton2992c1d2020-09-10 09:40:35 +03008947 msg_addr_offset =
8948 (mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRH_0 + mon_id * 4) -
8949 monitor_base;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03008950
8951 size += gaudi_add_mon_msg_short(buf + size, (u32) (fence_addr >> 32),
8952 msg_addr_offset);
8953
8954 /*
8955 * Third monitor config packet: the payload, i.e. what to write when the
8956 * sync triggers
8957 */
Ofir Bitton2992c1d2020-09-10 09:40:35 +03008958 msg_addr_offset =
8959 (mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_DATA_0 + mon_id * 4) -
8960 monitor_base;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03008961
8962 size += gaudi_add_mon_msg_short(buf + size, 1, msg_addr_offset);
8963
Ofir Bitton2992c1d2020-09-10 09:40:35 +03008964 return size;
8965}
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03008966
Oded Gabbay3c681572020-11-02 21:10:39 +02008967static u32 gaudi_gen_wait_cb(struct hl_device *hdev,
8968 struct hl_gen_wait_properties *prop)
Ofir Bitton2992c1d2020-09-10 09:40:35 +03008969{
8970 struct hl_cb *cb = (struct hl_cb *) prop->data;
8971 void *buf = cb->kernel_address;
8972 u64 fence_addr = 0;
8973 u32 size = prop->size;
8974
8975 if (gaudi_get_fence_addr(hdev, prop->q_idx, &fence_addr)) {
8976 dev_crit(hdev->dev, "wrong queue id %d for wait packet\n",
8977 prop->q_idx);
8978 return 0;
8979 }
8980
8981 size += gaudi_add_mon_pkts(buf + size, prop->mon_id, fence_addr);
8982 size += gaudi_add_arm_monitor_pkt(hdev, buf + size, prop->sob_base,
8983 prop->sob_mask, prop->sob_val, prop->mon_id);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03008984 size += gaudi_add_fence_pkt(buf + size);
Ofir Bitton2992c1d2020-09-10 09:40:35 +03008985
8986 return size;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03008987}
8988
8989static void gaudi_reset_sob(struct hl_device *hdev, void *data)
8990{
8991 struct hl_hw_sob *hw_sob = (struct hl_hw_sob *) data;
Ofir Bitton423815b2021-01-05 09:04:07 +02008992 int rc;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03008993
8994 dev_dbg(hdev->dev, "reset SOB, q_idx: %d, sob_id: %d\n", hw_sob->q_idx,
8995 hw_sob->sob_id);
8996
Ofir Bitton423815b2021-01-05 09:04:07 +02008997 rc = gaudi_schedule_register_memset(hdev, hw_sob->q_idx,
8998 CFG_BASE + mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 +
8999 hw_sob->sob_id * 4, 1, 0);
9000 if (rc)
9001 dev_err(hdev->dev, "failed resetting sob %u", hw_sob->sob_id);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03009002
9003 kref_init(&hw_sob->kref);
9004}
9005
9006static void gaudi_set_dma_mask_from_fw(struct hl_device *hdev)
9007{
9008 if (RREG32(mmPSOC_GLOBAL_CONF_NON_RST_FLOPS_0) ==
9009 HL_POWER9_HOST_MAGIC) {
9010 hdev->power9_64bit_dma_enable = 1;
9011 hdev->dma_mask = 64;
9012 } else {
9013 hdev->power9_64bit_dma_enable = 0;
9014 hdev->dma_mask = 48;
9015 }
9016}
9017
9018static u64 gaudi_get_device_time(struct hl_device *hdev)
9019{
9020 u64 device_time = ((u64) RREG32(mmPSOC_TIMESTAMP_CNTCVU)) << 32;
9021
9022 return device_time | RREG32(mmPSOC_TIMESTAMP_CNTCVL);
9023}
9024
Ofir Bittond00697f2021-01-05 12:55:06 +02009025static int gaudi_get_hw_block_id(struct hl_device *hdev, u64 block_addr,
Oded Gabbay6df50d22021-02-05 16:04:34 +02009026 u32 *block_size, u32 *block_id)
Ofir Bittond00697f2021-01-05 12:55:06 +02009027{
9028 return -EPERM;
9029}
9030
9031static int gaudi_block_mmap(struct hl_device *hdev,
9032 struct vm_area_struct *vma,
9033 u32 block_id, u32 block_size)
9034{
9035 return -EPERM;
9036}
9037
Oded Gabbay28bcf1f2021-02-01 21:23:43 +02009038static void gaudi_enable_events_from_fw(struct hl_device *hdev)
9039{
Koby Elbaze591a492021-05-12 18:05:46 +03009040 struct cpu_dyn_regs *dyn_regs =
9041 &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
Koby Elbaz81217362021-05-03 23:03:15 +03009042 u32 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
9043 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
Ofir Bitton5bc691d2021-05-25 22:09:13 +03009044 le32_to_cpu(dyn_regs->gic_host_ints_irq);
Koby Elbaz81217362021-05-03 23:03:15 +03009045
Ofir Bitton7d5ba002021-06-07 15:22:56 +03009046 WREG32(irq_handler_offset,
9047 gaudi_irq_map_table[GAUDI_EVENT_INTS_REGISTER].cpu_id);
Oded Gabbay28bcf1f2021-02-01 21:23:43 +02009048}
9049
Bharat Jauhari285c0fa2021-03-25 18:15:40 +02009050static int gaudi_map_pll_idx_to_fw_idx(u32 pll_idx)
9051{
9052 switch (pll_idx) {
9053 case HL_GAUDI_CPU_PLL: return CPU_PLL;
9054 case HL_GAUDI_PCI_PLL: return PCI_PLL;
9055 case HL_GAUDI_NIC_PLL: return NIC_PLL;
9056 case HL_GAUDI_DMA_PLL: return DMA_PLL;
9057 case HL_GAUDI_MESH_PLL: return MESH_PLL;
9058 case HL_GAUDI_MME_PLL: return MME_PLL;
9059 case HL_GAUDI_TPC_PLL: return TPC_PLL;
9060 case HL_GAUDI_IF_PLL: return IF_PLL;
9061 case HL_GAUDI_SRAM_PLL: return SRAM_PLL;
9062 case HL_GAUDI_HBM_PLL: return HBM_PLL;
9063 default: return -EINVAL;
9064 }
9065}
9066
Yuri Nudelman77977ac2021-06-06 10:30:41 +03009067static int gaudi_add_sync_to_engine_map_entry(
9068 struct hl_sync_to_engine_map *map, u32 reg_value,
9069 enum hl_sync_engine_type engine_type, u32 engine_id)
9070{
9071 struct hl_sync_to_engine_map_entry *entry;
9072
9073 /* Reg value represents a partial address of sync object,
9074 * it is used as unique identifier. For this we need to
9075 * clear the cutoff cfg base bits from the value.
9076 */
9077 if (reg_value == 0 || reg_value == 0xffffffff)
9078 return 0;
9079 reg_value -= (u32)CFG_BASE;
9080
9081 /* create a new hash entry */
9082 entry = kzalloc(sizeof(*entry), GFP_KERNEL);
9083 if (!entry)
9084 return -ENOMEM;
9085 entry->engine_type = engine_type;
9086 entry->engine_id = engine_id;
9087 entry->sync_id = reg_value;
9088 hash_add(map->tb, &entry->node, reg_value);
9089
9090 return 0;
9091}
9092
Yuri Nudelman938b7932021-06-06 10:28:51 +03009093static int gaudi_gen_sync_to_engine_map(struct hl_device *hdev,
9094 struct hl_sync_to_engine_map *map)
9095{
Yuri Nudelman77977ac2021-06-06 10:30:41 +03009096 struct hl_state_dump_specs *sds = &hdev->state_dump_specs;
9097 struct gaudi_device *gaudi = hdev->asic_specific;
9098 int i, j, rc;
9099 u32 reg_value;
9100
9101 /* Iterate over TPC engines */
9102 for (i = 0; i < sds->props[SP_NUM_OF_TPC_ENGINES]; ++i) {
9103 /* TPC registered must be accessed with clock gating disabled */
9104 mutex_lock(&gaudi->clk_gate_mutex);
9105 hdev->asic_funcs->disable_clock_gating(hdev);
9106
9107 reg_value = RREG32(sds->props[SP_TPC0_CFG_SO] +
9108 sds->props[SP_NEXT_TPC] * i);
9109
9110 /* We can reenable clock_gating */
9111 hdev->asic_funcs->set_clock_gating(hdev);
9112 mutex_unlock(&gaudi->clk_gate_mutex);
9113
9114 rc = gaudi_add_sync_to_engine_map_entry(map, reg_value,
9115 ENGINE_TPC, i);
9116 if (rc)
9117 goto free_sync_to_engine_map;
9118 }
9119
9120 /* Iterate over MME engines */
9121 for (i = 0; i < sds->props[SP_NUM_OF_MME_ENGINES]; ++i) {
9122 for (j = 0; j < sds->props[SP_SUB_MME_ENG_NUM]; ++j) {
9123 /* MME registered must be accessed with clock gating
9124 * disabled
9125 */
9126 mutex_lock(&gaudi->clk_gate_mutex);
9127 hdev->asic_funcs->disable_clock_gating(hdev);
9128
9129 reg_value = RREG32(sds->props[SP_MME_CFG_SO] +
9130 sds->props[SP_NEXT_MME] * i +
9131 j * sizeof(u32));
9132
9133 /* We can reenable clock_gating */
9134 hdev->asic_funcs->set_clock_gating(hdev);
9135 mutex_unlock(&gaudi->clk_gate_mutex);
9136
9137 rc = gaudi_add_sync_to_engine_map_entry(
9138 map, reg_value, ENGINE_MME,
9139 i * sds->props[SP_SUB_MME_ENG_NUM] + j);
9140 if (rc)
9141 goto free_sync_to_engine_map;
9142 }
9143 }
9144
9145 /* Iterate over DMA engines */
9146 for (i = 0; i < sds->props[SP_NUM_OF_DMA_ENGINES]; ++i) {
9147 reg_value = RREG32(sds->props[SP_DMA_CFG_SO] +
9148 sds->props[SP_DMA_QUEUES_OFFSET] * i);
9149 rc = gaudi_add_sync_to_engine_map_entry(map, reg_value,
9150 ENGINE_DMA, i);
9151 if (rc)
9152 goto free_sync_to_engine_map;
9153 }
9154
Yuri Nudelman938b7932021-06-06 10:28:51 +03009155 return 0;
Yuri Nudelman77977ac2021-06-06 10:30:41 +03009156
9157free_sync_to_engine_map:
9158 hl_state_dump_free_sync_to_engine_map(map);
9159
9160 return rc;
Yuri Nudelman938b7932021-06-06 10:28:51 +03009161}
9162
Yuri Nudelmanfd2010b2021-06-09 14:04:26 +03009163static int gaudi_monitor_valid(struct hl_mon_state_dump *mon)
9164{
Yuri Nudelman77977ac2021-06-06 10:30:41 +03009165 return FIELD_GET(
9166 SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_STATUS_0_VALID_MASK,
9167 mon->status);
Yuri Nudelmanfd2010b2021-06-09 14:04:26 +03009168}
9169
9170static int gaudi_print_single_monitor(char **buf, size_t *size, size_t *offset,
9171 struct hl_device *hdev,
9172 struct hl_mon_state_dump *mon)
9173{
Yuri Nudelman77977ac2021-06-06 10:30:41 +03009174 const char *name;
9175 char scratch_buf1[BIN_REG_STRING_SIZE],
9176 scratch_buf2[BIN_REG_STRING_SIZE];
9177
9178 name = hl_state_dump_get_monitor_name(hdev, mon);
9179 if (!name)
9180 name = "";
9181
9182 return hl_snprintf_resize(
9183 buf, size, offset,
9184 "Mon id: %u%s, wait for group id: %u mask %s to reach val: %u and write %u to address 0x%llx. Pending: %s",
9185 mon->id, name,
9186 FIELD_GET(SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_SID_MASK,
9187 mon->arm_data),
9188 hl_format_as_binary(
9189 scratch_buf1, sizeof(scratch_buf1),
9190 FIELD_GET(
9191 SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_MASK_MASK,
9192 mon->arm_data)),
9193 FIELD_GET(SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_SOD_MASK,
9194 mon->arm_data),
9195 mon->wr_data,
9196 (((u64)mon->wr_addr_high) << 32) | mon->wr_addr_low,
9197 hl_format_as_binary(
9198 scratch_buf2, sizeof(scratch_buf2),
9199 FIELD_GET(
9200 SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_STATUS_0_PENDING_MASK,
9201 mon->status)));
Yuri Nudelmanfd2010b2021-06-09 14:04:26 +03009202}
9203
9204
9205static int gaudi_print_fences_single_engine(
9206 struct hl_device *hdev, u64 base_offset, u64 status_base_offset,
9207 enum hl_sync_engine_type engine_type, u32 engine_id, char **buf,
9208 size_t *size, size_t *offset)
9209{
Yuri Nudelman77977ac2021-06-06 10:30:41 +03009210 struct hl_state_dump_specs *sds = &hdev->state_dump_specs;
9211 int rc = -ENOMEM, i;
9212 u32 *statuses, *fences;
9213
9214 statuses = kcalloc(sds->props[SP_ENGINE_NUM_OF_QUEUES],
9215 sizeof(*statuses), GFP_KERNEL);
9216 if (!statuses)
9217 goto out;
9218
9219 fences = kcalloc(sds->props[SP_ENGINE_NUM_OF_FENCES] *
9220 sds->props[SP_ENGINE_NUM_OF_QUEUES],
9221 sizeof(*fences), GFP_KERNEL);
9222 if (!fences)
9223 goto free_status;
9224
9225 for (i = 0; i < sds->props[SP_ENGINE_NUM_OF_FENCES]; ++i)
9226 statuses[i] = RREG32(status_base_offset + i * sizeof(u32));
9227
9228 for (i = 0; i < sds->props[SP_ENGINE_NUM_OF_FENCES] *
9229 sds->props[SP_ENGINE_NUM_OF_QUEUES]; ++i)
9230 fences[i] = RREG32(base_offset + i * sizeof(u32));
9231
9232 /* The actual print */
9233 for (i = 0; i < sds->props[SP_ENGINE_NUM_OF_QUEUES]; ++i) {
9234 u32 fence_id;
9235 u64 fence_cnt, fence_rdata;
9236 const char *engine_name;
9237
9238 if (!FIELD_GET(TPC0_QM_CP_STS_0_FENCE_IN_PROGRESS_MASK,
9239 statuses[i]))
9240 continue;
9241
9242 fence_id =
9243 FIELD_GET(TPC0_QM_CP_STS_0_FENCE_ID_MASK, statuses[i]);
9244 fence_cnt = base_offset + CFG_BASE +
9245 sizeof(u32) *
9246 (i + fence_id * sds->props[SP_ENGINE_NUM_OF_QUEUES]);
9247 fence_rdata = fence_cnt - sds->props[SP_FENCE0_CNT_OFFSET] +
9248 sds->props[SP_FENCE0_RDATA_OFFSET];
9249 engine_name = hl_sync_engine_to_string(engine_type);
9250
9251 rc = hl_snprintf_resize(
9252 buf, size, offset,
9253 "%s%u, stream %u: fence id %u cnt = 0x%llx (%s%u_QM.CP_FENCE%u_CNT_%u) rdata = 0x%llx (%s%u_QM.CP_FENCE%u_RDATA_%u) value = %u, cp_status = %u\n",
9254 engine_name, engine_id,
9255 i, fence_id,
9256 fence_cnt, engine_name, engine_id, fence_id, i,
9257 fence_rdata, engine_name, engine_id, fence_id, i,
9258 fences[fence_id],
9259 statuses[i]);
9260 if (rc)
9261 goto free_fences;
9262 }
9263
9264 rc = 0;
9265
9266free_fences:
9267 kfree(fences);
9268free_status:
9269 kfree(statuses);
9270out:
9271 return rc;
Yuri Nudelmanfd2010b2021-06-09 14:04:26 +03009272}
9273
Yuri Nudelman938b7932021-06-06 10:28:51 +03009274
9275static struct hl_state_dump_specs_funcs gaudi_state_dump_funcs = {
Yuri Nudelmanfd2010b2021-06-09 14:04:26 +03009276 .monitor_valid = gaudi_monitor_valid,
9277 .print_single_monitor = gaudi_print_single_monitor,
Yuri Nudelman938b7932021-06-06 10:28:51 +03009278 .gen_sync_to_engine_map = gaudi_gen_sync_to_engine_map,
Yuri Nudelmanfd2010b2021-06-09 14:04:26 +03009279 .print_fences_single_engine = gaudi_print_fences_single_engine,
Yuri Nudelman938b7932021-06-06 10:28:51 +03009280};
9281
9282static void gaudi_state_dump_init(struct hl_device *hdev)
9283{
Yuri Nudelman77977ac2021-06-06 10:30:41 +03009284 struct hl_state_dump_specs *sds = &hdev->state_dump_specs;
9285 int i;
9286
9287 for (i = 0; i < ARRAY_SIZE(gaudi_so_id_to_str); ++i)
9288 hash_add(sds->so_id_to_str_tb,
9289 &gaudi_so_id_to_str[i].node,
9290 gaudi_so_id_to_str[i].id);
9291
9292 for (i = 0; i < ARRAY_SIZE(gaudi_monitor_id_to_str); ++i)
9293 hash_add(sds->monitor_id_to_str_tb,
9294 &gaudi_monitor_id_to_str[i].node,
9295 gaudi_monitor_id_to_str[i].id);
9296
9297 sds->props = gaudi_state_dump_specs_props;
9298
9299 sds->sync_namager_names = gaudi_sync_manager_names;
9300
9301 sds->funcs = gaudi_state_dump_funcs;
Yuri Nudelman938b7932021-06-06 10:28:51 +03009302}
9303
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03009304static const struct hl_asic_funcs gaudi_funcs = {
9305 .early_init = gaudi_early_init,
9306 .early_fini = gaudi_early_fini,
9307 .late_init = gaudi_late_init,
9308 .late_fini = gaudi_late_fini,
9309 .sw_init = gaudi_sw_init,
9310 .sw_fini = gaudi_sw_fini,
9311 .hw_init = gaudi_hw_init,
9312 .hw_fini = gaudi_hw_fini,
9313 .halt_engines = gaudi_halt_engines,
9314 .suspend = gaudi_suspend,
9315 .resume = gaudi_resume,
9316 .cb_mmap = gaudi_cb_mmap,
9317 .ring_doorbell = gaudi_ring_doorbell,
9318 .pqe_write = gaudi_pqe_write,
9319 .asic_dma_alloc_coherent = gaudi_dma_alloc_coherent,
9320 .asic_dma_free_coherent = gaudi_dma_free_coherent,
farah kassabri03df1362020-05-06 11:17:38 +03009321 .scrub_device_mem = gaudi_scrub_device_mem,
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03009322 .get_int_queue_base = gaudi_get_int_queue_base,
9323 .test_queues = gaudi_test_queues,
9324 .asic_dma_pool_zalloc = gaudi_dma_pool_zalloc,
9325 .asic_dma_pool_free = gaudi_dma_pool_free,
9326 .cpu_accessible_dma_pool_alloc = gaudi_cpu_accessible_dma_pool_alloc,
9327 .cpu_accessible_dma_pool_free = gaudi_cpu_accessible_dma_pool_free,
9328 .hl_dma_unmap_sg = gaudi_dma_unmap_sg,
9329 .cs_parser = gaudi_cs_parser,
9330 .asic_dma_map_sg = gaudi_dma_map_sg,
9331 .get_dma_desc_list_size = gaudi_get_dma_desc_list_size,
9332 .add_end_of_cb_packets = gaudi_add_end_of_cb_packets,
9333 .update_eq_ci = gaudi_update_eq_ci,
9334 .context_switch = gaudi_context_switch,
9335 .restore_phase_topology = gaudi_restore_phase_topology,
9336 .debugfs_read32 = gaudi_debugfs_read32,
9337 .debugfs_write32 = gaudi_debugfs_write32,
9338 .debugfs_read64 = gaudi_debugfs_read64,
9339 .debugfs_write64 = gaudi_debugfs_write64,
Oded Gabbay639781d2021-04-02 01:43:18 +03009340 .debugfs_read_dma = gaudi_debugfs_read_dma,
Oded Gabbaybcaf4152020-05-11 10:41:37 +03009341 .add_device_attr = gaudi_add_device_attr,
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03009342 .handle_eqe = gaudi_handle_eqe,
Oded Gabbaybcaf4152020-05-11 10:41:37 +03009343 .set_pll_profile = gaudi_set_pll_profile,
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03009344 .get_events_stat = gaudi_get_events_stat,
9345 .read_pte = gaudi_read_pte,
9346 .write_pte = gaudi_write_pte,
9347 .mmu_invalidate_cache = gaudi_mmu_invalidate_cache,
9348 .mmu_invalidate_cache_range = gaudi_mmu_invalidate_cache_range,
9349 .send_heartbeat = gaudi_send_heartbeat,
Oded Gabbaye38bfd32020-07-03 20:46:12 +03009350 .set_clock_gating = gaudi_set_clock_gating,
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03009351 .disable_clock_gating = gaudi_disable_clock_gating,
Omer Shpigelman79fc7a92020-05-11 10:46:29 +03009352 .debug_coresight = gaudi_debug_coresight,
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03009353 .is_device_idle = gaudi_is_device_idle,
9354 .soft_reset_late_init = gaudi_soft_reset_late_init,
9355 .hw_queues_lock = gaudi_hw_queues_lock,
9356 .hw_queues_unlock = gaudi_hw_queues_unlock,
9357 .get_pci_id = gaudi_get_pci_id,
9358 .get_eeprom_data = gaudi_get_eeprom_data,
9359 .send_cpu_message = gaudi_send_cpu_message,
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03009360 .pci_bars_map = gaudi_pci_bars_map,
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03009361 .init_iatu = gaudi_init_iatu,
9362 .rreg = hl_rreg,
9363 .wreg = hl_wreg,
Omer Shpigelman79fc7a92020-05-11 10:46:29 +03009364 .halt_coresight = gaudi_halt_coresight,
Ofir Bittona04b7cd2020-07-13 13:36:55 +03009365 .ctx_init = gaudi_ctx_init,
Ofir Bitton5de406c2020-09-10 10:56:26 +03009366 .ctx_fini = gaudi_ctx_fini,
Oded Gabbaybcaf4152020-05-11 10:41:37 +03009367 .get_clk_rate = gaudi_get_clk_rate,
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03009368 .get_queue_id_for_cq = gaudi_get_queue_id_for_cq,
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03009369 .load_firmware_to_device = gaudi_load_firmware_to_device,
9370 .load_boot_fit_to_device = gaudi_load_boot_fit_to_device,
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03009371 .get_signal_cb_size = gaudi_get_signal_cb_size,
9372 .get_wait_cb_size = gaudi_get_wait_cb_size,
9373 .gen_signal_cb = gaudi_gen_signal_cb,
9374 .gen_wait_cb = gaudi_gen_wait_cb,
9375 .reset_sob = gaudi_reset_sob,
Ofir Bitton5fe1c172020-09-10 10:10:55 +03009376 .reset_sob_group = gaudi_reset_sob_group,
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03009377 .set_dma_mask_from_fw = gaudi_set_dma_mask_from_fw,
Ofir Bitton5fe1c172020-09-10 10:10:55 +03009378 .get_device_time = gaudi_get_device_time,
9379 .collective_wait_init_cs = gaudi_collective_wait_init_cs,
Moti Haimovskib19dc672020-11-18 20:15:29 +02009380 .collective_wait_create_jobs = gaudi_collective_wait_create_jobs,
farah kassabri89473a12021-01-12 17:24:00 +02009381 .scramble_addr = hl_mmu_scramble_addr,
9382 .descramble_addr = hl_mmu_descramble_addr,
Ofir Bittond00697f2021-01-05 12:55:06 +02009383 .ack_protection_bits_errors = gaudi_ack_protection_bits_errors,
9384 .get_hw_block_id = gaudi_get_hw_block_id,
Oded Gabbay28bcf1f2021-02-01 21:23:43 +02009385 .hw_block_mmap = gaudi_block_mmap,
Bharat Jauhari285c0fa2021-03-25 18:15:40 +02009386 .enable_events_from_fw = gaudi_enable_events_from_fw,
Ohad Sharabia22f0ec2021-04-11 23:06:46 +03009387 .map_pll_idx_to_fw_idx = gaudi_map_pll_idx_to_fw_idx,
9388 .init_firmware_loader = gaudi_init_firmware_loader,
Yuri Nudelman938b7932021-06-06 10:28:51 +03009389 .init_cpu_scrambler_dram = gaudi_init_scrambler_hbm,
9390 .state_dump_init = gaudi_state_dump_init
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03009391};
9392
9393/**
9394 * gaudi_set_asic_funcs - set GAUDI function pointers
9395 *
Lee Jonesf7d227c2020-07-01 09:58:42 +01009396 * @hdev: pointer to hl_device structure
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03009397 *
9398 */
9399void gaudi_set_asic_funcs(struct hl_device *hdev)
9400{
9401 hdev->asic_funcs = &gaudi_funcs;
9402}