blob: 278c4de98e22e4931a72b18778dc7e7d072c9b73 [file] [log] [blame]
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03001// SPDX-License-Identifier: GPL-2.0
2
3/*
4 * Copyright 2016-2020 HabanaLabs, Ltd.
5 * All Rights Reserved.
6 */
7
8#include "gaudiP.h"
Greg Kroah-Hartman7b16a152020-07-28 19:18:51 +02009#include "../include/hw_ip/mmu/mmu_general.h"
10#include "../include/hw_ip/mmu/mmu_v1_1.h"
11#include "../include/gaudi/gaudi_masks.h"
12#include "../include/gaudi/gaudi_fw_if.h"
13#include "../include/gaudi/gaudi_reg_map.h"
14#include "../include/gaudi/gaudi_async_ids_map_extended.h"
Oded Gabbayac0ae6a2020-05-11 10:29:27 +030015
16#include <linux/module.h>
17#include <linux/pci.h>
18#include <linux/firmware.h>
19#include <linux/hwmon.h>
Oded Gabbayac0ae6a2020-05-11 10:29:27 +030020#include <linux/iommu.h>
21#include <linux/seq_file.h>
22
23/*
24 * Gaudi security scheme:
25 *
26 * 1. Host is protected by:
27 * - Range registers
28 * - MMU
29 *
30 * 2. DDR is protected by:
31 * - Range registers (protect the first 512MB)
32 *
33 * 3. Configuration is protected by:
34 * - Range registers
35 * - Protection bits
36 *
37 * MMU is always enabled.
38 *
Ofir Bitton0940cab2020-08-31 08:52:56 +030039 * QMAN DMA channels 0,1 (PCI DMAN):
Oded Gabbayac0ae6a2020-05-11 10:29:27 +030040 * - DMA is not secured.
41 * - PQ and CQ are secured.
42 * - CP is secured: The driver needs to parse CB but WREG should be allowed
43 * because of TDMA (tensor DMA). Hence, WREG is always not
44 * secured.
45 *
46 * When the driver needs to use DMA it will check that Gaudi is idle, set DMA
47 * channel 0 to be secured, execute the DMA and change it back to not secured.
48 * Currently, the driver doesn't use the DMA while there are compute jobs
49 * running.
50 *
51 * The current use cases for the driver to use the DMA are:
52 * - Clear SRAM on context switch (happens on context switch when device is
53 * idle)
54 * - MMU page tables area clear (happens on init)
55 *
Ofir Bitton0940cab2020-08-31 08:52:56 +030056 * QMAN DMA 2-7, TPC, MME, NIC:
Oded Gabbayac0ae6a2020-05-11 10:29:27 +030057 * PQ is secured and is located on the Host (HBM CON TPC3 bug)
58 * CQ, CP and the engine are not secured
59 *
60 */
61
62#define GAUDI_BOOT_FIT_FILE "habanalabs/gaudi/gaudi-boot-fit.itb"
63#define GAUDI_LINUX_FW_FILE "habanalabs/gaudi/gaudi-fit.itb"
64#define GAUDI_TPC_FW_FILE "habanalabs/gaudi/gaudi_tpc.bin"
65
66#define GAUDI_DMA_POOL_BLK_SIZE 0x100 /* 256 bytes */
67
Ofir Bittonb90c8942020-11-08 12:59:04 +020068#define GAUDI_RESET_TIMEOUT_MSEC 2000 /* 2000ms */
Oded Gabbayac0ae6a2020-05-11 10:29:27 +030069#define GAUDI_RESET_WAIT_MSEC 1 /* 1ms */
70#define GAUDI_CPU_RESET_WAIT_MSEC 200 /* 200ms */
71#define GAUDI_TEST_QUEUE_WAIT_USEC 100000 /* 100ms */
72
73#define GAUDI_PLDM_RESET_WAIT_MSEC 1000 /* 1s */
74#define GAUDI_PLDM_HRESET_TIMEOUT_MSEC 20000 /* 20s */
Oded Gabbayac0ae6a2020-05-11 10:29:27 +030075#define GAUDI_PLDM_TEST_QUEUE_WAIT_USEC 1000000 /* 1s */
76#define GAUDI_PLDM_MMU_TIMEOUT_USEC (MMU_CONFIG_TIMEOUT_USEC * 100)
77#define GAUDI_PLDM_QMAN0_TIMEOUT_USEC (HL_DEVICE_TIMEOUT_USEC * 30)
78#define GAUDI_PLDM_TPC_KERNEL_WAIT_USEC (HL_DEVICE_TIMEOUT_USEC * 30)
79#define GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC 1000000 /* 1s */
Oded Gabbay788cacf2020-07-07 17:30:13 +030080#define GAUDI_MSG_TO_CPU_TIMEOUT_USEC 4000000 /* 4s */
Oded Gabbayac0ae6a2020-05-11 10:29:27 +030081
82#define GAUDI_QMAN0_FENCE_VAL 0x72E91AB9
83
84#define GAUDI_MAX_STRING_LEN 20
85
86#define GAUDI_CB_POOL_CB_CNT 512
87#define GAUDI_CB_POOL_CB_SIZE 0x20000 /* 128KB */
88
89#define GAUDI_ALLOC_CPU_MEM_RETRY_CNT 3
90
91#define GAUDI_NUM_OF_TPC_INTR_CAUSE 20
92
93#define GAUDI_NUM_OF_QM_ERR_CAUSE 16
94
95#define GAUDI_NUM_OF_QM_ARB_ERR_CAUSE 3
96
Oded Gabbay647e8352020-06-07 11:26:48 +030097#define GAUDI_ARB_WDT_TIMEOUT 0x1000000
Oded Gabbayac0ae6a2020-05-11 10:29:27 +030098
Oded Gabbaye38bfd32020-07-03 20:46:12 +030099#define GAUDI_CLK_GATE_DEBUGFS_MASK (\
100 BIT(GAUDI_ENGINE_ID_MME_0) |\
101 BIT(GAUDI_ENGINE_ID_MME_2) |\
102 GENMASK_ULL(GAUDI_ENGINE_ID_TPC_7, GAUDI_ENGINE_ID_TPC_0))
103
farah kassabri03df1362020-05-06 11:17:38 +0300104#define HBM_SCRUBBING_TIMEOUT_US 1000000 /* 1s */
105
Alon Mizrahi41478642020-11-17 14:25:14 +0200106#define GAUDI_PLL_MAX 10
107
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300108static const char gaudi_irq_name[GAUDI_MSI_ENTRIES][GAUDI_MAX_STRING_LEN] = {
109 "gaudi cq 0_0", "gaudi cq 0_1", "gaudi cq 0_2", "gaudi cq 0_3",
110 "gaudi cq 1_0", "gaudi cq 1_1", "gaudi cq 1_2", "gaudi cq 1_3",
111 "gaudi cq 5_0", "gaudi cq 5_1", "gaudi cq 5_2", "gaudi cq 5_3",
112 "gaudi cpu eq"
113};
114
115static const u8 gaudi_dma_assignment[GAUDI_DMA_MAX] = {
Oded Gabbaye38bfd32020-07-03 20:46:12 +0300116 [GAUDI_PCI_DMA_1] = GAUDI_ENGINE_ID_DMA_0,
117 [GAUDI_PCI_DMA_2] = GAUDI_ENGINE_ID_DMA_1,
Oded Gabbaye38bfd32020-07-03 20:46:12 +0300118 [GAUDI_HBM_DMA_1] = GAUDI_ENGINE_ID_DMA_2,
119 [GAUDI_HBM_DMA_2] = GAUDI_ENGINE_ID_DMA_3,
120 [GAUDI_HBM_DMA_3] = GAUDI_ENGINE_ID_DMA_4,
Ofir Bitton0940cab2020-08-31 08:52:56 +0300121 [GAUDI_HBM_DMA_4] = GAUDI_ENGINE_ID_DMA_5,
122 [GAUDI_HBM_DMA_5] = GAUDI_ENGINE_ID_DMA_6,
123 [GAUDI_HBM_DMA_6] = GAUDI_ENGINE_ID_DMA_7
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300124};
125
126static const u8 gaudi_cq_assignment[NUMBER_OF_CMPLT_QUEUES] = {
127 [0] = GAUDI_QUEUE_ID_DMA_0_0,
128 [1] = GAUDI_QUEUE_ID_DMA_0_1,
129 [2] = GAUDI_QUEUE_ID_DMA_0_2,
130 [3] = GAUDI_QUEUE_ID_DMA_0_3,
131 [4] = GAUDI_QUEUE_ID_DMA_1_0,
132 [5] = GAUDI_QUEUE_ID_DMA_1_1,
133 [6] = GAUDI_QUEUE_ID_DMA_1_2,
134 [7] = GAUDI_QUEUE_ID_DMA_1_3,
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300135};
136
137static const u16 gaudi_packet_sizes[MAX_PACKET_ID] = {
138 [PACKET_WREG_32] = sizeof(struct packet_wreg32),
139 [PACKET_WREG_BULK] = sizeof(struct packet_wreg_bulk),
140 [PACKET_MSG_LONG] = sizeof(struct packet_msg_long),
141 [PACKET_MSG_SHORT] = sizeof(struct packet_msg_short),
142 [PACKET_CP_DMA] = sizeof(struct packet_cp_dma),
143 [PACKET_REPEAT] = sizeof(struct packet_repeat),
144 [PACKET_MSG_PROT] = sizeof(struct packet_msg_prot),
145 [PACKET_FENCE] = sizeof(struct packet_fence),
146 [PACKET_LIN_DMA] = sizeof(struct packet_lin_dma),
147 [PACKET_NOP] = sizeof(struct packet_nop),
148 [PACKET_STOP] = sizeof(struct packet_stop),
149 [PACKET_ARB_POINT] = sizeof(struct packet_arb_point),
150 [PACKET_WAIT] = sizeof(struct packet_wait),
151 [PACKET_LOAD_AND_EXE] = sizeof(struct packet_load_and_exe)
152};
153
Ofir Bittonbc75be22020-07-30 14:56:38 +0300154static inline bool validate_packet_id(enum packet_id id)
155{
156 switch (id) {
157 case PACKET_WREG_32:
158 case PACKET_WREG_BULK:
159 case PACKET_MSG_LONG:
160 case PACKET_MSG_SHORT:
161 case PACKET_CP_DMA:
162 case PACKET_REPEAT:
163 case PACKET_MSG_PROT:
164 case PACKET_FENCE:
165 case PACKET_LIN_DMA:
166 case PACKET_NOP:
167 case PACKET_STOP:
168 case PACKET_ARB_POINT:
169 case PACKET_WAIT:
170 case PACKET_LOAD_AND_EXE:
171 return true;
172 default:
173 return false;
174 }
175}
176
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300177static const char * const
178gaudi_tpc_interrupts_cause[GAUDI_NUM_OF_TPC_INTR_CAUSE] = {
179 "tpc_address_exceed_slm",
180 "tpc_div_by_0",
181 "tpc_spu_mac_overflow",
182 "tpc_spu_addsub_overflow",
183 "tpc_spu_abs_overflow",
184 "tpc_spu_fp_dst_nan_inf",
185 "tpc_spu_fp_dst_denorm",
186 "tpc_vpu_mac_overflow",
187 "tpc_vpu_addsub_overflow",
188 "tpc_vpu_abs_overflow",
189 "tpc_vpu_fp_dst_nan_inf",
190 "tpc_vpu_fp_dst_denorm",
191 "tpc_assertions",
192 "tpc_illegal_instruction",
193 "tpc_pc_wrap_around",
194 "tpc_qm_sw_err",
195 "tpc_hbw_rresp_err",
196 "tpc_hbw_bresp_err",
197 "tpc_lbw_rresp_err",
198 "tpc_lbw_bresp_err"
199};
200
201static const char * const
202gaudi_qman_error_cause[GAUDI_NUM_OF_QM_ERR_CAUSE] = {
203 "PQ AXI HBW error",
204 "CQ AXI HBW error",
205 "CP AXI HBW error",
206 "CP error due to undefined OPCODE",
207 "CP encountered STOP OPCODE",
208 "CP AXI LBW error",
209 "CP WRREG32 or WRBULK returned error",
210 "N/A",
211 "FENCE 0 inc over max value and clipped",
212 "FENCE 1 inc over max value and clipped",
213 "FENCE 2 inc over max value and clipped",
214 "FENCE 3 inc over max value and clipped",
215 "FENCE 0 dec under min value and clipped",
216 "FENCE 1 dec under min value and clipped",
217 "FENCE 2 dec under min value and clipped",
218 "FENCE 3 dec under min value and clipped"
219};
220
221static const char * const
222gaudi_qman_arb_error_cause[GAUDI_NUM_OF_QM_ARB_ERR_CAUSE] = {
223 "Choice push while full error",
224 "Choice Q watchdog error",
225 "MSG AXI LBW returned with error"
226};
227
228static enum hl_queue_type gaudi_queue_type[GAUDI_QUEUE_ID_SIZE] = {
229 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_0 */
230 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_1 */
231 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_2 */
232 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_3 */
233 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_0 */
234 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_1 */
235 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_2 */
236 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_3 */
237 QUEUE_TYPE_CPU, /* GAUDI_QUEUE_ID_CPU_PQ */
238 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_0 */
239 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_1 */
240 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_2 */
241 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_3 */
242 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_0 */
243 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_1 */
244 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_2 */
245 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_3 */
246 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_0 */
247 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_1 */
248 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_2 */
249 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_3 */
Ofir Bitton0940cab2020-08-31 08:52:56 +0300250 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_5_0 */
251 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_5_1 */
252 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_5_2 */
253 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_5_3 */
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300254 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_0 */
255 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_1 */
256 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_2 */
257 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_3 */
258 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_0 */
259 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_1 */
260 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_2 */
261 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_3 */
262 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_0 */
263 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_1 */
264 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_2 */
265 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_3 */
266 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_0 */
267 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_1 */
268 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_2 */
269 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_3 */
270 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_0 */
271 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_1 */
272 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_2 */
273 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_3 */
274 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_0 */
275 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_1 */
276 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_2 */
277 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_3 */
278 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_0 */
279 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_1 */
280 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_2 */
281 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_3 */
282 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_0 */
283 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_1 */
284 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_2 */
285 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_3 */
286 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_0 */
287 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_1 */
288 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_2 */
289 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_3 */
290 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_0 */
291 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_1 */
292 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_2 */
293 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_3 */
294 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_0 */
295 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_1 */
296 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_2 */
297 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_3 */
298 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_0 */
299 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_1 */
300 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_2 */
301 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_3 */
Oded Gabbay3c681572020-11-02 21:10:39 +0200302 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_0_0 */
303 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_0_1 */
304 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_0_2 */
305 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_0_3 */
306 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_1_0 */
307 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_1_1 */
308 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_1_2 */
309 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_1_3 */
310 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_2_0 */
311 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_2_1 */
312 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_2_2 */
313 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_2_3 */
314 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_3_0 */
315 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_3_1 */
316 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_3_2 */
317 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_3_3 */
318 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_4_0 */
319 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_4_1 */
320 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_4_2 */
321 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_4_3 */
322 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_5_0 */
323 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_5_1 */
324 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_5_2 */
325 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_5_3 */
326 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_6_0 */
327 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_6_1 */
328 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_6_2 */
329 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_6_3 */
330 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_7_0 */
331 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_7_1 */
332 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_7_2 */
333 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_7_3 */
334 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_8_0 */
335 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_8_1 */
336 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_8_2 */
337 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_8_3 */
338 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_9_0 */
339 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_9_1 */
340 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_9_2 */
341 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_9_3 */
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300342};
343
Oded Gabbayfcc6a4e2020-05-17 08:20:35 +0300344struct ecc_info_extract_params {
345 u64 block_address;
346 u32 num_memories;
347 bool derr;
348 bool disable_clock_gating;
349};
350
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300351static int gaudi_mmu_update_asid_hop0_addr(struct hl_device *hdev, u32 asid,
352 u64 phys_addr);
353static int gaudi_send_job_on_qman0(struct hl_device *hdev,
354 struct hl_cs_job *job);
355static int gaudi_memset_device_memory(struct hl_device *hdev, u64 addr,
356 u32 size, u64 val);
357static int gaudi_run_tpc_kernel(struct hl_device *hdev, u64 tpc_kernel,
358 u32 tpc_id);
359static int gaudi_mmu_clear_pgt_range(struct hl_device *hdev);
Oded Gabbay2f553422020-08-15 16:28:10 +0300360static int gaudi_cpucp_info_get(struct hl_device *hdev);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300361static void gaudi_disable_clock_gating(struct hl_device *hdev);
362static void gaudi_mmu_prepare(struct hl_device *hdev, u32 asid);
Ofir Bitton5de406c2020-09-10 10:56:26 +0300363static u32 gaudi_gen_signal_cb(struct hl_device *hdev, void *data, u16 sob_id,
364 u32 size);
365static u32 gaudi_gen_wait_cb(struct hl_device *hdev,
366 struct hl_gen_wait_properties *prop);
367
368static inline enum hl_collective_mode
369get_collective_mode(struct hl_device *hdev, u32 queue_id)
370{
371 if (gaudi_queue_type[queue_id] == QUEUE_TYPE_EXT)
372 return HL_COLLECTIVE_MASTER;
373
374 if (queue_id >= GAUDI_QUEUE_ID_DMA_5_0 &&
375 queue_id <= GAUDI_QUEUE_ID_DMA_5_3)
376 return HL_COLLECTIVE_SLAVE;
377
378 if (queue_id >= GAUDI_QUEUE_ID_TPC_7_0 &&
379 queue_id <= GAUDI_QUEUE_ID_TPC_7_3)
380 return HL_COLLECTIVE_SLAVE;
381
382 if (queue_id >= GAUDI_QUEUE_ID_NIC_0_0 &&
383 queue_id <= GAUDI_QUEUE_ID_NIC_9_3)
384 return HL_COLLECTIVE_SLAVE;
385
386 return HL_COLLECTIVE_NOT_SUPPORTED;
387}
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300388
389static int gaudi_get_fixed_properties(struct hl_device *hdev)
390{
391 struct asic_fixed_properties *prop = &hdev->asic_prop;
Ofir Bitton843839b2020-07-19 11:08:09 +0300392 u32 num_sync_stream_queues = 0;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300393 int i;
394
Ofir Bitton3abc99b2020-06-23 14:50:39 +0300395 prop->max_queues = GAUDI_QUEUE_ID_SIZE;
396 prop->hw_queues_props = kcalloc(prop->max_queues,
397 sizeof(struct hw_queue_properties),
398 GFP_KERNEL);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300399
Ofir Bitton3abc99b2020-06-23 14:50:39 +0300400 if (!prop->hw_queues_props)
401 return -ENOMEM;
402
403 for (i = 0 ; i < prop->max_queues ; i++) {
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300404 if (gaudi_queue_type[i] == QUEUE_TYPE_EXT) {
405 prop->hw_queues_props[i].type = QUEUE_TYPE_EXT;
406 prop->hw_queues_props[i].driver_only = 0;
Ofir Bitton21e7a342020-05-14 18:25:47 +0300407 prop->hw_queues_props[i].supports_sync_stream = 1;
Tal Cohen4bb1f2f2020-06-03 09:25:27 +0300408 prop->hw_queues_props[i].cb_alloc_flags =
409 CB_ALLOC_KERNEL;
Ofir Bitton843839b2020-07-19 11:08:09 +0300410 num_sync_stream_queues++;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300411 } else if (gaudi_queue_type[i] == QUEUE_TYPE_CPU) {
412 prop->hw_queues_props[i].type = QUEUE_TYPE_CPU;
413 prop->hw_queues_props[i].driver_only = 1;
Ofir Bitton21e7a342020-05-14 18:25:47 +0300414 prop->hw_queues_props[i].supports_sync_stream = 0;
Tal Cohen4bb1f2f2020-06-03 09:25:27 +0300415 prop->hw_queues_props[i].cb_alloc_flags =
416 CB_ALLOC_KERNEL;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300417 } else if (gaudi_queue_type[i] == QUEUE_TYPE_INT) {
418 prop->hw_queues_props[i].type = QUEUE_TYPE_INT;
419 prop->hw_queues_props[i].driver_only = 0;
Tal Cohen4bb1f2f2020-06-03 09:25:27 +0300420 prop->hw_queues_props[i].supports_sync_stream = 0;
421 prop->hw_queues_props[i].cb_alloc_flags =
422 CB_ALLOC_USER;
Ofir Bitton5de406c2020-09-10 10:56:26 +0300423
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300424 }
Ofir Bitton5de406c2020-09-10 10:56:26 +0300425 prop->hw_queues_props[i].collective_mode =
426 get_collective_mode(hdev, i);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300427 }
428
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300429 prop->completion_queues_count = NUMBER_OF_CMPLT_QUEUES;
Ofir Bitton5de406c2020-09-10 10:56:26 +0300430 prop->collective_first_sob = 0;
431 prop->collective_first_mon = 0;
432
433 /* 2 SOBs per internal queue stream are reserved for collective */
434 prop->sync_stream_first_sob =
435 ALIGN(NUMBER_OF_SOBS_IN_GRP, HL_MAX_SOBS_PER_MONITOR)
436 * QMAN_STREAMS * HL_RSVD_SOBS;
437
438 /* 1 monitor per internal queue stream are reserved for collective
439 * 2 monitors per external queue stream are reserved for collective
440 */
441 prop->sync_stream_first_mon =
442 (NUMBER_OF_COLLECTIVE_QUEUES * QMAN_STREAMS) +
443 (NUMBER_OF_EXT_HW_QUEUES * 2);
444
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300445 prop->dram_base_address = DRAM_PHYS_BASE;
446 prop->dram_size = GAUDI_HBM_SIZE_32GB;
447 prop->dram_end_address = prop->dram_base_address +
448 prop->dram_size;
449 prop->dram_user_base_address = DRAM_BASE_ADDR_USER;
450
451 prop->sram_base_address = SRAM_BASE_ADDR;
452 prop->sram_size = SRAM_SIZE;
453 prop->sram_end_address = prop->sram_base_address +
454 prop->sram_size;
455 prop->sram_user_base_address = prop->sram_base_address +
456 SRAM_USER_BASE_OFFSET;
457
458 prop->mmu_pgt_addr = MMU_PAGE_TABLES_ADDR;
459 if (hdev->pldm)
460 prop->mmu_pgt_size = 0x800000; /* 8MB */
461 else
462 prop->mmu_pgt_size = MMU_PAGE_TABLES_SIZE;
463 prop->mmu_pte_size = HL_PTE_SIZE;
464 prop->mmu_hop_table_size = HOP_TABLE_SIZE;
465 prop->mmu_hop0_tables_total_size = HOP0_TABLES_TOTAL_SIZE;
466 prop->dram_page_size = PAGE_SIZE_2MB;
Oded Gabbay7f070c92020-11-09 09:48:31 +0200467 prop->dram_supports_virtual_memory = false;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300468
469 prop->pmmu.hop0_shift = HOP0_SHIFT;
470 prop->pmmu.hop1_shift = HOP1_SHIFT;
471 prop->pmmu.hop2_shift = HOP2_SHIFT;
472 prop->pmmu.hop3_shift = HOP3_SHIFT;
473 prop->pmmu.hop4_shift = HOP4_SHIFT;
474 prop->pmmu.hop0_mask = HOP0_MASK;
475 prop->pmmu.hop1_mask = HOP1_MASK;
476 prop->pmmu.hop2_mask = HOP2_MASK;
477 prop->pmmu.hop3_mask = HOP3_MASK;
478 prop->pmmu.hop4_mask = HOP4_MASK;
479 prop->pmmu.start_addr = VA_HOST_SPACE_START;
480 prop->pmmu.end_addr =
481 (VA_HOST_SPACE_START + VA_HOST_SPACE_SIZE / 2) - 1;
482 prop->pmmu.page_size = PAGE_SIZE_4KB;
Moti Haimovski7edf3412020-08-23 13:23:13 +0300483 prop->pmmu.num_hops = MMU_ARCH_5_HOPS;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300484
485 /* PMMU and HPMMU are the same except of page size */
486 memcpy(&prop->pmmu_huge, &prop->pmmu, sizeof(prop->pmmu));
487 prop->pmmu_huge.page_size = PAGE_SIZE_2MB;
488
489 /* shifts and masks are the same in PMMU and DMMU */
490 memcpy(&prop->dmmu, &prop->pmmu, sizeof(prop->pmmu));
491 prop->dmmu.start_addr = (VA_HOST_SPACE_START + VA_HOST_SPACE_SIZE / 2);
492 prop->dmmu.end_addr = VA_HOST_SPACE_END;
493 prop->dmmu.page_size = PAGE_SIZE_2MB;
494
495 prop->cfg_size = CFG_SIZE;
496 prop->max_asid = MAX_ASID;
497 prop->num_of_events = GAUDI_EVENT_SIZE;
498 prop->tpc_enabled_mask = TPC_ENABLED_MASK;
499
Oded Gabbay58361aa2020-08-08 23:34:47 +0300500 prop->max_power_default = MAX_POWER_DEFAULT_PCI;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300501
502 prop->cb_pool_cb_cnt = GAUDI_CB_POOL_CB_CNT;
503 prop->cb_pool_cb_size = GAUDI_CB_POOL_CB_SIZE;
504
505 prop->pcie_dbi_base_address = mmPCIE_DBI_BASE;
506 prop->pcie_aux_dbi_reg_addr = CFG_BASE + mmPCIE_AUX_DBI;
507
Oded Gabbay2f553422020-08-15 16:28:10 +0300508 strncpy(prop->cpucp_info.card_name, GAUDI_DEFAULT_CARD_NAME,
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300509 CARD_NAME_MAX_LEN);
510
Ofir Bittonc16d45f2020-06-02 12:28:27 +0300511 prop->max_pending_cs = GAUDI_MAX_PENDING_CS;
512
Ofir Bitton843839b2020-07-19 11:08:09 +0300513 prop->first_available_user_sob[HL_GAUDI_WS_DCORE] =
Ofir Bitton3cf74b32020-09-10 09:17:50 +0300514 prop->sync_stream_first_sob +
515 (num_sync_stream_queues * HL_RSVD_SOBS);
Ofir Bitton843839b2020-07-19 11:08:09 +0300516 prop->first_available_user_mon[HL_GAUDI_WS_DCORE] =
Ofir Bitton3cf74b32020-09-10 09:17:50 +0300517 prop->sync_stream_first_mon +
518 (num_sync_stream_queues * HL_RSVD_MONS);
Ofir Bitton843839b2020-07-19 11:08:09 +0300519
Ofir Bitton323b7262020-10-04 09:09:19 +0300520 /* disable fw security for now, set it in a later stage */
521 prop->fw_security_disabled = true;
522 prop->fw_security_status_valid = false;
Ofir Bittond611b9f2020-11-08 13:10:09 +0200523 prop->hard_reset_done_by_fw = false;
Ofir Bitton323b7262020-10-04 09:09:19 +0300524
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300525 return 0;
526}
527
528static int gaudi_pci_bars_map(struct hl_device *hdev)
529{
530 static const char * const name[] = {"SRAM", "CFG", "HBM"};
531 bool is_wc[3] = {false, false, true};
532 int rc;
533
534 rc = hl_pci_bars_map(hdev, name, is_wc);
535 if (rc)
536 return rc;
537
538 hdev->rmmio = hdev->pcie_bar[CFG_BAR_ID] +
539 (CFG_BASE - SPI_FLASH_BASE_ADDR);
540
541 return 0;
542}
543
544static u64 gaudi_set_hbm_bar_base(struct hl_device *hdev, u64 addr)
545{
546 struct gaudi_device *gaudi = hdev->asic_specific;
Ofir Bittonf4cbfd22020-06-15 17:45:12 +0300547 struct hl_inbound_pci_region pci_region;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300548 u64 old_addr = addr;
549 int rc;
550
551 if ((gaudi) && (gaudi->hbm_bar_cur_addr == addr))
552 return old_addr;
553
554 /* Inbound Region 2 - Bar 4 - Point to HBM */
Ofir Bittonf4cbfd22020-06-15 17:45:12 +0300555 pci_region.mode = PCI_BAR_MATCH_MODE;
556 pci_region.bar = HBM_BAR_ID;
557 pci_region.addr = addr;
558 rc = hl_pci_set_inbound_region(hdev, 2, &pci_region);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300559 if (rc)
560 return U64_MAX;
561
562 if (gaudi) {
563 old_addr = gaudi->hbm_bar_cur_addr;
564 gaudi->hbm_bar_cur_addr = addr;
565 }
566
567 return old_addr;
568}
569
570static int gaudi_init_iatu(struct hl_device *hdev)
571{
Ofir Bittonf4cbfd22020-06-15 17:45:12 +0300572 struct hl_inbound_pci_region inbound_region;
573 struct hl_outbound_pci_region outbound_region;
574 int rc;
575
576 /* Inbound Region 0 - Bar 0 - Point to SRAM + CFG */
577 inbound_region.mode = PCI_BAR_MATCH_MODE;
578 inbound_region.bar = SRAM_BAR_ID;
579 inbound_region.addr = SRAM_BASE_ADDR;
580 rc = hl_pci_set_inbound_region(hdev, 0, &inbound_region);
581 if (rc)
582 goto done;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300583
584 /* Inbound Region 1 - Bar 2 - Point to SPI FLASH */
Ofir Bittonf4cbfd22020-06-15 17:45:12 +0300585 inbound_region.mode = PCI_BAR_MATCH_MODE;
586 inbound_region.bar = CFG_BAR_ID;
587 inbound_region.addr = SPI_FLASH_BASE_ADDR;
588 rc = hl_pci_set_inbound_region(hdev, 1, &inbound_region);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300589 if (rc)
Ofir Bittonf4cbfd22020-06-15 17:45:12 +0300590 goto done;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300591
Ofir Bittonf4cbfd22020-06-15 17:45:12 +0300592 /* Inbound Region 2 - Bar 4 - Point to HBM */
593 inbound_region.mode = PCI_BAR_MATCH_MODE;
594 inbound_region.bar = HBM_BAR_ID;
595 inbound_region.addr = DRAM_PHYS_BASE;
596 rc = hl_pci_set_inbound_region(hdev, 2, &inbound_region);
597 if (rc)
598 goto done;
599
600 hdev->asic_funcs->set_dma_mask_from_fw(hdev);
601
602 /* Outbound Region 0 - Point to Host */
603 outbound_region.addr = HOST_PHYS_BASE;
604 outbound_region.size = HOST_PHYS_SIZE;
605 rc = hl_pci_set_outbound_region(hdev, &outbound_region);
606
607done:
608 return rc;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300609}
610
Ofir Bittond1ddd902020-10-19 17:04:20 +0300611static enum hl_device_hw_state gaudi_get_hw_state(struct hl_device *hdev)
612{
613 return RREG32(mmHW_STATE);
614}
615
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300616static int gaudi_early_init(struct hl_device *hdev)
617{
618 struct asic_fixed_properties *prop = &hdev->asic_prop;
619 struct pci_dev *pdev = hdev->pdev;
620 int rc;
621
622 rc = gaudi_get_fixed_properties(hdev);
623 if (rc) {
624 dev_err(hdev->dev, "Failed to get fixed properties\n");
625 return rc;
626 }
627
628 /* Check BAR sizes */
629 if (pci_resource_len(pdev, SRAM_BAR_ID) != SRAM_BAR_SIZE) {
630 dev_err(hdev->dev,
631 "Not " HL_NAME "? BAR %d size %llu, expecting %llu\n",
632 SRAM_BAR_ID,
633 (unsigned long long) pci_resource_len(pdev,
634 SRAM_BAR_ID),
635 SRAM_BAR_SIZE);
Ofir Bitton3abc99b2020-06-23 14:50:39 +0300636 rc = -ENODEV;
637 goto free_queue_props;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300638 }
639
640 if (pci_resource_len(pdev, CFG_BAR_ID) != CFG_BAR_SIZE) {
641 dev_err(hdev->dev,
642 "Not " HL_NAME "? BAR %d size %llu, expecting %llu\n",
643 CFG_BAR_ID,
644 (unsigned long long) pci_resource_len(pdev,
645 CFG_BAR_ID),
646 CFG_BAR_SIZE);
Ofir Bitton3abc99b2020-06-23 14:50:39 +0300647 rc = -ENODEV;
648 goto free_queue_props;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300649 }
650
651 prop->dram_pci_bar_size = pci_resource_len(pdev, HBM_BAR_ID);
652
Ofir Bittond1ddd902020-10-19 17:04:20 +0300653 rc = hl_pci_init(hdev);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300654 if (rc)
Ofir Bitton3abc99b2020-06-23 14:50:39 +0300655 goto free_queue_props;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300656
Ofir Bittond1ddd902020-10-19 17:04:20 +0300657 if (gaudi_get_hw_state(hdev) == HL_DEVICE_HW_STATE_DIRTY) {
658 dev_info(hdev->dev,
659 "H/W state is dirty, must reset before initializing\n");
660 hdev->asic_funcs->hw_fini(hdev, true);
661 }
662
663 /* Before continuing in the initialization, we need to read the preboot
664 * version to determine whether we run with a security-enabled firmware
665 */
666 rc = hl_fw_read_preboot_status(hdev, mmPSOC_GLOBAL_CONF_CPU_BOOT_STATUS,
667 mmCPU_BOOT_DEV_STS0, mmCPU_BOOT_ERR0,
668 GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC);
669 if (rc) {
670 if (hdev->reset_on_preboot_fail)
671 hdev->asic_funcs->hw_fini(hdev, true);
672 goto pci_fini;
673 }
674
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300675 return 0;
Ofir Bitton3abc99b2020-06-23 14:50:39 +0300676
Ofir Bittond1ddd902020-10-19 17:04:20 +0300677pci_fini:
678 hl_pci_fini(hdev);
Ofir Bitton3abc99b2020-06-23 14:50:39 +0300679free_queue_props:
680 kfree(hdev->asic_prop.hw_queues_props);
681 return rc;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300682}
683
684static int gaudi_early_fini(struct hl_device *hdev)
685{
Ofir Bitton3abc99b2020-06-23 14:50:39 +0300686 kfree(hdev->asic_prop.hw_queues_props);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300687 hl_pci_fini(hdev);
688
689 return 0;
690}
691
692/**
693 * gaudi_fetch_psoc_frequency - Fetch PSOC frequency values
694 *
695 * @hdev: pointer to hl_device structure
696 *
697 */
Ofir Bitton1cbca892020-10-05 11:36:00 +0300698static int gaudi_fetch_psoc_frequency(struct hl_device *hdev)
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300699{
700 struct asic_fixed_properties *prop = &hdev->asic_prop;
Alon Mizrahi65854892020-11-19 16:34:19 +0200701 u32 nr = 0, nf = 0, od = 0, div_fctr = 0, pll_clk, div_sel;
702 u16 pll_freq_arr[HL_PLL_NUM_OUTPUTS], freq;
Ofir Bitton1cbca892020-10-05 11:36:00 +0300703 int rc;
704
Alon Mizrahi65854892020-11-19 16:34:19 +0200705 if (hdev->asic_prop.fw_security_disabled) {
706 /* Backward compatibility */
707 div_fctr = RREG32(mmPSOC_CPU_PLL_DIV_FACTOR_2);
708 div_sel = RREG32(mmPSOC_CPU_PLL_DIV_SEL_2);
709 nr = RREG32(mmPSOC_CPU_PLL_NR);
710 nf = RREG32(mmPSOC_CPU_PLL_NF);
711 od = RREG32(mmPSOC_CPU_PLL_OD);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300712
Alon Mizrahi65854892020-11-19 16:34:19 +0200713 if (div_sel == DIV_SEL_REF_CLK ||
714 div_sel == DIV_SEL_DIVIDED_REF) {
715 if (div_sel == DIV_SEL_REF_CLK)
716 freq = PLL_REF_CLK;
717 else
718 freq = PLL_REF_CLK / (div_fctr + 1);
719 } else if (div_sel == DIV_SEL_PLL_CLK ||
720 div_sel == DIV_SEL_DIVIDED_PLL) {
721 pll_clk = PLL_REF_CLK * (nf + 1) /
722 ((nr + 1) * (od + 1));
723 if (div_sel == DIV_SEL_PLL_CLK)
724 freq = pll_clk;
725 else
726 freq = pll_clk / (div_fctr + 1);
727 } else {
728 dev_warn(hdev->dev,
729 "Received invalid div select value: %d",
730 div_sel);
731 freq = 0;
732 }
733 } else {
734 rc = hl_fw_cpucp_pll_info_get(hdev, CPU_PLL, pll_freq_arr);
735
736 if (rc)
737 return rc;
738
739 freq = pll_freq_arr[2];
740 }
741
742 prop->psoc_timestamp_frequency = freq;
743 prop->psoc_pci_pll_nr = nr;
744 prop->psoc_pci_pll_nf = nf;
745 prop->psoc_pci_pll_od = od;
746 prop->psoc_pci_pll_div_factor = div_fctr;
Ofir Bitton1cbca892020-10-05 11:36:00 +0300747
748 return 0;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300749}
750
751static int _gaudi_init_tpc_mem(struct hl_device *hdev,
752 dma_addr_t tpc_kernel_src_addr, u32 tpc_kernel_size)
753{
754 struct asic_fixed_properties *prop = &hdev->asic_prop;
755 struct packet_lin_dma *init_tpc_mem_pkt;
756 struct hl_cs_job *job;
757 struct hl_cb *cb;
758 u64 dst_addr;
759 u32 cb_size, ctl;
760 u8 tpc_id;
761 int rc;
762
Ofir Bittona04b7cd2020-07-13 13:36:55 +0300763 cb = hl_cb_kernel_create(hdev, PAGE_SIZE, false);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300764 if (!cb)
765 return -EFAULT;
766
Arnd Bergmann82948e62020-10-26 17:08:06 +0100767 init_tpc_mem_pkt = cb->kernel_address;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300768 cb_size = sizeof(*init_tpc_mem_pkt);
769 memset(init_tpc_mem_pkt, 0, cb_size);
770
771 init_tpc_mem_pkt->tsize = cpu_to_le32(tpc_kernel_size);
772
Oded Gabbay65887292020-08-12 11:21:01 +0300773 ctl = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_LIN_DMA);
774 ctl |= FIELD_PREP(GAUDI_PKT_LIN_DMA_CTL_LIN_MASK, 1);
775 ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
776 ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300777
778 init_tpc_mem_pkt->ctl = cpu_to_le32(ctl);
779
780 init_tpc_mem_pkt->src_addr = cpu_to_le64(tpc_kernel_src_addr);
781 dst_addr = (prop->sram_user_base_address &
782 GAUDI_PKT_LIN_DMA_DST_ADDR_MASK) >>
783 GAUDI_PKT_LIN_DMA_DST_ADDR_SHIFT;
784 init_tpc_mem_pkt->dst_addr |= cpu_to_le64(dst_addr);
785
786 job = hl_cs_allocate_job(hdev, QUEUE_TYPE_EXT, true);
787 if (!job) {
788 dev_err(hdev->dev, "Failed to allocate a new job\n");
789 rc = -ENOMEM;
790 goto release_cb;
791 }
792
793 job->id = 0;
794 job->user_cb = cb;
Tomer Tayarf07486742020-08-02 22:51:31 +0300795 atomic_inc(&job->user_cb->cs_cnt);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300796 job->user_cb_size = cb_size;
797 job->hw_queue_id = GAUDI_QUEUE_ID_DMA_0_0;
798 job->patched_cb = job->user_cb;
799 job->job_cb_size = job->user_cb_size + sizeof(struct packet_msg_prot);
800
801 hl_debugfs_add_job(hdev, job);
802
803 rc = gaudi_send_job_on_qman0(hdev, job);
804
805 if (rc)
806 goto free_job;
807
808 for (tpc_id = 0 ; tpc_id < TPC_NUMBER_OF_ENGINES ; tpc_id++) {
809 rc = gaudi_run_tpc_kernel(hdev, dst_addr, tpc_id);
810 if (rc)
811 break;
812 }
813
814free_job:
815 hl_userptr_delete_list(hdev, &job->userptr_list);
816 hl_debugfs_remove_job(hdev, job);
817 kfree(job);
Tomer Tayarf07486742020-08-02 22:51:31 +0300818 atomic_dec(&cb->cs_cnt);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300819
820release_cb:
821 hl_cb_put(cb);
822 hl_cb_destroy(hdev, &hdev->kernel_cb_mgr, cb->id << PAGE_SHIFT);
823
824 return rc;
825}
826
827/*
828 * gaudi_init_tpc_mem() - Initialize TPC memories.
829 * @hdev: Pointer to hl_device structure.
830 *
831 * Copy TPC kernel fw from firmware file and run it to initialize TPC memories.
832 *
833 * Return: 0 for success, negative value for error.
834 */
835static int gaudi_init_tpc_mem(struct hl_device *hdev)
836{
837 const struct firmware *fw;
838 size_t fw_size;
839 void *cpu_addr;
840 dma_addr_t dma_handle;
841 int rc;
842
843 rc = request_firmware(&fw, GAUDI_TPC_FW_FILE, hdev->dev);
844 if (rc) {
845 dev_err(hdev->dev, "Firmware file %s is not found!\n",
846 GAUDI_TPC_FW_FILE);
847 goto out;
848 }
849
850 fw_size = fw->size;
851 cpu_addr = hdev->asic_funcs->asic_dma_alloc_coherent(hdev, fw_size,
852 &dma_handle, GFP_KERNEL | __GFP_ZERO);
853 if (!cpu_addr) {
854 dev_err(hdev->dev,
855 "Failed to allocate %zu of dma memory for TPC kernel\n",
856 fw_size);
857 rc = -ENOMEM;
858 goto out;
859 }
860
861 memcpy(cpu_addr, fw->data, fw_size);
862
863 rc = _gaudi_init_tpc_mem(hdev, dma_handle, fw_size);
864
865 hdev->asic_funcs->asic_dma_free_coherent(hdev, fw->size, cpu_addr,
866 dma_handle);
867
868out:
869 release_firmware(fw);
870 return rc;
871}
872
Ofir Bitton5de406c2020-09-10 10:56:26 +0300873static void gaudi_collective_map_sobs(struct hl_device *hdev, u32 stream)
Ofir Bitton5fe1c172020-09-10 10:10:55 +0300874{
Ofir Bitton5de406c2020-09-10 10:56:26 +0300875 struct gaudi_device *gaudi = hdev->asic_specific;
876 struct gaudi_collective_properties *prop = &gaudi->collective_props;
877 struct hl_hw_queue *q;
878 u32 i, sob_id, sob_group_id, queue_id;
Ofir Bitton5fe1c172020-09-10 10:10:55 +0300879
Ofir Bitton5de406c2020-09-10 10:56:26 +0300880 /* Iterate through SOB groups and assign a SOB for each slave queue */
881 sob_group_id =
882 stream * HL_RSVD_SOBS + prop->curr_sob_group_idx[stream];
883 sob_id = prop->hw_sob_group[sob_group_id].base_sob_id;
884
885 queue_id = GAUDI_QUEUE_ID_NIC_0_0 + stream;
886 for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++) {
887 q = &hdev->kernel_queues[queue_id + (4 * i)];
888 q->sync_stream_prop.collective_sob_id = sob_id + i;
889 }
890
891 /* Both DMA5 and TPC7 use the same resources since only a single
892 * engine need to participate in the reduction process
893 */
894 queue_id = GAUDI_QUEUE_ID_DMA_5_0 + stream;
895 q = &hdev->kernel_queues[queue_id];
896 q->sync_stream_prop.collective_sob_id =
897 sob_id + NIC_NUMBER_OF_ENGINES;
898
899 queue_id = GAUDI_QUEUE_ID_TPC_7_0 + stream;
900 q = &hdev->kernel_queues[queue_id];
901 q->sync_stream_prop.collective_sob_id =
902 sob_id + NIC_NUMBER_OF_ENGINES;
903}
904
905static void gaudi_sob_group_hw_reset(struct kref *ref)
906{
907 struct gaudi_hw_sob_group *hw_sob_group =
908 container_of(ref, struct gaudi_hw_sob_group, kref);
909 struct hl_device *hdev = hw_sob_group->hdev;
910 int i;
911
912 for (i = 0 ; i < NUMBER_OF_SOBS_IN_GRP ; i++)
913 WREG32(mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 +
914 (hw_sob_group->base_sob_id + i) * 4, 0);
915
916 kref_init(&hw_sob_group->kref);
917}
918
919static void gaudi_sob_group_reset_error(struct kref *ref)
920{
921 struct gaudi_hw_sob_group *hw_sob_group =
922 container_of(ref, struct gaudi_hw_sob_group, kref);
923 struct hl_device *hdev = hw_sob_group->hdev;
924
925 dev_crit(hdev->dev,
926 "SOB release shouldn't be called here, base_sob_id: %d\n",
927 hw_sob_group->base_sob_id);
928}
929
930static int gaudi_collective_init(struct hl_device *hdev)
931{
932 u32 i, master_monitor_sobs, sob_id, reserved_sobs_per_group;
933 struct gaudi_collective_properties *prop;
934 struct gaudi_device *gaudi;
935
936 gaudi = hdev->asic_specific;
937 prop = &gaudi->collective_props;
938 sob_id = hdev->asic_prop.collective_first_sob;
939
940 /* First sob in group must be aligned to HL_MAX_SOBS_PER_MONITOR */
941 reserved_sobs_per_group =
942 ALIGN(NUMBER_OF_SOBS_IN_GRP, HL_MAX_SOBS_PER_MONITOR);
943
944 /* Init SOB groups */
945 for (i = 0 ; i < NUM_SOB_GROUPS; i++) {
946 prop->hw_sob_group[i].hdev = hdev;
947 prop->hw_sob_group[i].base_sob_id = sob_id;
948 sob_id += reserved_sobs_per_group;
949 gaudi_sob_group_hw_reset(&prop->hw_sob_group[i].kref);
950 }
951
952 for (i = 0 ; i < QMAN_STREAMS; i++) {
953 prop->next_sob_group_val[i] = 1;
954 prop->curr_sob_group_idx[i] = 0;
955 gaudi_collective_map_sobs(hdev, i);
956 }
957
958 prop->mstr_sob_mask[0] = 0;
959 master_monitor_sobs = HL_MAX_SOBS_PER_MONITOR;
960 for (i = 0 ; i < master_monitor_sobs ; i++)
961 if (gaudi->hw_cap_initialized & BIT(HW_CAP_NIC_SHIFT + i))
962 prop->mstr_sob_mask[0] |= BIT(i);
963
964 prop->mstr_sob_mask[1] = 0;
965 master_monitor_sobs =
966 NIC_NUMBER_OF_ENGINES - HL_MAX_SOBS_PER_MONITOR;
967 for (i = 0 ; i < master_monitor_sobs; i++) {
968 if (gaudi->hw_cap_initialized & BIT(HW_CAP_NIC_SHIFT + i))
969 prop->mstr_sob_mask[1] |= BIT(i);
970 }
971
972 /* Set collective engine bit */
973 prop->mstr_sob_mask[1] |= BIT(i);
974
975 return 0;
976}
977
978static void gaudi_reset_sob_group(struct hl_device *hdev, u16 sob_group)
979{
980 struct gaudi_device *gaudi = hdev->asic_specific;
981 struct gaudi_collective_properties *cprop = &gaudi->collective_props;
982
983 kref_put(&cprop->hw_sob_group[sob_group].kref,
984 gaudi_sob_group_hw_reset);
985}
986
987static void gaudi_collective_master_init_job(struct hl_device *hdev,
988 struct hl_cs_job *job, u32 stream, u32 sob_group_offset)
989{
990 u32 master_sob_base, master_monitor, queue_id, cb_size = 0;
991 struct gaudi_collective_properties *cprop;
992 struct hl_gen_wait_properties wait_prop;
993 struct hl_sync_stream_properties *prop;
994 struct gaudi_device *gaudi;
995
996 gaudi = hdev->asic_specific;
997 cprop = &gaudi->collective_props;
998 queue_id = job->hw_queue_id;
999 prop = &hdev->kernel_queues[queue_id].sync_stream_prop;
1000
1001 master_sob_base =
1002 cprop->hw_sob_group[sob_group_offset].base_sob_id;
1003 master_monitor = prop->collective_mstr_mon_id[0];
1004
1005 dev_dbg(hdev->dev,
1006 "Generate master wait CBs, sob %d (mask %#x), val:0x%x, mon %u, q %d\n",
1007 master_sob_base, cprop->mstr_sob_mask[0],
1008 cprop->next_sob_group_val[stream],
1009 master_monitor, queue_id);
1010
1011 wait_prop.data = (void *) job->patched_cb;
1012 wait_prop.sob_base = master_sob_base;
1013 wait_prop.sob_mask = cprop->mstr_sob_mask[0];
1014 wait_prop.sob_val = cprop->next_sob_group_val[stream];
1015 wait_prop.mon_id = master_monitor;
1016 wait_prop.q_idx = queue_id;
1017 wait_prop.size = cb_size;
1018 cb_size += gaudi_gen_wait_cb(hdev, &wait_prop);
1019
1020 master_sob_base += HL_MAX_SOBS_PER_MONITOR;
1021 master_monitor = prop->collective_mstr_mon_id[1];
1022
1023 dev_dbg(hdev->dev,
1024 "Generate master wait CBs, sob %d (mask %#x), val:0x%x, mon %u, q %d\n",
1025 master_sob_base, cprop->mstr_sob_mask[1],
1026 cprop->next_sob_group_val[stream],
1027 master_monitor, queue_id);
1028
1029 wait_prop.sob_base = master_sob_base;
1030 wait_prop.sob_mask = cprop->mstr_sob_mask[1];
1031 wait_prop.mon_id = master_monitor;
1032 wait_prop.size = cb_size;
1033 cb_size += gaudi_gen_wait_cb(hdev, &wait_prop);
1034}
1035
1036static void gaudi_collective_slave_init_job(struct hl_device *hdev,
1037 struct hl_cs_job *job, struct hl_cs_compl *cs_cmpl)
1038{
1039 struct hl_gen_wait_properties wait_prop;
1040 struct hl_sync_stream_properties *prop;
1041 u32 queue_id, cb_size = 0;
1042
1043 queue_id = job->hw_queue_id;
1044 prop = &hdev->kernel_queues[queue_id].sync_stream_prop;
1045
1046 /* Add to wait CBs using slave monitor */
1047 wait_prop.data = (void *) job->user_cb;
1048 wait_prop.sob_base = cs_cmpl->hw_sob->sob_id;
1049 wait_prop.sob_mask = 0x1;
1050 wait_prop.sob_val = cs_cmpl->sob_val;
1051 wait_prop.mon_id = prop->collective_slave_mon_id;
1052 wait_prop.q_idx = queue_id;
1053 wait_prop.size = cb_size;
1054
1055 dev_dbg(hdev->dev,
1056 "Generate slave wait CB, sob %d, val:0x%x, mon %d, q %d\n",
1057 cs_cmpl->hw_sob->sob_id, cs_cmpl->sob_val,
1058 prop->collective_slave_mon_id, queue_id);
1059
1060 cb_size += gaudi_gen_wait_cb(hdev, &wait_prop);
1061
1062 dev_dbg(hdev->dev,
1063 "generate signal CB, sob_id: %d, sob val: 1, q_idx: %d\n",
1064 prop->collective_sob_id, queue_id);
1065
1066 cb_size += gaudi_gen_signal_cb(hdev, job->user_cb,
1067 prop->collective_sob_id, cb_size);
Ofir Bitton5fe1c172020-09-10 10:10:55 +03001068}
1069
1070static void gaudi_collective_wait_init_cs(struct hl_cs *cs)
1071{
Ofir Bitton5de406c2020-09-10 10:56:26 +03001072 struct hl_cs_compl *signal_cs_cmpl =
1073 container_of(cs->signal_fence, struct hl_cs_compl, base_fence);
1074 struct hl_cs_compl *cs_cmpl =
1075 container_of(cs->fence, struct hl_cs_compl, base_fence);
1076 struct gaudi_collective_properties *cprop;
1077 u32 stream, queue_id, sob_group_offset;
1078 struct gaudi_device *gaudi;
1079 struct hl_device *hdev;
1080 struct hl_cs_job *job;
1081 struct hl_ctx *ctx;
Ofir Bitton5fe1c172020-09-10 10:10:55 +03001082
Ofir Bitton5de406c2020-09-10 10:56:26 +03001083 ctx = cs->ctx;
1084 hdev = ctx->hdev;
1085 gaudi = hdev->asic_specific;
1086 cprop = &gaudi->collective_props;
1087
1088 /* copy the SOB id and value of the signal CS */
1089 cs_cmpl->hw_sob = signal_cs_cmpl->hw_sob;
1090 cs_cmpl->sob_val = signal_cs_cmpl->sob_val;
1091
1092 /* Calculate the stream from collective master queue (1st job) */
1093 job = list_first_entry(&cs->job_list, struct hl_cs_job, cs_node);
1094 stream = job->hw_queue_id % 4;
1095 sob_group_offset =
1096 stream * HL_RSVD_SOBS + cprop->curr_sob_group_idx[stream];
1097
1098 list_for_each_entry(job, &cs->job_list, cs_node) {
1099 queue_id = job->hw_queue_id;
1100
1101 if (hdev->kernel_queues[queue_id].collective_mode ==
1102 HL_COLLECTIVE_MASTER)
1103 gaudi_collective_master_init_job(hdev, job, stream,
1104 sob_group_offset);
1105 else
1106 gaudi_collective_slave_init_job(hdev, job, cs_cmpl);
1107 }
1108
1109 cs_cmpl->sob_group = sob_group_offset;
1110
1111 /* Handle sob group kref and wraparound */
1112 kref_get(&cprop->hw_sob_group[sob_group_offset].kref);
1113 cprop->next_sob_group_val[stream]++;
1114
1115 if (cprop->next_sob_group_val[stream] == HL_MAX_SOB_VAL) {
1116 /*
1117 * Decrement as we reached the max value.
1118 * The release function won't be called here as we've
1119 * just incremented the refcount.
1120 */
1121 kref_put(&cprop->hw_sob_group[sob_group_offset].kref,
1122 gaudi_sob_group_reset_error);
1123 cprop->next_sob_group_val[stream] = 1;
1124 /* only two SOBs are currently in use */
1125 cprop->curr_sob_group_idx[stream] =
1126 (cprop->curr_sob_group_idx[stream] + 1) &
1127 (HL_RSVD_SOBS - 1);
1128
1129 gaudi_collective_map_sobs(hdev, stream);
1130
1131 dev_dbg(hdev->dev, "switched to SOB group %d, stream: %d\n",
1132 cprop->curr_sob_group_idx[stream], stream);
1133 }
1134
1135 /* Increment kref since all slave queues are now waiting on it */
1136 kref_get(&cs_cmpl->hw_sob->kref);
1137 /*
1138 * Must put the signal fence after the SOB refcnt increment so
1139 * the SOB refcnt won't turn 0 and reset the SOB before the
1140 * wait CS was submitted.
1141 */
1142 mb();
1143 hl_fence_put(cs->signal_fence);
1144 cs->signal_fence = NULL;
1145}
1146
1147static int gaudi_collective_wait_create_job(struct hl_device *hdev,
1148 struct hl_ctx *ctx, struct hl_cs *cs,
1149 enum hl_collective_mode mode, u32 queue_id, u32 wait_queue_id)
1150{
1151 struct hw_queue_properties *hw_queue_prop;
1152 struct hl_cs_counters_atomic *cntr;
1153 struct hl_cs_job *job;
1154 struct hl_cb *cb;
1155 u32 cb_size;
1156 bool patched_cb;
1157
1158 cntr = &hdev->aggregated_cs_counters;
1159
1160 if (mode == HL_COLLECTIVE_MASTER) {
1161 /* CB size of collective master queue contains
1162 * 4 msg short packets for monitor 1 configuration
1163 * 1 fence packet
1164 * 4 msg short packets for monitor 2 configuration
1165 * 1 fence packet
1166 * 2 msg prot packets for completion and MSI-X
1167 */
1168 cb_size = sizeof(struct packet_msg_short) * 8 +
1169 sizeof(struct packet_fence) * 2 +
1170 sizeof(struct packet_msg_prot) * 2;
1171 patched_cb = true;
1172 } else {
1173 /* CB size of collective slave queues contains
1174 * 4 msg short packets for monitor configuration
1175 * 1 fence packet
1176 * 1 additional msg short packet for sob signal
1177 */
1178 cb_size = sizeof(struct packet_msg_short) * 5 +
1179 sizeof(struct packet_fence);
1180 patched_cb = false;
1181 }
1182
1183 hw_queue_prop = &hdev->asic_prop.hw_queues_props[queue_id];
1184 job = hl_cs_allocate_job(hdev, hw_queue_prop->type, true);
1185 if (!job) {
farah kassabrie7536432020-10-12 14:30:26 +03001186 atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
Ofir Bitton5de406c2020-09-10 10:56:26 +03001187 atomic64_inc(&cntr->out_of_mem_drop_cnt);
1188 dev_err(hdev->dev, "Failed to allocate a new job\n");
1189 return -ENOMEM;
1190 }
1191
1192 /* Allocate internal mapped CB for non patched CBs */
1193 cb = hl_cb_kernel_create(hdev, cb_size,
1194 hdev->mmu_enable && !patched_cb);
1195 if (!cb) {
farah kassabrie7536432020-10-12 14:30:26 +03001196 atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
Ofir Bitton5de406c2020-09-10 10:56:26 +03001197 atomic64_inc(&cntr->out_of_mem_drop_cnt);
1198 kfree(job);
1199 return -EFAULT;
1200 }
1201
1202 job->id = 0;
1203 job->cs = cs;
1204 job->user_cb = cb;
Tomer Tayarf07486742020-08-02 22:51:31 +03001205 atomic_inc(&job->user_cb->cs_cnt);
Ofir Bitton5de406c2020-09-10 10:56:26 +03001206 job->user_cb_size = cb_size;
1207 job->hw_queue_id = queue_id;
1208
1209 /*
1210 * No need in parsing, user CB is the patched CB.
1211 * We call hl_cb_destroy() out of two reasons - we don't need
1212 * the CB in the CB idr anymore and to decrement its refcount as
1213 * it was incremented inside hl_cb_kernel_create().
1214 */
1215 if (patched_cb)
1216 job->patched_cb = job->user_cb;
1217 else
1218 job->patched_cb = NULL;
1219
1220 job->job_cb_size = job->user_cb_size;
1221 hl_cb_destroy(hdev, &hdev->kernel_cb_mgr, cb->id << PAGE_SHIFT);
1222
1223 /* increment refcount as for external queues we get completion */
1224 if (hw_queue_prop->type == QUEUE_TYPE_EXT)
1225 cs_get(cs);
1226
1227 cs->jobs_in_queue_cnt[job->hw_queue_id]++;
1228
1229 list_add_tail(&job->cs_node, &cs->job_list);
1230
1231 hl_debugfs_add_job(hdev, job);
1232
1233 return 0;
Ofir Bitton5fe1c172020-09-10 10:10:55 +03001234}
1235
1236static int gaudi_collective_wait_create_jobs(struct hl_device *hdev,
1237 struct hl_ctx *ctx, struct hl_cs *cs, u32 wait_queue_id,
1238 u32 collective_engine_id)
1239{
Ofir Bitton5de406c2020-09-10 10:56:26 +03001240 struct gaudi_device *gaudi = hdev->asic_specific;
1241 struct hw_queue_properties *hw_queue_prop;
1242 u32 queue_id, collective_queue, num_jobs;
1243 u32 stream, nic_queue, nic_idx = 0;
1244 bool skip;
1245 int i, rc;
1246
1247 /* Verify wait queue id is configured as master */
1248 hw_queue_prop = &hdev->asic_prop.hw_queues_props[wait_queue_id];
1249 if (!(hw_queue_prop->collective_mode == HL_COLLECTIVE_MASTER)) {
1250 dev_err(hdev->dev,
1251 "Queue %d is not configured as collective master\n",
1252 wait_queue_id);
1253 return -EINVAL;
1254 }
1255
1256 /* Verify engine id is supported */
1257 if (collective_engine_id != GAUDI_ENGINE_ID_DMA_5 &&
1258 collective_engine_id != GAUDI_ENGINE_ID_TPC_7) {
1259 dev_err(hdev->dev,
1260 "Collective wait does not support engine %u\n",
1261 collective_engine_id);
1262 return -EINVAL;
1263 }
1264
1265 stream = wait_queue_id % 4;
1266
1267 if (collective_engine_id == GAUDI_ENGINE_ID_DMA_5)
1268 collective_queue = GAUDI_QUEUE_ID_DMA_5_0 + stream;
Ofir Bitton5de406c2020-09-10 10:56:26 +03001269 else
Ofir Bitton71a984f2020-10-19 16:52:00 +03001270 collective_queue = GAUDI_QUEUE_ID_TPC_7_0 + stream;
Ofir Bitton5de406c2020-09-10 10:56:26 +03001271
1272 num_jobs = NUMBER_OF_SOBS_IN_GRP + 1;
1273 nic_queue = GAUDI_QUEUE_ID_NIC_0_0 + stream;
1274
1275 /* First job goes to the collective master queue, it will wait for
1276 * the collective slave queues to finish execution.
1277 * The synchronization is done using two monitors:
1278 * First monitor for NICs 0-7, second monitor for NICs 8-9 and the
1279 * reduction engine (DMA5/TPC7).
1280 *
1281 * Rest of the jobs goes to the collective slave queues which will
1282 * all wait for the user to signal sob 'cs_cmpl->sob_val'.
1283 */
1284 for (i = 0 ; i < num_jobs ; i++) {
1285 if (i == 0) {
1286 queue_id = wait_queue_id;
1287 rc = gaudi_collective_wait_create_job(hdev, ctx, cs,
1288 HL_COLLECTIVE_MASTER, queue_id, wait_queue_id);
1289 } else {
1290 if (nic_idx < NIC_NUMBER_OF_ENGINES) {
1291 if (gaudi->hw_cap_initialized &
1292 BIT(HW_CAP_NIC_SHIFT + nic_idx))
1293 skip = false;
1294 else
1295 skip = true;
1296
1297 queue_id = nic_queue;
1298 nic_queue += 4;
1299 nic_idx++;
1300
1301 if (skip)
1302 continue;
1303 } else {
1304 queue_id = collective_queue;
1305 }
1306
1307 rc = gaudi_collective_wait_create_job(hdev, ctx, cs,
1308 HL_COLLECTIVE_SLAVE, queue_id, wait_queue_id);
1309 }
1310
1311 if (rc)
1312 return rc;
1313 }
1314
1315 return rc;
Ofir Bitton5fe1c172020-09-10 10:10:55 +03001316}
1317
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03001318static int gaudi_late_init(struct hl_device *hdev)
1319{
1320 struct gaudi_device *gaudi = hdev->asic_specific;
1321 int rc;
1322
Oded Gabbay2f553422020-08-15 16:28:10 +03001323 rc = gaudi->cpucp_info_get(hdev);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03001324 if (rc) {
Oded Gabbay2f553422020-08-15 16:28:10 +03001325 dev_err(hdev->dev, "Failed to get cpucp info\n");
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03001326 return rc;
1327 }
1328
Oded Gabbay3c681572020-11-02 21:10:39 +02001329 if ((hdev->card_type == cpucp_card_type_pci) &&
1330 (hdev->nic_ports_mask & 0x3)) {
1331 dev_info(hdev->dev,
1332 "PCI card detected, only 8 ports are enabled\n");
1333 hdev->nic_ports_mask &= ~0x3;
1334
1335 /* Stop and disable unused NIC QMANs */
1336 WREG32(mmNIC0_QM0_GLBL_CFG1, NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
1337 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
1338 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
1339
1340 WREG32(mmNIC0_QM1_GLBL_CFG1, NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
1341 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
1342 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
1343
1344 WREG32(mmNIC0_QM0_GLBL_CFG0, 0);
1345 WREG32(mmNIC0_QM1_GLBL_CFG0, 0);
1346
1347 gaudi->hw_cap_initialized &= ~(HW_CAP_NIC0 | HW_CAP_NIC1);
1348 }
1349
Oded Gabbay2f553422020-08-15 16:28:10 +03001350 rc = hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_ENABLE_PCI_ACCESS);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03001351 if (rc) {
1352 dev_err(hdev->dev, "Failed to enable PCI access from CPU\n");
1353 return rc;
1354 }
1355
1356 WREG32(mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR, GAUDI_EVENT_INTS_REGISTER);
1357
Ofir Bitton1cbca892020-10-05 11:36:00 +03001358 rc = gaudi_fetch_psoc_frequency(hdev);
1359 if (rc) {
1360 dev_err(hdev->dev, "Failed to fetch psoc frequency\n");
1361 goto disable_pci_access;
1362 }
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03001363
1364 rc = gaudi_mmu_clear_pgt_range(hdev);
1365 if (rc) {
1366 dev_err(hdev->dev, "Failed to clear MMU page tables range\n");
1367 goto disable_pci_access;
1368 }
1369
1370 rc = gaudi_init_tpc_mem(hdev);
1371 if (rc) {
1372 dev_err(hdev->dev, "Failed to initialize TPC memories\n");
1373 goto disable_pci_access;
1374 }
1375
Ofir Bitton5de406c2020-09-10 10:56:26 +03001376 rc = gaudi_collective_init(hdev);
1377 if (rc) {
1378 dev_err(hdev->dev, "Failed to init collective\n");
1379 goto disable_pci_access;
1380 }
1381
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03001382 return 0;
1383
1384disable_pci_access:
Oded Gabbay2f553422020-08-15 16:28:10 +03001385 hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_DISABLE_PCI_ACCESS);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03001386
1387 return rc;
1388}
1389
1390static void gaudi_late_fini(struct hl_device *hdev)
1391{
1392 const struct hwmon_channel_info **channel_info_arr;
1393 int i = 0;
1394
1395 if (!hdev->hl_chip_info->info)
1396 return;
1397
1398 channel_info_arr = hdev->hl_chip_info->info;
1399
1400 while (channel_info_arr[i]) {
1401 kfree(channel_info_arr[i]->config);
1402 kfree(channel_info_arr[i]);
1403 i++;
1404 }
1405
1406 kfree(channel_info_arr);
1407
1408 hdev->hl_chip_info->info = NULL;
1409}
1410
1411static int gaudi_alloc_cpu_accessible_dma_mem(struct hl_device *hdev)
1412{
1413 dma_addr_t dma_addr_arr[GAUDI_ALLOC_CPU_MEM_RETRY_CNT] = {}, end_addr;
1414 void *virt_addr_arr[GAUDI_ALLOC_CPU_MEM_RETRY_CNT] = {};
1415 int i, j, rc = 0;
1416
1417 /*
1418 * The device CPU works with 40-bits addresses, while bit 39 must be set
1419 * to '1' when accessing the host.
1420 * Bits 49:39 of the full host address are saved for a later
1421 * configuration of the HW to perform extension to 50 bits.
1422 * Because there is a single HW register that holds the extension bits,
1423 * these bits must be identical in all allocated range.
1424 */
1425
1426 for (i = 0 ; i < GAUDI_ALLOC_CPU_MEM_RETRY_CNT ; i++) {
1427 virt_addr_arr[i] =
1428 hdev->asic_funcs->asic_dma_alloc_coherent(hdev,
1429 HL_CPU_ACCESSIBLE_MEM_SIZE,
1430 &dma_addr_arr[i],
1431 GFP_KERNEL | __GFP_ZERO);
1432 if (!virt_addr_arr[i]) {
1433 rc = -ENOMEM;
1434 goto free_dma_mem_arr;
1435 }
1436
1437 end_addr = dma_addr_arr[i] + HL_CPU_ACCESSIBLE_MEM_SIZE - 1;
1438 if (GAUDI_CPU_PCI_MSB_ADDR(dma_addr_arr[i]) ==
1439 GAUDI_CPU_PCI_MSB_ADDR(end_addr))
1440 break;
1441 }
1442
1443 if (i == GAUDI_ALLOC_CPU_MEM_RETRY_CNT) {
1444 dev_err(hdev->dev,
1445 "MSB of CPU accessible DMA memory are not identical in all range\n");
1446 rc = -EFAULT;
1447 goto free_dma_mem_arr;
1448 }
1449
1450 hdev->cpu_accessible_dma_mem = virt_addr_arr[i];
1451 hdev->cpu_accessible_dma_address = dma_addr_arr[i];
1452 hdev->cpu_pci_msb_addr =
1453 GAUDI_CPU_PCI_MSB_ADDR(hdev->cpu_accessible_dma_address);
1454
Ofir Bittonc692dec2020-10-04 17:34:37 +03001455 if (hdev->asic_prop.fw_security_disabled)
1456 GAUDI_PCI_TO_CPU_ADDR(hdev->cpu_accessible_dma_address);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03001457
1458free_dma_mem_arr:
1459 for (j = 0 ; j < i ; j++)
1460 hdev->asic_funcs->asic_dma_free_coherent(hdev,
1461 HL_CPU_ACCESSIBLE_MEM_SIZE,
1462 virt_addr_arr[j],
1463 dma_addr_arr[j]);
1464
1465 return rc;
1466}
1467
1468static void gaudi_free_internal_qmans_pq_mem(struct hl_device *hdev)
1469{
1470 struct gaudi_device *gaudi = hdev->asic_specific;
1471 struct gaudi_internal_qman_info *q;
1472 u32 i;
1473
1474 for (i = 0 ; i < GAUDI_QUEUE_ID_SIZE ; i++) {
1475 q = &gaudi->internal_qmans[i];
1476 if (!q->pq_kernel_addr)
1477 continue;
1478 hdev->asic_funcs->asic_dma_free_coherent(hdev, q->pq_size,
1479 q->pq_kernel_addr,
1480 q->pq_dma_addr);
1481 }
1482}
1483
1484static int gaudi_alloc_internal_qmans_pq_mem(struct hl_device *hdev)
1485{
1486 struct gaudi_device *gaudi = hdev->asic_specific;
1487 struct gaudi_internal_qman_info *q;
1488 int rc, i;
1489
1490 for (i = 0 ; i < GAUDI_QUEUE_ID_SIZE ; i++) {
1491 if (gaudi_queue_type[i] != QUEUE_TYPE_INT)
1492 continue;
1493
1494 q = &gaudi->internal_qmans[i];
1495
1496 switch (i) {
Ofir Bitton0940cab2020-08-31 08:52:56 +03001497 case GAUDI_QUEUE_ID_DMA_2_0 ... GAUDI_QUEUE_ID_DMA_7_3:
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03001498 q->pq_size = HBM_DMA_QMAN_SIZE_IN_BYTES;
1499 break;
1500 case GAUDI_QUEUE_ID_MME_0_0 ... GAUDI_QUEUE_ID_MME_1_3:
1501 q->pq_size = MME_QMAN_SIZE_IN_BYTES;
1502 break;
1503 case GAUDI_QUEUE_ID_TPC_0_0 ... GAUDI_QUEUE_ID_TPC_7_3:
1504 q->pq_size = TPC_QMAN_SIZE_IN_BYTES;
1505 break;
Oded Gabbay3c681572020-11-02 21:10:39 +02001506 case GAUDI_QUEUE_ID_NIC_0_0 ... GAUDI_QUEUE_ID_NIC_9_3:
1507 q->pq_size = NIC_QMAN_SIZE_IN_BYTES;
1508 break;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03001509 default:
1510 dev_err(hdev->dev, "Bad internal queue index %d", i);
1511 rc = -EINVAL;
1512 goto free_internal_qmans_pq_mem;
1513 }
1514
1515 q->pq_kernel_addr = hdev->asic_funcs->asic_dma_alloc_coherent(
1516 hdev, q->pq_size,
1517 &q->pq_dma_addr,
1518 GFP_KERNEL | __GFP_ZERO);
1519 if (!q->pq_kernel_addr) {
1520 rc = -ENOMEM;
1521 goto free_internal_qmans_pq_mem;
1522 }
1523 }
1524
1525 return 0;
1526
1527free_internal_qmans_pq_mem:
1528 gaudi_free_internal_qmans_pq_mem(hdev);
1529 return rc;
1530}
1531
1532static int gaudi_sw_init(struct hl_device *hdev)
1533{
1534 struct gaudi_device *gaudi;
Ofir Bittonebd8d122020-05-10 13:41:28 +03001535 u32 i, event_id = 0;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03001536 int rc;
1537
1538 /* Allocate device structure */
1539 gaudi = kzalloc(sizeof(*gaudi), GFP_KERNEL);
1540 if (!gaudi)
1541 return -ENOMEM;
1542
Ofir Bittonebd8d122020-05-10 13:41:28 +03001543 for (i = 0 ; i < ARRAY_SIZE(gaudi_irq_map_table) ; i++) {
1544 if (gaudi_irq_map_table[i].valid) {
1545 if (event_id == GAUDI_EVENT_SIZE) {
1546 dev_err(hdev->dev,
1547 "Event array exceeds the limit of %u events\n",
1548 GAUDI_EVENT_SIZE);
1549 rc = -EINVAL;
1550 goto free_gaudi_device;
1551 }
1552
1553 gaudi->events[event_id++] =
1554 gaudi_irq_map_table[i].fc_id;
1555 }
1556 }
1557
Oded Gabbay2f553422020-08-15 16:28:10 +03001558 gaudi->cpucp_info_get = gaudi_cpucp_info_get;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03001559
1560 gaudi->max_freq_value = GAUDI_MAX_CLK_FREQ;
1561
1562 hdev->asic_specific = gaudi;
1563
1564 /* Create DMA pool for small allocations */
1565 hdev->dma_pool = dma_pool_create(dev_name(hdev->dev),
1566 &hdev->pdev->dev, GAUDI_DMA_POOL_BLK_SIZE, 8, 0);
1567 if (!hdev->dma_pool) {
1568 dev_err(hdev->dev, "failed to create DMA pool\n");
1569 rc = -ENOMEM;
1570 goto free_gaudi_device;
1571 }
1572
1573 rc = gaudi_alloc_cpu_accessible_dma_mem(hdev);
1574 if (rc)
1575 goto free_dma_pool;
1576
1577 hdev->cpu_accessible_dma_pool = gen_pool_create(ilog2(32), -1);
1578 if (!hdev->cpu_accessible_dma_pool) {
1579 dev_err(hdev->dev,
1580 "Failed to create CPU accessible DMA pool\n");
1581 rc = -ENOMEM;
1582 goto free_cpu_dma_mem;
1583 }
1584
1585 rc = gen_pool_add(hdev->cpu_accessible_dma_pool,
1586 (uintptr_t) hdev->cpu_accessible_dma_mem,
1587 HL_CPU_ACCESSIBLE_MEM_SIZE, -1);
1588 if (rc) {
1589 dev_err(hdev->dev,
1590 "Failed to add memory to CPU accessible DMA pool\n");
1591 rc = -EFAULT;
1592 goto free_cpu_accessible_dma_pool;
1593 }
1594
1595 rc = gaudi_alloc_internal_qmans_pq_mem(hdev);
1596 if (rc)
1597 goto free_cpu_accessible_dma_pool;
1598
1599 spin_lock_init(&gaudi->hw_queues_lock);
1600 mutex_init(&gaudi->clk_gate_mutex);
1601
1602 hdev->supports_sync_stream = true;
1603 hdev->supports_coresight = true;
1604
1605 return 0;
1606
1607free_cpu_accessible_dma_pool:
1608 gen_pool_destroy(hdev->cpu_accessible_dma_pool);
1609free_cpu_dma_mem:
Ofir Bittonc692dec2020-10-04 17:34:37 +03001610 if (hdev->asic_prop.fw_security_disabled)
1611 GAUDI_CPU_TO_PCI_ADDR(hdev->cpu_accessible_dma_address,
1612 hdev->cpu_pci_msb_addr);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03001613 hdev->asic_funcs->asic_dma_free_coherent(hdev,
1614 HL_CPU_ACCESSIBLE_MEM_SIZE,
1615 hdev->cpu_accessible_dma_mem,
1616 hdev->cpu_accessible_dma_address);
1617free_dma_pool:
1618 dma_pool_destroy(hdev->dma_pool);
1619free_gaudi_device:
1620 kfree(gaudi);
1621 return rc;
1622}
1623
1624static int gaudi_sw_fini(struct hl_device *hdev)
1625{
1626 struct gaudi_device *gaudi = hdev->asic_specific;
1627
1628 gaudi_free_internal_qmans_pq_mem(hdev);
1629
1630 gen_pool_destroy(hdev->cpu_accessible_dma_pool);
1631
Ofir Bittonc692dec2020-10-04 17:34:37 +03001632 if (hdev->asic_prop.fw_security_disabled)
1633 GAUDI_CPU_TO_PCI_ADDR(hdev->cpu_accessible_dma_address,
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03001634 hdev->cpu_pci_msb_addr);
Ofir Bittonc692dec2020-10-04 17:34:37 +03001635
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03001636 hdev->asic_funcs->asic_dma_free_coherent(hdev,
1637 HL_CPU_ACCESSIBLE_MEM_SIZE,
1638 hdev->cpu_accessible_dma_mem,
1639 hdev->cpu_accessible_dma_address);
1640
1641 dma_pool_destroy(hdev->dma_pool);
1642
1643 mutex_destroy(&gaudi->clk_gate_mutex);
1644
1645 kfree(gaudi);
1646
1647 return 0;
1648}
1649
1650static irqreturn_t gaudi_irq_handler_single(int irq, void *arg)
1651{
1652 struct hl_device *hdev = arg;
1653 int i;
1654
1655 if (hdev->disabled)
1656 return IRQ_HANDLED;
1657
1658 for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++)
1659 hl_irq_handler_cq(irq, &hdev->completion_queue[i]);
1660
1661 hl_irq_handler_eq(irq, &hdev->event_queue);
1662
1663 return IRQ_HANDLED;
1664}
1665
1666/*
1667 * For backward compatibility, new MSI interrupts should be set after the
1668 * existing CPU and NIC interrupts.
1669 */
1670static int gaudi_pci_irq_vector(struct hl_device *hdev, unsigned int nr,
1671 bool cpu_eq)
1672{
1673 int msi_vec;
1674
1675 if ((nr != GAUDI_EVENT_QUEUE_MSI_IDX) && (cpu_eq))
1676 dev_crit(hdev->dev, "CPU EQ must use IRQ %d\n",
1677 GAUDI_EVENT_QUEUE_MSI_IDX);
1678
1679 msi_vec = ((nr < GAUDI_EVENT_QUEUE_MSI_IDX) || (cpu_eq)) ? nr :
1680 (nr + NIC_NUMBER_OF_ENGINES + 1);
1681
1682 return pci_irq_vector(hdev->pdev, msi_vec);
1683}
1684
1685static int gaudi_enable_msi_single(struct hl_device *hdev)
1686{
1687 int rc, irq;
1688
Oded Gabbay3b82c342020-11-27 18:10:20 +02001689 dev_dbg(hdev->dev, "Working in single MSI IRQ mode\n");
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03001690
1691 irq = gaudi_pci_irq_vector(hdev, 0, false);
1692 rc = request_irq(irq, gaudi_irq_handler_single, 0,
1693 "gaudi single msi", hdev);
1694 if (rc)
1695 dev_err(hdev->dev,
1696 "Failed to request single MSI IRQ\n");
1697
1698 return rc;
1699}
1700
1701static int gaudi_enable_msi_multi(struct hl_device *hdev)
1702{
1703 int cq_cnt = hdev->asic_prop.completion_queues_count;
1704 int rc, i, irq_cnt_init, irq;
1705
1706 for (i = 0, irq_cnt_init = 0 ; i < cq_cnt ; i++, irq_cnt_init++) {
1707 irq = gaudi_pci_irq_vector(hdev, i, false);
1708 rc = request_irq(irq, hl_irq_handler_cq, 0, gaudi_irq_name[i],
1709 &hdev->completion_queue[i]);
1710 if (rc) {
1711 dev_err(hdev->dev, "Failed to request IRQ %d", irq);
1712 goto free_irqs;
1713 }
1714 }
1715
1716 irq = gaudi_pci_irq_vector(hdev, GAUDI_EVENT_QUEUE_MSI_IDX, true);
1717 rc = request_irq(irq, hl_irq_handler_eq, 0, gaudi_irq_name[cq_cnt],
1718 &hdev->event_queue);
1719 if (rc) {
1720 dev_err(hdev->dev, "Failed to request IRQ %d", irq);
1721 goto free_irqs;
1722 }
1723
1724 return 0;
1725
1726free_irqs:
1727 for (i = 0 ; i < irq_cnt_init ; i++)
1728 free_irq(gaudi_pci_irq_vector(hdev, i, false),
1729 &hdev->completion_queue[i]);
1730 return rc;
1731}
1732
1733static int gaudi_enable_msi(struct hl_device *hdev)
1734{
1735 struct gaudi_device *gaudi = hdev->asic_specific;
1736 int rc;
1737
1738 if (gaudi->hw_cap_initialized & HW_CAP_MSI)
1739 return 0;
1740
1741 rc = pci_alloc_irq_vectors(hdev->pdev, 1, GAUDI_MSI_ENTRIES,
1742 PCI_IRQ_MSI);
1743 if (rc < 0) {
1744 dev_err(hdev->dev, "MSI: Failed to enable support %d\n", rc);
1745 return rc;
1746 }
1747
1748 if (rc < NUMBER_OF_INTERRUPTS) {
1749 gaudi->multi_msi_mode = false;
1750 rc = gaudi_enable_msi_single(hdev);
1751 } else {
1752 gaudi->multi_msi_mode = true;
1753 rc = gaudi_enable_msi_multi(hdev);
1754 }
1755
1756 if (rc)
1757 goto free_pci_irq_vectors;
1758
1759 gaudi->hw_cap_initialized |= HW_CAP_MSI;
1760
1761 return 0;
1762
1763free_pci_irq_vectors:
1764 pci_free_irq_vectors(hdev->pdev);
1765 return rc;
1766}
1767
1768static void gaudi_sync_irqs(struct hl_device *hdev)
1769{
1770 struct gaudi_device *gaudi = hdev->asic_specific;
1771 int i, cq_cnt = hdev->asic_prop.completion_queues_count;
1772
1773 if (!(gaudi->hw_cap_initialized & HW_CAP_MSI))
1774 return;
1775
1776 /* Wait for all pending IRQs to be finished */
1777 if (gaudi->multi_msi_mode) {
1778 for (i = 0 ; i < cq_cnt ; i++)
1779 synchronize_irq(gaudi_pci_irq_vector(hdev, i, false));
1780
1781 synchronize_irq(gaudi_pci_irq_vector(hdev,
1782 GAUDI_EVENT_QUEUE_MSI_IDX,
1783 true));
1784 } else {
1785 synchronize_irq(gaudi_pci_irq_vector(hdev, 0, false));
1786 }
1787}
1788
1789static void gaudi_disable_msi(struct hl_device *hdev)
1790{
1791 struct gaudi_device *gaudi = hdev->asic_specific;
1792 int i, irq, cq_cnt = hdev->asic_prop.completion_queues_count;
1793
1794 if (!(gaudi->hw_cap_initialized & HW_CAP_MSI))
1795 return;
1796
1797 gaudi_sync_irqs(hdev);
1798
1799 if (gaudi->multi_msi_mode) {
1800 irq = gaudi_pci_irq_vector(hdev, GAUDI_EVENT_QUEUE_MSI_IDX,
1801 true);
1802 free_irq(irq, &hdev->event_queue);
1803
1804 for (i = 0 ; i < cq_cnt ; i++) {
1805 irq = gaudi_pci_irq_vector(hdev, i, false);
1806 free_irq(irq, &hdev->completion_queue[i]);
1807 }
1808 } else {
1809 free_irq(gaudi_pci_irq_vector(hdev, 0, false), hdev);
1810 }
1811
1812 pci_free_irq_vectors(hdev->pdev);
1813
1814 gaudi->hw_cap_initialized &= ~HW_CAP_MSI;
1815}
1816
1817static void gaudi_init_scrambler_sram(struct hl_device *hdev)
1818{
1819 struct gaudi_device *gaudi = hdev->asic_specific;
1820
Ofir Bittonc692dec2020-10-04 17:34:37 +03001821 if (!hdev->asic_prop.fw_security_disabled)
1822 return;
1823
1824 if (hdev->asic_prop.fw_security_status_valid &&
1825 (hdev->asic_prop.fw_app_security_map &
1826 CPU_BOOT_DEV_STS0_SRAM_SCR_EN))
1827 return;
1828
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03001829 if (gaudi->hw_cap_initialized & HW_CAP_SRAM_SCRAMBLER)
1830 return;
1831
1832 if (!hdev->sram_scrambler_enable)
1833 return;
1834
1835 WREG32(mmNIF_RTR_CTRL_0_SCRAM_SRAM_EN,
1836 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1837 WREG32(mmNIF_RTR_CTRL_1_SCRAM_SRAM_EN,
1838 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1839 WREG32(mmNIF_RTR_CTRL_2_SCRAM_SRAM_EN,
1840 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1841 WREG32(mmNIF_RTR_CTRL_3_SCRAM_SRAM_EN,
1842 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1843 WREG32(mmNIF_RTR_CTRL_4_SCRAM_SRAM_EN,
1844 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1845 WREG32(mmNIF_RTR_CTRL_5_SCRAM_SRAM_EN,
1846 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1847 WREG32(mmNIF_RTR_CTRL_6_SCRAM_SRAM_EN,
1848 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1849 WREG32(mmNIF_RTR_CTRL_7_SCRAM_SRAM_EN,
1850 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1851
1852 WREG32(mmSIF_RTR_CTRL_0_SCRAM_SRAM_EN,
1853 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1854 WREG32(mmSIF_RTR_CTRL_1_SCRAM_SRAM_EN,
1855 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1856 WREG32(mmSIF_RTR_CTRL_2_SCRAM_SRAM_EN,
1857 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1858 WREG32(mmSIF_RTR_CTRL_3_SCRAM_SRAM_EN,
1859 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1860 WREG32(mmSIF_RTR_CTRL_4_SCRAM_SRAM_EN,
1861 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1862 WREG32(mmSIF_RTR_CTRL_5_SCRAM_SRAM_EN,
1863 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1864 WREG32(mmSIF_RTR_CTRL_6_SCRAM_SRAM_EN,
1865 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1866 WREG32(mmSIF_RTR_CTRL_7_SCRAM_SRAM_EN,
1867 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1868
1869 WREG32(mmDMA_IF_E_N_DOWN_CH0_SCRAM_SRAM_EN,
1870 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
1871 WREG32(mmDMA_IF_E_N_DOWN_CH1_SCRAM_SRAM_EN,
1872 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
1873 WREG32(mmDMA_IF_E_S_DOWN_CH0_SCRAM_SRAM_EN,
1874 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
1875 WREG32(mmDMA_IF_E_S_DOWN_CH1_SCRAM_SRAM_EN,
1876 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
1877 WREG32(mmDMA_IF_W_N_DOWN_CH0_SCRAM_SRAM_EN,
1878 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
1879 WREG32(mmDMA_IF_W_N_DOWN_CH1_SCRAM_SRAM_EN,
1880 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
1881 WREG32(mmDMA_IF_W_S_DOWN_CH0_SCRAM_SRAM_EN,
1882 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
1883 WREG32(mmDMA_IF_W_S_DOWN_CH1_SCRAM_SRAM_EN,
1884 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
1885
1886 gaudi->hw_cap_initialized |= HW_CAP_SRAM_SCRAMBLER;
1887}
1888
1889static void gaudi_init_scrambler_hbm(struct hl_device *hdev)
1890{
1891 struct gaudi_device *gaudi = hdev->asic_specific;
1892
Ofir Bittonc692dec2020-10-04 17:34:37 +03001893 if (!hdev->asic_prop.fw_security_disabled)
1894 return;
1895
1896 if (hdev->asic_prop.fw_security_status_valid &&
1897 (hdev->asic_prop.fw_boot_cpu_security_map &
1898 CPU_BOOT_DEV_STS0_DRAM_SCR_EN))
1899 return;
1900
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03001901 if (gaudi->hw_cap_initialized & HW_CAP_HBM_SCRAMBLER)
1902 return;
1903
1904 if (!hdev->dram_scrambler_enable)
1905 return;
1906
1907 WREG32(mmNIF_RTR_CTRL_0_SCRAM_HBM_EN,
1908 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1909 WREG32(mmNIF_RTR_CTRL_1_SCRAM_HBM_EN,
1910 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1911 WREG32(mmNIF_RTR_CTRL_2_SCRAM_HBM_EN,
1912 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1913 WREG32(mmNIF_RTR_CTRL_3_SCRAM_HBM_EN,
1914 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1915 WREG32(mmNIF_RTR_CTRL_4_SCRAM_HBM_EN,
1916 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1917 WREG32(mmNIF_RTR_CTRL_5_SCRAM_HBM_EN,
1918 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1919 WREG32(mmNIF_RTR_CTRL_6_SCRAM_HBM_EN,
1920 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1921 WREG32(mmNIF_RTR_CTRL_7_SCRAM_HBM_EN,
1922 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1923
1924 WREG32(mmSIF_RTR_CTRL_0_SCRAM_HBM_EN,
1925 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1926 WREG32(mmSIF_RTR_CTRL_1_SCRAM_HBM_EN,
1927 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1928 WREG32(mmSIF_RTR_CTRL_2_SCRAM_HBM_EN,
1929 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1930 WREG32(mmSIF_RTR_CTRL_3_SCRAM_HBM_EN,
1931 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1932 WREG32(mmSIF_RTR_CTRL_4_SCRAM_HBM_EN,
1933 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1934 WREG32(mmSIF_RTR_CTRL_5_SCRAM_HBM_EN,
1935 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1936 WREG32(mmSIF_RTR_CTRL_6_SCRAM_HBM_EN,
1937 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1938 WREG32(mmSIF_RTR_CTRL_7_SCRAM_HBM_EN,
1939 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1940
1941 WREG32(mmDMA_IF_E_N_DOWN_CH0_SCRAM_HBM_EN,
1942 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
1943 WREG32(mmDMA_IF_E_N_DOWN_CH1_SCRAM_HBM_EN,
1944 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
1945 WREG32(mmDMA_IF_E_S_DOWN_CH0_SCRAM_HBM_EN,
1946 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
1947 WREG32(mmDMA_IF_E_S_DOWN_CH1_SCRAM_HBM_EN,
1948 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
1949 WREG32(mmDMA_IF_W_N_DOWN_CH0_SCRAM_HBM_EN,
1950 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
1951 WREG32(mmDMA_IF_W_N_DOWN_CH1_SCRAM_HBM_EN,
1952 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
1953 WREG32(mmDMA_IF_W_S_DOWN_CH0_SCRAM_HBM_EN,
1954 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
1955 WREG32(mmDMA_IF_W_S_DOWN_CH1_SCRAM_HBM_EN,
1956 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
1957
1958 gaudi->hw_cap_initialized |= HW_CAP_HBM_SCRAMBLER;
1959}
1960
1961static void gaudi_init_e2e(struct hl_device *hdev)
1962{
Ofir Bittonc692dec2020-10-04 17:34:37 +03001963 if (!hdev->asic_prop.fw_security_disabled)
1964 return;
1965
1966 if (hdev->asic_prop.fw_security_status_valid &&
1967 (hdev->asic_prop.fw_boot_cpu_security_map &
1968 CPU_BOOT_DEV_STS0_E2E_CRED_EN))
1969 return;
1970
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03001971 WREG32(mmSIF_RTR_CTRL_0_E2E_HBM_WR_SIZE, 247 >> 3);
1972 WREG32(mmSIF_RTR_CTRL_0_E2E_HBM_RD_SIZE, 785 >> 3);
1973 WREG32(mmSIF_RTR_CTRL_0_E2E_PCI_WR_SIZE, 49);
1974 WREG32(mmSIF_RTR_CTRL_0_E2E_PCI_RD_SIZE, 101);
1975
1976 WREG32(mmSIF_RTR_CTRL_1_E2E_HBM_WR_SIZE, 275 >> 3);
1977 WREG32(mmSIF_RTR_CTRL_1_E2E_HBM_RD_SIZE, 614 >> 3);
1978 WREG32(mmSIF_RTR_CTRL_1_E2E_PCI_WR_SIZE, 1);
1979 WREG32(mmSIF_RTR_CTRL_1_E2E_PCI_RD_SIZE, 39);
1980
1981 WREG32(mmSIF_RTR_CTRL_2_E2E_HBM_WR_SIZE, 1);
1982 WREG32(mmSIF_RTR_CTRL_2_E2E_HBM_RD_SIZE, 1);
1983 WREG32(mmSIF_RTR_CTRL_2_E2E_PCI_WR_SIZE, 1);
1984 WREG32(mmSIF_RTR_CTRL_2_E2E_PCI_RD_SIZE, 32);
1985
1986 WREG32(mmSIF_RTR_CTRL_3_E2E_HBM_WR_SIZE, 176 >> 3);
1987 WREG32(mmSIF_RTR_CTRL_3_E2E_HBM_RD_SIZE, 32 >> 3);
1988 WREG32(mmSIF_RTR_CTRL_3_E2E_PCI_WR_SIZE, 19);
1989 WREG32(mmSIF_RTR_CTRL_3_E2E_PCI_RD_SIZE, 32);
1990
1991 WREG32(mmSIF_RTR_CTRL_4_E2E_HBM_WR_SIZE, 176 >> 3);
1992 WREG32(mmSIF_RTR_CTRL_4_E2E_HBM_RD_SIZE, 32 >> 3);
1993 WREG32(mmSIF_RTR_CTRL_4_E2E_PCI_WR_SIZE, 19);
1994 WREG32(mmSIF_RTR_CTRL_4_E2E_PCI_RD_SIZE, 32);
1995
1996 WREG32(mmSIF_RTR_CTRL_5_E2E_HBM_WR_SIZE, 1);
1997 WREG32(mmSIF_RTR_CTRL_5_E2E_HBM_RD_SIZE, 1);
1998 WREG32(mmSIF_RTR_CTRL_5_E2E_PCI_WR_SIZE, 1);
1999 WREG32(mmSIF_RTR_CTRL_5_E2E_PCI_RD_SIZE, 32);
2000
2001 WREG32(mmSIF_RTR_CTRL_6_E2E_HBM_WR_SIZE, 275 >> 3);
2002 WREG32(mmSIF_RTR_CTRL_6_E2E_HBM_RD_SIZE, 614 >> 3);
2003 WREG32(mmSIF_RTR_CTRL_6_E2E_PCI_WR_SIZE, 1);
2004 WREG32(mmSIF_RTR_CTRL_6_E2E_PCI_RD_SIZE, 39);
2005
2006 WREG32(mmSIF_RTR_CTRL_7_E2E_HBM_WR_SIZE, 297 >> 3);
2007 WREG32(mmSIF_RTR_CTRL_7_E2E_HBM_RD_SIZE, 908 >> 3);
2008 WREG32(mmSIF_RTR_CTRL_7_E2E_PCI_WR_SIZE, 19);
2009 WREG32(mmSIF_RTR_CTRL_7_E2E_PCI_RD_SIZE, 19);
2010
2011 WREG32(mmNIF_RTR_CTRL_0_E2E_HBM_WR_SIZE, 318 >> 3);
2012 WREG32(mmNIF_RTR_CTRL_0_E2E_HBM_RD_SIZE, 956 >> 3);
2013 WREG32(mmNIF_RTR_CTRL_0_E2E_PCI_WR_SIZE, 79);
2014 WREG32(mmNIF_RTR_CTRL_0_E2E_PCI_RD_SIZE, 163);
2015
2016 WREG32(mmNIF_RTR_CTRL_1_E2E_HBM_WR_SIZE, 275 >> 3);
2017 WREG32(mmNIF_RTR_CTRL_1_E2E_HBM_RD_SIZE, 614 >> 3);
2018 WREG32(mmNIF_RTR_CTRL_1_E2E_PCI_WR_SIZE, 1);
2019 WREG32(mmNIF_RTR_CTRL_1_E2E_PCI_RD_SIZE, 39);
2020
2021 WREG32(mmNIF_RTR_CTRL_2_E2E_HBM_WR_SIZE, 1);
2022 WREG32(mmNIF_RTR_CTRL_2_E2E_HBM_RD_SIZE, 1);
2023 WREG32(mmNIF_RTR_CTRL_2_E2E_PCI_WR_SIZE, 1);
2024 WREG32(mmNIF_RTR_CTRL_2_E2E_PCI_RD_SIZE, 32);
2025
2026 WREG32(mmNIF_RTR_CTRL_3_E2E_HBM_WR_SIZE, 176 >> 3);
2027 WREG32(mmNIF_RTR_CTRL_3_E2E_HBM_RD_SIZE, 32 >> 3);
2028 WREG32(mmNIF_RTR_CTRL_3_E2E_PCI_WR_SIZE, 19);
2029 WREG32(mmNIF_RTR_CTRL_3_E2E_PCI_RD_SIZE, 32);
2030
2031 WREG32(mmNIF_RTR_CTRL_4_E2E_HBM_WR_SIZE, 176 >> 3);
2032 WREG32(mmNIF_RTR_CTRL_4_E2E_HBM_RD_SIZE, 32 >> 3);
2033 WREG32(mmNIF_RTR_CTRL_4_E2E_PCI_WR_SIZE, 19);
2034 WREG32(mmNIF_RTR_CTRL_4_E2E_PCI_RD_SIZE, 32);
2035
2036 WREG32(mmNIF_RTR_CTRL_5_E2E_HBM_WR_SIZE, 1);
2037 WREG32(mmNIF_RTR_CTRL_5_E2E_HBM_RD_SIZE, 1);
2038 WREG32(mmNIF_RTR_CTRL_5_E2E_PCI_WR_SIZE, 1);
2039 WREG32(mmNIF_RTR_CTRL_5_E2E_PCI_RD_SIZE, 32);
2040
2041 WREG32(mmNIF_RTR_CTRL_6_E2E_HBM_WR_SIZE, 275 >> 3);
2042 WREG32(mmNIF_RTR_CTRL_6_E2E_HBM_RD_SIZE, 614 >> 3);
2043 WREG32(mmNIF_RTR_CTRL_6_E2E_PCI_WR_SIZE, 1);
2044 WREG32(mmNIF_RTR_CTRL_6_E2E_PCI_RD_SIZE, 39);
2045
2046 WREG32(mmNIF_RTR_CTRL_7_E2E_HBM_WR_SIZE, 318 >> 3);
2047 WREG32(mmNIF_RTR_CTRL_7_E2E_HBM_RD_SIZE, 956 >> 3);
2048 WREG32(mmNIF_RTR_CTRL_7_E2E_PCI_WR_SIZE, 79);
2049 WREG32(mmNIF_RTR_CTRL_7_E2E_PCI_RD_SIZE, 79);
2050
2051 WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
2052 WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
2053 WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
2054 WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
2055
2056 WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
2057 WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
2058 WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
2059 WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
2060
2061 WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
2062 WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
2063 WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
2064 WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
2065
2066 WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
2067 WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
2068 WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
2069 WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
2070
2071 WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
2072 WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
2073 WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
2074 WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
2075
2076 WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
2077 WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
2078 WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
2079 WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
2080
2081 WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
2082 WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
2083 WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
2084 WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
2085
2086 WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
2087 WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
2088 WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
2089 WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
2090
2091 if (!hdev->dram_scrambler_enable) {
2092 WREG32(mmSIF_RTR_CTRL_0_NL_HBM_SEL_0, 0x21);
2093 WREG32(mmSIF_RTR_CTRL_0_NL_HBM_SEL_1, 0x22);
2094 WREG32(mmSIF_RTR_CTRL_0_NL_HBM_OFFSET_18, 0x1F);
2095 WREG32(mmSIF_RTR_CTRL_0_NL_HBM_PC_SEL_3, 0x20);
2096
2097 WREG32(mmSIF_RTR_CTRL_1_NL_HBM_SEL_0, 0x21);
2098 WREG32(mmSIF_RTR_CTRL_1_NL_HBM_SEL_1, 0x22);
2099 WREG32(mmSIF_RTR_CTRL_1_NL_HBM_OFFSET_18, 0x1F);
2100 WREG32(mmSIF_RTR_CTRL_1_NL_HBM_PC_SEL_3, 0x20);
2101
2102 WREG32(mmSIF_RTR_CTRL_2_NL_HBM_SEL_0, 0x21);
2103 WREG32(mmSIF_RTR_CTRL_2_NL_HBM_SEL_1, 0x22);
2104 WREG32(mmSIF_RTR_CTRL_2_NL_HBM_OFFSET_18, 0x1F);
2105 WREG32(mmSIF_RTR_CTRL_2_NL_HBM_PC_SEL_3, 0x20);
2106
2107 WREG32(mmSIF_RTR_CTRL_3_NL_HBM_SEL_0, 0x21);
2108 WREG32(mmSIF_RTR_CTRL_3_NL_HBM_SEL_1, 0x22);
2109 WREG32(mmSIF_RTR_CTRL_3_NL_HBM_OFFSET_18, 0x1F);
2110 WREG32(mmSIF_RTR_CTRL_3_NL_HBM_PC_SEL_3, 0x20);
2111
2112 WREG32(mmSIF_RTR_CTRL_4_NL_HBM_SEL_0, 0x21);
2113 WREG32(mmSIF_RTR_CTRL_4_NL_HBM_SEL_1, 0x22);
2114 WREG32(mmSIF_RTR_CTRL_4_NL_HBM_OFFSET_18, 0x1F);
2115 WREG32(mmSIF_RTR_CTRL_4_NL_HBM_PC_SEL_3, 0x20);
2116
2117 WREG32(mmSIF_RTR_CTRL_5_NL_HBM_SEL_0, 0x21);
2118 WREG32(mmSIF_RTR_CTRL_5_NL_HBM_SEL_1, 0x22);
2119 WREG32(mmSIF_RTR_CTRL_5_NL_HBM_OFFSET_18, 0x1F);
2120 WREG32(mmSIF_RTR_CTRL_5_NL_HBM_PC_SEL_3, 0x20);
2121
2122 WREG32(mmSIF_RTR_CTRL_6_NL_HBM_SEL_0, 0x21);
2123 WREG32(mmSIF_RTR_CTRL_6_NL_HBM_SEL_1, 0x22);
2124 WREG32(mmSIF_RTR_CTRL_6_NL_HBM_OFFSET_18, 0x1F);
2125 WREG32(mmSIF_RTR_CTRL_6_NL_HBM_PC_SEL_3, 0x20);
2126
2127 WREG32(mmSIF_RTR_CTRL_7_NL_HBM_SEL_0, 0x21);
2128 WREG32(mmSIF_RTR_CTRL_7_NL_HBM_SEL_1, 0x22);
2129 WREG32(mmSIF_RTR_CTRL_7_NL_HBM_OFFSET_18, 0x1F);
2130 WREG32(mmSIF_RTR_CTRL_7_NL_HBM_PC_SEL_3, 0x20);
2131
2132 WREG32(mmNIF_RTR_CTRL_0_NL_HBM_SEL_0, 0x21);
2133 WREG32(mmNIF_RTR_CTRL_0_NL_HBM_SEL_1, 0x22);
2134 WREG32(mmNIF_RTR_CTRL_0_NL_HBM_OFFSET_18, 0x1F);
2135 WREG32(mmNIF_RTR_CTRL_0_NL_HBM_PC_SEL_3, 0x20);
2136
2137 WREG32(mmNIF_RTR_CTRL_1_NL_HBM_SEL_0, 0x21);
2138 WREG32(mmNIF_RTR_CTRL_1_NL_HBM_SEL_1, 0x22);
2139 WREG32(mmNIF_RTR_CTRL_1_NL_HBM_OFFSET_18, 0x1F);
2140 WREG32(mmNIF_RTR_CTRL_1_NL_HBM_PC_SEL_3, 0x20);
2141
2142 WREG32(mmNIF_RTR_CTRL_2_NL_HBM_SEL_0, 0x21);
2143 WREG32(mmNIF_RTR_CTRL_2_NL_HBM_SEL_1, 0x22);
2144 WREG32(mmNIF_RTR_CTRL_2_NL_HBM_OFFSET_18, 0x1F);
2145 WREG32(mmNIF_RTR_CTRL_2_NL_HBM_PC_SEL_3, 0x20);
2146
2147 WREG32(mmNIF_RTR_CTRL_3_NL_HBM_SEL_0, 0x21);
2148 WREG32(mmNIF_RTR_CTRL_3_NL_HBM_SEL_1, 0x22);
2149 WREG32(mmNIF_RTR_CTRL_3_NL_HBM_OFFSET_18, 0x1F);
2150 WREG32(mmNIF_RTR_CTRL_3_NL_HBM_PC_SEL_3, 0x20);
2151
2152 WREG32(mmNIF_RTR_CTRL_4_NL_HBM_SEL_0, 0x21);
2153 WREG32(mmNIF_RTR_CTRL_4_NL_HBM_SEL_1, 0x22);
2154 WREG32(mmNIF_RTR_CTRL_4_NL_HBM_OFFSET_18, 0x1F);
2155 WREG32(mmNIF_RTR_CTRL_4_NL_HBM_PC_SEL_3, 0x20);
2156
2157 WREG32(mmNIF_RTR_CTRL_5_NL_HBM_SEL_0, 0x21);
2158 WREG32(mmNIF_RTR_CTRL_5_NL_HBM_SEL_1, 0x22);
2159 WREG32(mmNIF_RTR_CTRL_5_NL_HBM_OFFSET_18, 0x1F);
2160 WREG32(mmNIF_RTR_CTRL_5_NL_HBM_PC_SEL_3, 0x20);
2161
2162 WREG32(mmNIF_RTR_CTRL_6_NL_HBM_SEL_0, 0x21);
2163 WREG32(mmNIF_RTR_CTRL_6_NL_HBM_SEL_1, 0x22);
2164 WREG32(mmNIF_RTR_CTRL_6_NL_HBM_OFFSET_18, 0x1F);
2165 WREG32(mmNIF_RTR_CTRL_6_NL_HBM_PC_SEL_3, 0x20);
2166
2167 WREG32(mmNIF_RTR_CTRL_7_NL_HBM_SEL_0, 0x21);
2168 WREG32(mmNIF_RTR_CTRL_7_NL_HBM_SEL_1, 0x22);
2169 WREG32(mmNIF_RTR_CTRL_7_NL_HBM_OFFSET_18, 0x1F);
2170 WREG32(mmNIF_RTR_CTRL_7_NL_HBM_PC_SEL_3, 0x20);
2171
2172 WREG32(mmDMA_IF_E_N_DOWN_CH0_NL_HBM_SEL_0, 0x21);
2173 WREG32(mmDMA_IF_E_N_DOWN_CH0_NL_HBM_SEL_1, 0x22);
2174 WREG32(mmDMA_IF_E_N_DOWN_CH0_NL_HBM_OFFSET_18, 0x1F);
2175 WREG32(mmDMA_IF_E_N_DOWN_CH0_NL_HBM_PC_SEL_3, 0x20);
2176
2177 WREG32(mmDMA_IF_E_N_DOWN_CH1_NL_HBM_SEL_0, 0x21);
2178 WREG32(mmDMA_IF_E_N_DOWN_CH1_NL_HBM_SEL_1, 0x22);
2179 WREG32(mmDMA_IF_E_N_DOWN_CH1_NL_HBM_OFFSET_18, 0x1F);
2180 WREG32(mmDMA_IF_E_N_DOWN_CH1_NL_HBM_PC_SEL_3, 0x20);
2181
2182 WREG32(mmDMA_IF_E_S_DOWN_CH0_NL_HBM_SEL_0, 0x21);
2183 WREG32(mmDMA_IF_E_S_DOWN_CH0_NL_HBM_SEL_1, 0x22);
2184 WREG32(mmDMA_IF_E_S_DOWN_CH0_NL_HBM_OFFSET_18, 0x1F);
2185 WREG32(mmDMA_IF_E_S_DOWN_CH0_NL_HBM_PC_SEL_3, 0x20);
2186
2187 WREG32(mmDMA_IF_E_S_DOWN_CH1_NL_HBM_SEL_0, 0x21);
2188 WREG32(mmDMA_IF_E_S_DOWN_CH1_NL_HBM_SEL_1, 0x22);
2189 WREG32(mmDMA_IF_E_S_DOWN_CH1_NL_HBM_OFFSET_18, 0x1F);
2190 WREG32(mmDMA_IF_E_S_DOWN_CH1_NL_HBM_PC_SEL_3, 0x20);
2191
2192 WREG32(mmDMA_IF_W_N_DOWN_CH0_NL_HBM_SEL_0, 0x21);
2193 WREG32(mmDMA_IF_W_N_DOWN_CH0_NL_HBM_SEL_1, 0x22);
2194 WREG32(mmDMA_IF_W_N_DOWN_CH0_NL_HBM_OFFSET_18, 0x1F);
2195 WREG32(mmDMA_IF_W_N_DOWN_CH0_NL_HBM_PC_SEL_3, 0x20);
2196
2197 WREG32(mmDMA_IF_W_N_DOWN_CH1_NL_HBM_SEL_0, 0x21);
2198 WREG32(mmDMA_IF_W_N_DOWN_CH1_NL_HBM_SEL_1, 0x22);
2199 WREG32(mmDMA_IF_W_N_DOWN_CH1_NL_HBM_OFFSET_18, 0x1F);
2200 WREG32(mmDMA_IF_W_N_DOWN_CH1_NL_HBM_PC_SEL_3, 0x20);
2201
2202 WREG32(mmDMA_IF_W_S_DOWN_CH0_NL_HBM_SEL_0, 0x21);
2203 WREG32(mmDMA_IF_W_S_DOWN_CH0_NL_HBM_SEL_1, 0x22);
2204 WREG32(mmDMA_IF_W_S_DOWN_CH0_NL_HBM_OFFSET_18, 0x1F);
2205 WREG32(mmDMA_IF_W_S_DOWN_CH0_NL_HBM_PC_SEL_3, 0x20);
2206
2207 WREG32(mmDMA_IF_W_S_DOWN_CH1_NL_HBM_SEL_0, 0x21);
2208 WREG32(mmDMA_IF_W_S_DOWN_CH1_NL_HBM_SEL_1, 0x22);
2209 WREG32(mmDMA_IF_W_S_DOWN_CH1_NL_HBM_OFFSET_18, 0x1F);
2210 WREG32(mmDMA_IF_W_S_DOWN_CH1_NL_HBM_PC_SEL_3, 0x20);
2211 }
2212
2213 WREG32(mmSIF_RTR_CTRL_0_E2E_HBM_EN,
2214 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2215 WREG32(mmSIF_RTR_CTRL_0_E2E_PCI_EN,
2216 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2217
2218 WREG32(mmSIF_RTR_CTRL_1_E2E_HBM_EN,
2219 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2220 WREG32(mmSIF_RTR_CTRL_1_E2E_PCI_EN,
2221 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2222
2223 WREG32(mmSIF_RTR_CTRL_2_E2E_HBM_EN,
2224 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2225 WREG32(mmSIF_RTR_CTRL_2_E2E_PCI_EN,
2226 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2227
2228 WREG32(mmSIF_RTR_CTRL_3_E2E_HBM_EN,
2229 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2230 WREG32(mmSIF_RTR_CTRL_3_E2E_PCI_EN,
2231 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2232
2233 WREG32(mmSIF_RTR_CTRL_4_E2E_HBM_EN,
2234 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2235 WREG32(mmSIF_RTR_CTRL_4_E2E_PCI_EN,
2236 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2237
2238 WREG32(mmSIF_RTR_CTRL_5_E2E_HBM_EN,
2239 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2240 WREG32(mmSIF_RTR_CTRL_5_E2E_PCI_EN,
2241 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2242
2243 WREG32(mmSIF_RTR_CTRL_6_E2E_HBM_EN,
2244 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2245 WREG32(mmSIF_RTR_CTRL_6_E2E_PCI_EN,
2246 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2247
2248 WREG32(mmSIF_RTR_CTRL_7_E2E_HBM_EN,
2249 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2250 WREG32(mmSIF_RTR_CTRL_7_E2E_PCI_EN,
2251 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2252
2253 WREG32(mmNIF_RTR_CTRL_0_E2E_HBM_EN,
2254 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2255 WREG32(mmNIF_RTR_CTRL_0_E2E_PCI_EN,
2256 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2257
2258 WREG32(mmNIF_RTR_CTRL_1_E2E_HBM_EN,
2259 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2260 WREG32(mmNIF_RTR_CTRL_1_E2E_PCI_EN,
2261 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2262
2263 WREG32(mmNIF_RTR_CTRL_2_E2E_HBM_EN,
2264 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2265 WREG32(mmNIF_RTR_CTRL_2_E2E_PCI_EN,
2266 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2267
2268 WREG32(mmNIF_RTR_CTRL_3_E2E_HBM_EN,
2269 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2270 WREG32(mmNIF_RTR_CTRL_3_E2E_PCI_EN,
2271 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2272
2273 WREG32(mmNIF_RTR_CTRL_4_E2E_HBM_EN,
2274 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2275 WREG32(mmNIF_RTR_CTRL_4_E2E_PCI_EN,
2276 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2277
2278 WREG32(mmNIF_RTR_CTRL_5_E2E_HBM_EN,
2279 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2280 WREG32(mmNIF_RTR_CTRL_5_E2E_PCI_EN,
2281 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2282
2283 WREG32(mmNIF_RTR_CTRL_6_E2E_HBM_EN,
2284 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2285 WREG32(mmNIF_RTR_CTRL_6_E2E_PCI_EN,
2286 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2287
2288 WREG32(mmNIF_RTR_CTRL_7_E2E_HBM_EN,
2289 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2290 WREG32(mmNIF_RTR_CTRL_7_E2E_PCI_EN,
2291 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2292
2293 WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_HBM_EN,
2294 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2295 WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_PCI_EN,
2296 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2297
2298 WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_HBM_EN,
2299 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2300 WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_PCI_EN,
2301 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2302
2303 WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_HBM_EN,
2304 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2305 WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_PCI_EN,
2306 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2307
2308 WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_HBM_EN,
2309 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2310 WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_PCI_EN,
2311 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2312
2313 WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_HBM_EN,
2314 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2315 WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_PCI_EN,
2316 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2317
2318 WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_HBM_EN,
2319 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2320 WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_PCI_EN,
2321 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2322
2323 WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_HBM_EN,
2324 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2325 WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_PCI_EN,
2326 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2327
2328 WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_HBM_EN,
2329 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2330 WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_PCI_EN,
2331 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2332}
2333
2334static void gaudi_init_hbm_cred(struct hl_device *hdev)
2335{
2336 uint32_t hbm0_wr, hbm1_wr, hbm0_rd, hbm1_rd;
2337
Ofir Bittonc692dec2020-10-04 17:34:37 +03002338 if (!hdev->asic_prop.fw_security_disabled)
2339 return;
2340
2341 if (hdev->asic_prop.fw_security_status_valid &&
2342 (hdev->asic_prop.fw_boot_cpu_security_map &
2343 CPU_BOOT_DEV_STS0_HBM_CRED_EN))
2344 return;
2345
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03002346 hbm0_wr = 0x33333333;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03002347 hbm0_rd = 0x77777777;
Oded Gabbay0b168c82020-06-15 19:25:57 +03002348 hbm1_wr = 0x55555555;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03002349 hbm1_rd = 0xDDDDDDDD;
2350
2351 WREG32(mmDMA_IF_E_N_HBM0_WR_CRED_CNT, hbm0_wr);
2352 WREG32(mmDMA_IF_E_N_HBM1_WR_CRED_CNT, hbm1_wr);
2353 WREG32(mmDMA_IF_E_N_HBM0_RD_CRED_CNT, hbm0_rd);
2354 WREG32(mmDMA_IF_E_N_HBM1_RD_CRED_CNT, hbm1_rd);
2355
2356 WREG32(mmDMA_IF_E_S_HBM0_WR_CRED_CNT, hbm0_wr);
2357 WREG32(mmDMA_IF_E_S_HBM1_WR_CRED_CNT, hbm1_wr);
2358 WREG32(mmDMA_IF_E_S_HBM0_RD_CRED_CNT, hbm0_rd);
2359 WREG32(mmDMA_IF_E_S_HBM1_RD_CRED_CNT, hbm1_rd);
2360
2361 WREG32(mmDMA_IF_W_N_HBM0_WR_CRED_CNT, hbm0_wr);
2362 WREG32(mmDMA_IF_W_N_HBM1_WR_CRED_CNT, hbm1_wr);
2363 WREG32(mmDMA_IF_W_N_HBM0_RD_CRED_CNT, hbm0_rd);
2364 WREG32(mmDMA_IF_W_N_HBM1_RD_CRED_CNT, hbm1_rd);
2365
2366 WREG32(mmDMA_IF_W_S_HBM0_WR_CRED_CNT, hbm0_wr);
2367 WREG32(mmDMA_IF_W_S_HBM1_WR_CRED_CNT, hbm1_wr);
2368 WREG32(mmDMA_IF_W_S_HBM0_RD_CRED_CNT, hbm0_rd);
2369 WREG32(mmDMA_IF_W_S_HBM1_RD_CRED_CNT, hbm1_rd);
2370
2371 WREG32(mmDMA_IF_E_N_HBM_CRED_EN_0,
2372 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2373 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2374 WREG32(mmDMA_IF_E_S_HBM_CRED_EN_0,
2375 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2376 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2377 WREG32(mmDMA_IF_W_N_HBM_CRED_EN_0,
2378 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2379 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2380 WREG32(mmDMA_IF_W_S_HBM_CRED_EN_0,
2381 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2382 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2383
2384 WREG32(mmDMA_IF_E_N_HBM_CRED_EN_1,
2385 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2386 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2387 WREG32(mmDMA_IF_E_S_HBM_CRED_EN_1,
2388 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2389 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2390 WREG32(mmDMA_IF_W_N_HBM_CRED_EN_1,
2391 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2392 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2393 WREG32(mmDMA_IF_W_S_HBM_CRED_EN_1,
2394 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2395 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2396}
2397
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03002398static void gaudi_init_golden_registers(struct hl_device *hdev)
2399{
2400 u32 tpc_offset;
2401 int tpc_id, i;
2402
2403 gaudi_init_e2e(hdev);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03002404 gaudi_init_hbm_cred(hdev);
2405
Oded Gabbaye38bfd32020-07-03 20:46:12 +03002406 hdev->asic_funcs->disable_clock_gating(hdev);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03002407
2408 for (tpc_id = 0, tpc_offset = 0;
2409 tpc_id < TPC_NUMBER_OF_ENGINES;
2410 tpc_id++, tpc_offset += TPC_CFG_OFFSET) {
2411 /* Mask all arithmetic interrupts from TPC */
2412 WREG32(mmTPC0_CFG_TPC_INTR_MASK + tpc_offset, 0x8FFF);
2413 /* Set 16 cache lines */
2414 WREG32_FIELD(TPC0_CFG_MSS_CONFIG, tpc_offset,
2415 ICACHE_FETCH_LINE_NUM, 2);
2416 }
2417
2418 /* Make sure 1st 128 bytes in SRAM are 0 for Tensor DMA */
2419 for (i = 0 ; i < 128 ; i += 8)
2420 writeq(0, hdev->pcie_bar[SRAM_BAR_ID] + i);
2421
2422 WREG32(mmMME0_CTRL_EUS_ROLLUP_CNT_ADD, 3);
2423 WREG32(mmMME1_CTRL_EUS_ROLLUP_CNT_ADD, 3);
2424 WREG32(mmMME2_CTRL_EUS_ROLLUP_CNT_ADD, 3);
2425 WREG32(mmMME3_CTRL_EUS_ROLLUP_CNT_ADD, 3);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03002426}
2427
2428static void gaudi_init_pci_dma_qman(struct hl_device *hdev, int dma_id,
2429 int qman_id, dma_addr_t qman_pq_addr)
2430{
2431 u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi;
2432 u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi;
2433 u32 q_off, dma_qm_offset;
2434 u32 dma_qm_err_cfg;
2435
2436 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
2437
2438 mtr_base_en_lo = lower_32_bits(CFG_BASE +
2439 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2440 mtr_base_en_hi = upper_32_bits(CFG_BASE +
2441 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2442 so_base_en_lo = lower_32_bits(CFG_BASE +
2443 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2444 so_base_en_hi = upper_32_bits(CFG_BASE +
2445 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2446 mtr_base_ws_lo = lower_32_bits(CFG_BASE +
2447 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2448 mtr_base_ws_hi = upper_32_bits(CFG_BASE +
2449 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2450 so_base_ws_lo = lower_32_bits(CFG_BASE +
2451 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
2452 so_base_ws_hi = upper_32_bits(CFG_BASE +
2453 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
2454
2455 q_off = dma_qm_offset + qman_id * 4;
2456
2457 WREG32(mmDMA0_QM_PQ_BASE_LO_0 + q_off, lower_32_bits(qman_pq_addr));
2458 WREG32(mmDMA0_QM_PQ_BASE_HI_0 + q_off, upper_32_bits(qman_pq_addr));
2459
2460 WREG32(mmDMA0_QM_PQ_SIZE_0 + q_off, ilog2(HL_QUEUE_LENGTH));
2461 WREG32(mmDMA0_QM_PQ_PI_0 + q_off, 0);
2462 WREG32(mmDMA0_QM_PQ_CI_0 + q_off, 0);
2463
Ofir Bitton25121d92020-09-24 08:22:58 +03002464 WREG32(mmDMA0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off, QMAN_LDMA_SIZE_OFFSET);
2465 WREG32(mmDMA0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2466 QMAN_LDMA_SRC_OFFSET);
2467 WREG32(mmDMA0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2468 QMAN_LDMA_DST_OFFSET);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03002469
2470 WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo);
2471 WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi);
2472 WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo);
2473 WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi);
2474 WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_LO_0 + q_off, mtr_base_ws_lo);
2475 WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_HI_0 + q_off, mtr_base_ws_hi);
2476 WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_LO_0 + q_off, so_base_ws_lo);
2477 WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_HI_0 + q_off, so_base_ws_hi);
2478
Omer Shpigelmance043262020-06-16 17:56:27 +03002479 WREG32(mmDMA0_QM_CP_BARRIER_CFG_0 + q_off, 0x100);
2480
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03002481 /* The following configuration is needed only once per QMAN */
2482 if (qman_id == 0) {
2483 /* Configure RAZWI IRQ */
2484 dma_qm_err_cfg = PCI_DMA_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
2485 if (hdev->stop_on_err) {
2486 dma_qm_err_cfg |=
2487 PCI_DMA_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
2488 }
2489
2490 WREG32(mmDMA0_QM_GLBL_ERR_CFG + dma_qm_offset, dma_qm_err_cfg);
2491 WREG32(mmDMA0_QM_GLBL_ERR_ADDR_LO + dma_qm_offset,
2492 lower_32_bits(CFG_BASE +
2493 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR));
2494 WREG32(mmDMA0_QM_GLBL_ERR_ADDR_HI + dma_qm_offset,
2495 upper_32_bits(CFG_BASE +
2496 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR));
2497 WREG32(mmDMA0_QM_GLBL_ERR_WDATA + dma_qm_offset,
2498 gaudi_irq_map_table[GAUDI_EVENT_DMA0_QM].cpu_id +
2499 dma_id);
2500
2501 WREG32(mmDMA0_QM_ARB_ERR_MSG_EN + dma_qm_offset,
2502 QM_ARB_ERR_MSG_EN_MASK);
2503
2504 /* Increase ARB WDT to support streams architecture */
2505 WREG32(mmDMA0_QM_ARB_SLV_CHOISE_WDT + dma_qm_offset,
2506 GAUDI_ARB_WDT_TIMEOUT);
2507
2508 WREG32(mmDMA0_QM_GLBL_PROT + dma_qm_offset,
2509 QMAN_EXTERNAL_MAKE_TRUSTED);
2510
2511 WREG32(mmDMA0_QM_GLBL_CFG1 + dma_qm_offset, 0);
2512 }
2513}
2514
2515static void gaudi_init_dma_core(struct hl_device *hdev, int dma_id)
2516{
2517 u32 dma_offset = dma_id * DMA_CORE_OFFSET;
2518 u32 dma_err_cfg = 1 << DMA0_CORE_ERR_CFG_ERR_MSG_EN_SHIFT;
2519
2520 /* Set to maximum possible according to physical size */
2521 WREG32(mmDMA0_CORE_RD_MAX_OUTSTAND + dma_offset, 0);
2522 WREG32(mmDMA0_CORE_RD_MAX_SIZE + dma_offset, 0);
2523
Oded Gabbayd1f36332020-09-14 09:26:54 +03002524 /* WA for H/W bug H3-2116 */
2525 WREG32(mmDMA0_CORE_LBW_MAX_OUTSTAND + dma_offset, 15);
2526
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03002527 /* STOP_ON bit implies no completion to operation in case of RAZWI */
2528 if (hdev->stop_on_err)
2529 dma_err_cfg |= 1 << DMA0_CORE_ERR_CFG_STOP_ON_ERR_SHIFT;
2530
2531 WREG32(mmDMA0_CORE_ERR_CFG + dma_offset, dma_err_cfg);
2532 WREG32(mmDMA0_CORE_ERRMSG_ADDR_LO + dma_offset,
2533 lower_32_bits(CFG_BASE + mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR));
2534 WREG32(mmDMA0_CORE_ERRMSG_ADDR_HI + dma_offset,
2535 upper_32_bits(CFG_BASE + mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR));
2536 WREG32(mmDMA0_CORE_ERRMSG_WDATA + dma_offset,
2537 gaudi_irq_map_table[GAUDI_EVENT_DMA0_CORE].cpu_id + dma_id);
2538 WREG32(mmDMA0_CORE_PROT + dma_offset,
2539 1 << DMA0_CORE_PROT_ERR_VAL_SHIFT);
2540 /* If the channel is secured, it should be in MMU bypass mode */
2541 WREG32(mmDMA0_CORE_SECURE_PROPS + dma_offset,
2542 1 << DMA0_CORE_SECURE_PROPS_MMBP_SHIFT);
2543 WREG32(mmDMA0_CORE_CFG_0 + dma_offset, 1 << DMA0_CORE_CFG_0_EN_SHIFT);
2544}
2545
2546static void gaudi_enable_qman(struct hl_device *hdev, int dma_id,
2547 u32 enable_mask)
2548{
2549 u32 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
2550
2551 WREG32(mmDMA0_QM_GLBL_CFG0 + dma_qm_offset, enable_mask);
2552}
2553
2554static void gaudi_init_pci_dma_qmans(struct hl_device *hdev)
2555{
2556 struct gaudi_device *gaudi = hdev->asic_specific;
2557 struct hl_hw_queue *q;
2558 int i, j, dma_id, cpu_skip, nic_skip, cq_id = 0, q_idx, msi_vec = 0;
2559
2560 if (gaudi->hw_cap_initialized & HW_CAP_PCI_DMA)
2561 return;
2562
2563 for (i = 0 ; i < PCI_DMA_NUMBER_OF_CHNLS ; i++) {
2564 dma_id = gaudi_dma_assignment[i];
2565 /*
2566 * For queues after the CPU Q need to add 1 to get the correct
2567 * queue. In addition, need to add the CPU EQ and NIC IRQs in
2568 * order to get the correct MSI register.
2569 */
2570 if (dma_id > 1) {
2571 cpu_skip = 1;
2572 nic_skip = NIC_NUMBER_OF_ENGINES;
2573 } else {
2574 cpu_skip = 0;
2575 nic_skip = 0;
2576 }
2577
2578 for (j = 0 ; j < QMAN_STREAMS ; j++) {
2579 q_idx = 4 * dma_id + j + cpu_skip;
2580 q = &hdev->kernel_queues[q_idx];
2581 q->cq_id = cq_id++;
2582 q->msi_vec = nic_skip + cpu_skip + msi_vec++;
2583 gaudi_init_pci_dma_qman(hdev, dma_id, j,
2584 q->bus_address);
2585 }
2586
2587 gaudi_init_dma_core(hdev, dma_id);
2588
2589 gaudi_enable_qman(hdev, dma_id, PCI_DMA_QMAN_ENABLE);
2590 }
2591
2592 gaudi->hw_cap_initialized |= HW_CAP_PCI_DMA;
2593}
2594
2595static void gaudi_init_hbm_dma_qman(struct hl_device *hdev, int dma_id,
2596 int qman_id, u64 qman_base_addr)
2597{
Ofir Bitton5de406c2020-09-10 10:56:26 +03002598 u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi;
2599 u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03002600 u32 q_off, dma_qm_offset;
2601 u32 dma_qm_err_cfg;
2602
2603 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
2604
Ofir Bitton5de406c2020-09-10 10:56:26 +03002605 mtr_base_en_lo = lower_32_bits(CFG_BASE +
2606 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2607 mtr_base_en_hi = upper_32_bits(CFG_BASE +
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03002608 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
Ofir Bitton5de406c2020-09-10 10:56:26 +03002609 so_base_en_lo = lower_32_bits(CFG_BASE +
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03002610 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
Ofir Bitton5de406c2020-09-10 10:56:26 +03002611 so_base_en_hi = upper_32_bits(CFG_BASE +
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03002612 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
Ofir Bitton5de406c2020-09-10 10:56:26 +03002613 mtr_base_ws_lo = lower_32_bits(CFG_BASE +
2614 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2615 mtr_base_ws_hi = upper_32_bits(CFG_BASE +
2616 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2617 so_base_ws_lo = lower_32_bits(CFG_BASE +
2618 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
2619 so_base_ws_hi = upper_32_bits(CFG_BASE +
2620 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03002621
2622 q_off = dma_qm_offset + qman_id * 4;
2623
2624 if (qman_id < 4) {
2625 WREG32(mmDMA0_QM_PQ_BASE_LO_0 + q_off,
2626 lower_32_bits(qman_base_addr));
2627 WREG32(mmDMA0_QM_PQ_BASE_HI_0 + q_off,
2628 upper_32_bits(qman_base_addr));
2629
2630 WREG32(mmDMA0_QM_PQ_SIZE_0 + q_off, ilog2(HBM_DMA_QMAN_LENGTH));
2631 WREG32(mmDMA0_QM_PQ_PI_0 + q_off, 0);
2632 WREG32(mmDMA0_QM_PQ_CI_0 + q_off, 0);
2633
Ofir Bitton25121d92020-09-24 08:22:58 +03002634 WREG32(mmDMA0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
2635 QMAN_CPDMA_SIZE_OFFSET);
2636 WREG32(mmDMA0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2637 QMAN_CPDMA_SRC_OFFSET);
2638 WREG32(mmDMA0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2639 QMAN_CPDMA_DST_OFFSET);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03002640 } else {
Ofir Bitton25121d92020-09-24 08:22:58 +03002641 WREG32(mmDMA0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
2642 QMAN_LDMA_SIZE_OFFSET);
2643 WREG32(mmDMA0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2644 QMAN_LDMA_SRC_OFFSET);
2645 WREG32(mmDMA0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
Oded Gabbay5b94d6e2020-09-25 20:14:15 +03002646 QMAN_LDMA_DST_OFFSET);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03002647
2648 /* Configure RAZWI IRQ */
2649 dma_qm_err_cfg = HBM_DMA_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
2650 if (hdev->stop_on_err) {
2651 dma_qm_err_cfg |=
2652 HBM_DMA_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
2653 }
2654 WREG32(mmDMA0_QM_GLBL_ERR_CFG + dma_qm_offset, dma_qm_err_cfg);
2655
2656 WREG32(mmDMA0_QM_GLBL_ERR_ADDR_LO + dma_qm_offset,
2657 lower_32_bits(CFG_BASE +
2658 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR));
2659 WREG32(mmDMA0_QM_GLBL_ERR_ADDR_HI + dma_qm_offset,
2660 upper_32_bits(CFG_BASE +
2661 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR));
2662 WREG32(mmDMA0_QM_GLBL_ERR_WDATA + dma_qm_offset,
2663 gaudi_irq_map_table[GAUDI_EVENT_DMA0_QM].cpu_id +
2664 dma_id);
2665
2666 WREG32(mmDMA0_QM_ARB_ERR_MSG_EN + dma_qm_offset,
2667 QM_ARB_ERR_MSG_EN_MASK);
2668
2669 /* Increase ARB WDT to support streams architecture */
2670 WREG32(mmDMA0_QM_ARB_SLV_CHOISE_WDT + dma_qm_offset,
2671 GAUDI_ARB_WDT_TIMEOUT);
2672
2673 WREG32(mmDMA0_QM_GLBL_CFG1 + dma_qm_offset, 0);
2674 WREG32(mmDMA0_QM_GLBL_PROT + dma_qm_offset,
2675 QMAN_INTERNAL_MAKE_TRUSTED);
2676 }
2677
Ofir Bitton5de406c2020-09-10 10:56:26 +03002678 WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo);
2679 WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi);
2680 WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo);
2681 WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi);
2682
2683 /* Configure DMA5 CP_MSG_BASE 2/3 for sync stream collective */
2684 if (gaudi_dma_assignment[dma_id] == GAUDI_ENGINE_ID_DMA_5) {
2685 WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_LO_0 + q_off,
2686 mtr_base_ws_lo);
2687 WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_HI_0 + q_off,
2688 mtr_base_ws_hi);
2689 WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_LO_0 + q_off,
2690 so_base_ws_lo);
2691 WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_HI_0 + q_off,
2692 so_base_ws_hi);
2693 }
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03002694}
2695
2696static void gaudi_init_hbm_dma_qmans(struct hl_device *hdev)
2697{
2698 struct gaudi_device *gaudi = hdev->asic_specific;
2699 struct gaudi_internal_qman_info *q;
2700 u64 qman_base_addr;
2701 int i, j, dma_id, internal_q_index;
2702
2703 if (gaudi->hw_cap_initialized & HW_CAP_HBM_DMA)
2704 return;
2705
2706 for (i = 0 ; i < HBM_DMA_NUMBER_OF_CHNLS ; i++) {
2707 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_1 + i];
2708
2709 for (j = 0 ; j < QMAN_STREAMS ; j++) {
2710 /*
2711 * Add the CPU queue in order to get the correct queue
2712 * number as all internal queue are placed after it
2713 */
2714 internal_q_index = dma_id * QMAN_STREAMS + j + 1;
2715
2716 q = &gaudi->internal_qmans[internal_q_index];
2717 qman_base_addr = (u64) q->pq_dma_addr;
2718 gaudi_init_hbm_dma_qman(hdev, dma_id, j,
2719 qman_base_addr);
2720 }
2721
2722 /* Initializing lower CP for HBM DMA QMAN */
2723 gaudi_init_hbm_dma_qman(hdev, dma_id, 4, 0);
2724
2725 gaudi_init_dma_core(hdev, dma_id);
2726
2727 gaudi_enable_qman(hdev, dma_id, HBM_DMA_QMAN_ENABLE);
2728 }
2729
2730 gaudi->hw_cap_initialized |= HW_CAP_HBM_DMA;
2731}
2732
2733static void gaudi_init_mme_qman(struct hl_device *hdev, u32 mme_offset,
2734 int qman_id, u64 qman_base_addr)
2735{
2736 u32 mtr_base_lo, mtr_base_hi;
2737 u32 so_base_lo, so_base_hi;
2738 u32 q_off, mme_id;
2739 u32 mme_qm_err_cfg;
2740
2741 mtr_base_lo = lower_32_bits(CFG_BASE +
2742 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2743 mtr_base_hi = upper_32_bits(CFG_BASE +
2744 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2745 so_base_lo = lower_32_bits(CFG_BASE +
2746 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2747 so_base_hi = upper_32_bits(CFG_BASE +
2748 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2749
2750 q_off = mme_offset + qman_id * 4;
2751
2752 if (qman_id < 4) {
2753 WREG32(mmMME0_QM_PQ_BASE_LO_0 + q_off,
2754 lower_32_bits(qman_base_addr));
2755 WREG32(mmMME0_QM_PQ_BASE_HI_0 + q_off,
2756 upper_32_bits(qman_base_addr));
2757
2758 WREG32(mmMME0_QM_PQ_SIZE_0 + q_off, ilog2(MME_QMAN_LENGTH));
2759 WREG32(mmMME0_QM_PQ_PI_0 + q_off, 0);
2760 WREG32(mmMME0_QM_PQ_CI_0 + q_off, 0);
2761
Ofir Bitton25121d92020-09-24 08:22:58 +03002762 WREG32(mmMME0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
2763 QMAN_CPDMA_SIZE_OFFSET);
2764 WREG32(mmMME0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2765 QMAN_CPDMA_SRC_OFFSET);
2766 WREG32(mmMME0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2767 QMAN_CPDMA_DST_OFFSET);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03002768 } else {
Ofir Bitton25121d92020-09-24 08:22:58 +03002769 WREG32(mmMME0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
2770 QMAN_LDMA_SIZE_OFFSET);
2771 WREG32(mmMME0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2772 QMAN_LDMA_SRC_OFFSET);
2773 WREG32(mmMME0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2774 QMAN_LDMA_DST_OFFSET);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03002775
2776 /* Configure RAZWI IRQ */
2777 mme_id = mme_offset /
2778 (mmMME1_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0);
2779
2780 mme_qm_err_cfg = MME_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
2781 if (hdev->stop_on_err) {
2782 mme_qm_err_cfg |=
2783 MME_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
2784 }
2785 WREG32(mmMME0_QM_GLBL_ERR_CFG + mme_offset, mme_qm_err_cfg);
2786 WREG32(mmMME0_QM_GLBL_ERR_ADDR_LO + mme_offset,
2787 lower_32_bits(CFG_BASE +
2788 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR));
2789 WREG32(mmMME0_QM_GLBL_ERR_ADDR_HI + mme_offset,
2790 upper_32_bits(CFG_BASE +
2791 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR));
2792 WREG32(mmMME0_QM_GLBL_ERR_WDATA + mme_offset,
2793 gaudi_irq_map_table[GAUDI_EVENT_MME0_QM].cpu_id +
2794 mme_id);
2795
2796 WREG32(mmMME0_QM_ARB_ERR_MSG_EN + mme_offset,
2797 QM_ARB_ERR_MSG_EN_MASK);
2798
2799 /* Increase ARB WDT to support streams architecture */
2800 WREG32(mmMME0_QM_ARB_SLV_CHOISE_WDT + mme_offset,
2801 GAUDI_ARB_WDT_TIMEOUT);
2802
2803 WREG32(mmMME0_QM_GLBL_CFG1 + mme_offset, 0);
2804 WREG32(mmMME0_QM_GLBL_PROT + mme_offset,
2805 QMAN_INTERNAL_MAKE_TRUSTED);
2806 }
2807
2808 WREG32(mmMME0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_lo);
2809 WREG32(mmMME0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_hi);
2810 WREG32(mmMME0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_lo);
2811 WREG32(mmMME0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_hi);
2812}
2813
2814static void gaudi_init_mme_qmans(struct hl_device *hdev)
2815{
2816 struct gaudi_device *gaudi = hdev->asic_specific;
2817 struct gaudi_internal_qman_info *q;
2818 u64 qman_base_addr;
2819 u32 mme_offset;
2820 int i, internal_q_index;
2821
2822 if (gaudi->hw_cap_initialized & HW_CAP_MME)
2823 return;
2824
2825 /*
2826 * map GAUDI_QUEUE_ID_MME_0_X to the N_W_MME (mmMME2_QM_BASE)
2827 * and GAUDI_QUEUE_ID_MME_1_X to the S_W_MME (mmMME0_QM_BASE)
2828 */
2829
2830 mme_offset = mmMME2_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0;
2831
2832 for (i = 0 ; i < MME_NUMBER_OF_QMANS ; i++) {
2833 internal_q_index = GAUDI_QUEUE_ID_MME_0_0 + i;
2834 q = &gaudi->internal_qmans[internal_q_index];
2835 qman_base_addr = (u64) q->pq_dma_addr;
2836 gaudi_init_mme_qman(hdev, mme_offset, (i & 0x3),
2837 qman_base_addr);
2838 if (i == 3)
2839 mme_offset = 0;
2840 }
2841
2842 /* Initializing lower CP for MME QMANs */
2843 mme_offset = mmMME2_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0;
2844 gaudi_init_mme_qman(hdev, mme_offset, 4, 0);
2845 gaudi_init_mme_qman(hdev, 0, 4, 0);
2846
2847 WREG32(mmMME2_QM_GLBL_CFG0, QMAN_MME_ENABLE);
2848 WREG32(mmMME0_QM_GLBL_CFG0, QMAN_MME_ENABLE);
2849
2850 gaudi->hw_cap_initialized |= HW_CAP_MME;
2851}
2852
2853static void gaudi_init_tpc_qman(struct hl_device *hdev, u32 tpc_offset,
2854 int qman_id, u64 qman_base_addr)
2855{
Ofir Bitton5de406c2020-09-10 10:56:26 +03002856 u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi;
2857 u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03002858 u32 q_off, tpc_id;
2859 u32 tpc_qm_err_cfg;
2860
Ofir Bitton5de406c2020-09-10 10:56:26 +03002861 mtr_base_en_lo = lower_32_bits(CFG_BASE +
2862 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2863 mtr_base_en_hi = upper_32_bits(CFG_BASE +
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03002864 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
Ofir Bitton5de406c2020-09-10 10:56:26 +03002865 so_base_en_lo = lower_32_bits(CFG_BASE +
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03002866 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
Ofir Bitton5de406c2020-09-10 10:56:26 +03002867 so_base_en_hi = upper_32_bits(CFG_BASE +
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03002868 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
Ofir Bitton5de406c2020-09-10 10:56:26 +03002869 mtr_base_ws_lo = lower_32_bits(CFG_BASE +
2870 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2871 mtr_base_ws_hi = upper_32_bits(CFG_BASE +
2872 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2873 so_base_ws_lo = lower_32_bits(CFG_BASE +
2874 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
2875 so_base_ws_hi = upper_32_bits(CFG_BASE +
2876 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03002877
2878 q_off = tpc_offset + qman_id * 4;
2879
Ofir Bitton5de406c2020-09-10 10:56:26 +03002880 tpc_id = tpc_offset /
2881 (mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0);
2882
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03002883 if (qman_id < 4) {
2884 WREG32(mmTPC0_QM_PQ_BASE_LO_0 + q_off,
2885 lower_32_bits(qman_base_addr));
2886 WREG32(mmTPC0_QM_PQ_BASE_HI_0 + q_off,
2887 upper_32_bits(qman_base_addr));
2888
2889 WREG32(mmTPC0_QM_PQ_SIZE_0 + q_off, ilog2(TPC_QMAN_LENGTH));
2890 WREG32(mmTPC0_QM_PQ_PI_0 + q_off, 0);
2891 WREG32(mmTPC0_QM_PQ_CI_0 + q_off, 0);
2892
Ofir Bitton25121d92020-09-24 08:22:58 +03002893 WREG32(mmTPC0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
2894 QMAN_CPDMA_SIZE_OFFSET);
2895 WREG32(mmTPC0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2896 QMAN_CPDMA_SRC_OFFSET);
2897 WREG32(mmTPC0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2898 QMAN_CPDMA_DST_OFFSET);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03002899 } else {
Ofir Bitton25121d92020-09-24 08:22:58 +03002900 WREG32(mmTPC0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
2901 QMAN_LDMA_SIZE_OFFSET);
2902 WREG32(mmTPC0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2903 QMAN_LDMA_SRC_OFFSET);
2904 WREG32(mmTPC0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2905 QMAN_LDMA_DST_OFFSET);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03002906
2907 /* Configure RAZWI IRQ */
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03002908 tpc_qm_err_cfg = TPC_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
2909 if (hdev->stop_on_err) {
2910 tpc_qm_err_cfg |=
2911 TPC_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
2912 }
2913
2914 WREG32(mmTPC0_QM_GLBL_ERR_CFG + tpc_offset, tpc_qm_err_cfg);
2915 WREG32(mmTPC0_QM_GLBL_ERR_ADDR_LO + tpc_offset,
2916 lower_32_bits(CFG_BASE +
2917 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR));
2918 WREG32(mmTPC0_QM_GLBL_ERR_ADDR_HI + tpc_offset,
2919 upper_32_bits(CFG_BASE +
2920 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR));
2921 WREG32(mmTPC0_QM_GLBL_ERR_WDATA + tpc_offset,
2922 gaudi_irq_map_table[GAUDI_EVENT_TPC0_QM].cpu_id +
2923 tpc_id);
2924
2925 WREG32(mmTPC0_QM_ARB_ERR_MSG_EN + tpc_offset,
2926 QM_ARB_ERR_MSG_EN_MASK);
2927
2928 /* Increase ARB WDT to support streams architecture */
2929 WREG32(mmTPC0_QM_ARB_SLV_CHOISE_WDT + tpc_offset,
2930 GAUDI_ARB_WDT_TIMEOUT);
2931
2932 WREG32(mmTPC0_QM_GLBL_CFG1 + tpc_offset, 0);
2933 WREG32(mmTPC0_QM_GLBL_PROT + tpc_offset,
2934 QMAN_INTERNAL_MAKE_TRUSTED);
2935 }
2936
Ofir Bitton5de406c2020-09-10 10:56:26 +03002937 WREG32(mmTPC0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo);
2938 WREG32(mmTPC0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi);
2939 WREG32(mmTPC0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo);
2940 WREG32(mmTPC0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi);
2941
2942 /* Configure TPC7 CP_MSG_BASE 2/3 for sync stream collective */
2943 if (tpc_id == 6) {
2944 WREG32(mmTPC0_QM_CP_MSG_BASE2_ADDR_LO_0 + q_off,
2945 mtr_base_ws_lo);
2946 WREG32(mmTPC0_QM_CP_MSG_BASE2_ADDR_HI_0 + q_off,
2947 mtr_base_ws_hi);
2948 WREG32(mmTPC0_QM_CP_MSG_BASE3_ADDR_LO_0 + q_off,
2949 so_base_ws_lo);
2950 WREG32(mmTPC0_QM_CP_MSG_BASE3_ADDR_HI_0 + q_off,
2951 so_base_ws_hi);
2952 }
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03002953}
2954
2955static void gaudi_init_tpc_qmans(struct hl_device *hdev)
2956{
2957 struct gaudi_device *gaudi = hdev->asic_specific;
2958 struct gaudi_internal_qman_info *q;
2959 u64 qman_base_addr;
2960 u32 so_base_hi, tpc_offset = 0;
2961 u32 tpc_delta = mmTPC1_CFG_SM_BASE_ADDRESS_HIGH -
2962 mmTPC0_CFG_SM_BASE_ADDRESS_HIGH;
2963 int i, tpc_id, internal_q_index;
2964
2965 if (gaudi->hw_cap_initialized & HW_CAP_TPC_MASK)
2966 return;
2967
2968 so_base_hi = upper_32_bits(CFG_BASE +
2969 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2970
2971 for (tpc_id = 0 ; tpc_id < TPC_NUMBER_OF_ENGINES ; tpc_id++) {
2972 for (i = 0 ; i < QMAN_STREAMS ; i++) {
2973 internal_q_index = GAUDI_QUEUE_ID_TPC_0_0 +
2974 tpc_id * QMAN_STREAMS + i;
2975 q = &gaudi->internal_qmans[internal_q_index];
2976 qman_base_addr = (u64) q->pq_dma_addr;
2977 gaudi_init_tpc_qman(hdev, tpc_offset, i,
2978 qman_base_addr);
2979
2980 if (i == 3) {
2981 /* Initializing lower CP for TPC QMAN */
2982 gaudi_init_tpc_qman(hdev, tpc_offset, 4, 0);
2983
2984 /* Enable the QMAN and TPC channel */
2985 WREG32(mmTPC0_QM_GLBL_CFG0 + tpc_offset,
2986 QMAN_TPC_ENABLE);
2987 }
2988 }
2989
2990 WREG32(mmTPC0_CFG_SM_BASE_ADDRESS_HIGH + tpc_id * tpc_delta,
2991 so_base_hi);
2992
2993 tpc_offset += mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0;
2994
Oded Gabbay65887292020-08-12 11:21:01 +03002995 gaudi->hw_cap_initialized |=
2996 FIELD_PREP(HW_CAP_TPC_MASK, 1 << tpc_id);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03002997 }
2998}
2999
Oded Gabbay3c681572020-11-02 21:10:39 +02003000static void gaudi_init_nic_qman(struct hl_device *hdev, u32 nic_offset,
3001 int qman_id, u64 qman_base_addr, int nic_id)
3002{
Ofir Bitton5de406c2020-09-10 10:56:26 +03003003 u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi;
3004 u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi;
Oded Gabbay3c681572020-11-02 21:10:39 +02003005 u32 q_off;
3006 u32 nic_qm_err_cfg;
3007
Ofir Bitton5de406c2020-09-10 10:56:26 +03003008 mtr_base_en_lo = lower_32_bits(CFG_BASE +
3009 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3010 mtr_base_en_hi = upper_32_bits(CFG_BASE +
Oded Gabbay3c681572020-11-02 21:10:39 +02003011 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
Ofir Bitton5de406c2020-09-10 10:56:26 +03003012 so_base_en_lo = lower_32_bits(CFG_BASE +
Oded Gabbay3c681572020-11-02 21:10:39 +02003013 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
Ofir Bitton5de406c2020-09-10 10:56:26 +03003014 so_base_en_hi = upper_32_bits(CFG_BASE +
Oded Gabbay3c681572020-11-02 21:10:39 +02003015 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
Ofir Bitton5de406c2020-09-10 10:56:26 +03003016 mtr_base_ws_lo = lower_32_bits(CFG_BASE +
3017 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3018 mtr_base_ws_hi = upper_32_bits(CFG_BASE +
3019 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3020 so_base_ws_lo = lower_32_bits(CFG_BASE +
3021 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
3022 so_base_ws_hi = upper_32_bits(CFG_BASE +
3023 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
Oded Gabbay3c681572020-11-02 21:10:39 +02003024
3025 q_off = nic_offset + qman_id * 4;
3026
3027 WREG32(mmNIC0_QM0_PQ_BASE_LO_0 + q_off, lower_32_bits(qman_base_addr));
3028 WREG32(mmNIC0_QM0_PQ_BASE_HI_0 + q_off, upper_32_bits(qman_base_addr));
3029
3030 WREG32(mmNIC0_QM0_PQ_SIZE_0 + q_off, ilog2(NIC_QMAN_LENGTH));
3031 WREG32(mmNIC0_QM0_PQ_PI_0 + q_off, 0);
3032 WREG32(mmNIC0_QM0_PQ_CI_0 + q_off, 0);
3033
Ofir Bitton5de406c2020-09-10 10:56:26 +03003034 WREG32(mmNIC0_QM0_CP_LDMA_TSIZE_OFFSET_0 + q_off,
3035 QMAN_LDMA_SIZE_OFFSET);
3036 WREG32(mmNIC0_QM0_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
3037 QMAN_LDMA_SRC_OFFSET);
3038 WREG32(mmNIC0_QM0_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
3039 QMAN_LDMA_DST_OFFSET);
Oded Gabbay3c681572020-11-02 21:10:39 +02003040
Ofir Bitton5de406c2020-09-10 10:56:26 +03003041 WREG32(mmNIC0_QM0_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo);
3042 WREG32(mmNIC0_QM0_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi);
3043 WREG32(mmNIC0_QM0_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo);
3044 WREG32(mmNIC0_QM0_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi);
3045
3046 /* Configure NIC CP_MSG_BASE 2/3 for sync stream collective */
3047 WREG32(mmNIC0_QM0_CP_MSG_BASE2_ADDR_LO_0 + q_off, mtr_base_ws_lo);
3048 WREG32(mmNIC0_QM0_CP_MSG_BASE2_ADDR_HI_0 + q_off, mtr_base_ws_hi);
3049 WREG32(mmNIC0_QM0_CP_MSG_BASE3_ADDR_LO_0 + q_off, so_base_ws_lo);
3050 WREG32(mmNIC0_QM0_CP_MSG_BASE3_ADDR_HI_0 + q_off, so_base_ws_hi);
Oded Gabbay3c681572020-11-02 21:10:39 +02003051
3052 if (qman_id == 0) {
3053 /* Configure RAZWI IRQ */
3054 nic_qm_err_cfg = NIC_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
3055 if (hdev->stop_on_err) {
3056 nic_qm_err_cfg |=
3057 NIC_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
3058 }
3059
3060 WREG32(mmNIC0_QM0_GLBL_ERR_CFG + nic_offset, nic_qm_err_cfg);
3061 WREG32(mmNIC0_QM0_GLBL_ERR_ADDR_LO + nic_offset,
3062 lower_32_bits(CFG_BASE +
3063 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR));
3064 WREG32(mmNIC0_QM0_GLBL_ERR_ADDR_HI + nic_offset,
3065 upper_32_bits(CFG_BASE +
3066 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR));
3067 WREG32(mmNIC0_QM0_GLBL_ERR_WDATA + nic_offset,
3068 gaudi_irq_map_table[GAUDI_EVENT_NIC0_QM0].cpu_id +
3069 nic_id);
3070
3071 WREG32(mmNIC0_QM0_ARB_ERR_MSG_EN + nic_offset,
3072 QM_ARB_ERR_MSG_EN_MASK);
3073
3074 /* Increase ARB WDT to support streams architecture */
3075 WREG32(mmNIC0_QM0_ARB_SLV_CHOISE_WDT + nic_offset,
3076 GAUDI_ARB_WDT_TIMEOUT);
3077
3078 WREG32(mmNIC0_QM0_GLBL_CFG1 + nic_offset, 0);
3079 WREG32(mmNIC0_QM0_GLBL_PROT + nic_offset,
3080 QMAN_INTERNAL_MAKE_TRUSTED);
3081 }
3082}
3083
3084static void gaudi_init_nic_qmans(struct hl_device *hdev)
3085{
3086 struct gaudi_device *gaudi = hdev->asic_specific;
3087 struct gaudi_internal_qman_info *q;
3088 u64 qman_base_addr;
3089 u32 nic_offset = 0;
3090 u32 nic_delta_between_qmans =
3091 mmNIC0_QM1_GLBL_CFG0 - mmNIC0_QM0_GLBL_CFG0;
3092 u32 nic_delta_between_nics =
3093 mmNIC1_QM0_GLBL_CFG0 - mmNIC0_QM0_GLBL_CFG0;
3094 int i, nic_id, internal_q_index;
3095
3096 if (!hdev->nic_ports_mask)
3097 return;
3098
3099 if (gaudi->hw_cap_initialized & HW_CAP_NIC_MASK)
3100 return;
3101
3102 dev_dbg(hdev->dev, "Initializing NIC QMANs\n");
3103
3104 for (nic_id = 0 ; nic_id < NIC_NUMBER_OF_ENGINES ; nic_id++) {
3105 if (!(hdev->nic_ports_mask & (1 << nic_id))) {
3106 nic_offset += nic_delta_between_qmans;
3107 if (nic_id & 1) {
3108 nic_offset -= (nic_delta_between_qmans * 2);
3109 nic_offset += nic_delta_between_nics;
3110 }
3111 continue;
3112 }
3113
3114 for (i = 0 ; i < QMAN_STREAMS ; i++) {
3115 internal_q_index = GAUDI_QUEUE_ID_NIC_0_0 +
3116 nic_id * QMAN_STREAMS + i;
3117 q = &gaudi->internal_qmans[internal_q_index];
3118 qman_base_addr = (u64) q->pq_dma_addr;
3119 gaudi_init_nic_qman(hdev, nic_offset, (i & 0x3),
3120 qman_base_addr, nic_id);
3121 }
3122
3123 /* Enable the QMAN */
3124 WREG32(mmNIC0_QM0_GLBL_CFG0 + nic_offset, NIC_QMAN_ENABLE);
3125
3126 nic_offset += nic_delta_between_qmans;
3127 if (nic_id & 1) {
3128 nic_offset -= (nic_delta_between_qmans * 2);
3129 nic_offset += nic_delta_between_nics;
3130 }
3131
3132 gaudi->hw_cap_initialized |= 1 << (HW_CAP_NIC_SHIFT + nic_id);
3133 }
3134}
3135
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003136static void gaudi_disable_pci_dma_qmans(struct hl_device *hdev)
3137{
3138 struct gaudi_device *gaudi = hdev->asic_specific;
3139
3140 if (!(gaudi->hw_cap_initialized & HW_CAP_PCI_DMA))
3141 return;
3142
3143 WREG32(mmDMA0_QM_GLBL_CFG0, 0);
3144 WREG32(mmDMA1_QM_GLBL_CFG0, 0);
3145 WREG32(mmDMA5_QM_GLBL_CFG0, 0);
3146}
3147
3148static void gaudi_disable_hbm_dma_qmans(struct hl_device *hdev)
3149{
3150 struct gaudi_device *gaudi = hdev->asic_specific;
3151
3152 if (!(gaudi->hw_cap_initialized & HW_CAP_HBM_DMA))
3153 return;
3154
3155 WREG32(mmDMA2_QM_GLBL_CFG0, 0);
3156 WREG32(mmDMA3_QM_GLBL_CFG0, 0);
3157 WREG32(mmDMA4_QM_GLBL_CFG0, 0);
3158 WREG32(mmDMA6_QM_GLBL_CFG0, 0);
3159 WREG32(mmDMA7_QM_GLBL_CFG0, 0);
3160}
3161
3162static void gaudi_disable_mme_qmans(struct hl_device *hdev)
3163{
3164 struct gaudi_device *gaudi = hdev->asic_specific;
3165
3166 if (!(gaudi->hw_cap_initialized & HW_CAP_MME))
3167 return;
3168
3169 WREG32(mmMME2_QM_GLBL_CFG0, 0);
3170 WREG32(mmMME0_QM_GLBL_CFG0, 0);
3171}
3172
3173static void gaudi_disable_tpc_qmans(struct hl_device *hdev)
3174{
3175 struct gaudi_device *gaudi = hdev->asic_specific;
3176 u32 tpc_offset = 0;
3177 int tpc_id;
3178
3179 if (!(gaudi->hw_cap_initialized & HW_CAP_TPC_MASK))
3180 return;
3181
3182 for (tpc_id = 0 ; tpc_id < TPC_NUMBER_OF_ENGINES ; tpc_id++) {
3183 WREG32(mmTPC0_QM_GLBL_CFG0 + tpc_offset, 0);
3184 tpc_offset += mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0;
3185 }
3186}
3187
Oded Gabbay3c681572020-11-02 21:10:39 +02003188static void gaudi_disable_nic_qmans(struct hl_device *hdev)
3189{
3190 struct gaudi_device *gaudi = hdev->asic_specific;
3191 u32 nic_mask, nic_offset = 0;
3192 u32 nic_delta_between_qmans =
3193 mmNIC0_QM1_GLBL_CFG0 - mmNIC0_QM0_GLBL_CFG0;
3194 u32 nic_delta_between_nics =
3195 mmNIC1_QM0_GLBL_CFG0 - mmNIC0_QM0_GLBL_CFG0;
3196 int nic_id;
3197
3198 for (nic_id = 0 ; nic_id < NIC_NUMBER_OF_ENGINES ; nic_id++) {
3199 nic_mask = 1 << (HW_CAP_NIC_SHIFT + nic_id);
3200
3201 if (gaudi->hw_cap_initialized & nic_mask)
3202 WREG32(mmNIC0_QM0_GLBL_CFG0 + nic_offset, 0);
3203
3204 nic_offset += nic_delta_between_qmans;
3205 if (nic_id & 1) {
3206 nic_offset -= (nic_delta_between_qmans * 2);
3207 nic_offset += nic_delta_between_nics;
3208 }
3209 }
3210}
3211
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003212static void gaudi_stop_pci_dma_qmans(struct hl_device *hdev)
3213{
3214 struct gaudi_device *gaudi = hdev->asic_specific;
3215
3216 if (!(gaudi->hw_cap_initialized & HW_CAP_PCI_DMA))
3217 return;
3218
3219 /* Stop upper CPs of QMANs 0.0 to 1.3 and 5.0 to 5.3 */
3220 WREG32(mmDMA0_QM_GLBL_CFG1, 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3221 WREG32(mmDMA1_QM_GLBL_CFG1, 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3222 WREG32(mmDMA5_QM_GLBL_CFG1, 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3223}
3224
3225static void gaudi_stop_hbm_dma_qmans(struct hl_device *hdev)
3226{
3227 struct gaudi_device *gaudi = hdev->asic_specific;
3228
3229 if (!(gaudi->hw_cap_initialized & HW_CAP_HBM_DMA))
3230 return;
3231
3232 /* Stop CPs of HBM DMA QMANs */
3233
3234 WREG32(mmDMA2_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3235 WREG32(mmDMA3_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3236 WREG32(mmDMA4_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3237 WREG32(mmDMA6_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3238 WREG32(mmDMA7_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3239}
3240
3241static void gaudi_stop_mme_qmans(struct hl_device *hdev)
3242{
3243 struct gaudi_device *gaudi = hdev->asic_specific;
3244
3245 if (!(gaudi->hw_cap_initialized & HW_CAP_MME))
3246 return;
3247
3248 /* Stop CPs of MME QMANs */
3249 WREG32(mmMME2_QM_GLBL_CFG1, 0x1F << MME0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3250 WREG32(mmMME0_QM_GLBL_CFG1, 0x1F << MME0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3251}
3252
3253static void gaudi_stop_tpc_qmans(struct hl_device *hdev)
3254{
3255 struct gaudi_device *gaudi = hdev->asic_specific;
3256
3257 if (!(gaudi->hw_cap_initialized & HW_CAP_TPC_MASK))
3258 return;
3259
3260 WREG32(mmTPC0_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3261 WREG32(mmTPC1_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3262 WREG32(mmTPC2_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3263 WREG32(mmTPC3_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3264 WREG32(mmTPC4_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3265 WREG32(mmTPC5_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3266 WREG32(mmTPC6_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3267 WREG32(mmTPC7_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3268}
3269
Oded Gabbay3c681572020-11-02 21:10:39 +02003270static void gaudi_stop_nic_qmans(struct hl_device *hdev)
3271{
3272 struct gaudi_device *gaudi = hdev->asic_specific;
3273
3274 /* Stop upper CPs of QMANs */
3275
3276 if (gaudi->hw_cap_initialized & HW_CAP_NIC0)
3277 WREG32(mmNIC0_QM0_GLBL_CFG1,
3278 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3279 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3280 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3281
3282 if (gaudi->hw_cap_initialized & HW_CAP_NIC1)
3283 WREG32(mmNIC0_QM1_GLBL_CFG1,
3284 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3285 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3286 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3287
3288 if (gaudi->hw_cap_initialized & HW_CAP_NIC2)
3289 WREG32(mmNIC1_QM0_GLBL_CFG1,
3290 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3291 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3292 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3293
3294 if (gaudi->hw_cap_initialized & HW_CAP_NIC3)
3295 WREG32(mmNIC1_QM1_GLBL_CFG1,
3296 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3297 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3298 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3299
3300 if (gaudi->hw_cap_initialized & HW_CAP_NIC4)
3301 WREG32(mmNIC2_QM0_GLBL_CFG1,
3302 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3303 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3304 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3305
3306 if (gaudi->hw_cap_initialized & HW_CAP_NIC5)
3307 WREG32(mmNIC2_QM1_GLBL_CFG1,
3308 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3309 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3310 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3311
3312 if (gaudi->hw_cap_initialized & HW_CAP_NIC6)
3313 WREG32(mmNIC3_QM0_GLBL_CFG1,
3314 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3315 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3316 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3317
3318 if (gaudi->hw_cap_initialized & HW_CAP_NIC7)
3319 WREG32(mmNIC3_QM1_GLBL_CFG1,
3320 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3321 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3322 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3323
3324 if (gaudi->hw_cap_initialized & HW_CAP_NIC8)
3325 WREG32(mmNIC4_QM0_GLBL_CFG1,
3326 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3327 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3328 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3329
3330 if (gaudi->hw_cap_initialized & HW_CAP_NIC9)
3331 WREG32(mmNIC4_QM1_GLBL_CFG1,
3332 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3333 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3334 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3335}
3336
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003337static void gaudi_pci_dma_stall(struct hl_device *hdev)
3338{
3339 struct gaudi_device *gaudi = hdev->asic_specific;
3340
3341 if (!(gaudi->hw_cap_initialized & HW_CAP_PCI_DMA))
3342 return;
3343
3344 WREG32(mmDMA0_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3345 WREG32(mmDMA1_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3346 WREG32(mmDMA5_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3347}
3348
3349static void gaudi_hbm_dma_stall(struct hl_device *hdev)
3350{
3351 struct gaudi_device *gaudi = hdev->asic_specific;
3352
3353 if (!(gaudi->hw_cap_initialized & HW_CAP_HBM_DMA))
3354 return;
3355
3356 WREG32(mmDMA2_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3357 WREG32(mmDMA3_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3358 WREG32(mmDMA4_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3359 WREG32(mmDMA6_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3360 WREG32(mmDMA7_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3361}
3362
3363static void gaudi_mme_stall(struct hl_device *hdev)
3364{
3365 struct gaudi_device *gaudi = hdev->asic_specific;
3366
3367 if (!(gaudi->hw_cap_initialized & HW_CAP_MME))
3368 return;
3369
3370 /* WA for H3-1800 bug: do ACC and SBAB writes twice */
3371 WREG32(mmMME0_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3372 WREG32(mmMME0_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3373 WREG32(mmMME0_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3374 WREG32(mmMME0_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3375 WREG32(mmMME1_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3376 WREG32(mmMME1_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3377 WREG32(mmMME1_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3378 WREG32(mmMME1_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3379 WREG32(mmMME2_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3380 WREG32(mmMME2_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3381 WREG32(mmMME2_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3382 WREG32(mmMME2_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3383 WREG32(mmMME3_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3384 WREG32(mmMME3_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3385 WREG32(mmMME3_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3386 WREG32(mmMME3_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3387}
3388
3389static void gaudi_tpc_stall(struct hl_device *hdev)
3390{
3391 struct gaudi_device *gaudi = hdev->asic_specific;
3392
3393 if (!(gaudi->hw_cap_initialized & HW_CAP_TPC_MASK))
3394 return;
3395
3396 WREG32(mmTPC0_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3397 WREG32(mmTPC1_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3398 WREG32(mmTPC2_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3399 WREG32(mmTPC3_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3400 WREG32(mmTPC4_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3401 WREG32(mmTPC5_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3402 WREG32(mmTPC6_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3403 WREG32(mmTPC7_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3404}
3405
Oded Gabbaye38bfd32020-07-03 20:46:12 +03003406static void gaudi_set_clock_gating(struct hl_device *hdev)
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003407{
3408 struct gaudi_device *gaudi = hdev->asic_specific;
3409 u32 qman_offset;
Ofir Bittonf44d23b2020-08-04 13:38:43 +03003410 bool enable;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003411 int i;
3412
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003413 /* In case we are during debug session, don't enable the clock gate
3414 * as it may interfere
3415 */
3416 if (hdev->in_debug)
3417 return;
3418
Oded Gabbaye38bfd32020-07-03 20:46:12 +03003419 for (i = GAUDI_PCI_DMA_1, qman_offset = 0 ; i < GAUDI_HBM_DMA_1 ; i++) {
Ofir Bittonf44d23b2020-08-04 13:38:43 +03003420 enable = !!(hdev->clock_gating_mask &
3421 (BIT_ULL(gaudi_dma_assignment[i])));
Oded Gabbaye38bfd32020-07-03 20:46:12 +03003422
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003423 qman_offset = gaudi_dma_assignment[i] * DMA_QMAN_OFFSET;
Ofir Bittonf44d23b2020-08-04 13:38:43 +03003424 WREG32(mmDMA0_QM_CGM_CFG1 + qman_offset,
3425 enable ? QMAN_CGM1_PWR_GATE_EN : 0);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003426 WREG32(mmDMA0_QM_CGM_CFG + qman_offset,
Ofir Bittonf44d23b2020-08-04 13:38:43 +03003427 enable ? QMAN_UPPER_CP_CGM_PWR_GATE_EN : 0);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003428 }
3429
Oded Gabbaye38bfd32020-07-03 20:46:12 +03003430 for (i = GAUDI_HBM_DMA_1 ; i < GAUDI_DMA_MAX ; i++) {
Ofir Bittonf44d23b2020-08-04 13:38:43 +03003431 enable = !!(hdev->clock_gating_mask &
3432 (BIT_ULL(gaudi_dma_assignment[i])));
Oded Gabbaye38bfd32020-07-03 20:46:12 +03003433
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003434 qman_offset = gaudi_dma_assignment[i] * DMA_QMAN_OFFSET;
Ofir Bittonf44d23b2020-08-04 13:38:43 +03003435 WREG32(mmDMA0_QM_CGM_CFG1 + qman_offset,
3436 enable ? QMAN_CGM1_PWR_GATE_EN : 0);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003437 WREG32(mmDMA0_QM_CGM_CFG + qman_offset,
Ofir Bittonf44d23b2020-08-04 13:38:43 +03003438 enable ? QMAN_COMMON_CP_CGM_PWR_GATE_EN : 0);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003439 }
3440
Ofir Bittonf44d23b2020-08-04 13:38:43 +03003441 enable = !!(hdev->clock_gating_mask & (BIT_ULL(GAUDI_ENGINE_ID_MME_0)));
3442 WREG32(mmMME0_QM_CGM_CFG1, enable ? QMAN_CGM1_PWR_GATE_EN : 0);
3443 WREG32(mmMME0_QM_CGM_CFG, enable ? QMAN_COMMON_CP_CGM_PWR_GATE_EN : 0);
Oded Gabbaye38bfd32020-07-03 20:46:12 +03003444
Ofir Bittonf44d23b2020-08-04 13:38:43 +03003445 enable = !!(hdev->clock_gating_mask & (BIT_ULL(GAUDI_ENGINE_ID_MME_2)));
3446 WREG32(mmMME2_QM_CGM_CFG1, enable ? QMAN_CGM1_PWR_GATE_EN : 0);
3447 WREG32(mmMME2_QM_CGM_CFG, enable ? QMAN_COMMON_CP_CGM_PWR_GATE_EN : 0);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003448
3449 for (i = 0, qman_offset = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) {
Ofir Bittonf44d23b2020-08-04 13:38:43 +03003450 enable = !!(hdev->clock_gating_mask &
3451 (BIT_ULL(GAUDI_ENGINE_ID_TPC_0 + i)));
Oded Gabbaye38bfd32020-07-03 20:46:12 +03003452
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003453 WREG32(mmTPC0_QM_CGM_CFG1 + qman_offset,
Ofir Bittonf44d23b2020-08-04 13:38:43 +03003454 enable ? QMAN_CGM1_PWR_GATE_EN : 0);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003455 WREG32(mmTPC0_QM_CGM_CFG + qman_offset,
Ofir Bittonf44d23b2020-08-04 13:38:43 +03003456 enable ? QMAN_COMMON_CP_CGM_PWR_GATE_EN : 0);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003457
3458 qman_offset += TPC_QMAN_OFFSET;
3459 }
3460
3461 gaudi->hw_cap_initialized |= HW_CAP_CLK_GATE;
3462}
3463
3464static void gaudi_disable_clock_gating(struct hl_device *hdev)
3465{
3466 struct gaudi_device *gaudi = hdev->asic_specific;
3467 u32 qman_offset;
3468 int i;
3469
3470 if (!(gaudi->hw_cap_initialized & HW_CAP_CLK_GATE))
3471 return;
3472
3473 for (i = 0, qman_offset = 0 ; i < DMA_NUMBER_OF_CHANNELS ; i++) {
3474 WREG32(mmDMA0_QM_CGM_CFG + qman_offset, 0);
3475 WREG32(mmDMA0_QM_CGM_CFG1 + qman_offset, 0);
3476
3477 qman_offset += (mmDMA1_QM_CGM_CFG - mmDMA0_QM_CGM_CFG);
3478 }
3479
3480 WREG32(mmMME0_QM_CGM_CFG, 0);
3481 WREG32(mmMME0_QM_CGM_CFG1, 0);
3482 WREG32(mmMME2_QM_CGM_CFG, 0);
3483 WREG32(mmMME2_QM_CGM_CFG1, 0);
3484
3485 for (i = 0, qman_offset = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) {
3486 WREG32(mmTPC0_QM_CGM_CFG + qman_offset, 0);
3487 WREG32(mmTPC0_QM_CGM_CFG1 + qman_offset, 0);
3488
3489 qman_offset += (mmTPC1_QM_CGM_CFG - mmTPC0_QM_CGM_CFG);
3490 }
3491
3492 gaudi->hw_cap_initialized &= ~(HW_CAP_CLK_GATE);
3493}
3494
3495static void gaudi_enable_timestamp(struct hl_device *hdev)
3496{
3497 /* Disable the timestamp counter */
3498 WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 0);
3499
3500 /* Zero the lower/upper parts of the 64-bit counter */
3501 WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE + 0xC, 0);
3502 WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE + 0x8, 0);
3503
3504 /* Enable the counter */
3505 WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 1);
3506}
3507
3508static void gaudi_disable_timestamp(struct hl_device *hdev)
3509{
3510 /* Disable the timestamp counter */
3511 WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 0);
3512}
3513
3514static void gaudi_halt_engines(struct hl_device *hdev, bool hard_reset)
3515{
Oded Gabbayc83c4172020-07-05 15:48:34 +03003516 u32 wait_timeout_ms;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003517
3518 dev_info(hdev->dev,
3519 "Halting compute engines and disabling interrupts\n");
3520
Oded Gabbayc83c4172020-07-05 15:48:34 +03003521 if (hdev->pldm)
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003522 wait_timeout_ms = GAUDI_PLDM_RESET_WAIT_MSEC;
Oded Gabbayc83c4172020-07-05 15:48:34 +03003523 else
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003524 wait_timeout_ms = GAUDI_RESET_WAIT_MSEC;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003525
Oded Gabbay3c681572020-11-02 21:10:39 +02003526 gaudi_stop_nic_qmans(hdev);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003527 gaudi_stop_mme_qmans(hdev);
3528 gaudi_stop_tpc_qmans(hdev);
3529 gaudi_stop_hbm_dma_qmans(hdev);
3530 gaudi_stop_pci_dma_qmans(hdev);
3531
Oded Gabbaye38bfd32020-07-03 20:46:12 +03003532 hdev->asic_funcs->disable_clock_gating(hdev);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003533
3534 msleep(wait_timeout_ms);
3535
3536 gaudi_pci_dma_stall(hdev);
3537 gaudi_hbm_dma_stall(hdev);
3538 gaudi_tpc_stall(hdev);
3539 gaudi_mme_stall(hdev);
3540
3541 msleep(wait_timeout_ms);
3542
Oded Gabbay3c681572020-11-02 21:10:39 +02003543 gaudi_disable_nic_qmans(hdev);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003544 gaudi_disable_mme_qmans(hdev);
3545 gaudi_disable_tpc_qmans(hdev);
3546 gaudi_disable_hbm_dma_qmans(hdev);
3547 gaudi_disable_pci_dma_qmans(hdev);
3548
3549 gaudi_disable_timestamp(hdev);
3550
Oded Gabbay12ae3132020-07-03 20:58:23 +03003551 gaudi_disable_msi(hdev);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003552}
3553
3554static int gaudi_mmu_init(struct hl_device *hdev)
3555{
3556 struct asic_fixed_properties *prop = &hdev->asic_prop;
3557 struct gaudi_device *gaudi = hdev->asic_specific;
3558 u64 hop0_addr;
3559 int rc, i;
3560
3561 if (!hdev->mmu_enable)
3562 return 0;
3563
3564 if (gaudi->hw_cap_initialized & HW_CAP_MMU)
3565 return 0;
3566
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003567 for (i = 0 ; i < prop->max_asid ; i++) {
3568 hop0_addr = prop->mmu_pgt_addr +
3569 (i * prop->mmu_hop_table_size);
3570
3571 rc = gaudi_mmu_update_asid_hop0_addr(hdev, i, hop0_addr);
3572 if (rc) {
3573 dev_err(hdev->dev,
3574 "failed to set hop0 addr for asid %d\n", i);
3575 goto err;
3576 }
3577 }
3578
3579 /* init MMU cache manage page */
3580 WREG32(mmSTLB_CACHE_INV_BASE_39_8, MMU_CACHE_MNG_ADDR >> 8);
3581 WREG32(mmSTLB_CACHE_INV_BASE_49_40, MMU_CACHE_MNG_ADDR >> 40);
3582
Tomer Tayar644883e2020-07-19 11:00:03 +03003583 hdev->asic_funcs->mmu_invalidate_cache(hdev, true, 0);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003584
3585 WREG32(mmMMU_UP_MMU_ENABLE, 1);
3586 WREG32(mmMMU_UP_SPI_MASK, 0xF);
3587
3588 WREG32(mmSTLB_HOP_CONFIGURATION,
3589 hdev->mmu_huge_page_opt ? 0x30440 : 0x40440);
3590
Omer Shpigelmancfd41762020-06-03 13:03:35 +03003591 /*
3592 * The H/W expects the first PI after init to be 1. After wraparound
3593 * we'll write 0.
3594 */
3595 gaudi->mmu_cache_inv_pi = 1;
3596
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003597 gaudi->hw_cap_initialized |= HW_CAP_MMU;
3598
3599 return 0;
3600
3601err:
3602 return rc;
3603}
3604
3605static int gaudi_load_firmware_to_device(struct hl_device *hdev)
3606{
3607 void __iomem *dst;
3608
3609 /* HBM scrambler must be initialized before pushing F/W to HBM */
3610 gaudi_init_scrambler_hbm(hdev);
3611
3612 dst = hdev->pcie_bar[HBM_BAR_ID] + LINUX_FW_OFFSET;
3613
Ofir Bitton9bb86b62020-10-20 10:45:37 +03003614 return hl_fw_load_fw_to_device(hdev, GAUDI_LINUX_FW_FILE, dst, 0, 0);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003615}
3616
3617static int gaudi_load_boot_fit_to_device(struct hl_device *hdev)
3618{
3619 void __iomem *dst;
3620
3621 dst = hdev->pcie_bar[SRAM_BAR_ID] + BOOT_FIT_SRAM_OFFSET;
3622
Ofir Bitton9bb86b62020-10-20 10:45:37 +03003623 return hl_fw_load_fw_to_device(hdev, GAUDI_BOOT_FIT_FILE, dst, 0, 0);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003624}
3625
farah kassabrieb10b892020-10-14 15:17:36 +03003626static int gaudi_read_device_fw_version(struct hl_device *hdev,
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003627 enum hl_fw_component fwc)
3628{
3629 const char *name;
3630 u32 ver_off;
3631 char *dest;
3632
3633 switch (fwc) {
3634 case FW_COMP_UBOOT:
3635 ver_off = RREG32(mmUBOOT_VER_OFFSET);
3636 dest = hdev->asic_prop.uboot_ver;
3637 name = "U-Boot";
3638 break;
3639 case FW_COMP_PREBOOT:
3640 ver_off = RREG32(mmPREBOOT_VER_OFFSET);
3641 dest = hdev->asic_prop.preboot_ver;
3642 name = "Preboot";
3643 break;
3644 default:
3645 dev_warn(hdev->dev, "Undefined FW component: %d\n", fwc);
farah kassabrieb10b892020-10-14 15:17:36 +03003646 return -EIO;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003647 }
3648
3649 ver_off &= ~((u32)SRAM_BASE_ADDR);
3650
3651 if (ver_off < SRAM_SIZE - VERSION_MAX_LEN) {
3652 memcpy_fromio(dest, hdev->pcie_bar[SRAM_BAR_ID] + ver_off,
3653 VERSION_MAX_LEN);
3654 } else {
3655 dev_err(hdev->dev, "%s version offset (0x%x) is above SRAM\n",
3656 name, ver_off);
3657 strcpy(dest, "unavailable");
farah kassabrieb10b892020-10-14 15:17:36 +03003658 return -EIO;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003659 }
farah kassabrieb10b892020-10-14 15:17:36 +03003660
3661 return 0;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003662}
3663
3664static int gaudi_init_cpu(struct hl_device *hdev)
3665{
3666 struct gaudi_device *gaudi = hdev->asic_specific;
3667 int rc;
3668
3669 if (!hdev->cpu_enable)
3670 return 0;
3671
3672 if (gaudi->hw_cap_initialized & HW_CAP_CPU)
3673 return 0;
3674
3675 /*
3676 * The device CPU works with 40 bits addresses.
3677 * This register sets the extension to 50 bits.
3678 */
Ofir Bittonc692dec2020-10-04 17:34:37 +03003679 if (hdev->asic_prop.fw_security_disabled)
3680 WREG32(mmCPU_IF_CPU_MSB_ADDR, hdev->cpu_pci_msb_addr);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003681
3682 rc = hl_fw_init_cpu(hdev, mmPSOC_GLOBAL_CONF_CPU_BOOT_STATUS,
3683 mmPSOC_GLOBAL_CONF_KMD_MSG_TO_CPU,
3684 mmCPU_CMD_STATUS_TO_HOST,
Ofir Bitton323b7262020-10-04 09:09:19 +03003685 mmCPU_BOOT_DEV_STS0, mmCPU_BOOT_ERR0,
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003686 !hdev->bmc_enable, GAUDI_CPU_TIMEOUT_USEC,
3687 GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC);
3688
3689 if (rc)
3690 return rc;
3691
3692 gaudi->hw_cap_initialized |= HW_CAP_CPU;
3693
3694 return 0;
3695}
3696
3697static int gaudi_init_cpu_queues(struct hl_device *hdev, u32 cpu_timeout)
3698{
3699 struct gaudi_device *gaudi = hdev->asic_specific;
3700 struct hl_eq *eq;
3701 u32 status;
3702 struct hl_hw_queue *cpu_pq =
3703 &hdev->kernel_queues[GAUDI_QUEUE_ID_CPU_PQ];
3704 int err;
3705
3706 if (!hdev->cpu_queues_enable)
3707 return 0;
3708
3709 if (gaudi->hw_cap_initialized & HW_CAP_CPU_Q)
3710 return 0;
3711
3712 eq = &hdev->event_queue;
3713
3714 WREG32(mmCPU_IF_PQ_BASE_ADDR_LOW, lower_32_bits(cpu_pq->bus_address));
3715 WREG32(mmCPU_IF_PQ_BASE_ADDR_HIGH, upper_32_bits(cpu_pq->bus_address));
3716
3717 WREG32(mmCPU_IF_EQ_BASE_ADDR_LOW, lower_32_bits(eq->bus_address));
3718 WREG32(mmCPU_IF_EQ_BASE_ADDR_HIGH, upper_32_bits(eq->bus_address));
3719
3720 WREG32(mmCPU_IF_CQ_BASE_ADDR_LOW,
3721 lower_32_bits(hdev->cpu_accessible_dma_address));
3722 WREG32(mmCPU_IF_CQ_BASE_ADDR_HIGH,
3723 upper_32_bits(hdev->cpu_accessible_dma_address));
3724
3725 WREG32(mmCPU_IF_PQ_LENGTH, HL_QUEUE_SIZE_IN_BYTES);
3726 WREG32(mmCPU_IF_EQ_LENGTH, HL_EQ_SIZE_IN_BYTES);
3727 WREG32(mmCPU_IF_CQ_LENGTH, HL_CPU_ACCESSIBLE_MEM_SIZE);
3728
3729 /* Used for EQ CI */
3730 WREG32(mmCPU_IF_EQ_RD_OFFS, 0);
3731
3732 WREG32(mmCPU_IF_PF_PQ_PI, 0);
3733
3734 if (gaudi->multi_msi_mode)
3735 WREG32(mmCPU_IF_QUEUE_INIT, PQ_INIT_STATUS_READY_FOR_CP);
3736 else
3737 WREG32(mmCPU_IF_QUEUE_INIT,
3738 PQ_INIT_STATUS_READY_FOR_CP_SINGLE_MSI);
3739
3740 WREG32(mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR, GAUDI_EVENT_PI_UPDATE);
3741
3742 err = hl_poll_timeout(
3743 hdev,
3744 mmCPU_IF_QUEUE_INIT,
3745 status,
3746 (status == PQ_INIT_STATUS_READY_FOR_HOST),
3747 1000,
3748 cpu_timeout);
3749
3750 if (err) {
3751 dev_err(hdev->dev,
Oded Gabbay6138bbe2020-09-04 20:18:16 +03003752 "Failed to communicate with Device CPU (CPU-CP timeout)\n");
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003753 return -EIO;
3754 }
3755
3756 gaudi->hw_cap_initialized |= HW_CAP_CPU_Q;
3757 return 0;
3758}
3759
3760static void gaudi_pre_hw_init(struct hl_device *hdev)
3761{
3762 /* Perform read from the device to make sure device is up */
3763 RREG32(mmPCIE_DBI_DEVICE_ID_VENDOR_ID_REG);
3764
Ofir Bittonc692dec2020-10-04 17:34:37 +03003765 if (hdev->asic_prop.fw_security_disabled) {
3766 /* Set the access through PCI bars (Linux driver only) as
3767 * secured
3768 */
3769 WREG32(mmPCIE_WRAP_LBW_PROT_OVR,
3770 (PCIE_WRAP_LBW_PROT_OVR_RD_EN_MASK |
3771 PCIE_WRAP_LBW_PROT_OVR_WR_EN_MASK));
Oded Gabbay57799ce2020-09-13 15:51:28 +03003772
Ofir Bittonc692dec2020-10-04 17:34:37 +03003773 /* Perform read to flush the waiting writes to ensure
3774 * configuration was set in the device
3775 */
3776 RREG32(mmPCIE_WRAP_LBW_PROT_OVR);
3777 }
Oded Gabbay57799ce2020-09-13 15:51:28 +03003778
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003779 /*
3780 * Let's mark in the H/W that we have reached this point. We check
3781 * this value in the reset_before_init function to understand whether
3782 * we need to reset the chip before doing H/W init. This register is
3783 * cleared by the H/W upon H/W reset
3784 */
3785 WREG32(mmHW_STATE, HL_DEVICE_HW_STATE_DIRTY);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003786}
3787
3788static int gaudi_hw_init(struct hl_device *hdev)
3789{
3790 int rc;
3791
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003792 gaudi_pre_hw_init(hdev);
3793
3794 gaudi_init_pci_dma_qmans(hdev);
3795
3796 gaudi_init_hbm_dma_qmans(hdev);
3797
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003798 rc = gaudi_init_cpu(hdev);
3799 if (rc) {
3800 dev_err(hdev->dev, "failed to initialize CPU\n");
3801 return rc;
3802 }
3803
3804 /* SRAM scrambler must be initialized after CPU is running from HBM */
3805 gaudi_init_scrambler_sram(hdev);
3806
3807 /* This is here just in case we are working without CPU */
3808 gaudi_init_scrambler_hbm(hdev);
3809
3810 gaudi_init_golden_registers(hdev);
3811
3812 rc = gaudi_mmu_init(hdev);
3813 if (rc)
3814 return rc;
3815
Omer Shpigelman3a3a5bf12020-05-11 10:45:12 +03003816 gaudi_init_security(hdev);
3817
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003818 gaudi_init_mme_qmans(hdev);
3819
3820 gaudi_init_tpc_qmans(hdev);
3821
Oded Gabbay3c681572020-11-02 21:10:39 +02003822 gaudi_init_nic_qmans(hdev);
3823
Oded Gabbaye38bfd32020-07-03 20:46:12 +03003824 hdev->asic_funcs->set_clock_gating(hdev);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003825
3826 gaudi_enable_timestamp(hdev);
3827
Oded Gabbay3c681572020-11-02 21:10:39 +02003828 /* MSI must be enabled before CPU queues and NIC are initialized */
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003829 rc = gaudi_enable_msi(hdev);
3830 if (rc)
3831 goto disable_queues;
3832
3833 /* must be called after MSI was enabled */
3834 rc = gaudi_init_cpu_queues(hdev, GAUDI_CPU_TIMEOUT_USEC);
3835 if (rc) {
3836 dev_err(hdev->dev, "failed to initialize CPU H/W queues %d\n",
3837 rc);
3838 goto disable_msi;
3839 }
3840
3841 /* Perform read from the device to flush all configuration */
3842 RREG32(mmPCIE_DBI_DEVICE_ID_VENDOR_ID_REG);
3843
3844 return 0;
3845
3846disable_msi:
3847 gaudi_disable_msi(hdev);
3848disable_queues:
3849 gaudi_disable_mme_qmans(hdev);
3850 gaudi_disable_pci_dma_qmans(hdev);
3851
3852 return rc;
3853}
3854
3855static void gaudi_hw_fini(struct hl_device *hdev, bool hard_reset)
3856{
3857 struct gaudi_device *gaudi = hdev->asic_specific;
Igor Grinbergb726a2f2020-10-29 14:06:54 +02003858 u32 status, reset_timeout_ms, cpu_timeout_ms;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003859
Oded Gabbay12ae3132020-07-03 20:58:23 +03003860 if (!hard_reset) {
3861 dev_err(hdev->dev, "GAUDI doesn't support soft-reset\n");
3862 return;
3863 }
3864
Oded Gabbayc83c4172020-07-05 15:48:34 +03003865 if (hdev->pldm) {
Oded Gabbay12ae3132020-07-03 20:58:23 +03003866 reset_timeout_ms = GAUDI_PLDM_HRESET_TIMEOUT_MSEC;
Oded Gabbayc83c4172020-07-05 15:48:34 +03003867 cpu_timeout_ms = GAUDI_PLDM_RESET_WAIT_MSEC;
3868 } else {
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003869 reset_timeout_ms = GAUDI_RESET_TIMEOUT_MSEC;
Oded Gabbayc83c4172020-07-05 15:48:34 +03003870 cpu_timeout_ms = GAUDI_CPU_RESET_WAIT_MSEC;
3871 }
3872
3873 /* Set device to handle FLR by H/W as we will put the device CPU to
3874 * halt mode
3875 */
Oded Gabbaya63c3fb2020-11-26 18:11:05 +02003876 if (hdev->asic_prop.fw_security_disabled &&
3877 !hdev->asic_prop.hard_reset_done_by_fw)
Ofir Bittonb90c8942020-11-08 12:59:04 +02003878 WREG32(mmPCIE_AUX_FLR_CTRL, (PCIE_AUX_FLR_CTRL_HW_CTRL_MASK |
Oded Gabbayc83c4172020-07-05 15:48:34 +03003879 PCIE_AUX_FLR_CTRL_INT_MASK_MASK));
3880
3881 /* I don't know what is the state of the CPU so make sure it is
3882 * stopped in any means necessary
3883 */
3884 WREG32(mmPSOC_GLOBAL_CONF_KMD_MSG_TO_CPU, KMD_MSG_GOTO_WFE);
Ofir Bittonb90c8942020-11-08 12:59:04 +02003885
Oded Gabbayc83c4172020-07-05 15:48:34 +03003886 WREG32(mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR, GAUDI_EVENT_HALT_MACHINE);
3887
Oded Gabbaya63c3fb2020-11-26 18:11:05 +02003888 if (hdev->asic_prop.fw_security_disabled &&
3889 !hdev->asic_prop.hard_reset_done_by_fw) {
3890
3891 /* Configure the reset registers. Must be done as early as
3892 * possible in case we fail during H/W initialization
3893 */
3894 WREG32(mmPSOC_GLOBAL_CONF_SOFT_RST_CFG_H,
3895 (CFG_RST_H_DMA_MASK |
3896 CFG_RST_H_MME_MASK |
3897 CFG_RST_H_SM_MASK |
3898 CFG_RST_H_TPC_7_MASK));
3899
3900 WREG32(mmPSOC_GLOBAL_CONF_SOFT_RST_CFG_L, CFG_RST_L_TPC_MASK);
3901
3902 WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST_CFG_H,
3903 (CFG_RST_H_HBM_MASK |
3904 CFG_RST_H_TPC_7_MASK |
3905 CFG_RST_H_NIC_MASK |
3906 CFG_RST_H_SM_MASK |
3907 CFG_RST_H_DMA_MASK |
3908 CFG_RST_H_MME_MASK |
3909 CFG_RST_H_CPU_MASK |
3910 CFG_RST_H_MMU_MASK));
3911
3912 WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST_CFG_L,
3913 (CFG_RST_L_IF_MASK |
3914 CFG_RST_L_PSOC_MASK |
3915 CFG_RST_L_TPC_MASK));
3916
Ofir Bittonb90c8942020-11-08 12:59:04 +02003917 msleep(cpu_timeout_ms);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003918
Ofir Bittonb90c8942020-11-08 12:59:04 +02003919 /* Tell ASIC not to re-initialize PCIe */
3920 WREG32(mmPREBOOT_PCIE_EN, LKD_HARD_RESET_MAGIC);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003921
Ofir Bittonb90c8942020-11-08 12:59:04 +02003922 /* Restart BTL/BLR upon hard-reset */
3923 if (hdev->asic_prop.fw_security_disabled)
3924 WREG32(mmPSOC_GLOBAL_CONF_BOOT_SEQ_RE_START, 1);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003925
Ofir Bittonb90c8942020-11-08 12:59:04 +02003926 WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST,
Oded Gabbay12ae3132020-07-03 20:58:23 +03003927 1 << PSOC_GLOBAL_CONF_SW_ALL_RST_IND_SHIFT);
Ofir Bittonb90c8942020-11-08 12:59:04 +02003928 }
3929
Oded Gabbay12ae3132020-07-03 20:58:23 +03003930 dev_info(hdev->dev,
3931 "Issued HARD reset command, going to wait %dms\n",
3932 reset_timeout_ms);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003933
3934 /*
3935 * After hard reset, we can't poll the BTM_FSM register because the PSOC
3936 * itself is in reset. Need to wait until the reset is deasserted
3937 */
3938 msleep(reset_timeout_ms);
3939
3940 status = RREG32(mmPSOC_GLOBAL_CONF_BTM_FSM);
3941 if (status & PSOC_GLOBAL_CONF_BTM_FSM_STATE_MASK)
3942 dev_err(hdev->dev,
3943 "Timeout while waiting for device to reset 0x%x\n",
3944 status);
3945
farah kassabrieb10b892020-10-14 15:17:36 +03003946 if (gaudi) {
3947 gaudi->hw_cap_initialized &= ~(HW_CAP_CPU | HW_CAP_CPU_Q |
3948 HW_CAP_HBM | HW_CAP_PCI_DMA |
3949 HW_CAP_MME | HW_CAP_TPC_MASK |
3950 HW_CAP_HBM_DMA | HW_CAP_PLL |
3951 HW_CAP_NIC_MASK | HW_CAP_MMU |
3952 HW_CAP_SRAM_SCRAMBLER |
3953 HW_CAP_HBM_SCRAMBLER |
3954 HW_CAP_CLK_GATE);
Oded Gabbaye38bfd32020-07-03 20:46:12 +03003955
farah kassabrieb10b892020-10-14 15:17:36 +03003956 memset(gaudi->events_stat, 0, sizeof(gaudi->events_stat));
3957 }
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003958}
3959
3960static int gaudi_suspend(struct hl_device *hdev)
3961{
3962 int rc;
3963
Oded Gabbay2f553422020-08-15 16:28:10 +03003964 rc = hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_DISABLE_PCI_ACCESS);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003965 if (rc)
3966 dev_err(hdev->dev, "Failed to disable PCI access from CPU\n");
3967
3968 return rc;
3969}
3970
3971static int gaudi_resume(struct hl_device *hdev)
3972{
3973 return gaudi_init_iatu(hdev);
3974}
3975
3976static int gaudi_cb_mmap(struct hl_device *hdev, struct vm_area_struct *vma,
Hillf Danton0db57532020-08-23 07:32:42 +08003977 void *cpu_addr, dma_addr_t dma_addr, size_t size)
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003978{
3979 int rc;
3980
3981 vma->vm_flags |= VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP |
3982 VM_DONTCOPY | VM_NORESERVE;
3983
Hillf Danton0db57532020-08-23 07:32:42 +08003984 rc = dma_mmap_coherent(hdev->dev, vma, cpu_addr, dma_addr, size);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003985 if (rc)
Hillf Danton0db57532020-08-23 07:32:42 +08003986 dev_err(hdev->dev, "dma_mmap_coherent error %d", rc);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003987
3988 return rc;
3989}
3990
3991static void gaudi_ring_doorbell(struct hl_device *hdev, u32 hw_queue_id, u32 pi)
3992{
3993 struct gaudi_device *gaudi = hdev->asic_specific;
3994 u32 db_reg_offset, db_value, dma_qm_offset, q_off;
3995 int dma_id;
3996 bool invalid_queue = false;
3997
3998 switch (hw_queue_id) {
3999 case GAUDI_QUEUE_ID_DMA_0_0...GAUDI_QUEUE_ID_DMA_0_3:
4000 dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_1];
4001 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4002 q_off = dma_qm_offset + (hw_queue_id & 0x3) * 4;
4003 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4004 break;
4005
4006 case GAUDI_QUEUE_ID_DMA_1_0...GAUDI_QUEUE_ID_DMA_1_3:
4007 dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_2];
4008 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4009 q_off = dma_qm_offset + (hw_queue_id & 0x3) * 4;
4010 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4011 break;
4012
4013 case GAUDI_QUEUE_ID_DMA_2_0...GAUDI_QUEUE_ID_DMA_2_3:
4014 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_1];
4015 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4016 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4017 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4018 break;
4019
4020 case GAUDI_QUEUE_ID_DMA_3_0...GAUDI_QUEUE_ID_DMA_3_3:
4021 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_2];
4022 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4023 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4024 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4025 break;
4026
4027 case GAUDI_QUEUE_ID_DMA_4_0...GAUDI_QUEUE_ID_DMA_4_3:
4028 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_3];
4029 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4030 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4031 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4032 break;
4033
4034 case GAUDI_QUEUE_ID_DMA_5_0...GAUDI_QUEUE_ID_DMA_5_3:
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03004035 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_4];
4036 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4037 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4038 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4039 break;
4040
Ofir Bitton0940cab2020-08-31 08:52:56 +03004041 case GAUDI_QUEUE_ID_DMA_6_0...GAUDI_QUEUE_ID_DMA_6_3:
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03004042 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_5];
4043 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4044 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4045 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4046 break;
4047
Ofir Bitton0940cab2020-08-31 08:52:56 +03004048 case GAUDI_QUEUE_ID_DMA_7_0...GAUDI_QUEUE_ID_DMA_7_3:
4049 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_6];
4050 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4051 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4052 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4053 break;
4054
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03004055 case GAUDI_QUEUE_ID_CPU_PQ:
4056 if (gaudi->hw_cap_initialized & HW_CAP_CPU_Q)
4057 db_reg_offset = mmCPU_IF_PF_PQ_PI;
4058 else
4059 invalid_queue = true;
4060 break;
4061
4062 case GAUDI_QUEUE_ID_MME_0_0:
4063 db_reg_offset = mmMME2_QM_PQ_PI_0;
4064 break;
4065
4066 case GAUDI_QUEUE_ID_MME_0_1:
4067 db_reg_offset = mmMME2_QM_PQ_PI_1;
4068 break;
4069
4070 case GAUDI_QUEUE_ID_MME_0_2:
4071 db_reg_offset = mmMME2_QM_PQ_PI_2;
4072 break;
4073
4074 case GAUDI_QUEUE_ID_MME_0_3:
4075 db_reg_offset = mmMME2_QM_PQ_PI_3;
4076 break;
4077
4078 case GAUDI_QUEUE_ID_MME_1_0:
4079 db_reg_offset = mmMME0_QM_PQ_PI_0;
4080 break;
4081
4082 case GAUDI_QUEUE_ID_MME_1_1:
4083 db_reg_offset = mmMME0_QM_PQ_PI_1;
4084 break;
4085
4086 case GAUDI_QUEUE_ID_MME_1_2:
4087 db_reg_offset = mmMME0_QM_PQ_PI_2;
4088 break;
4089
4090 case GAUDI_QUEUE_ID_MME_1_3:
4091 db_reg_offset = mmMME0_QM_PQ_PI_3;
4092 break;
4093
4094 case GAUDI_QUEUE_ID_TPC_0_0:
4095 db_reg_offset = mmTPC0_QM_PQ_PI_0;
4096 break;
4097
4098 case GAUDI_QUEUE_ID_TPC_0_1:
4099 db_reg_offset = mmTPC0_QM_PQ_PI_1;
4100 break;
4101
4102 case GAUDI_QUEUE_ID_TPC_0_2:
4103 db_reg_offset = mmTPC0_QM_PQ_PI_2;
4104 break;
4105
4106 case GAUDI_QUEUE_ID_TPC_0_3:
4107 db_reg_offset = mmTPC0_QM_PQ_PI_3;
4108 break;
4109
4110 case GAUDI_QUEUE_ID_TPC_1_0:
4111 db_reg_offset = mmTPC1_QM_PQ_PI_0;
4112 break;
4113
4114 case GAUDI_QUEUE_ID_TPC_1_1:
4115 db_reg_offset = mmTPC1_QM_PQ_PI_1;
4116 break;
4117
4118 case GAUDI_QUEUE_ID_TPC_1_2:
4119 db_reg_offset = mmTPC1_QM_PQ_PI_2;
4120 break;
4121
4122 case GAUDI_QUEUE_ID_TPC_1_3:
4123 db_reg_offset = mmTPC1_QM_PQ_PI_3;
4124 break;
4125
4126 case GAUDI_QUEUE_ID_TPC_2_0:
4127 db_reg_offset = mmTPC2_QM_PQ_PI_0;
4128 break;
4129
4130 case GAUDI_QUEUE_ID_TPC_2_1:
4131 db_reg_offset = mmTPC2_QM_PQ_PI_1;
4132 break;
4133
4134 case GAUDI_QUEUE_ID_TPC_2_2:
4135 db_reg_offset = mmTPC2_QM_PQ_PI_2;
4136 break;
4137
4138 case GAUDI_QUEUE_ID_TPC_2_3:
4139 db_reg_offset = mmTPC2_QM_PQ_PI_3;
4140 break;
4141
4142 case GAUDI_QUEUE_ID_TPC_3_0:
4143 db_reg_offset = mmTPC3_QM_PQ_PI_0;
4144 break;
4145
4146 case GAUDI_QUEUE_ID_TPC_3_1:
4147 db_reg_offset = mmTPC3_QM_PQ_PI_1;
4148 break;
4149
4150 case GAUDI_QUEUE_ID_TPC_3_2:
4151 db_reg_offset = mmTPC3_QM_PQ_PI_2;
4152 break;
4153
4154 case GAUDI_QUEUE_ID_TPC_3_3:
4155 db_reg_offset = mmTPC3_QM_PQ_PI_3;
4156 break;
4157
4158 case GAUDI_QUEUE_ID_TPC_4_0:
4159 db_reg_offset = mmTPC4_QM_PQ_PI_0;
4160 break;
4161
4162 case GAUDI_QUEUE_ID_TPC_4_1:
4163 db_reg_offset = mmTPC4_QM_PQ_PI_1;
4164 break;
4165
4166 case GAUDI_QUEUE_ID_TPC_4_2:
4167 db_reg_offset = mmTPC4_QM_PQ_PI_2;
4168 break;
4169
4170 case GAUDI_QUEUE_ID_TPC_4_3:
4171 db_reg_offset = mmTPC4_QM_PQ_PI_3;
4172 break;
4173
4174 case GAUDI_QUEUE_ID_TPC_5_0:
4175 db_reg_offset = mmTPC5_QM_PQ_PI_0;
4176 break;
4177
4178 case GAUDI_QUEUE_ID_TPC_5_1:
4179 db_reg_offset = mmTPC5_QM_PQ_PI_1;
4180 break;
4181
4182 case GAUDI_QUEUE_ID_TPC_5_2:
4183 db_reg_offset = mmTPC5_QM_PQ_PI_2;
4184 break;
4185
4186 case GAUDI_QUEUE_ID_TPC_5_3:
4187 db_reg_offset = mmTPC5_QM_PQ_PI_3;
4188 break;
4189
4190 case GAUDI_QUEUE_ID_TPC_6_0:
4191 db_reg_offset = mmTPC6_QM_PQ_PI_0;
4192 break;
4193
4194 case GAUDI_QUEUE_ID_TPC_6_1:
4195 db_reg_offset = mmTPC6_QM_PQ_PI_1;
4196 break;
4197
4198 case GAUDI_QUEUE_ID_TPC_6_2:
4199 db_reg_offset = mmTPC6_QM_PQ_PI_2;
4200 break;
4201
4202 case GAUDI_QUEUE_ID_TPC_6_3:
4203 db_reg_offset = mmTPC6_QM_PQ_PI_3;
4204 break;
4205
4206 case GAUDI_QUEUE_ID_TPC_7_0:
4207 db_reg_offset = mmTPC7_QM_PQ_PI_0;
4208 break;
4209
4210 case GAUDI_QUEUE_ID_TPC_7_1:
4211 db_reg_offset = mmTPC7_QM_PQ_PI_1;
4212 break;
4213
4214 case GAUDI_QUEUE_ID_TPC_7_2:
4215 db_reg_offset = mmTPC7_QM_PQ_PI_2;
4216 break;
4217
4218 case GAUDI_QUEUE_ID_TPC_7_3:
4219 db_reg_offset = mmTPC7_QM_PQ_PI_3;
4220 break;
4221
Oded Gabbay3c681572020-11-02 21:10:39 +02004222 case GAUDI_QUEUE_ID_NIC_0_0:
4223 db_reg_offset = mmNIC0_QM0_PQ_PI_0;
4224 break;
4225
4226 case GAUDI_QUEUE_ID_NIC_0_1:
4227 db_reg_offset = mmNIC0_QM0_PQ_PI_1;
4228 break;
4229
4230 case GAUDI_QUEUE_ID_NIC_0_2:
4231 db_reg_offset = mmNIC0_QM0_PQ_PI_2;
4232 break;
4233
4234 case GAUDI_QUEUE_ID_NIC_0_3:
4235 db_reg_offset = mmNIC0_QM0_PQ_PI_3;
4236 break;
4237
4238 case GAUDI_QUEUE_ID_NIC_1_0:
4239 db_reg_offset = mmNIC0_QM1_PQ_PI_0;
4240 break;
4241
4242 case GAUDI_QUEUE_ID_NIC_1_1:
4243 db_reg_offset = mmNIC0_QM1_PQ_PI_1;
4244 break;
4245
4246 case GAUDI_QUEUE_ID_NIC_1_2:
4247 db_reg_offset = mmNIC0_QM1_PQ_PI_2;
4248 break;
4249
4250 case GAUDI_QUEUE_ID_NIC_1_3:
4251 db_reg_offset = mmNIC0_QM1_PQ_PI_3;
4252 break;
4253
4254 case GAUDI_QUEUE_ID_NIC_2_0:
4255 db_reg_offset = mmNIC1_QM0_PQ_PI_0;
4256 break;
4257
4258 case GAUDI_QUEUE_ID_NIC_2_1:
4259 db_reg_offset = mmNIC1_QM0_PQ_PI_1;
4260 break;
4261
4262 case GAUDI_QUEUE_ID_NIC_2_2:
4263 db_reg_offset = mmNIC1_QM0_PQ_PI_2;
4264 break;
4265
4266 case GAUDI_QUEUE_ID_NIC_2_3:
4267 db_reg_offset = mmNIC1_QM0_PQ_PI_3;
4268 break;
4269
4270 case GAUDI_QUEUE_ID_NIC_3_0:
4271 db_reg_offset = mmNIC1_QM1_PQ_PI_0;
4272 break;
4273
4274 case GAUDI_QUEUE_ID_NIC_3_1:
4275 db_reg_offset = mmNIC1_QM1_PQ_PI_1;
4276 break;
4277
4278 case GAUDI_QUEUE_ID_NIC_3_2:
4279 db_reg_offset = mmNIC1_QM1_PQ_PI_2;
4280 break;
4281
4282 case GAUDI_QUEUE_ID_NIC_3_3:
4283 db_reg_offset = mmNIC1_QM1_PQ_PI_3;
4284 break;
4285
4286 case GAUDI_QUEUE_ID_NIC_4_0:
4287 db_reg_offset = mmNIC2_QM0_PQ_PI_0;
4288 break;
4289
4290 case GAUDI_QUEUE_ID_NIC_4_1:
4291 db_reg_offset = mmNIC2_QM0_PQ_PI_1;
4292 break;
4293
4294 case GAUDI_QUEUE_ID_NIC_4_2:
4295 db_reg_offset = mmNIC2_QM0_PQ_PI_2;
4296 break;
4297
4298 case GAUDI_QUEUE_ID_NIC_4_3:
4299 db_reg_offset = mmNIC2_QM0_PQ_PI_3;
4300 break;
4301
4302 case GAUDI_QUEUE_ID_NIC_5_0:
4303 db_reg_offset = mmNIC2_QM1_PQ_PI_0;
4304 break;
4305
4306 case GAUDI_QUEUE_ID_NIC_5_1:
4307 db_reg_offset = mmNIC2_QM1_PQ_PI_1;
4308 break;
4309
4310 case GAUDI_QUEUE_ID_NIC_5_2:
4311 db_reg_offset = mmNIC2_QM1_PQ_PI_2;
4312 break;
4313
4314 case GAUDI_QUEUE_ID_NIC_5_3:
4315 db_reg_offset = mmNIC2_QM1_PQ_PI_3;
4316 break;
4317
4318 case GAUDI_QUEUE_ID_NIC_6_0:
4319 db_reg_offset = mmNIC3_QM0_PQ_PI_0;
4320 break;
4321
4322 case GAUDI_QUEUE_ID_NIC_6_1:
4323 db_reg_offset = mmNIC3_QM0_PQ_PI_1;
4324 break;
4325
4326 case GAUDI_QUEUE_ID_NIC_6_2:
4327 db_reg_offset = mmNIC3_QM0_PQ_PI_2;
4328 break;
4329
4330 case GAUDI_QUEUE_ID_NIC_6_3:
4331 db_reg_offset = mmNIC3_QM0_PQ_PI_3;
4332 break;
4333
4334 case GAUDI_QUEUE_ID_NIC_7_0:
4335 db_reg_offset = mmNIC3_QM1_PQ_PI_0;
4336 break;
4337
4338 case GAUDI_QUEUE_ID_NIC_7_1:
4339 db_reg_offset = mmNIC3_QM1_PQ_PI_1;
4340 break;
4341
4342 case GAUDI_QUEUE_ID_NIC_7_2:
4343 db_reg_offset = mmNIC3_QM1_PQ_PI_2;
4344 break;
4345
4346 case GAUDI_QUEUE_ID_NIC_7_3:
4347 db_reg_offset = mmNIC3_QM1_PQ_PI_3;
4348 break;
4349
4350 case GAUDI_QUEUE_ID_NIC_8_0:
4351 db_reg_offset = mmNIC4_QM0_PQ_PI_0;
4352 break;
4353
4354 case GAUDI_QUEUE_ID_NIC_8_1:
4355 db_reg_offset = mmNIC4_QM0_PQ_PI_1;
4356 break;
4357
4358 case GAUDI_QUEUE_ID_NIC_8_2:
4359 db_reg_offset = mmNIC4_QM0_PQ_PI_2;
4360 break;
4361
4362 case GAUDI_QUEUE_ID_NIC_8_3:
4363 db_reg_offset = mmNIC4_QM0_PQ_PI_3;
4364 break;
4365
4366 case GAUDI_QUEUE_ID_NIC_9_0:
4367 db_reg_offset = mmNIC4_QM1_PQ_PI_0;
4368 break;
4369
4370 case GAUDI_QUEUE_ID_NIC_9_1:
4371 db_reg_offset = mmNIC4_QM1_PQ_PI_1;
4372 break;
4373
4374 case GAUDI_QUEUE_ID_NIC_9_2:
4375 db_reg_offset = mmNIC4_QM1_PQ_PI_2;
4376 break;
4377
4378 case GAUDI_QUEUE_ID_NIC_9_3:
4379 db_reg_offset = mmNIC4_QM1_PQ_PI_3;
4380 break;
4381
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03004382 default:
4383 invalid_queue = true;
4384 }
4385
4386 if (invalid_queue) {
4387 /* Should never get here */
4388 dev_err(hdev->dev, "h/w queue %d is invalid. Can't set pi\n",
4389 hw_queue_id);
4390 return;
4391 }
4392
4393 db_value = pi;
4394
4395 /* ring the doorbell */
4396 WREG32(db_reg_offset, db_value);
4397
4398 if (hw_queue_id == GAUDI_QUEUE_ID_CPU_PQ)
4399 WREG32(mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR,
4400 GAUDI_EVENT_PI_UPDATE);
4401}
4402
4403static void gaudi_pqe_write(struct hl_device *hdev, __le64 *pqe,
4404 struct hl_bd *bd)
4405{
4406 __le64 *pbd = (__le64 *) bd;
4407
4408 /* The QMANs are on the host memory so a simple copy suffice */
4409 pqe[0] = pbd[0];
4410 pqe[1] = pbd[1];
4411}
4412
4413static void *gaudi_dma_alloc_coherent(struct hl_device *hdev, size_t size,
4414 dma_addr_t *dma_handle, gfp_t flags)
4415{
4416 void *kernel_addr = dma_alloc_coherent(&hdev->pdev->dev, size,
4417 dma_handle, flags);
4418
4419 /* Shift to the device's base physical address of host memory */
4420 if (kernel_addr)
4421 *dma_handle += HOST_PHYS_BASE;
4422
4423 return kernel_addr;
4424}
4425
4426static void gaudi_dma_free_coherent(struct hl_device *hdev, size_t size,
4427 void *cpu_addr, dma_addr_t dma_handle)
4428{
4429 /* Cancel the device's base physical address of host memory */
4430 dma_addr_t fixed_dma_handle = dma_handle - HOST_PHYS_BASE;
4431
4432 dma_free_coherent(&hdev->pdev->dev, size, cpu_addr, fixed_dma_handle);
4433}
4434
farah kassabri03df1362020-05-06 11:17:38 +03004435static int gaudi_hbm_scrubbing(struct hl_device *hdev)
4436{
4437 struct asic_fixed_properties *prop = &hdev->asic_prop;
4438 u64 cur_addr = DRAM_BASE_ADDR_USER;
4439 u32 val;
4440 u32 chunk_size;
4441 int rc, dma_id;
4442
4443 while (cur_addr < prop->dram_end_address) {
4444 for (dma_id = 0 ; dma_id < DMA_NUMBER_OF_CHANNELS ; dma_id++) {
4445 u32 dma_offset = dma_id * DMA_CORE_OFFSET;
4446
4447 chunk_size =
4448 min((u64)SZ_2G, prop->dram_end_address - cur_addr);
4449
4450 dev_dbg(hdev->dev,
4451 "Doing HBM scrubbing for 0x%09llx - 0x%09llx\n",
4452 cur_addr, cur_addr + chunk_size);
4453
4454 WREG32(mmDMA0_CORE_SRC_BASE_LO + dma_offset, 0);
4455 WREG32(mmDMA0_CORE_SRC_BASE_HI + dma_offset, 0);
4456 WREG32(mmDMA0_CORE_DST_BASE_LO + dma_offset,
4457 lower_32_bits(cur_addr));
4458 WREG32(mmDMA0_CORE_DST_BASE_HI + dma_offset,
4459 upper_32_bits(cur_addr));
4460 WREG32(mmDMA0_CORE_DST_TSIZE_0 + dma_offset,
4461 chunk_size);
4462 WREG32(mmDMA0_CORE_COMMIT + dma_offset,
4463 ((1 << DMA0_CORE_COMMIT_LIN_SHIFT) |
4464 (1 << DMA0_CORE_COMMIT_MEM_SET_SHIFT)));
4465
4466 cur_addr += chunk_size;
4467
4468 if (cur_addr == prop->dram_end_address)
4469 break;
4470 }
4471
4472 for (dma_id = 0 ; dma_id < DMA_NUMBER_OF_CHANNELS ; dma_id++) {
4473 u32 dma_offset = dma_id * DMA_CORE_OFFSET;
4474
4475 rc = hl_poll_timeout(
4476 hdev,
4477 mmDMA0_CORE_STS0 + dma_offset,
4478 val,
4479 ((val & DMA0_CORE_STS0_BUSY_MASK) == 0),
4480 1000,
4481 HBM_SCRUBBING_TIMEOUT_US);
4482
4483 if (rc) {
4484 dev_err(hdev->dev,
4485 "DMA Timeout during HBM scrubbing of DMA #%d\n",
4486 dma_id);
4487 return -EIO;
4488 }
4489 }
4490 }
4491
4492 return 0;
4493}
4494
4495static int gaudi_scrub_device_mem(struct hl_device *hdev, u64 addr, u64 size)
4496{
4497 struct asic_fixed_properties *prop = &hdev->asic_prop;
4498 struct gaudi_device *gaudi = hdev->asic_specific;
4499 u64 idle_mask = 0;
4500 int rc = 0;
4501 u64 val = 0;
4502
4503 if (!hdev->memory_scrub)
4504 return 0;
4505
4506 if (!addr && !size) {
4507 /* Wait till device is idle */
4508 rc = hl_poll_timeout(
4509 hdev,
4510 mmDMA0_CORE_STS0/* dummy */,
4511 val/* dummy */,
4512 (hdev->asic_funcs->is_device_idle(hdev,
4513 &idle_mask, NULL)),
4514 1000,
4515 HBM_SCRUBBING_TIMEOUT_US);
4516 if (rc) {
4517 dev_err(hdev->dev, "waiting for idle timeout\n");
4518 return -EIO;
4519 }
4520
4521 /* Scrub SRAM */
4522 addr = prop->sram_user_base_address;
4523 size = hdev->pldm ? 0x10000 :
4524 (prop->sram_size - SRAM_USER_BASE_OFFSET);
4525 val = 0x7777777777777777ull;
4526
4527 rc = gaudi_memset_device_memory(hdev, addr, size, val);
4528 if (rc) {
4529 dev_err(hdev->dev,
4530 "Failed to clear SRAM in mem scrub all\n");
4531 return rc;
4532 }
4533
4534 mutex_lock(&gaudi->clk_gate_mutex);
4535 hdev->asic_funcs->disable_clock_gating(hdev);
4536
4537 /* Scrub HBM using all DMA channels in parallel */
4538 rc = gaudi_hbm_scrubbing(hdev);
4539 if (rc)
4540 dev_err(hdev->dev,
4541 "Failed to clear HBM in mem scrub all\n");
4542
4543 hdev->asic_funcs->set_clock_gating(hdev);
4544 mutex_unlock(&gaudi->clk_gate_mutex);
4545 }
4546
4547 return rc;
4548}
4549
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03004550static void *gaudi_get_int_queue_base(struct hl_device *hdev,
4551 u32 queue_id, dma_addr_t *dma_handle,
4552 u16 *queue_len)
4553{
4554 struct gaudi_device *gaudi = hdev->asic_specific;
4555 struct gaudi_internal_qman_info *q;
4556
4557 if (queue_id >= GAUDI_QUEUE_ID_SIZE ||
4558 gaudi_queue_type[queue_id] != QUEUE_TYPE_INT) {
4559 dev_err(hdev->dev, "Got invalid queue id %d\n", queue_id);
4560 return NULL;
4561 }
4562
4563 q = &gaudi->internal_qmans[queue_id];
4564 *dma_handle = q->pq_dma_addr;
4565 *queue_len = q->pq_size / QMAN_PQ_ENTRY_SIZE;
4566
4567 return q->pq_kernel_addr;
4568}
4569
4570static int gaudi_send_cpu_message(struct hl_device *hdev, u32 *msg,
Alon Mizrahi439bc472020-11-10 13:49:10 +02004571 u16 len, u32 timeout, u64 *result)
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03004572{
4573 struct gaudi_device *gaudi = hdev->asic_specific;
4574
4575 if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q)) {
4576 if (result)
4577 *result = 0;
4578 return 0;
4579 }
4580
Oded Gabbay788cacf2020-07-07 17:30:13 +03004581 if (!timeout)
4582 timeout = GAUDI_MSG_TO_CPU_TIMEOUT_USEC;
4583
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03004584 return hl_fw_send_cpu_message(hdev, GAUDI_QUEUE_ID_CPU_PQ, msg, len,
4585 timeout, result);
4586}
4587
4588static int gaudi_test_queue(struct hl_device *hdev, u32 hw_queue_id)
4589{
4590 struct packet_msg_prot *fence_pkt;
4591 dma_addr_t pkt_dma_addr;
4592 u32 fence_val, tmp, timeout_usec;
4593 dma_addr_t fence_dma_addr;
4594 u32 *fence_ptr;
4595 int rc;
4596
4597 if (hdev->pldm)
4598 timeout_usec = GAUDI_PLDM_TEST_QUEUE_WAIT_USEC;
4599 else
4600 timeout_usec = GAUDI_TEST_QUEUE_WAIT_USEC;
4601
4602 fence_val = GAUDI_QMAN0_FENCE_VAL;
4603
4604 fence_ptr = hdev->asic_funcs->asic_dma_pool_zalloc(hdev, 4, GFP_KERNEL,
4605 &fence_dma_addr);
4606 if (!fence_ptr) {
4607 dev_err(hdev->dev,
Dotan Barakd6b045c2020-08-06 09:20:49 +03004608 "Failed to allocate memory for H/W queue %d testing\n",
4609 hw_queue_id);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03004610 return -ENOMEM;
4611 }
4612
4613 *fence_ptr = 0;
4614
4615 fence_pkt = hdev->asic_funcs->asic_dma_pool_zalloc(hdev,
4616 sizeof(struct packet_msg_prot),
4617 GFP_KERNEL, &pkt_dma_addr);
4618 if (!fence_pkt) {
4619 dev_err(hdev->dev,
Dotan Barakd6b045c2020-08-06 09:20:49 +03004620 "Failed to allocate packet for H/W queue %d testing\n",
4621 hw_queue_id);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03004622 rc = -ENOMEM;
4623 goto free_fence_ptr;
4624 }
4625
Oded Gabbay65887292020-08-12 11:21:01 +03004626 tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT);
4627 tmp |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);
4628 tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
4629
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03004630 fence_pkt->ctl = cpu_to_le32(tmp);
4631 fence_pkt->value = cpu_to_le32(fence_val);
4632 fence_pkt->addr = cpu_to_le64(fence_dma_addr);
4633
4634 rc = hl_hw_queue_send_cb_no_cmpl(hdev, hw_queue_id,
4635 sizeof(struct packet_msg_prot),
4636 pkt_dma_addr);
4637 if (rc) {
4638 dev_err(hdev->dev,
Dotan Barakd6b045c2020-08-06 09:20:49 +03004639 "Failed to send fence packet to H/W queue %d\n",
4640 hw_queue_id);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03004641 goto free_pkt;
4642 }
4643
4644 rc = hl_poll_timeout_memory(hdev, fence_ptr, tmp, (tmp == fence_val),
4645 1000, timeout_usec, true);
4646
4647 hl_hw_queue_inc_ci_kernel(hdev, hw_queue_id);
4648
4649 if (rc == -ETIMEDOUT) {
4650 dev_err(hdev->dev,
4651 "H/W queue %d test failed (scratch(0x%08llX) == 0x%08X)\n",
4652 hw_queue_id, (unsigned long long) fence_dma_addr, tmp);
4653 rc = -EIO;
4654 }
4655
4656free_pkt:
4657 hdev->asic_funcs->asic_dma_pool_free(hdev, (void *) fence_pkt,
4658 pkt_dma_addr);
4659free_fence_ptr:
4660 hdev->asic_funcs->asic_dma_pool_free(hdev, (void *) fence_ptr,
4661 fence_dma_addr);
4662 return rc;
4663}
4664
4665static int gaudi_test_cpu_queue(struct hl_device *hdev)
4666{
4667 struct gaudi_device *gaudi = hdev->asic_specific;
4668
4669 /*
4670 * check capability here as send_cpu_message() won't update the result
4671 * value if no capability
4672 */
4673 if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
4674 return 0;
4675
4676 return hl_fw_test_cpu_queue(hdev);
4677}
4678
4679static int gaudi_test_queues(struct hl_device *hdev)
4680{
4681 int i, rc, ret_val = 0;
4682
Ofir Bitton3abc99b2020-06-23 14:50:39 +03004683 for (i = 0 ; i < hdev->asic_prop.max_queues ; i++) {
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03004684 if (hdev->asic_prop.hw_queues_props[i].type == QUEUE_TYPE_EXT) {
4685 rc = gaudi_test_queue(hdev, i);
4686 if (rc)
4687 ret_val = -EINVAL;
4688 }
4689 }
4690
4691 rc = gaudi_test_cpu_queue(hdev);
4692 if (rc)
4693 ret_val = -EINVAL;
4694
4695 return ret_val;
4696}
4697
4698static void *gaudi_dma_pool_zalloc(struct hl_device *hdev, size_t size,
4699 gfp_t mem_flags, dma_addr_t *dma_handle)
4700{
4701 void *kernel_addr;
4702
4703 if (size > GAUDI_DMA_POOL_BLK_SIZE)
4704 return NULL;
4705
4706 kernel_addr = dma_pool_zalloc(hdev->dma_pool, mem_flags, dma_handle);
4707
4708 /* Shift to the device's base physical address of host memory */
4709 if (kernel_addr)
4710 *dma_handle += HOST_PHYS_BASE;
4711
4712 return kernel_addr;
4713}
4714
4715static void gaudi_dma_pool_free(struct hl_device *hdev, void *vaddr,
4716 dma_addr_t dma_addr)
4717{
4718 /* Cancel the device's base physical address of host memory */
4719 dma_addr_t fixed_dma_addr = dma_addr - HOST_PHYS_BASE;
4720
4721 dma_pool_free(hdev->dma_pool, vaddr, fixed_dma_addr);
4722}
4723
4724static void *gaudi_cpu_accessible_dma_pool_alloc(struct hl_device *hdev,
4725 size_t size, dma_addr_t *dma_handle)
4726{
4727 return hl_fw_cpu_accessible_dma_pool_alloc(hdev, size, dma_handle);
4728}
4729
4730static void gaudi_cpu_accessible_dma_pool_free(struct hl_device *hdev,
4731 size_t size, void *vaddr)
4732{
4733 hl_fw_cpu_accessible_dma_pool_free(hdev, size, vaddr);
4734}
4735
4736static int gaudi_dma_map_sg(struct hl_device *hdev, struct scatterlist *sgl,
4737 int nents, enum dma_data_direction dir)
4738{
4739 struct scatterlist *sg;
4740 int i;
4741
4742 if (!dma_map_sg(&hdev->pdev->dev, sgl, nents, dir))
4743 return -ENOMEM;
4744
4745 /* Shift to the device's base physical address of host memory */
4746 for_each_sg(sgl, sg, nents, i)
4747 sg->dma_address += HOST_PHYS_BASE;
4748
4749 return 0;
4750}
4751
4752static void gaudi_dma_unmap_sg(struct hl_device *hdev, struct scatterlist *sgl,
4753 int nents, enum dma_data_direction dir)
4754{
4755 struct scatterlist *sg;
4756 int i;
4757
4758 /* Cancel the device's base physical address of host memory */
4759 for_each_sg(sgl, sg, nents, i)
4760 sg->dma_address -= HOST_PHYS_BASE;
4761
4762 dma_unmap_sg(&hdev->pdev->dev, sgl, nents, dir);
4763}
4764
4765static u32 gaudi_get_dma_desc_list_size(struct hl_device *hdev,
4766 struct sg_table *sgt)
4767{
4768 struct scatterlist *sg, *sg_next_iter;
4769 u32 count, dma_desc_cnt;
4770 u64 len, len_next;
4771 dma_addr_t addr, addr_next;
4772
4773 dma_desc_cnt = 0;
4774
4775 for_each_sg(sgt->sgl, sg, sgt->nents, count) {
4776
4777 len = sg_dma_len(sg);
4778 addr = sg_dma_address(sg);
4779
4780 if (len == 0)
4781 break;
4782
4783 while ((count + 1) < sgt->nents) {
4784 sg_next_iter = sg_next(sg);
4785 len_next = sg_dma_len(sg_next_iter);
4786 addr_next = sg_dma_address(sg_next_iter);
4787
4788 if (len_next == 0)
4789 break;
4790
4791 if ((addr + len == addr_next) &&
4792 (len + len_next <= DMA_MAX_TRANSFER_SIZE)) {
4793 len += len_next;
4794 count++;
4795 sg = sg_next_iter;
4796 } else {
4797 break;
4798 }
4799 }
4800
4801 dma_desc_cnt++;
4802 }
4803
4804 return dma_desc_cnt * sizeof(struct packet_lin_dma);
4805}
4806
4807static int gaudi_pin_memory_before_cs(struct hl_device *hdev,
4808 struct hl_cs_parser *parser,
4809 struct packet_lin_dma *user_dma_pkt,
4810 u64 addr, enum dma_data_direction dir)
4811{
4812 struct hl_userptr *userptr;
4813 int rc;
4814
4815 if (hl_userptr_is_pinned(hdev, addr, le32_to_cpu(user_dma_pkt->tsize),
4816 parser->job_userptr_list, &userptr))
4817 goto already_pinned;
4818
4819 userptr = kzalloc(sizeof(*userptr), GFP_ATOMIC);
4820 if (!userptr)
4821 return -ENOMEM;
4822
4823 rc = hl_pin_host_memory(hdev, addr, le32_to_cpu(user_dma_pkt->tsize),
4824 userptr);
4825 if (rc)
4826 goto free_userptr;
4827
4828 list_add_tail(&userptr->job_node, parser->job_userptr_list);
4829
4830 rc = hdev->asic_funcs->asic_dma_map_sg(hdev, userptr->sgt->sgl,
4831 userptr->sgt->nents, dir);
4832 if (rc) {
4833 dev_err(hdev->dev, "failed to map sgt with DMA region\n");
4834 goto unpin_memory;
4835 }
4836
4837 userptr->dma_mapped = true;
4838 userptr->dir = dir;
4839
4840already_pinned:
4841 parser->patched_cb_size +=
4842 gaudi_get_dma_desc_list_size(hdev, userptr->sgt);
4843
4844 return 0;
4845
4846unpin_memory:
4847 hl_unpin_host_memory(hdev, userptr);
4848free_userptr:
4849 kfree(userptr);
4850 return rc;
4851}
4852
4853static int gaudi_validate_dma_pkt_host(struct hl_device *hdev,
4854 struct hl_cs_parser *parser,
4855 struct packet_lin_dma *user_dma_pkt,
4856 bool src_in_host)
4857{
4858 enum dma_data_direction dir;
4859 bool skip_host_mem_pin = false, user_memset;
4860 u64 addr;
4861 int rc = 0;
4862
4863 user_memset = (le32_to_cpu(user_dma_pkt->ctl) &
4864 GAUDI_PKT_LIN_DMA_CTL_MEMSET_MASK) >>
4865 GAUDI_PKT_LIN_DMA_CTL_MEMSET_SHIFT;
4866
4867 if (src_in_host) {
4868 if (user_memset)
4869 skip_host_mem_pin = true;
4870
4871 dev_dbg(hdev->dev, "DMA direction is HOST --> DEVICE\n");
4872 dir = DMA_TO_DEVICE;
4873 addr = le64_to_cpu(user_dma_pkt->src_addr);
4874 } else {
4875 dev_dbg(hdev->dev, "DMA direction is DEVICE --> HOST\n");
4876 dir = DMA_FROM_DEVICE;
4877 addr = (le64_to_cpu(user_dma_pkt->dst_addr) &
4878 GAUDI_PKT_LIN_DMA_DST_ADDR_MASK) >>
4879 GAUDI_PKT_LIN_DMA_DST_ADDR_SHIFT;
4880 }
4881
4882 if (skip_host_mem_pin)
4883 parser->patched_cb_size += sizeof(*user_dma_pkt);
4884 else
4885 rc = gaudi_pin_memory_before_cs(hdev, parser, user_dma_pkt,
4886 addr, dir);
4887
4888 return rc;
4889}
4890
4891static int gaudi_validate_dma_pkt_no_mmu(struct hl_device *hdev,
4892 struct hl_cs_parser *parser,
4893 struct packet_lin_dma *user_dma_pkt)
4894{
4895 bool src_in_host = false;
4896 u64 dst_addr = (le64_to_cpu(user_dma_pkt->dst_addr) &
4897 GAUDI_PKT_LIN_DMA_DST_ADDR_MASK) >>
4898 GAUDI_PKT_LIN_DMA_DST_ADDR_SHIFT;
4899
4900 dev_dbg(hdev->dev, "DMA packet details:\n");
4901 dev_dbg(hdev->dev, "source == 0x%llx\n",
4902 le64_to_cpu(user_dma_pkt->src_addr));
4903 dev_dbg(hdev->dev, "destination == 0x%llx\n", dst_addr);
4904 dev_dbg(hdev->dev, "size == %u\n", le32_to_cpu(user_dma_pkt->tsize));
4905
4906 /*
4907 * Special handling for DMA with size 0. Bypass all validations
4908 * because no transactions will be done except for WR_COMP, which
4909 * is not a security issue
4910 */
4911 if (!le32_to_cpu(user_dma_pkt->tsize)) {
4912 parser->patched_cb_size += sizeof(*user_dma_pkt);
4913 return 0;
4914 }
4915
4916 if (parser->hw_queue_id <= GAUDI_QUEUE_ID_DMA_0_3)
4917 src_in_host = true;
4918
4919 return gaudi_validate_dma_pkt_host(hdev, parser, user_dma_pkt,
4920 src_in_host);
4921}
4922
Oded Gabbay64536ab2020-05-27 12:38:16 +03004923static int gaudi_validate_load_and_exe_pkt(struct hl_device *hdev,
4924 struct hl_cs_parser *parser,
4925 struct packet_load_and_exe *user_pkt)
4926{
4927 u32 cfg;
4928
4929 cfg = le32_to_cpu(user_pkt->cfg);
4930
4931 if (cfg & GAUDI_PKT_LOAD_AND_EXE_CFG_DST_MASK) {
4932 dev_err(hdev->dev,
4933 "User not allowed to use Load and Execute\n");
4934 return -EPERM;
4935 }
4936
4937 parser->patched_cb_size += sizeof(struct packet_load_and_exe);
4938
4939 return 0;
4940}
4941
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03004942static int gaudi_validate_cb(struct hl_device *hdev,
4943 struct hl_cs_parser *parser, bool is_mmu)
4944{
4945 u32 cb_parsed_length = 0;
4946 int rc = 0;
4947
4948 parser->patched_cb_size = 0;
4949
4950 /* cb_user_size is more than 0 so loop will always be executed */
4951 while (cb_parsed_length < parser->user_cb_size) {
4952 enum packet_id pkt_id;
4953 u16 pkt_size;
4954 struct gaudi_packet *user_pkt;
4955
Arnd Bergmann82948e62020-10-26 17:08:06 +01004956 user_pkt = parser->user_cb->kernel_address + cb_parsed_length;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03004957
4958 pkt_id = (enum packet_id) (
4959 (le64_to_cpu(user_pkt->header) &
4960 PACKET_HEADER_PACKET_ID_MASK) >>
4961 PACKET_HEADER_PACKET_ID_SHIFT);
4962
Ofir Bittonbc75be22020-07-30 14:56:38 +03004963 if (!validate_packet_id(pkt_id)) {
4964 dev_err(hdev->dev, "Invalid packet id %u\n", pkt_id);
4965 rc = -EINVAL;
4966 break;
4967 }
4968
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03004969 pkt_size = gaudi_packet_sizes[pkt_id];
4970 cb_parsed_length += pkt_size;
4971 if (cb_parsed_length > parser->user_cb_size) {
4972 dev_err(hdev->dev,
4973 "packet 0x%x is out of CB boundary\n", pkt_id);
4974 rc = -EINVAL;
4975 break;
4976 }
4977
4978 switch (pkt_id) {
4979 case PACKET_MSG_PROT:
4980 dev_err(hdev->dev,
4981 "User not allowed to use MSG_PROT\n");
4982 rc = -EPERM;
4983 break;
4984
4985 case PACKET_CP_DMA:
4986 dev_err(hdev->dev, "User not allowed to use CP_DMA\n");
4987 rc = -EPERM;
4988 break;
4989
4990 case PACKET_STOP:
4991 dev_err(hdev->dev, "User not allowed to use STOP\n");
4992 rc = -EPERM;
4993 break;
4994
Oded Gabbay2edc66e2020-07-03 19:28:54 +03004995 case PACKET_WREG_BULK:
4996 dev_err(hdev->dev,
4997 "User not allowed to use WREG_BULK\n");
4998 rc = -EPERM;
4999 break;
5000
Oded Gabbay64536ab2020-05-27 12:38:16 +03005001 case PACKET_LOAD_AND_EXE:
5002 rc = gaudi_validate_load_and_exe_pkt(hdev, parser,
5003 (struct packet_load_and_exe *) user_pkt);
5004 break;
5005
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005006 case PACKET_LIN_DMA:
5007 parser->contains_dma_pkt = true;
5008 if (is_mmu)
5009 parser->patched_cb_size += pkt_size;
5010 else
5011 rc = gaudi_validate_dma_pkt_no_mmu(hdev, parser,
5012 (struct packet_lin_dma *) user_pkt);
5013 break;
5014
5015 case PACKET_WREG_32:
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005016 case PACKET_MSG_LONG:
5017 case PACKET_MSG_SHORT:
5018 case PACKET_REPEAT:
5019 case PACKET_FENCE:
5020 case PACKET_NOP:
5021 case PACKET_ARB_POINT:
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005022 parser->patched_cb_size += pkt_size;
5023 break;
5024
5025 default:
5026 dev_err(hdev->dev, "Invalid packet header 0x%x\n",
5027 pkt_id);
5028 rc = -EINVAL;
5029 break;
5030 }
5031
5032 if (rc)
5033 break;
5034 }
5035
5036 /*
5037 * The new CB should have space at the end for two MSG_PROT packets:
5038 * 1. A packet that will act as a completion packet
5039 * 2. A packet that will generate MSI-X interrupt
5040 */
5041 parser->patched_cb_size += sizeof(struct packet_msg_prot) * 2;
5042
5043 return rc;
5044}
5045
5046static int gaudi_patch_dma_packet(struct hl_device *hdev,
5047 struct hl_cs_parser *parser,
5048 struct packet_lin_dma *user_dma_pkt,
5049 struct packet_lin_dma *new_dma_pkt,
5050 u32 *new_dma_pkt_size)
5051{
5052 struct hl_userptr *userptr;
5053 struct scatterlist *sg, *sg_next_iter;
5054 u32 count, dma_desc_cnt, user_wrcomp_en_mask, ctl;
5055 u64 len, len_next;
5056 dma_addr_t dma_addr, dma_addr_next;
5057 u64 device_memory_addr, addr;
5058 enum dma_data_direction dir;
5059 struct sg_table *sgt;
5060 bool src_in_host = false;
5061 bool skip_host_mem_pin = false;
5062 bool user_memset;
5063
5064 ctl = le32_to_cpu(user_dma_pkt->ctl);
5065
5066 if (parser->hw_queue_id <= GAUDI_QUEUE_ID_DMA_0_3)
5067 src_in_host = true;
5068
5069 user_memset = (ctl & GAUDI_PKT_LIN_DMA_CTL_MEMSET_MASK) >>
5070 GAUDI_PKT_LIN_DMA_CTL_MEMSET_SHIFT;
5071
5072 if (src_in_host) {
5073 addr = le64_to_cpu(user_dma_pkt->src_addr);
5074 device_memory_addr = le64_to_cpu(user_dma_pkt->dst_addr);
5075 dir = DMA_TO_DEVICE;
5076 if (user_memset)
5077 skip_host_mem_pin = true;
5078 } else {
5079 addr = le64_to_cpu(user_dma_pkt->dst_addr);
5080 device_memory_addr = le64_to_cpu(user_dma_pkt->src_addr);
5081 dir = DMA_FROM_DEVICE;
5082 }
5083
5084 if ((!skip_host_mem_pin) &&
5085 (!hl_userptr_is_pinned(hdev, addr,
5086 le32_to_cpu(user_dma_pkt->tsize),
5087 parser->job_userptr_list, &userptr))) {
5088 dev_err(hdev->dev, "Userptr 0x%llx + 0x%x NOT mapped\n",
5089 addr, user_dma_pkt->tsize);
5090 return -EFAULT;
5091 }
5092
5093 if ((user_memset) && (dir == DMA_TO_DEVICE)) {
5094 memcpy(new_dma_pkt, user_dma_pkt, sizeof(*user_dma_pkt));
5095 *new_dma_pkt_size = sizeof(*user_dma_pkt);
5096 return 0;
5097 }
5098
5099 user_wrcomp_en_mask = ctl & GAUDI_PKT_LIN_DMA_CTL_WRCOMP_EN_MASK;
5100
5101 sgt = userptr->sgt;
5102 dma_desc_cnt = 0;
5103
5104 for_each_sg(sgt->sgl, sg, sgt->nents, count) {
5105 len = sg_dma_len(sg);
5106 dma_addr = sg_dma_address(sg);
5107
5108 if (len == 0)
5109 break;
5110
5111 while ((count + 1) < sgt->nents) {
5112 sg_next_iter = sg_next(sg);
5113 len_next = sg_dma_len(sg_next_iter);
5114 dma_addr_next = sg_dma_address(sg_next_iter);
5115
5116 if (len_next == 0)
5117 break;
5118
5119 if ((dma_addr + len == dma_addr_next) &&
5120 (len + len_next <= DMA_MAX_TRANSFER_SIZE)) {
5121 len += len_next;
5122 count++;
5123 sg = sg_next_iter;
5124 } else {
5125 break;
5126 }
5127 }
5128
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005129 ctl = le32_to_cpu(user_dma_pkt->ctl);
5130 if (likely(dma_desc_cnt))
5131 ctl &= ~GAUDI_PKT_CTL_EB_MASK;
5132 ctl &= ~GAUDI_PKT_LIN_DMA_CTL_WRCOMP_EN_MASK;
5133 new_dma_pkt->ctl = cpu_to_le32(ctl);
5134 new_dma_pkt->tsize = cpu_to_le32(len);
5135
5136 if (dir == DMA_TO_DEVICE) {
5137 new_dma_pkt->src_addr = cpu_to_le64(dma_addr);
5138 new_dma_pkt->dst_addr = cpu_to_le64(device_memory_addr);
5139 } else {
5140 new_dma_pkt->src_addr = cpu_to_le64(device_memory_addr);
5141 new_dma_pkt->dst_addr = cpu_to_le64(dma_addr);
5142 }
5143
5144 if (!user_memset)
5145 device_memory_addr += len;
5146 dma_desc_cnt++;
5147 new_dma_pkt++;
5148 }
5149
5150 if (!dma_desc_cnt) {
5151 dev_err(hdev->dev,
5152 "Error of 0 SG entries when patching DMA packet\n");
5153 return -EFAULT;
5154 }
5155
5156 /* Fix the last dma packet - wrcomp must be as user set it */
5157 new_dma_pkt--;
5158 new_dma_pkt->ctl |= cpu_to_le32(user_wrcomp_en_mask);
5159
5160 *new_dma_pkt_size = dma_desc_cnt * sizeof(struct packet_lin_dma);
5161
5162 return 0;
5163}
5164
5165static int gaudi_patch_cb(struct hl_device *hdev,
5166 struct hl_cs_parser *parser)
5167{
5168 u32 cb_parsed_length = 0;
5169 u32 cb_patched_cur_length = 0;
5170 int rc = 0;
5171
5172 /* cb_user_size is more than 0 so loop will always be executed */
5173 while (cb_parsed_length < parser->user_cb_size) {
5174 enum packet_id pkt_id;
5175 u16 pkt_size;
5176 u32 new_pkt_size = 0;
5177 struct gaudi_packet *user_pkt, *kernel_pkt;
5178
Arnd Bergmann82948e62020-10-26 17:08:06 +01005179 user_pkt = parser->user_cb->kernel_address + cb_parsed_length;
5180 kernel_pkt = parser->patched_cb->kernel_address +
5181 cb_patched_cur_length;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005182
5183 pkt_id = (enum packet_id) (
5184 (le64_to_cpu(user_pkt->header) &
5185 PACKET_HEADER_PACKET_ID_MASK) >>
5186 PACKET_HEADER_PACKET_ID_SHIFT);
5187
Ofir Bittonbc75be22020-07-30 14:56:38 +03005188 if (!validate_packet_id(pkt_id)) {
5189 dev_err(hdev->dev, "Invalid packet id %u\n", pkt_id);
5190 rc = -EINVAL;
5191 break;
5192 }
5193
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005194 pkt_size = gaudi_packet_sizes[pkt_id];
5195 cb_parsed_length += pkt_size;
5196 if (cb_parsed_length > parser->user_cb_size) {
5197 dev_err(hdev->dev,
5198 "packet 0x%x is out of CB boundary\n", pkt_id);
5199 rc = -EINVAL;
5200 break;
5201 }
5202
5203 switch (pkt_id) {
5204 case PACKET_LIN_DMA:
5205 rc = gaudi_patch_dma_packet(hdev, parser,
5206 (struct packet_lin_dma *) user_pkt,
5207 (struct packet_lin_dma *) kernel_pkt,
5208 &new_pkt_size);
5209 cb_patched_cur_length += new_pkt_size;
5210 break;
5211
5212 case PACKET_MSG_PROT:
5213 dev_err(hdev->dev,
5214 "User not allowed to use MSG_PROT\n");
5215 rc = -EPERM;
5216 break;
5217
5218 case PACKET_CP_DMA:
5219 dev_err(hdev->dev, "User not allowed to use CP_DMA\n");
5220 rc = -EPERM;
5221 break;
5222
5223 case PACKET_STOP:
5224 dev_err(hdev->dev, "User not allowed to use STOP\n");
5225 rc = -EPERM;
5226 break;
5227
5228 case PACKET_WREG_32:
5229 case PACKET_WREG_BULK:
5230 case PACKET_MSG_LONG:
5231 case PACKET_MSG_SHORT:
5232 case PACKET_REPEAT:
5233 case PACKET_FENCE:
5234 case PACKET_NOP:
5235 case PACKET_ARB_POINT:
5236 case PACKET_LOAD_AND_EXE:
5237 memcpy(kernel_pkt, user_pkt, pkt_size);
5238 cb_patched_cur_length += pkt_size;
5239 break;
5240
5241 default:
5242 dev_err(hdev->dev, "Invalid packet header 0x%x\n",
5243 pkt_id);
5244 rc = -EINVAL;
5245 break;
5246 }
5247
5248 if (rc)
5249 break;
5250 }
5251
5252 return rc;
5253}
5254
5255static int gaudi_parse_cb_mmu(struct hl_device *hdev,
5256 struct hl_cs_parser *parser)
5257{
5258 u64 patched_cb_handle;
5259 u32 patched_cb_size;
5260 struct hl_cb *user_cb;
5261 int rc;
5262
5263 /*
5264 * The new CB should have space at the end for two MSG_PROT pkt:
5265 * 1. A packet that will act as a completion packet
5266 * 2. A packet that will generate MSI interrupt
5267 */
5268 parser->patched_cb_size = parser->user_cb_size +
5269 sizeof(struct packet_msg_prot) * 2;
5270
Tomer Tayarfa8641a12020-09-07 17:36:41 +03005271 rc = hl_cb_create(hdev, &hdev->kernel_cb_mgr, hdev->kernel_ctx,
Tomer Tayaref6a0f62020-07-09 16:17:48 +03005272 parser->patched_cb_size, false, false,
Tomer Tayarfa8641a12020-09-07 17:36:41 +03005273 &patched_cb_handle);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005274
5275 if (rc) {
5276 dev_err(hdev->dev,
5277 "Failed to allocate patched CB for DMA CS %d\n",
5278 rc);
5279 return rc;
5280 }
5281
5282 patched_cb_handle >>= PAGE_SHIFT;
5283 parser->patched_cb = hl_cb_get(hdev, &hdev->kernel_cb_mgr,
5284 (u32) patched_cb_handle);
5285 /* hl_cb_get should never fail here so use kernel WARN */
5286 WARN(!parser->patched_cb, "DMA CB handle invalid 0x%x\n",
5287 (u32) patched_cb_handle);
5288 if (!parser->patched_cb) {
5289 rc = -EFAULT;
5290 goto out;
5291 }
5292
5293 /*
5294 * The check that parser->user_cb_size <= parser->user_cb->size was done
5295 * in validate_queue_index().
5296 */
Arnd Bergmann82948e62020-10-26 17:08:06 +01005297 memcpy(parser->patched_cb->kernel_address,
5298 parser->user_cb->kernel_address,
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005299 parser->user_cb_size);
5300
5301 patched_cb_size = parser->patched_cb_size;
5302
5303 /* Validate patched CB instead of user CB */
5304 user_cb = parser->user_cb;
5305 parser->user_cb = parser->patched_cb;
5306 rc = gaudi_validate_cb(hdev, parser, true);
5307 parser->user_cb = user_cb;
5308
5309 if (rc) {
5310 hl_cb_put(parser->patched_cb);
5311 goto out;
5312 }
5313
5314 if (patched_cb_size != parser->patched_cb_size) {
5315 dev_err(hdev->dev, "user CB size mismatch\n");
5316 hl_cb_put(parser->patched_cb);
5317 rc = -EINVAL;
5318 goto out;
5319 }
5320
5321out:
5322 /*
5323 * Always call cb destroy here because we still have 1 reference
5324 * to it by calling cb_get earlier. After the job will be completed,
5325 * cb_put will release it, but here we want to remove it from the
5326 * idr
5327 */
5328 hl_cb_destroy(hdev, &hdev->kernel_cb_mgr,
5329 patched_cb_handle << PAGE_SHIFT);
5330
5331 return rc;
5332}
5333
5334static int gaudi_parse_cb_no_mmu(struct hl_device *hdev,
5335 struct hl_cs_parser *parser)
5336{
5337 u64 patched_cb_handle;
5338 int rc;
5339
5340 rc = gaudi_validate_cb(hdev, parser, false);
5341
5342 if (rc)
5343 goto free_userptr;
5344
Tomer Tayarfa8641a12020-09-07 17:36:41 +03005345 rc = hl_cb_create(hdev, &hdev->kernel_cb_mgr, hdev->kernel_ctx,
Tomer Tayaref6a0f62020-07-09 16:17:48 +03005346 parser->patched_cb_size, false, false,
Tomer Tayarfa8641a12020-09-07 17:36:41 +03005347 &patched_cb_handle);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005348 if (rc) {
5349 dev_err(hdev->dev,
5350 "Failed to allocate patched CB for DMA CS %d\n", rc);
5351 goto free_userptr;
5352 }
5353
5354 patched_cb_handle >>= PAGE_SHIFT;
5355 parser->patched_cb = hl_cb_get(hdev, &hdev->kernel_cb_mgr,
5356 (u32) patched_cb_handle);
5357 /* hl_cb_get should never fail here so use kernel WARN */
5358 WARN(!parser->patched_cb, "DMA CB handle invalid 0x%x\n",
5359 (u32) patched_cb_handle);
5360 if (!parser->patched_cb) {
5361 rc = -EFAULT;
5362 goto out;
5363 }
5364
5365 rc = gaudi_patch_cb(hdev, parser);
5366
5367 if (rc)
5368 hl_cb_put(parser->patched_cb);
5369
5370out:
5371 /*
5372 * Always call cb destroy here because we still have 1 reference
5373 * to it by calling cb_get earlier. After the job will be completed,
5374 * cb_put will release it, but here we want to remove it from the
5375 * idr
5376 */
5377 hl_cb_destroy(hdev, &hdev->kernel_cb_mgr,
5378 patched_cb_handle << PAGE_SHIFT);
5379
5380free_userptr:
5381 if (rc)
5382 hl_userptr_delete_list(hdev, parser->job_userptr_list);
5383 return rc;
5384}
5385
5386static int gaudi_parse_cb_no_ext_queue(struct hl_device *hdev,
5387 struct hl_cs_parser *parser)
5388{
5389 struct asic_fixed_properties *asic_prop = &hdev->asic_prop;
Oded Gabbay3c681572020-11-02 21:10:39 +02005390 struct gaudi_device *gaudi = hdev->asic_specific;
5391 u32 nic_mask_q_id = 1 << (HW_CAP_NIC_SHIFT +
5392 ((parser->hw_queue_id - GAUDI_QUEUE_ID_NIC_0_0) >> 2));
5393
5394 if ((parser->hw_queue_id >= GAUDI_QUEUE_ID_NIC_0_0) &&
5395 (parser->hw_queue_id <= GAUDI_QUEUE_ID_NIC_9_3) &&
5396 (!(gaudi->hw_cap_initialized & nic_mask_q_id))) {
5397 dev_err(hdev->dev, "h/w queue %d is disabled\n",
5398 parser->hw_queue_id);
5399 return -EINVAL;
5400 }
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005401
5402 /* For internal queue jobs just check if CB address is valid */
5403 if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,
5404 parser->user_cb_size,
5405 asic_prop->sram_user_base_address,
5406 asic_prop->sram_end_address))
5407 return 0;
5408
5409 if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,
5410 parser->user_cb_size,
5411 asic_prop->dram_user_base_address,
5412 asic_prop->dram_end_address))
5413 return 0;
5414
5415 /* PMMU and HPMMU addresses are equal, check only one of them */
5416 if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,
5417 parser->user_cb_size,
5418 asic_prop->pmmu.start_addr,
5419 asic_prop->pmmu.end_addr))
5420 return 0;
5421
5422 dev_err(hdev->dev,
5423 "CB address 0x%px + 0x%x for internal QMAN is not valid\n",
5424 parser->user_cb, parser->user_cb_size);
5425
5426 return -EFAULT;
5427}
5428
5429static int gaudi_cs_parser(struct hl_device *hdev, struct hl_cs_parser *parser)
5430{
5431 struct gaudi_device *gaudi = hdev->asic_specific;
5432
5433 if (parser->queue_type == QUEUE_TYPE_INT)
5434 return gaudi_parse_cb_no_ext_queue(hdev, parser);
5435
5436 if (gaudi->hw_cap_initialized & HW_CAP_MMU)
5437 return gaudi_parse_cb_mmu(hdev, parser);
5438 else
5439 return gaudi_parse_cb_no_mmu(hdev, parser);
5440}
5441
5442static void gaudi_add_end_of_cb_packets(struct hl_device *hdev,
Arnd Bergmann82948e62020-10-26 17:08:06 +01005443 void *kernel_address, u32 len,
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005444 u64 cq_addr, u32 cq_val, u32 msi_vec,
5445 bool eb)
5446{
5447 struct gaudi_device *gaudi = hdev->asic_specific;
5448 struct packet_msg_prot *cq_pkt;
5449 u32 tmp;
5450
Arnd Bergmann82948e62020-10-26 17:08:06 +01005451 cq_pkt = kernel_address + len - (sizeof(struct packet_msg_prot) * 2);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005452
Oded Gabbay65887292020-08-12 11:21:01 +03005453 tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT);
5454 tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005455
5456 if (eb)
Oded Gabbay65887292020-08-12 11:21:01 +03005457 tmp |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005458
5459 cq_pkt->ctl = cpu_to_le32(tmp);
5460 cq_pkt->value = cpu_to_le32(cq_val);
5461 cq_pkt->addr = cpu_to_le64(cq_addr);
5462
5463 cq_pkt++;
5464
Oded Gabbay65887292020-08-12 11:21:01 +03005465 tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT);
5466 tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005467 cq_pkt->ctl = cpu_to_le32(tmp);
5468 cq_pkt->value = cpu_to_le32(1);
5469
5470 if (!gaudi->multi_msi_mode)
5471 msi_vec = 0;
5472
5473 cq_pkt->addr = cpu_to_le64(CFG_BASE + mmPCIE_MSI_INTR_0 + msi_vec * 4);
5474}
5475
5476static void gaudi_update_eq_ci(struct hl_device *hdev, u32 val)
5477{
5478 WREG32(mmCPU_IF_EQ_RD_OFFS, val);
5479}
5480
5481static int gaudi_memset_device_memory(struct hl_device *hdev, u64 addr,
5482 u32 size, u64 val)
5483{
5484 struct packet_lin_dma *lin_dma_pkt;
5485 struct hl_cs_job *job;
Moti Haimovskia9855a2d92020-06-24 19:40:57 +03005486 u32 cb_size, ctl, err_cause;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005487 struct hl_cb *cb;
5488 int rc;
5489
Ofir Bittona04b7cd2020-07-13 13:36:55 +03005490 cb = hl_cb_kernel_create(hdev, PAGE_SIZE, false);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005491 if (!cb)
5492 return -EFAULT;
5493
Arnd Bergmann82948e62020-10-26 17:08:06 +01005494 lin_dma_pkt = cb->kernel_address;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005495 memset(lin_dma_pkt, 0, sizeof(*lin_dma_pkt));
5496 cb_size = sizeof(*lin_dma_pkt);
5497
Oded Gabbay65887292020-08-12 11:21:01 +03005498 ctl = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_LIN_DMA);
5499 ctl |= FIELD_PREP(GAUDI_PKT_LIN_DMA_CTL_MEMSET_MASK, 1);
5500 ctl |= FIELD_PREP(GAUDI_PKT_LIN_DMA_CTL_LIN_MASK, 1);
5501 ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
5502 ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
5503
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005504 lin_dma_pkt->ctl = cpu_to_le32(ctl);
5505 lin_dma_pkt->src_addr = cpu_to_le64(val);
5506 lin_dma_pkt->dst_addr |= cpu_to_le64(addr);
5507 lin_dma_pkt->tsize = cpu_to_le32(size);
5508
5509 job = hl_cs_allocate_job(hdev, QUEUE_TYPE_EXT, true);
5510 if (!job) {
5511 dev_err(hdev->dev, "Failed to allocate a new job\n");
5512 rc = -ENOMEM;
5513 goto release_cb;
5514 }
5515
Moti Haimovskia9855a2d92020-06-24 19:40:57 +03005516 /* Verify DMA is OK */
5517 err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE);
5518 if (err_cause && !hdev->init_done) {
5519 dev_dbg(hdev->dev,
5520 "Clearing DMA0 engine from errors (cause 0x%x)\n",
5521 err_cause);
5522 WREG32(mmDMA0_CORE_ERR_CAUSE, err_cause);
5523 }
5524
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005525 job->id = 0;
5526 job->user_cb = cb;
Tomer Tayarf07486742020-08-02 22:51:31 +03005527 atomic_inc(&job->user_cb->cs_cnt);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005528 job->user_cb_size = cb_size;
5529 job->hw_queue_id = GAUDI_QUEUE_ID_DMA_0_0;
5530 job->patched_cb = job->user_cb;
5531 job->job_cb_size = job->user_cb_size + sizeof(struct packet_msg_prot);
5532
5533 hl_debugfs_add_job(hdev, job);
5534
5535 rc = gaudi_send_job_on_qman0(hdev, job);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005536 hl_debugfs_remove_job(hdev, job);
5537 kfree(job);
Tomer Tayarf07486742020-08-02 22:51:31 +03005538 atomic_dec(&cb->cs_cnt);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005539
Moti Haimovskia9855a2d92020-06-24 19:40:57 +03005540 /* Verify DMA is OK */
5541 err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE);
5542 if (err_cause) {
5543 dev_err(hdev->dev, "DMA Failed, cause 0x%x\n", err_cause);
5544 rc = -EIO;
5545 if (!hdev->init_done) {
5546 dev_dbg(hdev->dev,
5547 "Clearing DMA0 engine from errors (cause 0x%x)\n",
5548 err_cause);
5549 WREG32(mmDMA0_CORE_ERR_CAUSE, err_cause);
5550 }
5551 }
5552
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005553release_cb:
5554 hl_cb_put(cb);
5555 hl_cb_destroy(hdev, &hdev->kernel_cb_mgr, cb->id << PAGE_SHIFT);
5556
5557 return rc;
5558}
5559
5560static void gaudi_restore_sm_registers(struct hl_device *hdev)
5561{
5562 int i;
5563
5564 for (i = 0 ; i < NUM_OF_SOB_IN_BLOCK << 2 ; i += 4) {
5565 WREG32(mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0 + i, 0);
5566 WREG32(mmSYNC_MNGR_E_S_SYNC_MNGR_OBJS_SOB_OBJ_0 + i, 0);
5567 WREG32(mmSYNC_MNGR_W_N_SYNC_MNGR_OBJS_SOB_OBJ_0 + i, 0);
5568 }
5569
5570 for (i = 0 ; i < NUM_OF_MONITORS_IN_BLOCK << 2 ; i += 4) {
5571 WREG32(mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_STATUS_0 + i, 0);
5572 WREG32(mmSYNC_MNGR_E_S_SYNC_MNGR_OBJS_MON_STATUS_0 + i, 0);
5573 WREG32(mmSYNC_MNGR_W_N_SYNC_MNGR_OBJS_MON_STATUS_0 + i, 0);
5574 }
5575
5576 i = GAUDI_FIRST_AVAILABLE_W_S_SYNC_OBJECT * 4;
5577
5578 for (; i < NUM_OF_SOB_IN_BLOCK << 2 ; i += 4)
5579 WREG32(mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 + i, 0);
5580
5581 i = GAUDI_FIRST_AVAILABLE_W_S_MONITOR * 4;
5582
5583 for (; i < NUM_OF_MONITORS_IN_BLOCK << 2 ; i += 4)
5584 WREG32(mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_STATUS_0 + i, 0);
5585}
5586
5587static void gaudi_restore_dma_registers(struct hl_device *hdev)
5588{
5589 u32 sob_delta = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_1 -
5590 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0;
5591 int i;
5592
5593 for (i = 0 ; i < DMA_NUMBER_OF_CHANNELS ; i++) {
5594 u64 sob_addr = CFG_BASE +
5595 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0 +
5596 (i * sob_delta);
5597 u32 dma_offset = i * DMA_CORE_OFFSET;
5598
5599 WREG32(mmDMA0_CORE_WR_COMP_ADDR_LO + dma_offset,
5600 lower_32_bits(sob_addr));
5601 WREG32(mmDMA0_CORE_WR_COMP_ADDR_HI + dma_offset,
5602 upper_32_bits(sob_addr));
5603 WREG32(mmDMA0_CORE_WR_COMP_WDATA + dma_offset, 0x80000001);
5604
5605 /* For DMAs 2-7, need to restore WR_AWUSER_31_11 as it can be
5606 * modified by the user for SRAM reduction
5607 */
5608 if (i > 1)
5609 WREG32(mmDMA0_CORE_WR_AWUSER_31_11 + dma_offset,
5610 0x00000001);
5611 }
5612}
5613
5614static void gaudi_restore_qm_registers(struct hl_device *hdev)
5615{
5616 u32 qman_offset;
5617 int i;
5618
5619 for (i = 0 ; i < DMA_NUMBER_OF_CHANNELS ; i++) {
5620 qman_offset = i * DMA_QMAN_OFFSET;
5621 WREG32(mmDMA0_QM_ARB_CFG_0 + qman_offset, 0);
5622 }
5623
5624 for (i = 0 ; i < MME_NUMBER_OF_MASTER_ENGINES ; i++) {
5625 qman_offset = i * (mmMME2_QM_BASE - mmMME0_QM_BASE);
5626 WREG32(mmMME0_QM_ARB_CFG_0 + qman_offset, 0);
5627 }
5628
5629 for (i = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) {
5630 qman_offset = i * TPC_QMAN_OFFSET;
5631 WREG32(mmTPC0_QM_ARB_CFG_0 + qman_offset, 0);
5632 }
Oded Gabbay3c681572020-11-02 21:10:39 +02005633
5634 for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++) {
5635 qman_offset = (i >> 1) * NIC_MACRO_QMAN_OFFSET +
5636 (i & 0x1) * NIC_ENGINE_QMAN_OFFSET;
5637 WREG32(mmNIC0_QM0_ARB_CFG_0 + qman_offset, 0);
5638 }
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005639}
5640
5641static void gaudi_restore_user_registers(struct hl_device *hdev)
5642{
5643 gaudi_restore_sm_registers(hdev);
5644 gaudi_restore_dma_registers(hdev);
5645 gaudi_restore_qm_registers(hdev);
5646}
5647
5648static int gaudi_context_switch(struct hl_device *hdev, u32 asid)
5649{
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005650 gaudi_restore_user_registers(hdev);
5651
5652 return 0;
5653}
5654
5655static int gaudi_mmu_clear_pgt_range(struct hl_device *hdev)
5656{
5657 struct asic_fixed_properties *prop = &hdev->asic_prop;
5658 struct gaudi_device *gaudi = hdev->asic_specific;
5659 u64 addr = prop->mmu_pgt_addr;
5660 u32 size = prop->mmu_pgt_size + MMU_CACHE_MNG_SIZE;
5661
5662 if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
5663 return 0;
5664
5665 return gaudi_memset_device_memory(hdev, addr, size, 0);
5666}
5667
5668static void gaudi_restore_phase_topology(struct hl_device *hdev)
5669{
5670
5671}
5672
5673static int gaudi_debugfs_read32(struct hl_device *hdev, u64 addr, u32 *val)
5674{
5675 struct asic_fixed_properties *prop = &hdev->asic_prop;
5676 struct gaudi_device *gaudi = hdev->asic_specific;
5677 u64 hbm_bar_addr;
5678 int rc = 0;
5679
5680 if ((addr >= CFG_BASE) && (addr < CFG_BASE + CFG_SIZE)) {
Oded Gabbaye38bfd32020-07-03 20:46:12 +03005681
5682 if ((gaudi->hw_cap_initialized & HW_CAP_CLK_GATE) &&
5683 (hdev->clock_gating_mask &
5684 GAUDI_CLK_GATE_DEBUGFS_MASK)) {
5685
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005686 dev_err_ratelimited(hdev->dev,
5687 "Can't read register - clock gating is enabled!\n");
5688 rc = -EFAULT;
5689 } else {
5690 *val = RREG32(addr - CFG_BASE);
5691 }
Oded Gabbaye38bfd32020-07-03 20:46:12 +03005692
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005693 } else if ((addr >= SRAM_BASE_ADDR) &&
5694 (addr < SRAM_BASE_ADDR + SRAM_BAR_SIZE)) {
5695 *val = readl(hdev->pcie_bar[SRAM_BAR_ID] +
5696 (addr - SRAM_BASE_ADDR));
5697 } else if (addr < DRAM_PHYS_BASE + hdev->asic_prop.dram_size) {
5698 u64 bar_base_addr = DRAM_PHYS_BASE +
5699 (addr & ~(prop->dram_pci_bar_size - 0x1ull));
5700
5701 hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, bar_base_addr);
5702 if (hbm_bar_addr != U64_MAX) {
5703 *val = readl(hdev->pcie_bar[HBM_BAR_ID] +
5704 (addr - bar_base_addr));
5705
5706 hbm_bar_addr = gaudi_set_hbm_bar_base(hdev,
5707 hbm_bar_addr);
5708 }
5709 if (hbm_bar_addr == U64_MAX)
5710 rc = -EIO;
5711 } else if (addr >= HOST_PHYS_BASE && !iommu_present(&pci_bus_type)) {
5712 *val = *(u32 *) phys_to_virt(addr - HOST_PHYS_BASE);
5713 } else {
5714 rc = -EFAULT;
5715 }
5716
5717 return rc;
5718}
5719
5720static int gaudi_debugfs_write32(struct hl_device *hdev, u64 addr, u32 val)
5721{
5722 struct asic_fixed_properties *prop = &hdev->asic_prop;
5723 struct gaudi_device *gaudi = hdev->asic_specific;
5724 u64 hbm_bar_addr;
5725 int rc = 0;
5726
5727 if ((addr >= CFG_BASE) && (addr < CFG_BASE + CFG_SIZE)) {
Oded Gabbaye38bfd32020-07-03 20:46:12 +03005728
5729 if ((gaudi->hw_cap_initialized & HW_CAP_CLK_GATE) &&
5730 (hdev->clock_gating_mask &
5731 GAUDI_CLK_GATE_DEBUGFS_MASK)) {
5732
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005733 dev_err_ratelimited(hdev->dev,
5734 "Can't write register - clock gating is enabled!\n");
5735 rc = -EFAULT;
5736 } else {
5737 WREG32(addr - CFG_BASE, val);
5738 }
Oded Gabbaye38bfd32020-07-03 20:46:12 +03005739
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005740 } else if ((addr >= SRAM_BASE_ADDR) &&
5741 (addr < SRAM_BASE_ADDR + SRAM_BAR_SIZE)) {
5742 writel(val, hdev->pcie_bar[SRAM_BAR_ID] +
5743 (addr - SRAM_BASE_ADDR));
5744 } else if (addr < DRAM_PHYS_BASE + hdev->asic_prop.dram_size) {
5745 u64 bar_base_addr = DRAM_PHYS_BASE +
5746 (addr & ~(prop->dram_pci_bar_size - 0x1ull));
5747
5748 hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, bar_base_addr);
5749 if (hbm_bar_addr != U64_MAX) {
5750 writel(val, hdev->pcie_bar[HBM_BAR_ID] +
5751 (addr - bar_base_addr));
5752
5753 hbm_bar_addr = gaudi_set_hbm_bar_base(hdev,
5754 hbm_bar_addr);
5755 }
5756 if (hbm_bar_addr == U64_MAX)
5757 rc = -EIO;
5758 } else if (addr >= HOST_PHYS_BASE && !iommu_present(&pci_bus_type)) {
5759 *(u32 *) phys_to_virt(addr - HOST_PHYS_BASE) = val;
5760 } else {
5761 rc = -EFAULT;
5762 }
5763
5764 return rc;
5765}
5766
5767static int gaudi_debugfs_read64(struct hl_device *hdev, u64 addr, u64 *val)
5768{
5769 struct asic_fixed_properties *prop = &hdev->asic_prop;
5770 struct gaudi_device *gaudi = hdev->asic_specific;
5771 u64 hbm_bar_addr;
5772 int rc = 0;
5773
5774 if ((addr >= CFG_BASE) && (addr <= CFG_BASE + CFG_SIZE - sizeof(u64))) {
Oded Gabbaye38bfd32020-07-03 20:46:12 +03005775
5776 if ((gaudi->hw_cap_initialized & HW_CAP_CLK_GATE) &&
5777 (hdev->clock_gating_mask &
5778 GAUDI_CLK_GATE_DEBUGFS_MASK)) {
5779
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005780 dev_err_ratelimited(hdev->dev,
5781 "Can't read register - clock gating is enabled!\n");
5782 rc = -EFAULT;
5783 } else {
5784 u32 val_l = RREG32(addr - CFG_BASE);
5785 u32 val_h = RREG32(addr + sizeof(u32) - CFG_BASE);
5786
5787 *val = (((u64) val_h) << 32) | val_l;
5788 }
Oded Gabbaye38bfd32020-07-03 20:46:12 +03005789
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005790 } else if ((addr >= SRAM_BASE_ADDR) &&
5791 (addr <= SRAM_BASE_ADDR + SRAM_BAR_SIZE - sizeof(u64))) {
5792 *val = readq(hdev->pcie_bar[SRAM_BAR_ID] +
5793 (addr - SRAM_BASE_ADDR));
5794 } else if (addr <=
5795 DRAM_PHYS_BASE + hdev->asic_prop.dram_size - sizeof(u64)) {
5796 u64 bar_base_addr = DRAM_PHYS_BASE +
5797 (addr & ~(prop->dram_pci_bar_size - 0x1ull));
5798
5799 hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, bar_base_addr);
5800 if (hbm_bar_addr != U64_MAX) {
5801 *val = readq(hdev->pcie_bar[HBM_BAR_ID] +
5802 (addr - bar_base_addr));
5803
5804 hbm_bar_addr = gaudi_set_hbm_bar_base(hdev,
5805 hbm_bar_addr);
5806 }
5807 if (hbm_bar_addr == U64_MAX)
5808 rc = -EIO;
5809 } else if (addr >= HOST_PHYS_BASE && !iommu_present(&pci_bus_type)) {
5810 *val = *(u64 *) phys_to_virt(addr - HOST_PHYS_BASE);
5811 } else {
5812 rc = -EFAULT;
5813 }
5814
5815 return rc;
5816}
5817
5818static int gaudi_debugfs_write64(struct hl_device *hdev, u64 addr, u64 val)
5819{
5820 struct asic_fixed_properties *prop = &hdev->asic_prop;
5821 struct gaudi_device *gaudi = hdev->asic_specific;
5822 u64 hbm_bar_addr;
5823 int rc = 0;
5824
5825 if ((addr >= CFG_BASE) && (addr <= CFG_BASE + CFG_SIZE - sizeof(u64))) {
Oded Gabbaye38bfd32020-07-03 20:46:12 +03005826
5827 if ((gaudi->hw_cap_initialized & HW_CAP_CLK_GATE) &&
5828 (hdev->clock_gating_mask &
5829 GAUDI_CLK_GATE_DEBUGFS_MASK)) {
5830
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005831 dev_err_ratelimited(hdev->dev,
5832 "Can't write register - clock gating is enabled!\n");
5833 rc = -EFAULT;
5834 } else {
5835 WREG32(addr - CFG_BASE, lower_32_bits(val));
5836 WREG32(addr + sizeof(u32) - CFG_BASE,
5837 upper_32_bits(val));
5838 }
Oded Gabbaye38bfd32020-07-03 20:46:12 +03005839
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005840 } else if ((addr >= SRAM_BASE_ADDR) &&
5841 (addr <= SRAM_BASE_ADDR + SRAM_BAR_SIZE - sizeof(u64))) {
5842 writeq(val, hdev->pcie_bar[SRAM_BAR_ID] +
5843 (addr - SRAM_BASE_ADDR));
5844 } else if (addr <=
5845 DRAM_PHYS_BASE + hdev->asic_prop.dram_size - sizeof(u64)) {
5846 u64 bar_base_addr = DRAM_PHYS_BASE +
5847 (addr & ~(prop->dram_pci_bar_size - 0x1ull));
5848
5849 hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, bar_base_addr);
5850 if (hbm_bar_addr != U64_MAX) {
5851 writeq(val, hdev->pcie_bar[HBM_BAR_ID] +
5852 (addr - bar_base_addr));
5853
5854 hbm_bar_addr = gaudi_set_hbm_bar_base(hdev,
5855 hbm_bar_addr);
5856 }
5857 if (hbm_bar_addr == U64_MAX)
5858 rc = -EIO;
5859 } else if (addr >= HOST_PHYS_BASE && !iommu_present(&pci_bus_type)) {
5860 *(u64 *) phys_to_virt(addr - HOST_PHYS_BASE) = val;
5861 } else {
5862 rc = -EFAULT;
5863 }
5864
5865 return rc;
5866}
5867
5868static u64 gaudi_read_pte(struct hl_device *hdev, u64 addr)
5869{
5870 struct gaudi_device *gaudi = hdev->asic_specific;
5871
5872 if (hdev->hard_reset_pending)
5873 return U64_MAX;
5874
5875 return readq(hdev->pcie_bar[HBM_BAR_ID] +
5876 (addr - gaudi->hbm_bar_cur_addr));
5877}
5878
5879static void gaudi_write_pte(struct hl_device *hdev, u64 addr, u64 val)
5880{
5881 struct gaudi_device *gaudi = hdev->asic_specific;
5882
5883 if (hdev->hard_reset_pending)
5884 return;
5885
5886 writeq(val, hdev->pcie_bar[HBM_BAR_ID] +
5887 (addr - gaudi->hbm_bar_cur_addr));
5888}
5889
Ofir Bitton1137e1e2020-09-30 18:43:52 +03005890void gaudi_mmu_prepare_reg(struct hl_device *hdev, u64 reg, u32 asid)
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005891{
5892 /* mask to zero the MMBP and ASID bits */
5893 WREG32_AND(reg, ~0x7FF);
5894 WREG32_OR(reg, asid);
5895}
5896
5897static void gaudi_mmu_prepare(struct hl_device *hdev, u32 asid)
5898{
5899 struct gaudi_device *gaudi = hdev->asic_specific;
5900
5901 if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
5902 return;
5903
5904 if (asid & ~DMA0_QM_GLBL_NON_SECURE_PROPS_0_ASID_MASK) {
5905 WARN(1, "asid %u is too big\n", asid);
5906 return;
5907 }
5908
5909 mutex_lock(&gaudi->clk_gate_mutex);
5910
5911 hdev->asic_funcs->disable_clock_gating(hdev);
5912
5913 gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_0, asid);
5914 gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_1, asid);
5915 gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_2, asid);
5916 gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_3, asid);
5917 gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_4, asid);
5918
5919 gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_0, asid);
5920 gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_1, asid);
5921 gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_2, asid);
5922 gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_3, asid);
5923 gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_4, asid);
5924
5925 gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_0, asid);
5926 gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_1, asid);
5927 gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_2, asid);
5928 gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_3, asid);
5929 gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_4, asid);
5930
5931 gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_0, asid);
5932 gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_1, asid);
5933 gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_2, asid);
5934 gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_3, asid);
5935 gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_4, asid);
5936
5937 gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_0, asid);
5938 gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_1, asid);
5939 gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_2, asid);
5940 gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_3, asid);
5941 gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_4, asid);
5942
5943 gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_0, asid);
5944 gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_1, asid);
5945 gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_2, asid);
5946 gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_3, asid);
5947 gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_4, asid);
5948
5949 gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_0, asid);
5950 gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_1, asid);
5951 gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_2, asid);
5952 gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_3, asid);
5953 gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_4, asid);
5954
5955 gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_0, asid);
5956 gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_1, asid);
5957 gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_2, asid);
5958 gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_3, asid);
5959 gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_4, asid);
5960
5961 gaudi_mmu_prepare_reg(hdev, mmDMA0_CORE_NON_SECURE_PROPS, asid);
5962 gaudi_mmu_prepare_reg(hdev, mmDMA1_CORE_NON_SECURE_PROPS, asid);
5963 gaudi_mmu_prepare_reg(hdev, mmDMA2_CORE_NON_SECURE_PROPS, asid);
5964 gaudi_mmu_prepare_reg(hdev, mmDMA3_CORE_NON_SECURE_PROPS, asid);
5965 gaudi_mmu_prepare_reg(hdev, mmDMA4_CORE_NON_SECURE_PROPS, asid);
5966 gaudi_mmu_prepare_reg(hdev, mmDMA5_CORE_NON_SECURE_PROPS, asid);
5967 gaudi_mmu_prepare_reg(hdev, mmDMA6_CORE_NON_SECURE_PROPS, asid);
5968 gaudi_mmu_prepare_reg(hdev, mmDMA7_CORE_NON_SECURE_PROPS, asid);
5969
5970 gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_0, asid);
5971 gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_1, asid);
5972 gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_2, asid);
5973 gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_3, asid);
5974 gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_4, asid);
5975 gaudi_mmu_prepare_reg(hdev, mmTPC0_CFG_ARUSER_LO, asid);
5976 gaudi_mmu_prepare_reg(hdev, mmTPC0_CFG_AWUSER_LO, asid);
5977
5978 gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_0, asid);
5979 gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_1, asid);
5980 gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_2, asid);
5981 gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_3, asid);
5982 gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_4, asid);
5983 gaudi_mmu_prepare_reg(hdev, mmTPC1_CFG_ARUSER_LO, asid);
5984 gaudi_mmu_prepare_reg(hdev, mmTPC1_CFG_AWUSER_LO, asid);
5985
5986 gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_0, asid);
5987 gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_1, asid);
5988 gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_2, asid);
5989 gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_3, asid);
5990 gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_4, asid);
5991 gaudi_mmu_prepare_reg(hdev, mmTPC2_CFG_ARUSER_LO, asid);
5992 gaudi_mmu_prepare_reg(hdev, mmTPC2_CFG_AWUSER_LO, asid);
5993
5994 gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_0, asid);
5995 gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_1, asid);
5996 gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_2, asid);
5997 gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_3, asid);
5998 gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_4, asid);
5999 gaudi_mmu_prepare_reg(hdev, mmTPC3_CFG_ARUSER_LO, asid);
6000 gaudi_mmu_prepare_reg(hdev, mmTPC3_CFG_AWUSER_LO, asid);
6001
6002 gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_0, asid);
6003 gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_1, asid);
6004 gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_2, asid);
6005 gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_3, asid);
6006 gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_4, asid);
6007 gaudi_mmu_prepare_reg(hdev, mmTPC4_CFG_ARUSER_LO, asid);
6008 gaudi_mmu_prepare_reg(hdev, mmTPC4_CFG_AWUSER_LO, asid);
6009
6010 gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_0, asid);
6011 gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_1, asid);
6012 gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_2, asid);
6013 gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_3, asid);
6014 gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_4, asid);
6015 gaudi_mmu_prepare_reg(hdev, mmTPC5_CFG_ARUSER_LO, asid);
6016 gaudi_mmu_prepare_reg(hdev, mmTPC5_CFG_AWUSER_LO, asid);
6017
6018 gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_0, asid);
6019 gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_1, asid);
6020 gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_2, asid);
6021 gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_3, asid);
6022 gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_4, asid);
6023 gaudi_mmu_prepare_reg(hdev, mmTPC6_CFG_ARUSER_LO, asid);
6024 gaudi_mmu_prepare_reg(hdev, mmTPC6_CFG_AWUSER_LO, asid);
6025
6026 gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_0, asid);
6027 gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_1, asid);
6028 gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_2, asid);
6029 gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_3, asid);
6030 gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_4, asid);
6031 gaudi_mmu_prepare_reg(hdev, mmTPC7_CFG_ARUSER_LO, asid);
6032 gaudi_mmu_prepare_reg(hdev, mmTPC7_CFG_AWUSER_LO, asid);
6033
6034 gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_0, asid);
6035 gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_1, asid);
6036 gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_2, asid);
6037 gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_3, asid);
6038 gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_4, asid);
6039 gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_0, asid);
6040 gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_1, asid);
6041 gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_2, asid);
6042 gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_3, asid);
6043 gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_4, asid);
6044
6045 gaudi_mmu_prepare_reg(hdev, mmMME0_SBAB_ARUSER0, asid);
6046 gaudi_mmu_prepare_reg(hdev, mmMME0_SBAB_ARUSER1, asid);
6047 gaudi_mmu_prepare_reg(hdev, mmMME1_SBAB_ARUSER0, asid);
6048 gaudi_mmu_prepare_reg(hdev, mmMME1_SBAB_ARUSER1, asid);
6049 gaudi_mmu_prepare_reg(hdev, mmMME2_SBAB_ARUSER0, asid);
6050 gaudi_mmu_prepare_reg(hdev, mmMME2_SBAB_ARUSER1, asid);
6051 gaudi_mmu_prepare_reg(hdev, mmMME3_SBAB_ARUSER0, asid);
6052 gaudi_mmu_prepare_reg(hdev, mmMME3_SBAB_ARUSER1, asid);
6053 gaudi_mmu_prepare_reg(hdev, mmMME0_ACC_WBC, asid);
6054 gaudi_mmu_prepare_reg(hdev, mmMME1_ACC_WBC, asid);
6055 gaudi_mmu_prepare_reg(hdev, mmMME2_ACC_WBC, asid);
6056 gaudi_mmu_prepare_reg(hdev, mmMME3_ACC_WBC, asid);
6057
Oded Gabbay3c681572020-11-02 21:10:39 +02006058 if (hdev->nic_ports_mask & GAUDI_NIC_MASK_NIC0) {
6059 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_0,
6060 asid);
6061 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_1,
6062 asid);
6063 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_2,
6064 asid);
6065 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_3,
6066 asid);
6067 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_4,
6068 asid);
6069 }
6070
6071 if (hdev->nic_ports_mask & GAUDI_NIC_MASK_NIC1) {
6072 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_0,
6073 asid);
6074 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_1,
6075 asid);
6076 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_2,
6077 asid);
6078 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_3,
6079 asid);
6080 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_4,
6081 asid);
6082 }
6083
6084 if (hdev->nic_ports_mask & GAUDI_NIC_MASK_NIC2) {
6085 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_0,
6086 asid);
6087 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_1,
6088 asid);
6089 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_2,
6090 asid);
6091 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_3,
6092 asid);
6093 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_4,
6094 asid);
6095 }
6096
6097 if (hdev->nic_ports_mask & GAUDI_NIC_MASK_NIC3) {
6098 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_0,
6099 asid);
6100 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_1,
6101 asid);
6102 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_2,
6103 asid);
6104 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_3,
6105 asid);
6106 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_4,
6107 asid);
6108 }
6109
6110 if (hdev->nic_ports_mask & GAUDI_NIC_MASK_NIC4) {
6111 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_0,
6112 asid);
6113 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_1,
6114 asid);
6115 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_2,
6116 asid);
6117 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_3,
6118 asid);
6119 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_4,
6120 asid);
6121 }
6122
6123 if (hdev->nic_ports_mask & GAUDI_NIC_MASK_NIC5) {
6124 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_0,
6125 asid);
6126 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_1,
6127 asid);
6128 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_2,
6129 asid);
6130 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_3,
6131 asid);
6132 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_4,
6133 asid);
6134 }
6135
6136 if (hdev->nic_ports_mask & GAUDI_NIC_MASK_NIC6) {
6137 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_0,
6138 asid);
6139 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_1,
6140 asid);
6141 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_2,
6142 asid);
6143 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_3,
6144 asid);
6145 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_4,
6146 asid);
6147 }
6148
6149 if (hdev->nic_ports_mask & GAUDI_NIC_MASK_NIC7) {
6150 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_0,
6151 asid);
6152 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_1,
6153 asid);
6154 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_2,
6155 asid);
6156 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_3,
6157 asid);
6158 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_4,
6159 asid);
6160 }
6161
6162 if (hdev->nic_ports_mask & GAUDI_NIC_MASK_NIC8) {
6163 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_0,
6164 asid);
6165 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_1,
6166 asid);
6167 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_2,
6168 asid);
6169 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_3,
6170 asid);
6171 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_4,
6172 asid);
6173 }
6174
6175 if (hdev->nic_ports_mask & GAUDI_NIC_MASK_NIC9) {
6176 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_0,
6177 asid);
6178 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_1,
6179 asid);
6180 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_2,
6181 asid);
6182 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_3,
6183 asid);
6184 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_4,
6185 asid);
6186 }
6187
Oded Gabbaye38bfd32020-07-03 20:46:12 +03006188 hdev->asic_funcs->set_clock_gating(hdev);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006189
6190 mutex_unlock(&gaudi->clk_gate_mutex);
6191}
6192
6193static int gaudi_send_job_on_qman0(struct hl_device *hdev,
6194 struct hl_cs_job *job)
6195{
6196 struct packet_msg_prot *fence_pkt;
6197 u32 *fence_ptr;
6198 dma_addr_t fence_dma_addr;
6199 struct hl_cb *cb;
6200 u32 tmp, timeout, dma_offset;
6201 int rc;
6202
6203 if (hdev->pldm)
6204 timeout = GAUDI_PLDM_QMAN0_TIMEOUT_USEC;
6205 else
6206 timeout = HL_DEVICE_TIMEOUT_USEC;
6207
6208 if (!hdev->asic_funcs->is_device_idle(hdev, NULL, NULL)) {
6209 dev_err_ratelimited(hdev->dev,
6210 "Can't send driver job on QMAN0 because the device is not idle\n");
6211 return -EBUSY;
6212 }
6213
6214 fence_ptr = hdev->asic_funcs->asic_dma_pool_zalloc(hdev, 4, GFP_KERNEL,
6215 &fence_dma_addr);
6216 if (!fence_ptr) {
6217 dev_err(hdev->dev,
6218 "Failed to allocate fence memory for QMAN0\n");
6219 return -ENOMEM;
6220 }
6221
6222 cb = job->patched_cb;
6223
Arnd Bergmann82948e62020-10-26 17:08:06 +01006224 fence_pkt = cb->kernel_address +
6225 job->job_cb_size - sizeof(struct packet_msg_prot);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006226
Oded Gabbay65887292020-08-12 11:21:01 +03006227 tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT);
6228 tmp |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);
6229 tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
6230
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006231 fence_pkt->ctl = cpu_to_le32(tmp);
6232 fence_pkt->value = cpu_to_le32(GAUDI_QMAN0_FENCE_VAL);
6233 fence_pkt->addr = cpu_to_le64(fence_dma_addr);
6234
6235 dma_offset = gaudi_dma_assignment[GAUDI_PCI_DMA_1] * DMA_CORE_OFFSET;
6236
6237 WREG32_OR(mmDMA0_CORE_PROT + dma_offset, BIT(DMA0_CORE_PROT_VAL_SHIFT));
6238
6239 rc = hl_hw_queue_send_cb_no_cmpl(hdev, GAUDI_QUEUE_ID_DMA_0_0,
6240 job->job_cb_size, cb->bus_address);
6241 if (rc) {
6242 dev_err(hdev->dev, "Failed to send CB on QMAN0, %d\n", rc);
6243 goto free_fence_ptr;
6244 }
6245
6246 rc = hl_poll_timeout_memory(hdev, fence_ptr, tmp,
6247 (tmp == GAUDI_QMAN0_FENCE_VAL), 1000,
6248 timeout, true);
6249
6250 hl_hw_queue_inc_ci_kernel(hdev, GAUDI_QUEUE_ID_DMA_0_0);
6251
6252 if (rc == -ETIMEDOUT) {
6253 dev_err(hdev->dev, "QMAN0 Job timeout (0x%x)\n", tmp);
6254 goto free_fence_ptr;
6255 }
6256
6257free_fence_ptr:
6258 WREG32_AND(mmDMA0_CORE_PROT + dma_offset,
6259 ~BIT(DMA0_CORE_PROT_VAL_SHIFT));
6260
6261 hdev->asic_funcs->asic_dma_pool_free(hdev, (void *) fence_ptr,
6262 fence_dma_addr);
6263 return rc;
6264}
6265
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006266static void gaudi_get_event_desc(u16 event_type, char *desc, size_t size)
6267{
Ofir Bittonebd8d122020-05-10 13:41:28 +03006268 if (event_type >= GAUDI_EVENT_SIZE)
6269 goto event_not_supported;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006270
Ofir Bittonebd8d122020-05-10 13:41:28 +03006271 if (!gaudi_irq_map_table[event_type].valid)
6272 goto event_not_supported;
6273
6274 snprintf(desc, size, gaudi_irq_map_table[event_type].name);
6275
6276 return;
6277
6278event_not_supported:
6279 snprintf(desc, size, "N/A");
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006280}
6281
6282static const char *gaudi_get_razwi_initiator_dma_name(struct hl_device *hdev,
6283 u32 x_y, bool is_write)
6284{
6285 u32 dma_id[2], dma_offset, err_cause[2], mask, i;
6286
6287 mask = is_write ? DMA0_CORE_ERR_CAUSE_HBW_WR_ERR_MASK :
6288 DMA0_CORE_ERR_CAUSE_HBW_RD_ERR_MASK;
6289
6290 switch (x_y) {
6291 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_0:
6292 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_1:
6293 dma_id[0] = 0;
6294 dma_id[1] = 2;
6295 break;
6296 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_0:
6297 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_1:
6298 dma_id[0] = 1;
6299 dma_id[1] = 3;
6300 break;
6301 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_0:
6302 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_1:
6303 dma_id[0] = 4;
6304 dma_id[1] = 6;
6305 break;
6306 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_0:
6307 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_1:
6308 dma_id[0] = 5;
6309 dma_id[1] = 7;
6310 break;
6311 default:
6312 goto unknown_initiator;
6313 }
6314
6315 for (i = 0 ; i < 2 ; i++) {
6316 dma_offset = dma_id[i] * DMA_CORE_OFFSET;
6317 err_cause[i] = RREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset);
6318 }
6319
6320 switch (x_y) {
6321 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_0:
6322 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_1:
6323 if ((err_cause[0] & mask) && !(err_cause[1] & mask))
6324 return "DMA0";
6325 else if (!(err_cause[0] & mask) && (err_cause[1] & mask))
6326 return "DMA2";
6327 else
6328 return "DMA0 or DMA2";
6329 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_0:
6330 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_1:
6331 if ((err_cause[0] & mask) && !(err_cause[1] & mask))
6332 return "DMA1";
6333 else if (!(err_cause[0] & mask) && (err_cause[1] & mask))
6334 return "DMA3";
6335 else
6336 return "DMA1 or DMA3";
6337 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_0:
6338 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_1:
6339 if ((err_cause[0] & mask) && !(err_cause[1] & mask))
6340 return "DMA4";
6341 else if (!(err_cause[0] & mask) && (err_cause[1] & mask))
6342 return "DMA6";
6343 else
6344 return "DMA4 or DMA6";
6345 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_0:
6346 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_1:
6347 if ((err_cause[0] & mask) && !(err_cause[1] & mask))
6348 return "DMA5";
6349 else if (!(err_cause[0] & mask) && (err_cause[1] & mask))
6350 return "DMA7";
6351 else
6352 return "DMA5 or DMA7";
6353 }
6354
6355unknown_initiator:
6356 return "unknown initiator";
6357}
6358
6359static const char *gaudi_get_razwi_initiator_name(struct hl_device *hdev,
6360 bool is_write)
6361{
6362 u32 val, x_y, axi_id;
6363
6364 val = is_write ? RREG32(mmMMU_UP_RAZWI_WRITE_ID) :
6365 RREG32(mmMMU_UP_RAZWI_READ_ID);
6366 x_y = val & ((RAZWI_INITIATOR_Y_MASK << RAZWI_INITIATOR_Y_SHIFT) |
6367 (RAZWI_INITIATOR_X_MASK << RAZWI_INITIATOR_X_SHIFT));
6368 axi_id = val & (RAZWI_INITIATOR_AXI_ID_MASK <<
6369 RAZWI_INITIATOR_AXI_ID_SHIFT);
6370
6371 switch (x_y) {
6372 case RAZWI_INITIATOR_ID_X_Y_TPC0_NIC0:
6373 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC))
6374 return "TPC0";
6375 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC))
6376 return "NIC0";
6377 break;
6378 case RAZWI_INITIATOR_ID_X_Y_TPC1:
6379 return "TPC1";
6380 case RAZWI_INITIATOR_ID_X_Y_MME0_0:
6381 case RAZWI_INITIATOR_ID_X_Y_MME0_1:
6382 return "MME0";
6383 case RAZWI_INITIATOR_ID_X_Y_MME1_0:
6384 case RAZWI_INITIATOR_ID_X_Y_MME1_1:
6385 return "MME1";
6386 case RAZWI_INITIATOR_ID_X_Y_TPC2:
6387 return "TPC2";
6388 case RAZWI_INITIATOR_ID_X_Y_TPC3_PCI_CPU_PSOC:
6389 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC))
6390 return "TPC3";
6391 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_PCI))
6392 return "PCI";
6393 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_CPU))
6394 return "CPU";
6395 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_PSOC))
6396 return "PSOC";
6397 break;
6398 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_0:
6399 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_1:
6400 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_0:
6401 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_1:
6402 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_0:
6403 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_1:
6404 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_0:
6405 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_1:
6406 return gaudi_get_razwi_initiator_dma_name(hdev, x_y, is_write);
6407 case RAZWI_INITIATOR_ID_X_Y_TPC4_NIC1_NIC2:
6408 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC))
6409 return "TPC4";
6410 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC))
6411 return "NIC1";
6412 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC_FT))
6413 return "NIC2";
6414 break;
6415 case RAZWI_INITIATOR_ID_X_Y_TPC5:
6416 return "TPC5";
6417 case RAZWI_INITIATOR_ID_X_Y_MME2_0:
6418 case RAZWI_INITIATOR_ID_X_Y_MME2_1:
6419 return "MME2";
6420 case RAZWI_INITIATOR_ID_X_Y_MME3_0:
6421 case RAZWI_INITIATOR_ID_X_Y_MME3_1:
6422 return "MME3";
6423 case RAZWI_INITIATOR_ID_X_Y_TPC6:
6424 return "TPC6";
6425 case RAZWI_INITIATOR_ID_X_Y_TPC7_NIC4_NIC5:
6426 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC))
6427 return "TPC7";
6428 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC))
6429 return "NIC4";
6430 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC_FT))
6431 return "NIC5";
6432 break;
6433 default:
6434 break;
6435 }
6436
6437 dev_err(hdev->dev,
6438 "Unknown RAZWI initiator ID 0x%x [Y=%d, X=%d, AXI_ID=%d]\n",
6439 val,
6440 (val >> RAZWI_INITIATOR_Y_SHIFT) & RAZWI_INITIATOR_Y_MASK,
6441 (val >> RAZWI_INITIATOR_X_SHIFT) & RAZWI_INITIATOR_X_MASK,
6442 (val >> RAZWI_INITIATOR_AXI_ID_SHIFT) &
6443 RAZWI_INITIATOR_AXI_ID_MASK);
6444
6445 return "unknown initiator";
6446}
6447
6448static void gaudi_print_razwi_info(struct hl_device *hdev)
6449{
6450 if (RREG32(mmMMU_UP_RAZWI_WRITE_VLD)) {
6451 dev_err_ratelimited(hdev->dev,
6452 "RAZWI event caused by illegal write of %s\n",
6453 gaudi_get_razwi_initiator_name(hdev, true));
6454 WREG32(mmMMU_UP_RAZWI_WRITE_VLD, 0);
6455 }
6456
6457 if (RREG32(mmMMU_UP_RAZWI_READ_VLD)) {
6458 dev_err_ratelimited(hdev->dev,
6459 "RAZWI event caused by illegal read of %s\n",
6460 gaudi_get_razwi_initiator_name(hdev, false));
6461 WREG32(mmMMU_UP_RAZWI_READ_VLD, 0);
6462 }
6463}
6464
6465static void gaudi_print_mmu_error_info(struct hl_device *hdev)
6466{
6467 struct gaudi_device *gaudi = hdev->asic_specific;
6468 u64 addr;
6469 u32 val;
6470
6471 if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
6472 return;
6473
6474 val = RREG32(mmMMU_UP_PAGE_ERROR_CAPTURE);
6475 if (val & MMU_UP_PAGE_ERROR_CAPTURE_ENTRY_VALID_MASK) {
6476 addr = val & MMU_UP_PAGE_ERROR_CAPTURE_VA_49_32_MASK;
6477 addr <<= 32;
6478 addr |= RREG32(mmMMU_UP_PAGE_ERROR_CAPTURE_VA);
6479
6480 dev_err_ratelimited(hdev->dev, "MMU page fault on va 0x%llx\n",
6481 addr);
6482
6483 WREG32(mmMMU_UP_PAGE_ERROR_CAPTURE, 0);
6484 }
6485
6486 val = RREG32(mmMMU_UP_ACCESS_ERROR_CAPTURE);
6487 if (val & MMU_UP_ACCESS_ERROR_CAPTURE_ENTRY_VALID_MASK) {
6488 addr = val & MMU_UP_ACCESS_ERROR_CAPTURE_VA_49_32_MASK;
6489 addr <<= 32;
6490 addr |= RREG32(mmMMU_UP_ACCESS_ERROR_CAPTURE_VA);
6491
6492 dev_err_ratelimited(hdev->dev,
6493 "MMU access error on va 0x%llx\n", addr);
6494
6495 WREG32(mmMMU_UP_ACCESS_ERROR_CAPTURE, 0);
6496 }
6497}
6498
6499/*
6500 * +-------------------+------------------------------------------------------+
6501 * | Configuration Reg | Description |
6502 * | Address | |
6503 * +-------------------+------------------------------------------------------+
6504 * | 0xF30 - 0xF3F |ECC single error indication (1 bit per memory wrapper)|
6505 * | |0xF30 memory wrappers 31:0 (MSB to LSB) |
6506 * | |0xF34 memory wrappers 63:32 |
6507 * | |0xF38 memory wrappers 95:64 |
6508 * | |0xF3C memory wrappers 127:96 |
6509 * +-------------------+------------------------------------------------------+
6510 * | 0xF40 - 0xF4F |ECC double error indication (1 bit per memory wrapper)|
6511 * | |0xF40 memory wrappers 31:0 (MSB to LSB) |
6512 * | |0xF44 memory wrappers 63:32 |
6513 * | |0xF48 memory wrappers 95:64 |
6514 * | |0xF4C memory wrappers 127:96 |
6515 * +-------------------+------------------------------------------------------+
6516 */
Oded Gabbayfcc6a4e2020-05-17 08:20:35 +03006517static int gaudi_extract_ecc_info(struct hl_device *hdev,
6518 struct ecc_info_extract_params *params, u64 *ecc_address,
6519 u64 *ecc_syndrom, u8 *memory_wrapper_idx)
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006520{
6521 struct gaudi_device *gaudi = hdev->asic_specific;
Oded Gabbayfcc6a4e2020-05-17 08:20:35 +03006522 u32 i, num_mem_regs, reg, err_bit;
6523 u64 err_addr, err_word = 0;
6524 int rc = 0;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006525
Oded Gabbayfcc6a4e2020-05-17 08:20:35 +03006526 num_mem_regs = params->num_memories / 32 +
6527 ((params->num_memories % 32) ? 1 : 0);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006528
Oded Gabbayfcc6a4e2020-05-17 08:20:35 +03006529 if (params->block_address >= CFG_BASE)
6530 params->block_address -= CFG_BASE;
6531
6532 if (params->derr)
6533 err_addr = params->block_address + GAUDI_ECC_DERR0_OFFSET;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006534 else
Oded Gabbayfcc6a4e2020-05-17 08:20:35 +03006535 err_addr = params->block_address + GAUDI_ECC_SERR0_OFFSET;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006536
Oded Gabbayfcc6a4e2020-05-17 08:20:35 +03006537 if (params->disable_clock_gating) {
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006538 mutex_lock(&gaudi->clk_gate_mutex);
6539 hdev->asic_funcs->disable_clock_gating(hdev);
6540 }
6541
Oded Gabbayfcc6a4e2020-05-17 08:20:35 +03006542 /* Set invalid wrapper index */
6543 *memory_wrapper_idx = 0xFF;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006544
Oded Gabbayfcc6a4e2020-05-17 08:20:35 +03006545 /* Iterate through memory wrappers, a single bit must be set */
Dan Carpenterb0353542020-08-05 12:51:05 +03006546 for (i = 0 ; i < num_mem_regs ; i++) {
Oded Gabbayfcc6a4e2020-05-17 08:20:35 +03006547 err_addr += i * 4;
6548 err_word = RREG32(err_addr);
6549 if (err_word) {
6550 err_bit = __ffs(err_word);
6551 *memory_wrapper_idx = err_bit + (32 * i);
6552 break;
6553 }
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006554 }
6555
Oded Gabbayfcc6a4e2020-05-17 08:20:35 +03006556 if (*memory_wrapper_idx == 0xFF) {
6557 dev_err(hdev->dev, "ECC error information cannot be found\n");
6558 rc = -EINVAL;
6559 goto enable_clk_gate;
6560 }
6561
6562 WREG32(params->block_address + GAUDI_ECC_MEM_SEL_OFFSET,
6563 *memory_wrapper_idx);
6564
6565 *ecc_address =
6566 RREG32(params->block_address + GAUDI_ECC_ADDRESS_OFFSET);
6567 *ecc_syndrom =
6568 RREG32(params->block_address + GAUDI_ECC_SYNDROME_OFFSET);
6569
6570 /* Clear error indication */
6571 reg = RREG32(params->block_address + GAUDI_ECC_MEM_INFO_CLR_OFFSET);
6572 if (params->derr)
6573 reg |= FIELD_PREP(GAUDI_ECC_MEM_INFO_CLR_DERR_MASK, 1);
6574 else
6575 reg |= FIELD_PREP(GAUDI_ECC_MEM_INFO_CLR_SERR_MASK, 1);
6576
6577 WREG32(params->block_address + GAUDI_ECC_MEM_INFO_CLR_OFFSET, reg);
6578
6579enable_clk_gate:
6580 if (params->disable_clock_gating) {
Oded Gabbaye38bfd32020-07-03 20:46:12 +03006581 hdev->asic_funcs->set_clock_gating(hdev);
Greg Kroah-Hartman65a9bde62020-07-27 11:49:37 +02006582
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006583 mutex_unlock(&gaudi->clk_gate_mutex);
6584 }
Oded Gabbayfcc6a4e2020-05-17 08:20:35 +03006585
6586 return rc;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006587}
6588
6589static void gaudi_handle_qman_err_generic(struct hl_device *hdev,
6590 const char *qm_name,
6591 u64 glbl_sts_addr,
6592 u64 arb_err_addr)
6593{
6594 u32 i, j, glbl_sts_val, arb_err_val, glbl_sts_clr_val;
6595 char reg_desc[32];
6596
6597 /* Iterate through all stream GLBL_STS1 registers + Lower CP */
6598 for (i = 0 ; i < QMAN_STREAMS + 1 ; i++) {
6599 glbl_sts_clr_val = 0;
6600 glbl_sts_val = RREG32(glbl_sts_addr + 4 * i);
6601
6602 if (!glbl_sts_val)
6603 continue;
6604
6605 if (i == QMAN_STREAMS)
6606 snprintf(reg_desc, ARRAY_SIZE(reg_desc), "LowerCP");
6607 else
6608 snprintf(reg_desc, ARRAY_SIZE(reg_desc), "stream%u", i);
6609
6610 for (j = 0 ; j < GAUDI_NUM_OF_QM_ERR_CAUSE ; j++) {
6611 if (glbl_sts_val & BIT(j)) {
6612 dev_err_ratelimited(hdev->dev,
6613 "%s %s. err cause: %s\n",
6614 qm_name, reg_desc,
6615 gaudi_qman_error_cause[j]);
6616 glbl_sts_clr_val |= BIT(j);
6617 }
6618 }
6619
6620 /* Write 1 clear errors */
6621 WREG32(glbl_sts_addr + 4 * i, glbl_sts_clr_val);
6622 }
6623
6624 arb_err_val = RREG32(arb_err_addr);
6625
6626 if (!arb_err_val)
6627 return;
6628
6629 for (j = 0 ; j < GAUDI_NUM_OF_QM_ARB_ERR_CAUSE ; j++) {
6630 if (arb_err_val & BIT(j)) {
6631 dev_err_ratelimited(hdev->dev,
6632 "%s ARB_ERR. err cause: %s\n",
6633 qm_name,
6634 gaudi_qman_arb_error_cause[j]);
6635 }
6636 }
6637}
6638
Oded Gabbayfcc6a4e2020-05-17 08:20:35 +03006639static void gaudi_handle_ecc_event(struct hl_device *hdev, u16 event_type,
6640 struct hl_eq_ecc_data *ecc_data)
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006641{
Oded Gabbayfcc6a4e2020-05-17 08:20:35 +03006642 struct ecc_info_extract_params params;
6643 u64 ecc_address = 0, ecc_syndrom = 0;
6644 u8 index, memory_wrapper_idx = 0;
6645 bool extract_info_from_fw;
6646 int rc;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006647
6648 switch (event_type) {
Oded Gabbayfcc6a4e2020-05-17 08:20:35 +03006649 case GAUDI_EVENT_PCIE_CORE_SERR ... GAUDI_EVENT_PCIE_PHY_DERR:
6650 case GAUDI_EVENT_DMA0_SERR_ECC ... GAUDI_EVENT_MMU_DERR:
6651 extract_info_from_fw = true;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006652 break;
6653 case GAUDI_EVENT_TPC0_SERR ... GAUDI_EVENT_TPC7_SERR:
6654 index = event_type - GAUDI_EVENT_TPC0_SERR;
Oded Gabbayfcc6a4e2020-05-17 08:20:35 +03006655 params.block_address = mmTPC0_CFG_BASE + index * TPC_CFG_OFFSET;
6656 params.num_memories = 90;
6657 params.derr = false;
6658 params.disable_clock_gating = true;
6659 extract_info_from_fw = false;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006660 break;
6661 case GAUDI_EVENT_TPC0_DERR ... GAUDI_EVENT_TPC7_DERR:
6662 index = event_type - GAUDI_EVENT_TPC0_DERR;
Oded Gabbayfcc6a4e2020-05-17 08:20:35 +03006663 params.block_address =
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006664 mmTPC0_CFG_BASE + index * TPC_CFG_OFFSET;
Oded Gabbayfcc6a4e2020-05-17 08:20:35 +03006665 params.num_memories = 90;
6666 params.derr = true;
6667 params.disable_clock_gating = true;
6668 extract_info_from_fw = false;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006669 break;
6670 case GAUDI_EVENT_MME0_ACC_SERR:
6671 case GAUDI_EVENT_MME1_ACC_SERR:
6672 case GAUDI_EVENT_MME2_ACC_SERR:
6673 case GAUDI_EVENT_MME3_ACC_SERR:
6674 index = (event_type - GAUDI_EVENT_MME0_ACC_SERR) / 4;
Oded Gabbayfcc6a4e2020-05-17 08:20:35 +03006675 params.block_address = mmMME0_ACC_BASE + index * MME_ACC_OFFSET;
6676 params.num_memories = 128;
6677 params.derr = false;
6678 params.disable_clock_gating = true;
6679 extract_info_from_fw = false;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006680 break;
6681 case GAUDI_EVENT_MME0_ACC_DERR:
6682 case GAUDI_EVENT_MME1_ACC_DERR:
6683 case GAUDI_EVENT_MME2_ACC_DERR:
6684 case GAUDI_EVENT_MME3_ACC_DERR:
6685 index = (event_type - GAUDI_EVENT_MME0_ACC_DERR) / 4;
Oded Gabbayfcc6a4e2020-05-17 08:20:35 +03006686 params.block_address = mmMME0_ACC_BASE + index * MME_ACC_OFFSET;
6687 params.num_memories = 128;
6688 params.derr = true;
6689 params.disable_clock_gating = true;
6690 extract_info_from_fw = false;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006691 break;
6692 case GAUDI_EVENT_MME0_SBAB_SERR:
6693 case GAUDI_EVENT_MME1_SBAB_SERR:
6694 case GAUDI_EVENT_MME2_SBAB_SERR:
6695 case GAUDI_EVENT_MME3_SBAB_SERR:
6696 index = (event_type - GAUDI_EVENT_MME0_SBAB_SERR) / 4;
Oded Gabbayfcc6a4e2020-05-17 08:20:35 +03006697 params.block_address =
6698 mmMME0_SBAB_BASE + index * MME_ACC_OFFSET;
6699 params.num_memories = 33;
6700 params.derr = false;
6701 params.disable_clock_gating = true;
6702 extract_info_from_fw = false;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006703 break;
6704 case GAUDI_EVENT_MME0_SBAB_DERR:
6705 case GAUDI_EVENT_MME1_SBAB_DERR:
6706 case GAUDI_EVENT_MME2_SBAB_DERR:
6707 case GAUDI_EVENT_MME3_SBAB_DERR:
6708 index = (event_type - GAUDI_EVENT_MME0_SBAB_DERR) / 4;
Oded Gabbayfcc6a4e2020-05-17 08:20:35 +03006709 params.block_address =
6710 mmMME0_SBAB_BASE + index * MME_ACC_OFFSET;
6711 params.num_memories = 33;
6712 params.derr = true;
6713 params.disable_clock_gating = true;
Oded Gabbay652b4442020-11-21 14:35:35 +02006714 extract_info_from_fw = false;
6715 break;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006716 default:
6717 return;
6718 }
6719
Oded Gabbayfcc6a4e2020-05-17 08:20:35 +03006720 if (extract_info_from_fw) {
6721 ecc_address = le64_to_cpu(ecc_data->ecc_address);
6722 ecc_syndrom = le64_to_cpu(ecc_data->ecc_syndrom);
6723 memory_wrapper_idx = ecc_data->memory_wrapper_idx;
6724 } else {
6725 rc = gaudi_extract_ecc_info(hdev, &params, &ecc_address,
6726 &ecc_syndrom, &memory_wrapper_idx);
6727 if (rc)
6728 return;
6729 }
6730
6731 dev_err(hdev->dev,
6732 "ECC error detected. address: %#llx. Syndrom: %#llx. block id %u\n",
6733 ecc_address, ecc_syndrom, memory_wrapper_idx);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006734}
6735
6736static void gaudi_handle_qman_err(struct hl_device *hdev, u16 event_type)
6737{
6738 u64 glbl_sts_addr, arb_err_addr;
6739 u8 index;
6740 char desc[32];
6741
6742 switch (event_type) {
6743 case GAUDI_EVENT_TPC0_QM ... GAUDI_EVENT_TPC7_QM:
6744 index = event_type - GAUDI_EVENT_TPC0_QM;
6745 glbl_sts_addr =
6746 mmTPC0_QM_GLBL_STS1_0 + index * TPC_QMAN_OFFSET;
6747 arb_err_addr =
6748 mmTPC0_QM_ARB_ERR_CAUSE + index * TPC_QMAN_OFFSET;
6749 snprintf(desc, ARRAY_SIZE(desc), "%s%d", "TPC_QM", index);
6750 break;
6751 case GAUDI_EVENT_MME0_QM ... GAUDI_EVENT_MME2_QM:
6752 index = event_type - GAUDI_EVENT_MME0_QM;
6753 glbl_sts_addr =
6754 mmMME0_QM_GLBL_STS1_0 + index * MME_QMAN_OFFSET;
6755 arb_err_addr =
6756 mmMME0_QM_ARB_ERR_CAUSE + index * MME_QMAN_OFFSET;
6757 snprintf(desc, ARRAY_SIZE(desc), "%s%d", "MME_QM", index);
6758 break;
6759 case GAUDI_EVENT_DMA0_QM ... GAUDI_EVENT_DMA7_QM:
6760 index = event_type - GAUDI_EVENT_DMA0_QM;
6761 glbl_sts_addr =
6762 mmDMA0_QM_GLBL_STS1_0 + index * DMA_QMAN_OFFSET;
6763 arb_err_addr =
6764 mmDMA0_QM_ARB_ERR_CAUSE + index * DMA_QMAN_OFFSET;
6765 snprintf(desc, ARRAY_SIZE(desc), "%s%d", "DMA_QM", index);
6766 break;
Oded Gabbay3c681572020-11-02 21:10:39 +02006767 case GAUDI_EVENT_NIC0_QM0:
6768 glbl_sts_addr = mmNIC0_QM0_GLBL_STS1_0;
6769 arb_err_addr = mmNIC0_QM0_ARB_ERR_CAUSE;
6770 snprintf(desc, ARRAY_SIZE(desc), "NIC0_QM0");
6771 break;
6772 case GAUDI_EVENT_NIC0_QM1:
6773 glbl_sts_addr = mmNIC0_QM1_GLBL_STS1_0;
6774 arb_err_addr = mmNIC0_QM1_ARB_ERR_CAUSE;
6775 snprintf(desc, ARRAY_SIZE(desc), "NIC0_QM1");
6776 break;
6777 case GAUDI_EVENT_NIC1_QM0:
6778 glbl_sts_addr = mmNIC1_QM0_GLBL_STS1_0;
6779 arb_err_addr = mmNIC1_QM0_ARB_ERR_CAUSE;
6780 snprintf(desc, ARRAY_SIZE(desc), "NIC1_QM0");
6781 break;
6782 case GAUDI_EVENT_NIC1_QM1:
6783 glbl_sts_addr = mmNIC1_QM1_GLBL_STS1_0;
6784 arb_err_addr = mmNIC1_QM1_ARB_ERR_CAUSE;
6785 snprintf(desc, ARRAY_SIZE(desc), "NIC1_QM1");
6786 break;
6787 case GAUDI_EVENT_NIC2_QM0:
6788 glbl_sts_addr = mmNIC2_QM0_GLBL_STS1_0;
6789 arb_err_addr = mmNIC2_QM0_ARB_ERR_CAUSE;
6790 snprintf(desc, ARRAY_SIZE(desc), "NIC2_QM0");
6791 break;
6792 case GAUDI_EVENT_NIC2_QM1:
6793 glbl_sts_addr = mmNIC2_QM1_GLBL_STS1_0;
6794 arb_err_addr = mmNIC2_QM1_ARB_ERR_CAUSE;
6795 snprintf(desc, ARRAY_SIZE(desc), "NIC2_QM1");
6796 break;
6797 case GAUDI_EVENT_NIC3_QM0:
6798 glbl_sts_addr = mmNIC3_QM0_GLBL_STS1_0;
6799 arb_err_addr = mmNIC3_QM0_ARB_ERR_CAUSE;
6800 snprintf(desc, ARRAY_SIZE(desc), "NIC3_QM0");
6801 break;
6802 case GAUDI_EVENT_NIC3_QM1:
6803 glbl_sts_addr = mmNIC3_QM1_GLBL_STS1_0;
6804 arb_err_addr = mmNIC3_QM1_ARB_ERR_CAUSE;
6805 snprintf(desc, ARRAY_SIZE(desc), "NIC3_QM1");
6806 break;
6807 case GAUDI_EVENT_NIC4_QM0:
6808 glbl_sts_addr = mmNIC4_QM0_GLBL_STS1_0;
6809 arb_err_addr = mmNIC4_QM0_ARB_ERR_CAUSE;
6810 snprintf(desc, ARRAY_SIZE(desc), "NIC4_QM0");
6811 break;
6812 case GAUDI_EVENT_NIC4_QM1:
6813 glbl_sts_addr = mmNIC4_QM1_GLBL_STS1_0;
6814 arb_err_addr = mmNIC4_QM1_ARB_ERR_CAUSE;
6815 snprintf(desc, ARRAY_SIZE(desc), "NIC4_QM1");
6816 break;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006817 default:
6818 return;
6819 }
6820
6821 gaudi_handle_qman_err_generic(hdev, desc, glbl_sts_addr, arb_err_addr);
6822}
6823
6824static void gaudi_print_irq_info(struct hl_device *hdev, u16 event_type,
6825 bool razwi)
6826{
Ofir Bittonebd8d122020-05-10 13:41:28 +03006827 char desc[64] = "";
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006828
6829 gaudi_get_event_desc(event_type, desc, sizeof(desc));
6830 dev_err_ratelimited(hdev->dev, "Received H/W interrupt %d [\"%s\"]\n",
6831 event_type, desc);
6832
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006833 if (razwi) {
6834 gaudi_print_razwi_info(hdev);
6835 gaudi_print_mmu_error_info(hdev);
6836 }
6837}
6838
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006839static int gaudi_soft_reset_late_init(struct hl_device *hdev)
6840{
Ofir Bittonebd8d122020-05-10 13:41:28 +03006841 struct gaudi_device *gaudi = hdev->asic_specific;
6842
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006843 /* Unmask all IRQs since some could have been received
6844 * during the soft reset
6845 */
Ofir Bittonebd8d122020-05-10 13:41:28 +03006846 return hl_fw_unmask_irq_arr(hdev, gaudi->events, sizeof(gaudi->events));
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006847}
6848
Ofir Bitton5a2998f2020-10-05 13:44:59 +03006849static int gaudi_hbm_read_interrupts(struct hl_device *hdev, int device,
6850 struct hl_eq_hbm_ecc_data *hbm_ecc_data)
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006851{
Ofir Bitton5a2998f2020-10-05 13:44:59 +03006852 u32 base, val, val2, wr_par, rd_par, ca_par, derr, serr, type, ch;
6853 int err = 0;
6854
6855 if (!hdev->asic_prop.fw_security_disabled) {
6856 if (!hbm_ecc_data) {
6857 dev_err(hdev->dev, "No FW ECC data");
6858 return 0;
6859 }
6860
6861 wr_par = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_WR_PAR_MASK,
6862 le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
6863 rd_par = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_RD_PAR_MASK,
6864 le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
6865 ca_par = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_CA_PAR_MASK,
6866 le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
6867 derr = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_DERR_MASK,
6868 le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
6869 serr = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_SERR_MASK,
6870 le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
6871 type = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_TYPE_MASK,
6872 le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
6873 ch = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_HBM_CH_MASK,
6874 le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
6875
6876 dev_err(hdev->dev,
Oded Gabbay64a9d5a2020-11-21 14:29:25 +02006877 "HBM%d pc%d ECC: TYPE=%d, WR_PAR=%d, RD_PAR=%d, CA_PAR=%d, SERR=%d, DERR=%d\n",
6878 device, ch, type, wr_par, rd_par, ca_par, serr, derr);
Ofir Bitton5a2998f2020-10-05 13:44:59 +03006879
6880 err = 1;
6881
6882 return 0;
6883 }
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006884
6885 base = GAUDI_HBM_CFG_BASE + device * GAUDI_HBM_CFG_OFFSET;
6886 for (ch = 0 ; ch < GAUDI_HBM_CHANNELS ; ch++) {
6887 val = RREG32_MASK(base + ch * 0x1000 + 0x06C, 0x0000FFFF);
6888 val = (val & 0xFF) | ((val >> 8) & 0xFF);
6889 if (val) {
6890 err = 1;
6891 dev_err(hdev->dev,
6892 "HBM%d pc%d interrupts info: WR_PAR=%d, RD_PAR=%d, CA_PAR=%d, SERR=%d, DERR=%d\n",
6893 device, ch * 2, val & 0x1, (val >> 1) & 0x1,
6894 (val >> 2) & 0x1, (val >> 3) & 0x1,
6895 (val >> 4) & 0x1);
6896
6897 val2 = RREG32(base + ch * 0x1000 + 0x060);
6898 dev_err(hdev->dev,
Ofir Bitton5a2998f2020-10-05 13:44:59 +03006899 "HBM%d pc%d ECC info: 1ST_ERR_ADDR=0x%x, 1ST_ERR_TYPE=%d, SEC_CONT_CNT=%d, SEC_CNT=%d, DEC_CNT=%d\n",
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006900 device, ch * 2,
6901 RREG32(base + ch * 0x1000 + 0x064),
6902 (val2 & 0x200) >> 9, (val2 & 0xFC00) >> 10,
6903 (val2 & 0xFF0000) >> 16,
6904 (val2 & 0xFF000000) >> 24);
6905 }
6906
6907 val = RREG32_MASK(base + ch * 0x1000 + 0x07C, 0x0000FFFF);
6908 val = (val & 0xFF) | ((val >> 8) & 0xFF);
6909 if (val) {
6910 err = 1;
6911 dev_err(hdev->dev,
6912 "HBM%d pc%d interrupts info: WR_PAR=%d, RD_PAR=%d, CA_PAR=%d, SERR=%d, DERR=%d\n",
6913 device, ch * 2 + 1, val & 0x1, (val >> 1) & 0x1,
6914 (val >> 2) & 0x1, (val >> 3) & 0x1,
6915 (val >> 4) & 0x1);
6916
6917 val2 = RREG32(base + ch * 0x1000 + 0x070);
6918 dev_err(hdev->dev,
Ofir Bitton5a2998f2020-10-05 13:44:59 +03006919 "HBM%d pc%d ECC info: 1ST_ERR_ADDR=0x%x, 1ST_ERR_TYPE=%d, SEC_CONT_CNT=%d, SEC_CNT=%d, DEC_CNT=%d\n",
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006920 device, ch * 2 + 1,
6921 RREG32(base + ch * 0x1000 + 0x074),
6922 (val2 & 0x200) >> 9, (val2 & 0xFC00) >> 10,
6923 (val2 & 0xFF0000) >> 16,
6924 (val2 & 0xFF000000) >> 24);
6925 }
6926
6927 /* Clear interrupts */
6928 RMWREG32(base + (ch * 0x1000) + 0x060, 0x1C8, 0x1FF);
6929 RMWREG32(base + (ch * 0x1000) + 0x070, 0x1C8, 0x1FF);
6930 WREG32(base + (ch * 0x1000) + 0x06C, 0x1F1F);
6931 WREG32(base + (ch * 0x1000) + 0x07C, 0x1F1F);
6932 RMWREG32(base + (ch * 0x1000) + 0x060, 0x0, 0xF);
6933 RMWREG32(base + (ch * 0x1000) + 0x070, 0x0, 0xF);
6934 }
6935
6936 val = RREG32(base + 0x8F30);
6937 val2 = RREG32(base + 0x8F34);
6938 if (val | val2) {
6939 err = 1;
6940 dev_err(hdev->dev,
6941 "HBM %d MC SRAM SERR info: Reg 0x8F30=0x%x, Reg 0x8F34=0x%x\n",
6942 device, val, val2);
6943 }
6944 val = RREG32(base + 0x8F40);
6945 val2 = RREG32(base + 0x8F44);
6946 if (val | val2) {
6947 err = 1;
6948 dev_err(hdev->dev,
6949 "HBM %d MC SRAM DERR info: Reg 0x8F40=0x%x, Reg 0x8F44=0x%x\n",
6950 device, val, val2);
6951 }
6952
6953 return err;
6954}
6955
6956static int gaudi_hbm_event_to_dev(u16 hbm_event_type)
6957{
6958 switch (hbm_event_type) {
6959 case GAUDI_EVENT_HBM0_SPI_0:
6960 case GAUDI_EVENT_HBM0_SPI_1:
6961 return 0;
6962 case GAUDI_EVENT_HBM1_SPI_0:
6963 case GAUDI_EVENT_HBM1_SPI_1:
6964 return 1;
6965 case GAUDI_EVENT_HBM2_SPI_0:
6966 case GAUDI_EVENT_HBM2_SPI_1:
6967 return 2;
6968 case GAUDI_EVENT_HBM3_SPI_0:
6969 case GAUDI_EVENT_HBM3_SPI_1:
6970 return 3;
6971 default:
6972 break;
6973 }
6974
6975 /* Should never happen */
6976 return 0;
6977}
6978
6979static bool gaudi_tpc_read_interrupts(struct hl_device *hdev, u8 tpc_id,
6980 char *interrupt_name)
6981{
6982 struct gaudi_device *gaudi = hdev->asic_specific;
6983 u32 tpc_offset = tpc_id * TPC_CFG_OFFSET, tpc_interrupts_cause, i;
6984 bool soft_reset_required = false;
6985
6986 /* Accessing the TPC_INTR_CAUSE registers requires disabling the clock
Oded Gabbay6138bbe2020-09-04 20:18:16 +03006987 * gating, and thus cannot be done in CPU-CP and should be done instead
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006988 * by the driver.
6989 */
6990
6991 mutex_lock(&gaudi->clk_gate_mutex);
6992
6993 hdev->asic_funcs->disable_clock_gating(hdev);
6994
6995 tpc_interrupts_cause = RREG32(mmTPC0_CFG_TPC_INTR_CAUSE + tpc_offset) &
6996 TPC0_CFG_TPC_INTR_CAUSE_CAUSE_MASK;
6997
6998 for (i = 0 ; i < GAUDI_NUM_OF_TPC_INTR_CAUSE ; i++)
6999 if (tpc_interrupts_cause & BIT(i)) {
7000 dev_err_ratelimited(hdev->dev,
7001 "TPC%d_%s interrupt cause: %s\n",
7002 tpc_id, interrupt_name,
7003 gaudi_tpc_interrupts_cause[i]);
7004 /* If this is QM error, we need to soft-reset */
7005 if (i == 15)
7006 soft_reset_required = true;
7007 }
7008
7009 /* Clear interrupts */
7010 WREG32(mmTPC0_CFG_TPC_INTR_CAUSE + tpc_offset, 0);
7011
Oded Gabbaye38bfd32020-07-03 20:46:12 +03007012 hdev->asic_funcs->set_clock_gating(hdev);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007013
7014 mutex_unlock(&gaudi->clk_gate_mutex);
7015
7016 return soft_reset_required;
7017}
7018
7019static int tpc_dec_event_to_tpc_id(u16 tpc_dec_event_type)
7020{
7021 return (tpc_dec_event_type - GAUDI_EVENT_TPC0_DEC) >> 1;
7022}
7023
7024static int tpc_krn_event_to_tpc_id(u16 tpc_dec_event_type)
7025{
7026 return (tpc_dec_event_type - GAUDI_EVENT_TPC0_KRN_ERR) / 6;
7027}
7028
7029static void gaudi_print_clk_change_info(struct hl_device *hdev,
7030 u16 event_type)
7031{
7032 switch (event_type) {
7033 case GAUDI_EVENT_FIX_POWER_ENV_S:
Ofir Bitton0a068ad2020-07-21 10:49:51 +03007034 hdev->clk_throttling_reason |= HL_CLK_THROTTLE_POWER;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007035 dev_info_ratelimited(hdev->dev,
7036 "Clock throttling due to power consumption\n");
7037 break;
7038
7039 case GAUDI_EVENT_FIX_POWER_ENV_E:
Ofir Bitton0a068ad2020-07-21 10:49:51 +03007040 hdev->clk_throttling_reason &= ~HL_CLK_THROTTLE_POWER;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007041 dev_info_ratelimited(hdev->dev,
7042 "Power envelop is safe, back to optimal clock\n");
7043 break;
7044
7045 case GAUDI_EVENT_FIX_THERMAL_ENV_S:
Ofir Bitton0a068ad2020-07-21 10:49:51 +03007046 hdev->clk_throttling_reason |= HL_CLK_THROTTLE_THERMAL;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007047 dev_info_ratelimited(hdev->dev,
7048 "Clock throttling due to overheating\n");
7049 break;
7050
7051 case GAUDI_EVENT_FIX_THERMAL_ENV_E:
Ofir Bitton0a068ad2020-07-21 10:49:51 +03007052 hdev->clk_throttling_reason &= ~HL_CLK_THROTTLE_THERMAL;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007053 dev_info_ratelimited(hdev->dev,
7054 "Thermal envelop is safe, back to optimal clock\n");
7055 break;
7056
7057 default:
7058 dev_err(hdev->dev, "Received invalid clock change event %d\n",
7059 event_type);
7060 break;
7061 }
7062}
7063
7064static void gaudi_handle_eqe(struct hl_device *hdev,
7065 struct hl_eq_entry *eq_entry)
7066{
7067 struct gaudi_device *gaudi = hdev->asic_specific;
7068 u32 ctl = le32_to_cpu(eq_entry->hdr.ctl);
7069 u16 event_type = ((ctl & EQ_CTL_EVENT_TYPE_MASK)
7070 >> EQ_CTL_EVENT_TYPE_SHIFT);
7071 u8 cause;
Oded Gabbay66446822020-05-18 16:48:01 +03007072 bool reset_required;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007073
7074 gaudi->events_stat[event_type]++;
7075 gaudi->events_stat_aggregate[event_type]++;
7076
7077 switch (event_type) {
7078 case GAUDI_EVENT_PCIE_CORE_DERR:
7079 case GAUDI_EVENT_PCIE_IF_DERR:
7080 case GAUDI_EVENT_PCIE_PHY_DERR:
7081 case GAUDI_EVENT_TPC0_DERR ... GAUDI_EVENT_TPC7_DERR:
7082 case GAUDI_EVENT_MME0_ACC_DERR:
7083 case GAUDI_EVENT_MME0_SBAB_DERR:
7084 case GAUDI_EVENT_MME1_ACC_DERR:
7085 case GAUDI_EVENT_MME1_SBAB_DERR:
7086 case GAUDI_EVENT_MME2_ACC_DERR:
7087 case GAUDI_EVENT_MME2_SBAB_DERR:
7088 case GAUDI_EVENT_MME3_ACC_DERR:
7089 case GAUDI_EVENT_MME3_SBAB_DERR:
7090 case GAUDI_EVENT_DMA0_DERR_ECC ... GAUDI_EVENT_DMA7_DERR_ECC:
7091 fallthrough;
7092 case GAUDI_EVENT_CPU_IF_ECC_DERR:
7093 case GAUDI_EVENT_PSOC_MEM_DERR:
7094 case GAUDI_EVENT_PSOC_CORESIGHT_DERR:
7095 case GAUDI_EVENT_SRAM0_DERR ... GAUDI_EVENT_SRAM28_DERR:
7096 case GAUDI_EVENT_DMA_IF0_DERR ... GAUDI_EVENT_DMA_IF3_DERR:
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007097 case GAUDI_EVENT_HBM_0_DERR ... GAUDI_EVENT_HBM_3_DERR:
7098 case GAUDI_EVENT_MMU_DERR:
Oded Gabbayfcc6a4e2020-05-17 08:20:35 +03007099 gaudi_print_irq_info(hdev, event_type, true);
7100 gaudi_handle_ecc_event(hdev, event_type, &eq_entry->ecc_data);
7101 if (hdev->hard_reset_on_fw_events)
7102 hl_device_reset(hdev, true, false);
7103 break;
7104
7105 case GAUDI_EVENT_GIC500:
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007106 case GAUDI_EVENT_AXI_ECC:
7107 case GAUDI_EVENT_L2_RAM_ECC:
7108 case GAUDI_EVENT_PLL0 ... GAUDI_EVENT_PLL17:
7109 gaudi_print_irq_info(hdev, event_type, false);
7110 if (hdev->hard_reset_on_fw_events)
7111 hl_device_reset(hdev, true, false);
7112 break;
7113
7114 case GAUDI_EVENT_HBM0_SPI_0:
7115 case GAUDI_EVENT_HBM1_SPI_0:
7116 case GAUDI_EVENT_HBM2_SPI_0:
7117 case GAUDI_EVENT_HBM3_SPI_0:
7118 gaudi_print_irq_info(hdev, event_type, false);
7119 gaudi_hbm_read_interrupts(hdev,
Ofir Bitton5a2998f2020-10-05 13:44:59 +03007120 gaudi_hbm_event_to_dev(event_type),
7121 &eq_entry->hbm_ecc_data);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007122 if (hdev->hard_reset_on_fw_events)
7123 hl_device_reset(hdev, true, false);
7124 break;
7125
7126 case GAUDI_EVENT_HBM0_SPI_1:
7127 case GAUDI_EVENT_HBM1_SPI_1:
7128 case GAUDI_EVENT_HBM2_SPI_1:
7129 case GAUDI_EVENT_HBM3_SPI_1:
7130 gaudi_print_irq_info(hdev, event_type, false);
7131 gaudi_hbm_read_interrupts(hdev,
Ofir Bitton5a2998f2020-10-05 13:44:59 +03007132 gaudi_hbm_event_to_dev(event_type),
7133 &eq_entry->hbm_ecc_data);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007134 break;
7135
7136 case GAUDI_EVENT_TPC0_DEC:
7137 case GAUDI_EVENT_TPC1_DEC:
7138 case GAUDI_EVENT_TPC2_DEC:
7139 case GAUDI_EVENT_TPC3_DEC:
7140 case GAUDI_EVENT_TPC4_DEC:
7141 case GAUDI_EVENT_TPC5_DEC:
7142 case GAUDI_EVENT_TPC6_DEC:
7143 case GAUDI_EVENT_TPC7_DEC:
7144 gaudi_print_irq_info(hdev, event_type, true);
Oded Gabbay66446822020-05-18 16:48:01 +03007145 reset_required = gaudi_tpc_read_interrupts(hdev,
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007146 tpc_dec_event_to_tpc_id(event_type),
7147 "AXI_SLV_DEC_Error");
Oded Gabbay66446822020-05-18 16:48:01 +03007148 if (reset_required) {
7149 dev_err(hdev->dev, "hard reset required due to %s\n",
7150 gaudi_irq_map_table[event_type].name);
7151
7152 if (hdev->hard_reset_on_fw_events)
7153 hl_device_reset(hdev, true, false);
7154 } else {
7155 hl_fw_unmask_irq(hdev, event_type);
Omer Shpigelmand7985072020-05-17 23:01:22 +03007156 }
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007157 break;
7158
7159 case GAUDI_EVENT_TPC0_KRN_ERR:
7160 case GAUDI_EVENT_TPC1_KRN_ERR:
7161 case GAUDI_EVENT_TPC2_KRN_ERR:
7162 case GAUDI_EVENT_TPC3_KRN_ERR:
7163 case GAUDI_EVENT_TPC4_KRN_ERR:
7164 case GAUDI_EVENT_TPC5_KRN_ERR:
7165 case GAUDI_EVENT_TPC6_KRN_ERR:
7166 case GAUDI_EVENT_TPC7_KRN_ERR:
7167 gaudi_print_irq_info(hdev, event_type, true);
Oded Gabbay66446822020-05-18 16:48:01 +03007168 reset_required = gaudi_tpc_read_interrupts(hdev,
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007169 tpc_krn_event_to_tpc_id(event_type),
7170 "KRN_ERR");
Oded Gabbay66446822020-05-18 16:48:01 +03007171 if (reset_required) {
7172 dev_err(hdev->dev, "hard reset required due to %s\n",
7173 gaudi_irq_map_table[event_type].name);
7174
7175 if (hdev->hard_reset_on_fw_events)
7176 hl_device_reset(hdev, true, false);
7177 } else {
7178 hl_fw_unmask_irq(hdev, event_type);
Omer Shpigelmand7985072020-05-17 23:01:22 +03007179 }
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007180 break;
7181
7182 case GAUDI_EVENT_PCIE_CORE_SERR:
7183 case GAUDI_EVENT_PCIE_IF_SERR:
7184 case GAUDI_EVENT_PCIE_PHY_SERR:
7185 case GAUDI_EVENT_TPC0_SERR ... GAUDI_EVENT_TPC7_SERR:
7186 case GAUDI_EVENT_MME0_ACC_SERR:
7187 case GAUDI_EVENT_MME0_SBAB_SERR:
7188 case GAUDI_EVENT_MME1_ACC_SERR:
7189 case GAUDI_EVENT_MME1_SBAB_SERR:
7190 case GAUDI_EVENT_MME2_ACC_SERR:
7191 case GAUDI_EVENT_MME2_SBAB_SERR:
7192 case GAUDI_EVENT_MME3_ACC_SERR:
7193 case GAUDI_EVENT_MME3_SBAB_SERR:
7194 case GAUDI_EVENT_DMA0_SERR_ECC ... GAUDI_EVENT_DMA7_SERR_ECC:
7195 case GAUDI_EVENT_CPU_IF_ECC_SERR:
7196 case GAUDI_EVENT_PSOC_MEM_SERR:
7197 case GAUDI_EVENT_PSOC_CORESIGHT_SERR:
7198 case GAUDI_EVENT_SRAM0_SERR ... GAUDI_EVENT_SRAM28_SERR:
7199 case GAUDI_EVENT_DMA_IF0_SERR ... GAUDI_EVENT_DMA_IF3_SERR:
7200 case GAUDI_EVENT_HBM_0_SERR ... GAUDI_EVENT_HBM_3_SERR:
7201 fallthrough;
7202 case GAUDI_EVENT_MMU_SERR:
Oded Gabbayfcc6a4e2020-05-17 08:20:35 +03007203 gaudi_print_irq_info(hdev, event_type, true);
7204 gaudi_handle_ecc_event(hdev, event_type, &eq_entry->ecc_data);
7205 hl_fw_unmask_irq(hdev, event_type);
7206 break;
7207
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007208 case GAUDI_EVENT_PCIE_DEC:
7209 case GAUDI_EVENT_MME0_WBC_RSP:
7210 case GAUDI_EVENT_MME0_SBAB0_RSP:
7211 case GAUDI_EVENT_MME1_WBC_RSP:
7212 case GAUDI_EVENT_MME1_SBAB0_RSP:
7213 case GAUDI_EVENT_MME2_WBC_RSP:
7214 case GAUDI_EVENT_MME2_SBAB0_RSP:
7215 case GAUDI_EVENT_MME3_WBC_RSP:
7216 case GAUDI_EVENT_MME3_SBAB0_RSP:
7217 case GAUDI_EVENT_CPU_AXI_SPLITTER:
7218 case GAUDI_EVENT_PSOC_AXI_DEC:
7219 case GAUDI_EVENT_PSOC_PRSTN_FALL:
7220 case GAUDI_EVENT_MMU_PAGE_FAULT:
7221 case GAUDI_EVENT_MMU_WR_PERM:
7222 case GAUDI_EVENT_RAZWI_OR_ADC:
7223 case GAUDI_EVENT_TPC0_QM ... GAUDI_EVENT_TPC7_QM:
7224 case GAUDI_EVENT_MME0_QM ... GAUDI_EVENT_MME2_QM:
7225 case GAUDI_EVENT_DMA0_QM ... GAUDI_EVENT_DMA7_QM:
7226 fallthrough;
Oded Gabbay3c681572020-11-02 21:10:39 +02007227 case GAUDI_EVENT_NIC0_QM0:
7228 case GAUDI_EVENT_NIC0_QM1:
7229 case GAUDI_EVENT_NIC1_QM0:
7230 case GAUDI_EVENT_NIC1_QM1:
7231 case GAUDI_EVENT_NIC2_QM0:
7232 case GAUDI_EVENT_NIC2_QM1:
7233 case GAUDI_EVENT_NIC3_QM0:
7234 case GAUDI_EVENT_NIC3_QM1:
7235 case GAUDI_EVENT_NIC4_QM0:
7236 case GAUDI_EVENT_NIC4_QM1:
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007237 case GAUDI_EVENT_DMA0_CORE ... GAUDI_EVENT_DMA7_CORE:
7238 gaudi_print_irq_info(hdev, event_type, true);
7239 gaudi_handle_qman_err(hdev, event_type);
Ofir Bittonebd8d122020-05-10 13:41:28 +03007240 hl_fw_unmask_irq(hdev, event_type);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007241 break;
7242
7243 case GAUDI_EVENT_RAZWI_OR_ADC_SW:
7244 gaudi_print_irq_info(hdev, event_type, true);
Oded Gabbay66446822020-05-18 16:48:01 +03007245 if (hdev->hard_reset_on_fw_events)
7246 hl_device_reset(hdev, true, false);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007247 break;
7248
7249 case GAUDI_EVENT_TPC0_BMON_SPMU:
7250 case GAUDI_EVENT_TPC1_BMON_SPMU:
7251 case GAUDI_EVENT_TPC2_BMON_SPMU:
7252 case GAUDI_EVENT_TPC3_BMON_SPMU:
7253 case GAUDI_EVENT_TPC4_BMON_SPMU:
7254 case GAUDI_EVENT_TPC5_BMON_SPMU:
7255 case GAUDI_EVENT_TPC6_BMON_SPMU:
7256 case GAUDI_EVENT_TPC7_BMON_SPMU:
7257 case GAUDI_EVENT_DMA_BM_CH0 ... GAUDI_EVENT_DMA_BM_CH7:
7258 gaudi_print_irq_info(hdev, event_type, false);
Ofir Bittonebd8d122020-05-10 13:41:28 +03007259 hl_fw_unmask_irq(hdev, event_type);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007260 break;
7261
7262 case GAUDI_EVENT_FIX_POWER_ENV_S ... GAUDI_EVENT_FIX_THERMAL_ENV_E:
7263 gaudi_print_clk_change_info(hdev, event_type);
Ofir Bittonebd8d122020-05-10 13:41:28 +03007264 hl_fw_unmask_irq(hdev, event_type);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007265 break;
7266
7267 case GAUDI_EVENT_PSOC_GPIO_U16_0:
7268 cause = le64_to_cpu(eq_entry->data[0]) & 0xFF;
7269 dev_err(hdev->dev,
7270 "Received high temp H/W interrupt %d (cause %d)\n",
7271 event_type, cause);
7272 break;
7273
7274 default:
7275 dev_err(hdev->dev, "Received invalid H/W interrupt %d\n",
7276 event_type);
7277 break;
7278 }
7279}
7280
7281static void *gaudi_get_events_stat(struct hl_device *hdev, bool aggregate,
7282 u32 *size)
7283{
7284 struct gaudi_device *gaudi = hdev->asic_specific;
7285
7286 if (aggregate) {
7287 *size = (u32) sizeof(gaudi->events_stat_aggregate);
7288 return gaudi->events_stat_aggregate;
7289 }
7290
7291 *size = (u32) sizeof(gaudi->events_stat);
7292 return gaudi->events_stat;
7293}
7294
Omer Shpigelman8ff5f4f2020-05-24 23:06:59 +03007295static int gaudi_mmu_invalidate_cache(struct hl_device *hdev, bool is_hard,
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007296 u32 flags)
7297{
7298 struct gaudi_device *gaudi = hdev->asic_specific;
7299 u32 status, timeout_usec;
7300 int rc;
7301
7302 if (!(gaudi->hw_cap_initialized & HW_CAP_MMU) ||
7303 hdev->hard_reset_pending)
Omer Shpigelman8ff5f4f2020-05-24 23:06:59 +03007304 return 0;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007305
7306 if (hdev->pldm)
7307 timeout_usec = GAUDI_PLDM_MMU_TIMEOUT_USEC;
7308 else
7309 timeout_usec = MMU_CONFIG_TIMEOUT_USEC;
7310
Omer Shpigelman8ff5f4f2020-05-24 23:06:59 +03007311 mutex_lock(&hdev->mmu_cache_lock);
7312
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007313 /* L0 & L1 invalidation */
Omer Shpigelmancfd41762020-06-03 13:03:35 +03007314 WREG32(mmSTLB_INV_PS, 3);
7315 WREG32(mmSTLB_CACHE_INV, gaudi->mmu_cache_inv_pi++);
Omer Shpigelman42d0b0b2020-05-17 17:35:39 +03007316 WREG32(mmSTLB_INV_PS, 2);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007317
7318 rc = hl_poll_timeout(
7319 hdev,
Omer Shpigelman42d0b0b2020-05-17 17:35:39 +03007320 mmSTLB_INV_PS,
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007321 status,
7322 !status,
7323 1000,
7324 timeout_usec);
7325
Omer Shpigelman42d0b0b2020-05-17 17:35:39 +03007326 WREG32(mmSTLB_INV_SET, 0);
7327
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007328 mutex_unlock(&hdev->mmu_cache_lock);
Omer Shpigelman8ff5f4f2020-05-24 23:06:59 +03007329
7330 if (rc) {
7331 dev_err_ratelimited(hdev->dev,
7332 "MMU cache invalidation timeout\n");
7333 hl_device_reset(hdev, true, false);
7334 }
7335
7336 return rc;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007337}
7338
Omer Shpigelman8ff5f4f2020-05-24 23:06:59 +03007339static int gaudi_mmu_invalidate_cache_range(struct hl_device *hdev,
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007340 bool is_hard, u32 asid, u64 va, u64 size)
7341{
7342 struct gaudi_device *gaudi = hdev->asic_specific;
7343 u32 status, timeout_usec;
7344 u32 inv_data;
7345 u32 pi;
7346 int rc;
7347
7348 if (!(gaudi->hw_cap_initialized & HW_CAP_MMU) ||
7349 hdev->hard_reset_pending)
Omer Shpigelman8ff5f4f2020-05-24 23:06:59 +03007350 return 0;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007351
7352 mutex_lock(&hdev->mmu_cache_lock);
7353
7354 if (hdev->pldm)
7355 timeout_usec = GAUDI_PLDM_MMU_TIMEOUT_USEC;
7356 else
7357 timeout_usec = MMU_CONFIG_TIMEOUT_USEC;
7358
7359 /*
7360 * TODO: currently invalidate entire L0 & L1 as in regular hard
7361 * invalidation. Need to apply invalidation of specific cache
7362 * lines with mask of ASID & VA & size.
7363 * Note that L1 with be flushed entirely in any case.
7364 */
7365
7366 /* L0 & L1 invalidation */
7367 inv_data = RREG32(mmSTLB_CACHE_INV);
7368 /* PI is 8 bit */
7369 pi = ((inv_data & STLB_CACHE_INV_PRODUCER_INDEX_MASK) + 1) & 0xFF;
7370 WREG32(mmSTLB_CACHE_INV,
7371 (inv_data & STLB_CACHE_INV_INDEX_MASK_MASK) | pi);
7372
7373 rc = hl_poll_timeout(
7374 hdev,
7375 mmSTLB_INV_CONSUMER_INDEX,
7376 status,
7377 status == pi,
7378 1000,
7379 timeout_usec);
7380
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007381 mutex_unlock(&hdev->mmu_cache_lock);
Omer Shpigelman8ff5f4f2020-05-24 23:06:59 +03007382
7383 if (rc) {
7384 dev_err_ratelimited(hdev->dev,
7385 "MMU cache invalidation timeout\n");
7386 hl_device_reset(hdev, true, false);
7387 }
7388
7389 return rc;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007390}
7391
7392static int gaudi_mmu_update_asid_hop0_addr(struct hl_device *hdev,
7393 u32 asid, u64 phys_addr)
7394{
7395 u32 status, timeout_usec;
7396 int rc;
7397
7398 if (hdev->pldm)
7399 timeout_usec = GAUDI_PLDM_MMU_TIMEOUT_USEC;
7400 else
7401 timeout_usec = MMU_CONFIG_TIMEOUT_USEC;
7402
7403 WREG32(MMU_ASID, asid);
7404 WREG32(MMU_HOP0_PA43_12, phys_addr >> MMU_HOP0_PA43_12_SHIFT);
7405 WREG32(MMU_HOP0_PA49_44, phys_addr >> MMU_HOP0_PA49_44_SHIFT);
7406 WREG32(MMU_BUSY, 0x80000000);
7407
7408 rc = hl_poll_timeout(
7409 hdev,
7410 MMU_BUSY,
7411 status,
7412 !(status & 0x80000000),
7413 1000,
7414 timeout_usec);
7415
7416 if (rc) {
7417 dev_err(hdev->dev,
7418 "Timeout during MMU hop0 config of asid %d\n", asid);
7419 return rc;
7420 }
7421
7422 return 0;
7423}
7424
7425static int gaudi_send_heartbeat(struct hl_device *hdev)
7426{
7427 struct gaudi_device *gaudi = hdev->asic_specific;
7428
7429 if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
7430 return 0;
7431
7432 return hl_fw_send_heartbeat(hdev);
7433}
7434
Oded Gabbay2f553422020-08-15 16:28:10 +03007435static int gaudi_cpucp_info_get(struct hl_device *hdev)
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007436{
7437 struct gaudi_device *gaudi = hdev->asic_specific;
7438 struct asic_fixed_properties *prop = &hdev->asic_prop;
7439 int rc;
7440
7441 if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
7442 return 0;
7443
Alon Mizrahi41478642020-11-17 14:25:14 +02007444 rc = hl_fw_cpucp_info_get(hdev, mmCPU_BOOT_DEV_STS0);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007445 if (rc)
7446 return rc;
7447
Oded Gabbay2f553422020-08-15 16:28:10 +03007448 if (!strlen(prop->cpucp_info.card_name))
7449 strncpy(prop->cpucp_info.card_name, GAUDI_DEFAULT_CARD_NAME,
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007450 CARD_NAME_MAX_LEN);
7451
Oded Gabbay2f553422020-08-15 16:28:10 +03007452 hdev->card_type = le32_to_cpu(hdev->asic_prop.cpucp_info.card_type);
Oded Gabbay58361aa2020-08-08 23:34:47 +03007453
Oded Gabbay2f553422020-08-15 16:28:10 +03007454 if (hdev->card_type == cpucp_card_type_pci)
Oded Gabbay58361aa2020-08-08 23:34:47 +03007455 prop->max_power_default = MAX_POWER_DEFAULT_PCI;
Oded Gabbay2f553422020-08-15 16:28:10 +03007456 else if (hdev->card_type == cpucp_card_type_pmc)
Oded Gabbay58361aa2020-08-08 23:34:47 +03007457 prop->max_power_default = MAX_POWER_DEFAULT_PMC;
7458
7459 hdev->max_power = prop->max_power_default;
7460
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007461 return 0;
7462}
7463
farah kassabrid90416c2020-08-12 17:20:13 +03007464static bool gaudi_is_device_idle(struct hl_device *hdev, u64 *mask,
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007465 struct seq_file *s)
7466{
7467 struct gaudi_device *gaudi = hdev->asic_specific;
7468 const char *fmt = "%-5d%-9s%#-14x%#-12x%#x\n";
7469 const char *mme_slave_fmt = "%-5d%-9s%-14s%-12s%#x\n";
Oded Gabbay3c681572020-11-02 21:10:39 +02007470 const char *nic_fmt = "%-5d%-9s%#-14x%#x\n";
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007471 u32 qm_glbl_sts0, qm_cgm_sts, dma_core_sts0, tpc_cfg_sts, mme_arch_sts;
7472 bool is_idle = true, is_eng_idle, is_slave;
7473 u64 offset;
Oded Gabbay3c681572020-11-02 21:10:39 +02007474 int i, dma_id, port;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007475
7476 mutex_lock(&gaudi->clk_gate_mutex);
7477
7478 hdev->asic_funcs->disable_clock_gating(hdev);
7479
7480 if (s)
7481 seq_puts(s,
7482 "\nDMA is_idle QM_GLBL_STS0 QM_CGM_STS DMA_CORE_STS0\n"
7483 "--- ------- ------------ ---------- -------------\n");
7484
7485 for (i = 0 ; i < DMA_NUMBER_OF_CHNLS ; i++) {
7486 dma_id = gaudi_dma_assignment[i];
7487 offset = dma_id * DMA_QMAN_OFFSET;
7488
7489 qm_glbl_sts0 = RREG32(mmDMA0_QM_GLBL_STS0 + offset);
7490 qm_cgm_sts = RREG32(mmDMA0_QM_CGM_STS + offset);
7491 dma_core_sts0 = RREG32(mmDMA0_CORE_STS0 + offset);
7492 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts) &&
7493 IS_DMA_IDLE(dma_core_sts0);
7494 is_idle &= is_eng_idle;
7495
7496 if (mask)
Oded Gabbayf7639462020-08-29 11:24:03 +03007497 *mask |= ((u64) !is_eng_idle) <<
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007498 (GAUDI_ENGINE_ID_DMA_0 + dma_id);
7499 if (s)
7500 seq_printf(s, fmt, dma_id,
7501 is_eng_idle ? "Y" : "N", qm_glbl_sts0,
7502 qm_cgm_sts, dma_core_sts0);
7503 }
7504
7505 if (s)
7506 seq_puts(s,
7507 "\nTPC is_idle QM_GLBL_STS0 QM_CGM_STS CFG_STATUS\n"
7508 "--- ------- ------------ ---------- ----------\n");
7509
7510 for (i = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) {
7511 offset = i * TPC_QMAN_OFFSET;
7512 qm_glbl_sts0 = RREG32(mmTPC0_QM_GLBL_STS0 + offset);
7513 qm_cgm_sts = RREG32(mmTPC0_QM_CGM_STS + offset);
7514 tpc_cfg_sts = RREG32(mmTPC0_CFG_STATUS + offset);
7515 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts) &&
7516 IS_TPC_IDLE(tpc_cfg_sts);
7517 is_idle &= is_eng_idle;
7518
7519 if (mask)
Oded Gabbayf7639462020-08-29 11:24:03 +03007520 *mask |= ((u64) !is_eng_idle) <<
7521 (GAUDI_ENGINE_ID_TPC_0 + i);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007522 if (s)
7523 seq_printf(s, fmt, i,
7524 is_eng_idle ? "Y" : "N",
7525 qm_glbl_sts0, qm_cgm_sts, tpc_cfg_sts);
7526 }
7527
7528 if (s)
7529 seq_puts(s,
7530 "\nMME is_idle QM_GLBL_STS0 QM_CGM_STS ARCH_STATUS\n"
7531 "--- ------- ------------ ---------- -----------\n");
7532
7533 for (i = 0 ; i < MME_NUMBER_OF_ENGINES ; i++) {
7534 offset = i * MME_QMAN_OFFSET;
7535 mme_arch_sts = RREG32(mmMME0_CTRL_ARCH_STATUS + offset);
7536 is_eng_idle = IS_MME_IDLE(mme_arch_sts);
7537
7538 /* MME 1 & 3 are slaves, no need to check their QMANs */
7539 is_slave = i % 2;
7540 if (!is_slave) {
7541 qm_glbl_sts0 = RREG32(mmMME0_QM_GLBL_STS0 + offset);
7542 qm_cgm_sts = RREG32(mmMME0_QM_CGM_STS + offset);
7543 is_eng_idle &= IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts);
7544 }
7545
7546 is_idle &= is_eng_idle;
7547
7548 if (mask)
Oded Gabbayf7639462020-08-29 11:24:03 +03007549 *mask |= ((u64) !is_eng_idle) <<
7550 (GAUDI_ENGINE_ID_MME_0 + i);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007551 if (s) {
7552 if (!is_slave)
7553 seq_printf(s, fmt, i,
7554 is_eng_idle ? "Y" : "N",
7555 qm_glbl_sts0, qm_cgm_sts, mme_arch_sts);
7556 else
7557 seq_printf(s, mme_slave_fmt, i,
7558 is_eng_idle ? "Y" : "N", "-",
7559 "-", mme_arch_sts);
7560 }
7561 }
7562
7563 if (s)
Oded Gabbay3c681572020-11-02 21:10:39 +02007564 seq_puts(s, "\nNIC is_idle QM_GLBL_STS0 QM_CGM_STS\n"
7565 "--- ------- ------------ ----------\n");
7566
7567 for (i = 0 ; i < (NIC_NUMBER_OF_ENGINES / 2) ; i++) {
7568 offset = i * NIC_MACRO_QMAN_OFFSET;
7569 port = 2 * i;
7570 if (hdev->nic_ports_mask & BIT(port)) {
7571 qm_glbl_sts0 = RREG32(mmNIC0_QM0_GLBL_STS0 + offset);
7572 qm_cgm_sts = RREG32(mmNIC0_QM0_CGM_STS + offset);
7573 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts);
7574 is_idle &= is_eng_idle;
7575
7576 if (mask)
7577 *mask |= ((u64) !is_eng_idle) <<
7578 (GAUDI_ENGINE_ID_NIC_0 + port);
7579 if (s)
7580 seq_printf(s, nic_fmt, port,
7581 is_eng_idle ? "Y" : "N",
7582 qm_glbl_sts0, qm_cgm_sts);
7583 }
7584
7585 port = 2 * i + 1;
7586 if (hdev->nic_ports_mask & BIT(port)) {
7587 qm_glbl_sts0 = RREG32(mmNIC0_QM1_GLBL_STS0 + offset);
7588 qm_cgm_sts = RREG32(mmNIC0_QM1_CGM_STS + offset);
7589 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts);
7590 is_idle &= is_eng_idle;
7591
7592 if (mask)
7593 *mask |= ((u64) !is_eng_idle) <<
7594 (GAUDI_ENGINE_ID_NIC_0 + port);
7595 if (s)
7596 seq_printf(s, nic_fmt, port,
7597 is_eng_idle ? "Y" : "N",
7598 qm_glbl_sts0, qm_cgm_sts);
7599 }
7600 }
7601
7602 if (s)
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007603 seq_puts(s, "\n");
7604
Oded Gabbaye38bfd32020-07-03 20:46:12 +03007605 hdev->asic_funcs->set_clock_gating(hdev);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007606
7607 mutex_unlock(&gaudi->clk_gate_mutex);
7608
7609 return is_idle;
7610}
7611
7612static void gaudi_hw_queues_lock(struct hl_device *hdev)
7613 __acquires(&gaudi->hw_queues_lock)
7614{
7615 struct gaudi_device *gaudi = hdev->asic_specific;
7616
7617 spin_lock(&gaudi->hw_queues_lock);
7618}
7619
7620static void gaudi_hw_queues_unlock(struct hl_device *hdev)
7621 __releases(&gaudi->hw_queues_lock)
7622{
7623 struct gaudi_device *gaudi = hdev->asic_specific;
7624
7625 spin_unlock(&gaudi->hw_queues_lock);
7626}
7627
7628static u32 gaudi_get_pci_id(struct hl_device *hdev)
7629{
7630 return hdev->pdev->device;
7631}
7632
7633static int gaudi_get_eeprom_data(struct hl_device *hdev, void *data,
7634 size_t max_size)
7635{
7636 struct gaudi_device *gaudi = hdev->asic_specific;
7637
7638 if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
7639 return 0;
7640
7641 return hl_fw_get_eeprom_data(hdev, data, max_size);
7642}
7643
7644/*
7645 * this function should be used only during initialization and/or after reset,
7646 * when there are no active users.
7647 */
7648static int gaudi_run_tpc_kernel(struct hl_device *hdev, u64 tpc_kernel,
7649 u32 tpc_id)
7650{
7651 struct gaudi_device *gaudi = hdev->asic_specific;
7652 u64 kernel_timeout;
7653 u32 status, offset;
7654 int rc;
7655
7656 offset = tpc_id * (mmTPC1_CFG_STATUS - mmTPC0_CFG_STATUS);
7657
7658 if (hdev->pldm)
7659 kernel_timeout = GAUDI_PLDM_TPC_KERNEL_WAIT_USEC;
7660 else
7661 kernel_timeout = HL_DEVICE_TIMEOUT_USEC;
7662
7663 mutex_lock(&gaudi->clk_gate_mutex);
7664
7665 hdev->asic_funcs->disable_clock_gating(hdev);
7666
7667 WREG32(mmTPC0_CFG_QM_KERNEL_BASE_ADDRESS_LOW + offset,
7668 lower_32_bits(tpc_kernel));
7669 WREG32(mmTPC0_CFG_QM_KERNEL_BASE_ADDRESS_HIGH + offset,
7670 upper_32_bits(tpc_kernel));
7671
7672 WREG32(mmTPC0_CFG_ICACHE_BASE_ADDERESS_LOW + offset,
7673 lower_32_bits(tpc_kernel));
7674 WREG32(mmTPC0_CFG_ICACHE_BASE_ADDERESS_HIGH + offset,
7675 upper_32_bits(tpc_kernel));
7676 /* set a valid LUT pointer, content is of no significance */
7677 WREG32(mmTPC0_CFG_LUT_FUNC256_BASE_ADDR_LO + offset,
7678 lower_32_bits(tpc_kernel));
7679 WREG32(mmTPC0_CFG_LUT_FUNC256_BASE_ADDR_HI + offset,
7680 upper_32_bits(tpc_kernel));
7681
7682 WREG32(mmTPC0_CFG_QM_SYNC_OBJECT_ADDR + offset,
7683 lower_32_bits(CFG_BASE +
7684 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0));
7685
7686 WREG32(mmTPC0_CFG_TPC_CMD + offset,
7687 (1 << TPC0_CFG_TPC_CMD_ICACHE_INVALIDATE_SHIFT |
7688 1 << TPC0_CFG_TPC_CMD_ICACHE_PREFETCH_64KB_SHIFT));
7689 /* wait a bit for the engine to start executing */
7690 usleep_range(1000, 1500);
7691
7692 /* wait until engine has finished executing */
7693 rc = hl_poll_timeout(
7694 hdev,
7695 mmTPC0_CFG_STATUS + offset,
7696 status,
7697 (status & TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK) ==
7698 TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK,
7699 1000,
7700 kernel_timeout);
7701
7702 if (rc) {
7703 dev_err(hdev->dev,
7704 "Timeout while waiting for TPC%d icache prefetch\n",
7705 tpc_id);
Oded Gabbaye38bfd32020-07-03 20:46:12 +03007706 hdev->asic_funcs->set_clock_gating(hdev);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007707 mutex_unlock(&gaudi->clk_gate_mutex);
7708 return -EIO;
7709 }
7710
7711 WREG32(mmTPC0_CFG_TPC_EXECUTE + offset,
7712 1 << TPC0_CFG_TPC_EXECUTE_V_SHIFT);
7713
7714 /* wait a bit for the engine to start executing */
7715 usleep_range(1000, 1500);
7716
7717 /* wait until engine has finished executing */
7718 rc = hl_poll_timeout(
7719 hdev,
7720 mmTPC0_CFG_STATUS + offset,
7721 status,
7722 (status & TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK) ==
7723 TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK,
7724 1000,
7725 kernel_timeout);
7726
Oded Gabbay31ac1f12020-08-12 11:28:13 +03007727 if (rc) {
7728 dev_err(hdev->dev,
7729 "Timeout while waiting for TPC%d vector pipe\n",
7730 tpc_id);
7731 hdev->asic_funcs->set_clock_gating(hdev);
7732 mutex_unlock(&gaudi->clk_gate_mutex);
7733 return -EIO;
7734 }
7735
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007736 rc = hl_poll_timeout(
7737 hdev,
7738 mmTPC0_CFG_WQ_INFLIGHT_CNTR + offset,
7739 status,
7740 (status == 0),
7741 1000,
7742 kernel_timeout);
7743
Oded Gabbaye38bfd32020-07-03 20:46:12 +03007744 hdev->asic_funcs->set_clock_gating(hdev);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007745 mutex_unlock(&gaudi->clk_gate_mutex);
7746
7747 if (rc) {
7748 dev_err(hdev->dev,
7749 "Timeout while waiting for TPC%d kernel to execute\n",
7750 tpc_id);
7751 return -EIO;
7752 }
7753
7754 return 0;
7755}
7756
Ofir Bitton5de406c2020-09-10 10:56:26 +03007757static int gaudi_internal_cb_pool_init(struct hl_device *hdev,
7758 struct hl_ctx *ctx)
7759{
7760 struct gaudi_device *gaudi = hdev->asic_specific;
Ofir Bitton5de406c2020-09-10 10:56:26 +03007761 int min_alloc_order, rc, collective_cb_size;
7762
7763 if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
7764 return 0;
7765
7766 hdev->internal_cb_pool_virt_addr =
7767 hdev->asic_funcs->asic_dma_alloc_coherent(hdev,
7768 HOST_SPACE_INTERNAL_CB_SZ,
7769 &hdev->internal_cb_pool_dma_addr,
7770 GFP_KERNEL | __GFP_ZERO);
7771
7772 if (!hdev->internal_cb_pool_virt_addr)
7773 return -ENOMEM;
7774
7775 collective_cb_size = sizeof(struct packet_msg_short) * 5 +
7776 sizeof(struct packet_fence);
7777 min_alloc_order = ilog2(collective_cb_size);
7778
7779 hdev->internal_cb_pool = gen_pool_create(min_alloc_order, -1);
7780 if (!hdev->internal_cb_pool) {
7781 dev_err(hdev->dev,
7782 "Failed to create internal CB pool\n");
7783 rc = -ENOMEM;
7784 goto free_internal_cb_pool;
7785 }
7786
7787 rc = gen_pool_add(hdev->internal_cb_pool,
7788 (uintptr_t) hdev->internal_cb_pool_virt_addr,
7789 HOST_SPACE_INTERNAL_CB_SZ, -1);
7790 if (rc) {
7791 dev_err(hdev->dev,
7792 "Failed to add memory to internal CB pool\n");
7793 rc = -EFAULT;
7794 goto destroy_internal_cb_pool;
7795 }
7796
Ofir Bittonbe91b912020-10-22 15:04:10 +03007797 hdev->internal_cb_va_base = hl_reserve_va_block(hdev, ctx,
Ofir Bitton412c41f2020-11-04 15:18:55 +02007798 HL_VA_RANGE_TYPE_HOST, HOST_SPACE_INTERNAL_CB_SZ,
7799 HL_MMU_VA_ALIGNMENT_NOT_NEEDED);
Ofir Bittonbe91b912020-10-22 15:04:10 +03007800
7801 if (!hdev->internal_cb_va_base)
7802 goto destroy_internal_cb_pool;
Ofir Bitton5de406c2020-09-10 10:56:26 +03007803
7804 mutex_lock(&ctx->mmu_lock);
Ofir Bitton5c054872020-10-22 15:13:10 +03007805 rc = hl_mmu_map_contiguous(ctx, hdev->internal_cb_va_base,
7806 hdev->internal_cb_pool_dma_addr,
7807 HOST_SPACE_INTERNAL_CB_SZ);
Ofir Bitton5de406c2020-09-10 10:56:26 +03007808
7809 hdev->asic_funcs->mmu_invalidate_cache(hdev, false, VM_TYPE_USERPTR);
Ofir Bitton5de406c2020-09-10 10:56:26 +03007810 mutex_unlock(&ctx->mmu_lock);
7811
Ofir Bitton5c054872020-10-22 15:13:10 +03007812 if (rc)
7813 goto unreserve_internal_cb_pool;
7814
Ofir Bitton5de406c2020-09-10 10:56:26 +03007815 return 0;
7816
Ofir Bitton5c054872020-10-22 15:13:10 +03007817unreserve_internal_cb_pool:
Ofir Bittonbe91b912020-10-22 15:04:10 +03007818 hl_unreserve_va_block(hdev, ctx, hdev->internal_cb_va_base,
7819 HOST_SPACE_INTERNAL_CB_SZ);
Ofir Bitton5de406c2020-09-10 10:56:26 +03007820destroy_internal_cb_pool:
7821 gen_pool_destroy(hdev->internal_cb_pool);
Ofir Bitton5de406c2020-09-10 10:56:26 +03007822free_internal_cb_pool:
7823 hdev->asic_funcs->asic_dma_free_coherent(hdev,
7824 HOST_SPACE_INTERNAL_CB_SZ,
7825 hdev->internal_cb_pool_virt_addr,
7826 hdev->internal_cb_pool_dma_addr);
7827
7828 return rc;
7829}
7830
7831static void gaudi_internal_cb_pool_fini(struct hl_device *hdev,
7832 struct hl_ctx *ctx)
7833{
7834 struct gaudi_device *gaudi = hdev->asic_specific;
Ofir Bitton5de406c2020-09-10 10:56:26 +03007835
7836 if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
7837 return;
7838
7839 mutex_lock(&ctx->mmu_lock);
Ofir Bitton5c054872020-10-22 15:13:10 +03007840 hl_mmu_unmap_contiguous(ctx, hdev->internal_cb_va_base,
7841 HOST_SPACE_INTERNAL_CB_SZ);
Ofir Bittonbe91b912020-10-22 15:04:10 +03007842 hl_unreserve_va_block(hdev, ctx, hdev->internal_cb_va_base,
7843 HOST_SPACE_INTERNAL_CB_SZ);
Ofir Bitton5de406c2020-09-10 10:56:26 +03007844 hdev->asic_funcs->mmu_invalidate_cache(hdev, true, VM_TYPE_USERPTR);
Ofir Bitton5de406c2020-09-10 10:56:26 +03007845 mutex_unlock(&ctx->mmu_lock);
7846
7847 gen_pool_destroy(hdev->internal_cb_pool);
7848
7849 hdev->asic_funcs->asic_dma_free_coherent(hdev,
7850 HOST_SPACE_INTERNAL_CB_SZ,
7851 hdev->internal_cb_pool_virt_addr,
7852 hdev->internal_cb_pool_dma_addr);
7853}
7854
kernel test robotbb34bf72020-07-29 08:03:13 +08007855static int gaudi_ctx_init(struct hl_ctx *ctx)
Ofir Bittona04b7cd2020-07-13 13:36:55 +03007856{
Ofir Bitton20b75252020-09-30 15:51:10 +03007857 gaudi_mmu_prepare(ctx->hdev, ctx->asid);
Ofir Bitton5de406c2020-09-10 10:56:26 +03007858 return gaudi_internal_cb_pool_init(ctx->hdev, ctx);
7859}
Ofir Bitton20b75252020-09-30 15:51:10 +03007860
kernel test robot293744d2020-11-19 12:25:43 +08007861static void gaudi_ctx_fini(struct hl_ctx *ctx)
Ofir Bitton5de406c2020-09-10 10:56:26 +03007862{
7863 struct hl_device *hdev = ctx->hdev;
7864
7865 /* Gaudi will NEVER support more then a single compute context.
7866 * Therefore, don't clear anything unless it is the compute context
7867 */
7868 if (hdev->compute_ctx != ctx)
7869 return;
7870
7871 gaudi_internal_cb_pool_fini(ctx->hdev, ctx);
Ofir Bittona04b7cd2020-07-13 13:36:55 +03007872}
7873
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007874static u32 gaudi_get_queue_id_for_cq(struct hl_device *hdev, u32 cq_idx)
7875{
7876 return gaudi_cq_assignment[cq_idx];
7877}
7878
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007879static u32 gaudi_get_signal_cb_size(struct hl_device *hdev)
7880{
7881 return sizeof(struct packet_msg_short) +
7882 sizeof(struct packet_msg_prot) * 2;
7883}
7884
7885static u32 gaudi_get_wait_cb_size(struct hl_device *hdev)
7886{
7887 return sizeof(struct packet_msg_short) * 4 +
7888 sizeof(struct packet_fence) +
7889 sizeof(struct packet_msg_prot) * 2;
7890}
7891
Ofir Bitton2992c1d2020-09-10 09:40:35 +03007892static u32 gaudi_gen_signal_cb(struct hl_device *hdev, void *data, u16 sob_id,
Ofir Bitton5de406c2020-09-10 10:56:26 +03007893 u32 size)
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007894{
7895 struct hl_cb *cb = (struct hl_cb *) data;
7896 struct packet_msg_short *pkt;
Ofir Bitton2992c1d2020-09-10 09:40:35 +03007897 u32 value, ctl, pkt_size = sizeof(*pkt);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007898
Ofir Bitton2992c1d2020-09-10 09:40:35 +03007899 pkt = cb->kernel_address + size;
7900 memset(pkt, 0, pkt_size);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007901
Ofir Bitton6c07bab2020-06-01 10:38:46 +03007902 /* Inc by 1, Mode ADD */
7903 value = FIELD_PREP(GAUDI_PKT_SHORT_VAL_SOB_SYNC_VAL_MASK, 1);
7904 value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_SOB_MOD_MASK, 1);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007905
Ofir Bitton6c07bab2020-06-01 10:38:46 +03007906 ctl = FIELD_PREP(GAUDI_PKT_SHORT_CTL_ADDR_MASK, sob_id * 4);
7907 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_OP_MASK, 0); /* write the value */
7908 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_BASE_MASK, 3); /* W_S SOB base */
7909 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_OPCODE_MASK, PACKET_MSG_SHORT);
7910 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_EB_MASK, 1);
7911 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_RB_MASK, 1);
7912 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_MB_MASK, 1);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007913
7914 pkt->value = cpu_to_le32(value);
7915 pkt->ctl = cpu_to_le32(ctl);
Ofir Bitton2992c1d2020-09-10 09:40:35 +03007916
7917 return size + pkt_size;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007918}
7919
7920static u32 gaudi_add_mon_msg_short(struct packet_msg_short *pkt, u32 value,
7921 u16 addr)
7922{
7923 u32 ctl, pkt_size = sizeof(*pkt);
7924
7925 memset(pkt, 0, pkt_size);
7926
Ofir Bitton6c07bab2020-06-01 10:38:46 +03007927 ctl = FIELD_PREP(GAUDI_PKT_SHORT_CTL_ADDR_MASK, addr);
7928 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_BASE_MASK, 2); /* W_S MON base */
7929 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_OPCODE_MASK, PACKET_MSG_SHORT);
7930 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_EB_MASK, 0);
7931 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_RB_MASK, 1);
7932 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_MB_MASK, 0); /* last pkt MB */
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007933
7934 pkt->value = cpu_to_le32(value);
7935 pkt->ctl = cpu_to_le32(ctl);
7936
7937 return pkt_size;
7938}
7939
Ofir Bitton2992c1d2020-09-10 09:40:35 +03007940static u32 gaudi_add_arm_monitor_pkt(struct hl_device *hdev,
7941 struct packet_msg_short *pkt, u16 sob_base, u8 sob_mask,
7942 u16 sob_val, u16 mon_id)
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007943{
Ofir Bitton2992c1d2020-09-10 09:40:35 +03007944 u64 monitor_base;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007945 u32 ctl, value, pkt_size = sizeof(*pkt);
Ofir Bitton2992c1d2020-09-10 09:40:35 +03007946 u16 msg_addr_offset;
7947 u8 mask;
7948
7949 if (hl_gen_sob_mask(sob_base, sob_mask, &mask)) {
7950 dev_err(hdev->dev,
7951 "sob_base %u (mask %#x) is not valid\n",
7952 sob_base, sob_mask);
7953 return 0;
7954 }
7955
7956 /*
7957 * monitor_base should be the content of the base0 address registers,
7958 * so it will be added to the msg short offsets
7959 */
7960 monitor_base = mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0;
7961
7962 msg_addr_offset =
7963 (mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0 + mon_id * 4) -
7964 monitor_base;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007965
7966 memset(pkt, 0, pkt_size);
7967
Ofir Bitton2992c1d2020-09-10 09:40:35 +03007968 /* Monitor config packet: bind the monitor to a sync object */
7969 value = FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_SYNC_GID_MASK, sob_base / 8);
Ofir Bitton6c07bab2020-06-01 10:38:46 +03007970 value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_SYNC_VAL_MASK, sob_val);
7971 value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_MODE_MASK,
7972 0); /* GREATER OR EQUAL*/
7973 value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_MASK_MASK, mask);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007974
Ofir Bitton2992c1d2020-09-10 09:40:35 +03007975 ctl = FIELD_PREP(GAUDI_PKT_SHORT_CTL_ADDR_MASK, msg_addr_offset);
Ofir Bitton6c07bab2020-06-01 10:38:46 +03007976 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_OP_MASK, 0); /* write the value */
7977 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_BASE_MASK, 2); /* W_S MON base */
7978 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_OPCODE_MASK, PACKET_MSG_SHORT);
7979 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_EB_MASK, 0);
7980 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_RB_MASK, 1);
7981 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_MB_MASK, 1);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007982
7983 pkt->value = cpu_to_le32(value);
7984 pkt->ctl = cpu_to_le32(ctl);
7985
7986 return pkt_size;
7987}
7988
7989static u32 gaudi_add_fence_pkt(struct packet_fence *pkt)
7990{
7991 u32 ctl, cfg, pkt_size = sizeof(*pkt);
7992
7993 memset(pkt, 0, pkt_size);
7994
Ofir Bitton6c07bab2020-06-01 10:38:46 +03007995 cfg = FIELD_PREP(GAUDI_PKT_FENCE_CFG_DEC_VAL_MASK, 1);
7996 cfg |= FIELD_PREP(GAUDI_PKT_FENCE_CFG_TARGET_VAL_MASK, 1);
7997 cfg |= FIELD_PREP(GAUDI_PKT_FENCE_CFG_ID_MASK, 2);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007998
Ofir Bitton6c07bab2020-06-01 10:38:46 +03007999 ctl = FIELD_PREP(GAUDI_PKT_FENCE_CTL_OPCODE_MASK, PACKET_FENCE);
8000 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_EB_MASK, 0);
8001 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_RB_MASK, 1);
8002 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_MB_MASK, 1);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03008003
8004 pkt->cfg = cpu_to_le32(cfg);
8005 pkt->ctl = cpu_to_le32(ctl);
8006
8007 return pkt_size;
8008}
8009
Ofir Bitton2992c1d2020-09-10 09:40:35 +03008010static int gaudi_get_fence_addr(struct hl_device *hdev, u32 queue_id, u64 *addr)
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03008011{
Ofir Bitton5de406c2020-09-10 10:56:26 +03008012 u32 offset, nic_index;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03008013
Ofir Bitton2992c1d2020-09-10 09:40:35 +03008014 switch (queue_id) {
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03008015 case GAUDI_QUEUE_ID_DMA_0_0:
Ofir Bitton2992c1d2020-09-10 09:40:35 +03008016 offset = mmDMA0_QM_CP_FENCE2_RDATA_0;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03008017 break;
8018 case GAUDI_QUEUE_ID_DMA_0_1:
Ofir Bitton2992c1d2020-09-10 09:40:35 +03008019 offset = mmDMA0_QM_CP_FENCE2_RDATA_1;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03008020 break;
8021 case GAUDI_QUEUE_ID_DMA_0_2:
Ofir Bitton2992c1d2020-09-10 09:40:35 +03008022 offset = mmDMA0_QM_CP_FENCE2_RDATA_2;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03008023 break;
8024 case GAUDI_QUEUE_ID_DMA_0_3:
Ofir Bitton2992c1d2020-09-10 09:40:35 +03008025 offset = mmDMA0_QM_CP_FENCE2_RDATA_3;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03008026 break;
8027 case GAUDI_QUEUE_ID_DMA_1_0:
Ofir Bitton2992c1d2020-09-10 09:40:35 +03008028 offset = mmDMA1_QM_CP_FENCE2_RDATA_0;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03008029 break;
8030 case GAUDI_QUEUE_ID_DMA_1_1:
Ofir Bitton2992c1d2020-09-10 09:40:35 +03008031 offset = mmDMA1_QM_CP_FENCE2_RDATA_1;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03008032 break;
8033 case GAUDI_QUEUE_ID_DMA_1_2:
Ofir Bitton2992c1d2020-09-10 09:40:35 +03008034 offset = mmDMA1_QM_CP_FENCE2_RDATA_2;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03008035 break;
8036 case GAUDI_QUEUE_ID_DMA_1_3:
Ofir Bitton2992c1d2020-09-10 09:40:35 +03008037 offset = mmDMA1_QM_CP_FENCE2_RDATA_3;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03008038 break;
8039 case GAUDI_QUEUE_ID_DMA_5_0:
Ofir Bitton2992c1d2020-09-10 09:40:35 +03008040 offset = mmDMA5_QM_CP_FENCE2_RDATA_0;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03008041 break;
8042 case GAUDI_QUEUE_ID_DMA_5_1:
Ofir Bitton2992c1d2020-09-10 09:40:35 +03008043 offset = mmDMA5_QM_CP_FENCE2_RDATA_1;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03008044 break;
8045 case GAUDI_QUEUE_ID_DMA_5_2:
Ofir Bitton2992c1d2020-09-10 09:40:35 +03008046 offset = mmDMA5_QM_CP_FENCE2_RDATA_2;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03008047 break;
8048 case GAUDI_QUEUE_ID_DMA_5_3:
Ofir Bitton2992c1d2020-09-10 09:40:35 +03008049 offset = mmDMA5_QM_CP_FENCE2_RDATA_3;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03008050 break;
Ofir Bitton5de406c2020-09-10 10:56:26 +03008051 case GAUDI_QUEUE_ID_TPC_7_0:
8052 offset = mmTPC7_QM_CP_FENCE2_RDATA_0;
8053 break;
8054 case GAUDI_QUEUE_ID_TPC_7_1:
8055 offset = mmTPC7_QM_CP_FENCE2_RDATA_1;
8056 break;
8057 case GAUDI_QUEUE_ID_TPC_7_2:
8058 offset = mmTPC7_QM_CP_FENCE2_RDATA_2;
8059 break;
8060 case GAUDI_QUEUE_ID_TPC_7_3:
8061 offset = mmTPC7_QM_CP_FENCE2_RDATA_3;
8062 break;
8063 case GAUDI_QUEUE_ID_NIC_0_0:
8064 case GAUDI_QUEUE_ID_NIC_1_0:
8065 case GAUDI_QUEUE_ID_NIC_2_0:
8066 case GAUDI_QUEUE_ID_NIC_3_0:
8067 case GAUDI_QUEUE_ID_NIC_4_0:
8068 case GAUDI_QUEUE_ID_NIC_5_0:
8069 case GAUDI_QUEUE_ID_NIC_6_0:
8070 case GAUDI_QUEUE_ID_NIC_7_0:
8071 case GAUDI_QUEUE_ID_NIC_8_0:
8072 case GAUDI_QUEUE_ID_NIC_9_0:
8073 nic_index = (queue_id - GAUDI_QUEUE_ID_NIC_0_0) >> 2;
8074 offset = mmNIC0_QM0_CP_FENCE2_RDATA_0 +
8075 (nic_index >> 1) * NIC_MACRO_QMAN_OFFSET +
8076 (nic_index & 0x1) * NIC_ENGINE_QMAN_OFFSET;
8077 break;
8078 case GAUDI_QUEUE_ID_NIC_0_1:
8079 case GAUDI_QUEUE_ID_NIC_1_1:
8080 case GAUDI_QUEUE_ID_NIC_2_1:
8081 case GAUDI_QUEUE_ID_NIC_3_1:
8082 case GAUDI_QUEUE_ID_NIC_4_1:
8083 case GAUDI_QUEUE_ID_NIC_5_1:
8084 case GAUDI_QUEUE_ID_NIC_6_1:
8085 case GAUDI_QUEUE_ID_NIC_7_1:
8086 case GAUDI_QUEUE_ID_NIC_8_1:
8087 case GAUDI_QUEUE_ID_NIC_9_1:
8088 nic_index = (queue_id - GAUDI_QUEUE_ID_NIC_0_1) >> 2;
8089 offset = mmNIC0_QM0_CP_FENCE2_RDATA_1 +
8090 (nic_index >> 1) * NIC_MACRO_QMAN_OFFSET +
8091 (nic_index & 0x1) * NIC_ENGINE_QMAN_OFFSET;
8092 break;
8093 case GAUDI_QUEUE_ID_NIC_0_2:
8094 case GAUDI_QUEUE_ID_NIC_1_2:
8095 case GAUDI_QUEUE_ID_NIC_2_2:
8096 case GAUDI_QUEUE_ID_NIC_3_2:
8097 case GAUDI_QUEUE_ID_NIC_4_2:
8098 case GAUDI_QUEUE_ID_NIC_5_2:
8099 case GAUDI_QUEUE_ID_NIC_6_2:
8100 case GAUDI_QUEUE_ID_NIC_7_2:
8101 case GAUDI_QUEUE_ID_NIC_8_2:
8102 case GAUDI_QUEUE_ID_NIC_9_2:
8103 nic_index = (queue_id - GAUDI_QUEUE_ID_NIC_0_2) >> 2;
8104 offset = mmNIC0_QM0_CP_FENCE2_RDATA_2 +
8105 (nic_index >> 1) * NIC_MACRO_QMAN_OFFSET +
8106 (nic_index & 0x1) * NIC_ENGINE_QMAN_OFFSET;
8107 break;
8108 case GAUDI_QUEUE_ID_NIC_0_3:
8109 case GAUDI_QUEUE_ID_NIC_1_3:
8110 case GAUDI_QUEUE_ID_NIC_2_3:
8111 case GAUDI_QUEUE_ID_NIC_3_3:
8112 case GAUDI_QUEUE_ID_NIC_4_3:
8113 case GAUDI_QUEUE_ID_NIC_5_3:
8114 case GAUDI_QUEUE_ID_NIC_6_3:
8115 case GAUDI_QUEUE_ID_NIC_7_3:
8116 case GAUDI_QUEUE_ID_NIC_8_3:
8117 case GAUDI_QUEUE_ID_NIC_9_3:
8118 nic_index = (queue_id - GAUDI_QUEUE_ID_NIC_0_3) >> 2;
8119 offset = mmNIC0_QM0_CP_FENCE2_RDATA_3 +
8120 (nic_index >> 1) * NIC_MACRO_QMAN_OFFSET +
8121 (nic_index & 0x1) * NIC_ENGINE_QMAN_OFFSET;
8122 break;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03008123 default:
Ofir Bitton2992c1d2020-09-10 09:40:35 +03008124 return -EINVAL;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03008125 }
8126
Ofir Bitton2992c1d2020-09-10 09:40:35 +03008127 *addr = CFG_BASE + offset;
8128
8129 return 0;
8130}
8131
8132static u32 gaudi_add_mon_pkts(void *buf, u16 mon_id, u64 fence_addr)
8133{
8134 u64 monitor_base;
8135 u32 size = 0;
8136 u16 msg_addr_offset;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03008137
8138 /*
8139 * monitor_base should be the content of the base0 address registers,
8140 * so it will be added to the msg short offsets
8141 */
8142 monitor_base = mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0;
8143
8144 /* First monitor config packet: low address of the sync */
Ofir Bitton2992c1d2020-09-10 09:40:35 +03008145 msg_addr_offset =
8146 (mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0 + mon_id * 4) -
8147 monitor_base;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03008148
8149 size += gaudi_add_mon_msg_short(buf + size, (u32) fence_addr,
8150 msg_addr_offset);
8151
8152 /* Second monitor config packet: high address of the sync */
Ofir Bitton2992c1d2020-09-10 09:40:35 +03008153 msg_addr_offset =
8154 (mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRH_0 + mon_id * 4) -
8155 monitor_base;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03008156
8157 size += gaudi_add_mon_msg_short(buf + size, (u32) (fence_addr >> 32),
8158 msg_addr_offset);
8159
8160 /*
8161 * Third monitor config packet: the payload, i.e. what to write when the
8162 * sync triggers
8163 */
Ofir Bitton2992c1d2020-09-10 09:40:35 +03008164 msg_addr_offset =
8165 (mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_DATA_0 + mon_id * 4) -
8166 monitor_base;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03008167
8168 size += gaudi_add_mon_msg_short(buf + size, 1, msg_addr_offset);
8169
Ofir Bitton2992c1d2020-09-10 09:40:35 +03008170 return size;
8171}
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03008172
Oded Gabbay3c681572020-11-02 21:10:39 +02008173static u32 gaudi_gen_wait_cb(struct hl_device *hdev,
8174 struct hl_gen_wait_properties *prop)
Ofir Bitton2992c1d2020-09-10 09:40:35 +03008175{
8176 struct hl_cb *cb = (struct hl_cb *) prop->data;
8177 void *buf = cb->kernel_address;
8178 u64 fence_addr = 0;
8179 u32 size = prop->size;
8180
8181 if (gaudi_get_fence_addr(hdev, prop->q_idx, &fence_addr)) {
8182 dev_crit(hdev->dev, "wrong queue id %d for wait packet\n",
8183 prop->q_idx);
8184 return 0;
8185 }
8186
8187 size += gaudi_add_mon_pkts(buf + size, prop->mon_id, fence_addr);
8188 size += gaudi_add_arm_monitor_pkt(hdev, buf + size, prop->sob_base,
8189 prop->sob_mask, prop->sob_val, prop->mon_id);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03008190 size += gaudi_add_fence_pkt(buf + size);
Ofir Bitton2992c1d2020-09-10 09:40:35 +03008191
8192 return size;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03008193}
8194
8195static void gaudi_reset_sob(struct hl_device *hdev, void *data)
8196{
8197 struct hl_hw_sob *hw_sob = (struct hl_hw_sob *) data;
8198
8199 dev_dbg(hdev->dev, "reset SOB, q_idx: %d, sob_id: %d\n", hw_sob->q_idx,
8200 hw_sob->sob_id);
8201
8202 WREG32(mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 + hw_sob->sob_id * 4,
8203 0);
8204
8205 kref_init(&hw_sob->kref);
8206}
8207
8208static void gaudi_set_dma_mask_from_fw(struct hl_device *hdev)
8209{
8210 if (RREG32(mmPSOC_GLOBAL_CONF_NON_RST_FLOPS_0) ==
8211 HL_POWER9_HOST_MAGIC) {
8212 hdev->power9_64bit_dma_enable = 1;
8213 hdev->dma_mask = 64;
8214 } else {
8215 hdev->power9_64bit_dma_enable = 0;
8216 hdev->dma_mask = 48;
8217 }
8218}
8219
8220static u64 gaudi_get_device_time(struct hl_device *hdev)
8221{
8222 u64 device_time = ((u64) RREG32(mmPSOC_TIMESTAMP_CNTCVU)) << 32;
8223
8224 return device_time | RREG32(mmPSOC_TIMESTAMP_CNTCVL);
8225}
8226
8227static const struct hl_asic_funcs gaudi_funcs = {
8228 .early_init = gaudi_early_init,
8229 .early_fini = gaudi_early_fini,
8230 .late_init = gaudi_late_init,
8231 .late_fini = gaudi_late_fini,
8232 .sw_init = gaudi_sw_init,
8233 .sw_fini = gaudi_sw_fini,
8234 .hw_init = gaudi_hw_init,
8235 .hw_fini = gaudi_hw_fini,
8236 .halt_engines = gaudi_halt_engines,
8237 .suspend = gaudi_suspend,
8238 .resume = gaudi_resume,
8239 .cb_mmap = gaudi_cb_mmap,
8240 .ring_doorbell = gaudi_ring_doorbell,
8241 .pqe_write = gaudi_pqe_write,
8242 .asic_dma_alloc_coherent = gaudi_dma_alloc_coherent,
8243 .asic_dma_free_coherent = gaudi_dma_free_coherent,
farah kassabri03df1362020-05-06 11:17:38 +03008244 .scrub_device_mem = gaudi_scrub_device_mem,
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03008245 .get_int_queue_base = gaudi_get_int_queue_base,
8246 .test_queues = gaudi_test_queues,
8247 .asic_dma_pool_zalloc = gaudi_dma_pool_zalloc,
8248 .asic_dma_pool_free = gaudi_dma_pool_free,
8249 .cpu_accessible_dma_pool_alloc = gaudi_cpu_accessible_dma_pool_alloc,
8250 .cpu_accessible_dma_pool_free = gaudi_cpu_accessible_dma_pool_free,
8251 .hl_dma_unmap_sg = gaudi_dma_unmap_sg,
8252 .cs_parser = gaudi_cs_parser,
8253 .asic_dma_map_sg = gaudi_dma_map_sg,
8254 .get_dma_desc_list_size = gaudi_get_dma_desc_list_size,
8255 .add_end_of_cb_packets = gaudi_add_end_of_cb_packets,
8256 .update_eq_ci = gaudi_update_eq_ci,
8257 .context_switch = gaudi_context_switch,
8258 .restore_phase_topology = gaudi_restore_phase_topology,
8259 .debugfs_read32 = gaudi_debugfs_read32,
8260 .debugfs_write32 = gaudi_debugfs_write32,
8261 .debugfs_read64 = gaudi_debugfs_read64,
8262 .debugfs_write64 = gaudi_debugfs_write64,
Oded Gabbaybcaf4152020-05-11 10:41:37 +03008263 .add_device_attr = gaudi_add_device_attr,
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03008264 .handle_eqe = gaudi_handle_eqe,
Oded Gabbaybcaf4152020-05-11 10:41:37 +03008265 .set_pll_profile = gaudi_set_pll_profile,
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03008266 .get_events_stat = gaudi_get_events_stat,
8267 .read_pte = gaudi_read_pte,
8268 .write_pte = gaudi_write_pte,
8269 .mmu_invalidate_cache = gaudi_mmu_invalidate_cache,
8270 .mmu_invalidate_cache_range = gaudi_mmu_invalidate_cache_range,
8271 .send_heartbeat = gaudi_send_heartbeat,
Oded Gabbaye38bfd32020-07-03 20:46:12 +03008272 .set_clock_gating = gaudi_set_clock_gating,
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03008273 .disable_clock_gating = gaudi_disable_clock_gating,
Omer Shpigelman79fc7a92020-05-11 10:46:29 +03008274 .debug_coresight = gaudi_debug_coresight,
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03008275 .is_device_idle = gaudi_is_device_idle,
8276 .soft_reset_late_init = gaudi_soft_reset_late_init,
8277 .hw_queues_lock = gaudi_hw_queues_lock,
8278 .hw_queues_unlock = gaudi_hw_queues_unlock,
8279 .get_pci_id = gaudi_get_pci_id,
8280 .get_eeprom_data = gaudi_get_eeprom_data,
8281 .send_cpu_message = gaudi_send_cpu_message,
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03008282 .pci_bars_map = gaudi_pci_bars_map,
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03008283 .init_iatu = gaudi_init_iatu,
8284 .rreg = hl_rreg,
8285 .wreg = hl_wreg,
Omer Shpigelman79fc7a92020-05-11 10:46:29 +03008286 .halt_coresight = gaudi_halt_coresight,
Ofir Bittona04b7cd2020-07-13 13:36:55 +03008287 .ctx_init = gaudi_ctx_init,
Ofir Bitton5de406c2020-09-10 10:56:26 +03008288 .ctx_fini = gaudi_ctx_fini,
Oded Gabbaybcaf4152020-05-11 10:41:37 +03008289 .get_clk_rate = gaudi_get_clk_rate,
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03008290 .get_queue_id_for_cq = gaudi_get_queue_id_for_cq,
8291 .read_device_fw_version = gaudi_read_device_fw_version,
8292 .load_firmware_to_device = gaudi_load_firmware_to_device,
8293 .load_boot_fit_to_device = gaudi_load_boot_fit_to_device,
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03008294 .get_signal_cb_size = gaudi_get_signal_cb_size,
8295 .get_wait_cb_size = gaudi_get_wait_cb_size,
8296 .gen_signal_cb = gaudi_gen_signal_cb,
8297 .gen_wait_cb = gaudi_gen_wait_cb,
8298 .reset_sob = gaudi_reset_sob,
Ofir Bitton5fe1c172020-09-10 10:10:55 +03008299 .reset_sob_group = gaudi_reset_sob_group,
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03008300 .set_dma_mask_from_fw = gaudi_set_dma_mask_from_fw,
Ofir Bitton5fe1c172020-09-10 10:10:55 +03008301 .get_device_time = gaudi_get_device_time,
8302 .collective_wait_init_cs = gaudi_collective_wait_init_cs,
8303 .collective_wait_create_jobs = gaudi_collective_wait_create_jobs
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03008304};
8305
8306/**
8307 * gaudi_set_asic_funcs - set GAUDI function pointers
8308 *
Lee Jonesf7d227c2020-07-01 09:58:42 +01008309 * @hdev: pointer to hl_device structure
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03008310 *
8311 */
8312void gaudi_set_asic_funcs(struct hl_device *hdev)
8313{
8314 hdev->asic_funcs = &gaudi_funcs;
8315}