blob: e1c6072e5fb31cb1d98641aca52cc4fbbcafb25c [file] [log] [blame]
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03001// SPDX-License-Identifier: GPL-2.0
2
3/*
4 * Copyright 2016-2020 HabanaLabs, Ltd.
5 * All Rights Reserved.
6 */
7
8#include "gaudiP.h"
Greg Kroah-Hartman7b16a152020-07-28 19:18:51 +02009#include "../include/hw_ip/mmu/mmu_general.h"
10#include "../include/hw_ip/mmu/mmu_v1_1.h"
11#include "../include/gaudi/gaudi_masks.h"
12#include "../include/gaudi/gaudi_fw_if.h"
13#include "../include/gaudi/gaudi_reg_map.h"
14#include "../include/gaudi/gaudi_async_ids_map_extended.h"
Oded Gabbayac0ae6a2020-05-11 10:29:27 +030015
16#include <linux/module.h>
17#include <linux/pci.h>
18#include <linux/firmware.h>
19#include <linux/hwmon.h>
Oded Gabbayac0ae6a2020-05-11 10:29:27 +030020#include <linux/iommu.h>
21#include <linux/seq_file.h>
22
23/*
24 * Gaudi security scheme:
25 *
26 * 1. Host is protected by:
27 * - Range registers
28 * - MMU
29 *
30 * 2. DDR is protected by:
31 * - Range registers (protect the first 512MB)
32 *
33 * 3. Configuration is protected by:
34 * - Range registers
35 * - Protection bits
36 *
37 * MMU is always enabled.
38 *
Ofir Bitton0940cab2020-08-31 08:52:56 +030039 * QMAN DMA channels 0,1 (PCI DMAN):
Oded Gabbayac0ae6a2020-05-11 10:29:27 +030040 * - DMA is not secured.
41 * - PQ and CQ are secured.
42 * - CP is secured: The driver needs to parse CB but WREG should be allowed
43 * because of TDMA (tensor DMA). Hence, WREG is always not
44 * secured.
45 *
46 * When the driver needs to use DMA it will check that Gaudi is idle, set DMA
47 * channel 0 to be secured, execute the DMA and change it back to not secured.
48 * Currently, the driver doesn't use the DMA while there are compute jobs
49 * running.
50 *
51 * The current use cases for the driver to use the DMA are:
52 * - Clear SRAM on context switch (happens on context switch when device is
53 * idle)
54 * - MMU page tables area clear (happens on init)
55 *
Ofir Bitton0940cab2020-08-31 08:52:56 +030056 * QMAN DMA 2-7, TPC, MME, NIC:
Oded Gabbayac0ae6a2020-05-11 10:29:27 +030057 * PQ is secured and is located on the Host (HBM CON TPC3 bug)
58 * CQ, CP and the engine are not secured
59 *
60 */
61
62#define GAUDI_BOOT_FIT_FILE "habanalabs/gaudi/gaudi-boot-fit.itb"
63#define GAUDI_LINUX_FW_FILE "habanalabs/gaudi/gaudi-fit.itb"
64#define GAUDI_TPC_FW_FILE "habanalabs/gaudi/gaudi_tpc.bin"
65
66#define GAUDI_DMA_POOL_BLK_SIZE 0x100 /* 256 bytes */
67
Ofir Bittonb90c8942020-11-08 12:59:04 +020068#define GAUDI_RESET_TIMEOUT_MSEC 2000 /* 2000ms */
Oded Gabbayac0ae6a2020-05-11 10:29:27 +030069#define GAUDI_RESET_WAIT_MSEC 1 /* 1ms */
70#define GAUDI_CPU_RESET_WAIT_MSEC 200 /* 200ms */
71#define GAUDI_TEST_QUEUE_WAIT_USEC 100000 /* 100ms */
72
73#define GAUDI_PLDM_RESET_WAIT_MSEC 1000 /* 1s */
74#define GAUDI_PLDM_HRESET_TIMEOUT_MSEC 20000 /* 20s */
Oded Gabbayac0ae6a2020-05-11 10:29:27 +030075#define GAUDI_PLDM_TEST_QUEUE_WAIT_USEC 1000000 /* 1s */
76#define GAUDI_PLDM_MMU_TIMEOUT_USEC (MMU_CONFIG_TIMEOUT_USEC * 100)
77#define GAUDI_PLDM_QMAN0_TIMEOUT_USEC (HL_DEVICE_TIMEOUT_USEC * 30)
78#define GAUDI_PLDM_TPC_KERNEL_WAIT_USEC (HL_DEVICE_TIMEOUT_USEC * 30)
79#define GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC 1000000 /* 1s */
Oded Gabbay788cacf2020-07-07 17:30:13 +030080#define GAUDI_MSG_TO_CPU_TIMEOUT_USEC 4000000 /* 4s */
Oded Gabbayac0ae6a2020-05-11 10:29:27 +030081
82#define GAUDI_QMAN0_FENCE_VAL 0x72E91AB9
83
84#define GAUDI_MAX_STRING_LEN 20
85
86#define GAUDI_CB_POOL_CB_CNT 512
87#define GAUDI_CB_POOL_CB_SIZE 0x20000 /* 128KB */
88
89#define GAUDI_ALLOC_CPU_MEM_RETRY_CNT 3
90
91#define GAUDI_NUM_OF_TPC_INTR_CAUSE 20
92
93#define GAUDI_NUM_OF_QM_ERR_CAUSE 16
94
95#define GAUDI_NUM_OF_QM_ARB_ERR_CAUSE 3
96
Oded Gabbay647e8352020-06-07 11:26:48 +030097#define GAUDI_ARB_WDT_TIMEOUT 0x1000000
Oded Gabbayac0ae6a2020-05-11 10:29:27 +030098
Oded Gabbaye38bfd32020-07-03 20:46:12 +030099#define GAUDI_CLK_GATE_DEBUGFS_MASK (\
100 BIT(GAUDI_ENGINE_ID_MME_0) |\
101 BIT(GAUDI_ENGINE_ID_MME_2) |\
102 GENMASK_ULL(GAUDI_ENGINE_ID_TPC_7, GAUDI_ENGINE_ID_TPC_0))
103
farah kassabri03df1362020-05-06 11:17:38 +0300104#define HBM_SCRUBBING_TIMEOUT_US 1000000 /* 1s */
105
Alon Mizrahi41478642020-11-17 14:25:14 +0200106#define GAUDI_PLL_MAX 10
107
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300108static const char gaudi_irq_name[GAUDI_MSI_ENTRIES][GAUDI_MAX_STRING_LEN] = {
109 "gaudi cq 0_0", "gaudi cq 0_1", "gaudi cq 0_2", "gaudi cq 0_3",
110 "gaudi cq 1_0", "gaudi cq 1_1", "gaudi cq 1_2", "gaudi cq 1_3",
111 "gaudi cq 5_0", "gaudi cq 5_1", "gaudi cq 5_2", "gaudi cq 5_3",
112 "gaudi cpu eq"
113};
114
115static const u8 gaudi_dma_assignment[GAUDI_DMA_MAX] = {
Oded Gabbaye38bfd32020-07-03 20:46:12 +0300116 [GAUDI_PCI_DMA_1] = GAUDI_ENGINE_ID_DMA_0,
117 [GAUDI_PCI_DMA_2] = GAUDI_ENGINE_ID_DMA_1,
Oded Gabbaye38bfd32020-07-03 20:46:12 +0300118 [GAUDI_HBM_DMA_1] = GAUDI_ENGINE_ID_DMA_2,
119 [GAUDI_HBM_DMA_2] = GAUDI_ENGINE_ID_DMA_3,
120 [GAUDI_HBM_DMA_3] = GAUDI_ENGINE_ID_DMA_4,
Ofir Bitton0940cab2020-08-31 08:52:56 +0300121 [GAUDI_HBM_DMA_4] = GAUDI_ENGINE_ID_DMA_5,
122 [GAUDI_HBM_DMA_5] = GAUDI_ENGINE_ID_DMA_6,
123 [GAUDI_HBM_DMA_6] = GAUDI_ENGINE_ID_DMA_7
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300124};
125
126static const u8 gaudi_cq_assignment[NUMBER_OF_CMPLT_QUEUES] = {
127 [0] = GAUDI_QUEUE_ID_DMA_0_0,
128 [1] = GAUDI_QUEUE_ID_DMA_0_1,
129 [2] = GAUDI_QUEUE_ID_DMA_0_2,
130 [3] = GAUDI_QUEUE_ID_DMA_0_3,
131 [4] = GAUDI_QUEUE_ID_DMA_1_0,
132 [5] = GAUDI_QUEUE_ID_DMA_1_1,
133 [6] = GAUDI_QUEUE_ID_DMA_1_2,
134 [7] = GAUDI_QUEUE_ID_DMA_1_3,
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300135};
136
137static const u16 gaudi_packet_sizes[MAX_PACKET_ID] = {
138 [PACKET_WREG_32] = sizeof(struct packet_wreg32),
139 [PACKET_WREG_BULK] = sizeof(struct packet_wreg_bulk),
140 [PACKET_MSG_LONG] = sizeof(struct packet_msg_long),
141 [PACKET_MSG_SHORT] = sizeof(struct packet_msg_short),
142 [PACKET_CP_DMA] = sizeof(struct packet_cp_dma),
143 [PACKET_REPEAT] = sizeof(struct packet_repeat),
144 [PACKET_MSG_PROT] = sizeof(struct packet_msg_prot),
145 [PACKET_FENCE] = sizeof(struct packet_fence),
146 [PACKET_LIN_DMA] = sizeof(struct packet_lin_dma),
147 [PACKET_NOP] = sizeof(struct packet_nop),
148 [PACKET_STOP] = sizeof(struct packet_stop),
149 [PACKET_ARB_POINT] = sizeof(struct packet_arb_point),
150 [PACKET_WAIT] = sizeof(struct packet_wait),
151 [PACKET_LOAD_AND_EXE] = sizeof(struct packet_load_and_exe)
152};
153
Ofir Bittonbc75be22020-07-30 14:56:38 +0300154static inline bool validate_packet_id(enum packet_id id)
155{
156 switch (id) {
157 case PACKET_WREG_32:
158 case PACKET_WREG_BULK:
159 case PACKET_MSG_LONG:
160 case PACKET_MSG_SHORT:
161 case PACKET_CP_DMA:
162 case PACKET_REPEAT:
163 case PACKET_MSG_PROT:
164 case PACKET_FENCE:
165 case PACKET_LIN_DMA:
166 case PACKET_NOP:
167 case PACKET_STOP:
168 case PACKET_ARB_POINT:
169 case PACKET_WAIT:
170 case PACKET_LOAD_AND_EXE:
171 return true;
172 default:
173 return false;
174 }
175}
176
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300177static const char * const
178gaudi_tpc_interrupts_cause[GAUDI_NUM_OF_TPC_INTR_CAUSE] = {
179 "tpc_address_exceed_slm",
180 "tpc_div_by_0",
181 "tpc_spu_mac_overflow",
182 "tpc_spu_addsub_overflow",
183 "tpc_spu_abs_overflow",
184 "tpc_spu_fp_dst_nan_inf",
185 "tpc_spu_fp_dst_denorm",
186 "tpc_vpu_mac_overflow",
187 "tpc_vpu_addsub_overflow",
188 "tpc_vpu_abs_overflow",
189 "tpc_vpu_fp_dst_nan_inf",
190 "tpc_vpu_fp_dst_denorm",
191 "tpc_assertions",
192 "tpc_illegal_instruction",
193 "tpc_pc_wrap_around",
194 "tpc_qm_sw_err",
195 "tpc_hbw_rresp_err",
196 "tpc_hbw_bresp_err",
197 "tpc_lbw_rresp_err",
198 "tpc_lbw_bresp_err"
199};
200
201static const char * const
202gaudi_qman_error_cause[GAUDI_NUM_OF_QM_ERR_CAUSE] = {
203 "PQ AXI HBW error",
204 "CQ AXI HBW error",
205 "CP AXI HBW error",
206 "CP error due to undefined OPCODE",
207 "CP encountered STOP OPCODE",
208 "CP AXI LBW error",
209 "CP WRREG32 or WRBULK returned error",
210 "N/A",
211 "FENCE 0 inc over max value and clipped",
212 "FENCE 1 inc over max value and clipped",
213 "FENCE 2 inc over max value and clipped",
214 "FENCE 3 inc over max value and clipped",
215 "FENCE 0 dec under min value and clipped",
216 "FENCE 1 dec under min value and clipped",
217 "FENCE 2 dec under min value and clipped",
218 "FENCE 3 dec under min value and clipped"
219};
220
221static const char * const
222gaudi_qman_arb_error_cause[GAUDI_NUM_OF_QM_ARB_ERR_CAUSE] = {
223 "Choice push while full error",
224 "Choice Q watchdog error",
225 "MSG AXI LBW returned with error"
226};
227
Ofir Bittonf8bc7f02021-01-03 20:52:40 +0200228enum gaudi_sm_sei_cause {
229 GAUDI_SM_SEI_SO_OVERFLOW,
230 GAUDI_SM_SEI_LBW_4B_UNALIGNED,
231 GAUDI_SM_SEI_AXI_RESPONSE_ERR
232};
233
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300234static enum hl_queue_type gaudi_queue_type[GAUDI_QUEUE_ID_SIZE] = {
235 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_0 */
236 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_1 */
237 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_2 */
238 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_3 */
239 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_0 */
240 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_1 */
241 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_2 */
242 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_3 */
243 QUEUE_TYPE_CPU, /* GAUDI_QUEUE_ID_CPU_PQ */
244 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_0 */
245 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_1 */
246 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_2 */
247 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_3 */
248 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_0 */
249 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_1 */
250 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_2 */
251 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_3 */
252 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_0 */
253 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_1 */
254 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_2 */
255 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_3 */
Ofir Bitton0940cab2020-08-31 08:52:56 +0300256 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_5_0 */
257 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_5_1 */
258 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_5_2 */
259 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_5_3 */
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300260 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_0 */
261 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_1 */
262 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_2 */
263 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_3 */
264 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_0 */
265 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_1 */
266 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_2 */
267 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_3 */
268 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_0 */
269 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_1 */
270 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_2 */
271 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_3 */
272 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_0 */
273 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_1 */
274 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_2 */
275 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_3 */
276 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_0 */
277 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_1 */
278 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_2 */
279 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_3 */
280 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_0 */
281 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_1 */
282 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_2 */
283 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_3 */
284 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_0 */
285 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_1 */
286 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_2 */
287 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_3 */
288 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_0 */
289 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_1 */
290 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_2 */
291 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_3 */
292 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_0 */
293 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_1 */
294 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_2 */
295 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_3 */
296 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_0 */
297 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_1 */
298 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_2 */
299 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_3 */
300 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_0 */
301 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_1 */
302 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_2 */
303 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_3 */
304 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_0 */
305 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_1 */
306 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_2 */
307 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_3 */
Oded Gabbay3c681572020-11-02 21:10:39 +0200308 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_0_0 */
309 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_0_1 */
310 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_0_2 */
311 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_0_3 */
312 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_1_0 */
313 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_1_1 */
314 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_1_2 */
315 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_1_3 */
316 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_2_0 */
317 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_2_1 */
318 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_2_2 */
319 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_2_3 */
320 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_3_0 */
321 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_3_1 */
322 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_3_2 */
323 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_3_3 */
324 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_4_0 */
325 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_4_1 */
326 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_4_2 */
327 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_4_3 */
328 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_5_0 */
329 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_5_1 */
330 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_5_2 */
331 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_5_3 */
332 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_6_0 */
333 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_6_1 */
334 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_6_2 */
335 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_6_3 */
336 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_7_0 */
337 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_7_1 */
338 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_7_2 */
339 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_7_3 */
340 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_8_0 */
341 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_8_1 */
342 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_8_2 */
343 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_8_3 */
344 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_9_0 */
345 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_9_1 */
346 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_9_2 */
347 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_9_3 */
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300348};
349
Oded Gabbayfcc6a4e2020-05-17 08:20:35 +0300350struct ecc_info_extract_params {
351 u64 block_address;
352 u32 num_memories;
353 bool derr;
354 bool disable_clock_gating;
355};
356
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300357static int gaudi_mmu_update_asid_hop0_addr(struct hl_device *hdev, u32 asid,
358 u64 phys_addr);
359static int gaudi_send_job_on_qman0(struct hl_device *hdev,
360 struct hl_cs_job *job);
361static int gaudi_memset_device_memory(struct hl_device *hdev, u64 addr,
362 u32 size, u64 val);
Ofir Bitton423815b2021-01-05 09:04:07 +0200363static int gaudi_memset_registers(struct hl_device *hdev, u64 reg_base,
364 u32 num_regs, u32 val);
365static int gaudi_schedule_register_memset(struct hl_device *hdev,
366 u32 hw_queue_id, u64 reg_base, u32 num_regs, u32 val);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300367static int gaudi_run_tpc_kernel(struct hl_device *hdev, u64 tpc_kernel,
368 u32 tpc_id);
369static int gaudi_mmu_clear_pgt_range(struct hl_device *hdev);
Oded Gabbay2f553422020-08-15 16:28:10 +0300370static int gaudi_cpucp_info_get(struct hl_device *hdev);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300371static void gaudi_disable_clock_gating(struct hl_device *hdev);
372static void gaudi_mmu_prepare(struct hl_device *hdev, u32 asid);
Ofir Bitton5de406c2020-09-10 10:56:26 +0300373static u32 gaudi_gen_signal_cb(struct hl_device *hdev, void *data, u16 sob_id,
Alon Mizrahi72ab9ca52020-12-02 19:55:30 +0200374 u32 size, bool eb);
Ofir Bitton5de406c2020-09-10 10:56:26 +0300375static u32 gaudi_gen_wait_cb(struct hl_device *hdev,
376 struct hl_gen_wait_properties *prop);
377
378static inline enum hl_collective_mode
379get_collective_mode(struct hl_device *hdev, u32 queue_id)
380{
381 if (gaudi_queue_type[queue_id] == QUEUE_TYPE_EXT)
382 return HL_COLLECTIVE_MASTER;
383
384 if (queue_id >= GAUDI_QUEUE_ID_DMA_5_0 &&
385 queue_id <= GAUDI_QUEUE_ID_DMA_5_3)
386 return HL_COLLECTIVE_SLAVE;
387
388 if (queue_id >= GAUDI_QUEUE_ID_TPC_7_0 &&
389 queue_id <= GAUDI_QUEUE_ID_TPC_7_3)
390 return HL_COLLECTIVE_SLAVE;
391
392 if (queue_id >= GAUDI_QUEUE_ID_NIC_0_0 &&
393 queue_id <= GAUDI_QUEUE_ID_NIC_9_3)
394 return HL_COLLECTIVE_SLAVE;
395
396 return HL_COLLECTIVE_NOT_SUPPORTED;
397}
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300398
399static int gaudi_get_fixed_properties(struct hl_device *hdev)
400{
401 struct asic_fixed_properties *prop = &hdev->asic_prop;
Ofir Bitton843839b2020-07-19 11:08:09 +0300402 u32 num_sync_stream_queues = 0;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300403 int i;
404
Ofir Bitton3abc99b2020-06-23 14:50:39 +0300405 prop->max_queues = GAUDI_QUEUE_ID_SIZE;
406 prop->hw_queues_props = kcalloc(prop->max_queues,
407 sizeof(struct hw_queue_properties),
408 GFP_KERNEL);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300409
Ofir Bitton3abc99b2020-06-23 14:50:39 +0300410 if (!prop->hw_queues_props)
411 return -ENOMEM;
412
413 for (i = 0 ; i < prop->max_queues ; i++) {
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300414 if (gaudi_queue_type[i] == QUEUE_TYPE_EXT) {
415 prop->hw_queues_props[i].type = QUEUE_TYPE_EXT;
416 prop->hw_queues_props[i].driver_only = 0;
Ofir Bitton21e7a342020-05-14 18:25:47 +0300417 prop->hw_queues_props[i].supports_sync_stream = 1;
Tal Cohen4bb1f2f2020-06-03 09:25:27 +0300418 prop->hw_queues_props[i].cb_alloc_flags =
419 CB_ALLOC_KERNEL;
Ofir Bitton843839b2020-07-19 11:08:09 +0300420 num_sync_stream_queues++;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300421 } else if (gaudi_queue_type[i] == QUEUE_TYPE_CPU) {
422 prop->hw_queues_props[i].type = QUEUE_TYPE_CPU;
423 prop->hw_queues_props[i].driver_only = 1;
Ofir Bitton21e7a342020-05-14 18:25:47 +0300424 prop->hw_queues_props[i].supports_sync_stream = 0;
Tal Cohen4bb1f2f2020-06-03 09:25:27 +0300425 prop->hw_queues_props[i].cb_alloc_flags =
426 CB_ALLOC_KERNEL;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300427 } else if (gaudi_queue_type[i] == QUEUE_TYPE_INT) {
428 prop->hw_queues_props[i].type = QUEUE_TYPE_INT;
429 prop->hw_queues_props[i].driver_only = 0;
Tal Cohen4bb1f2f2020-06-03 09:25:27 +0300430 prop->hw_queues_props[i].supports_sync_stream = 0;
431 prop->hw_queues_props[i].cb_alloc_flags =
432 CB_ALLOC_USER;
Ofir Bitton5de406c2020-09-10 10:56:26 +0300433
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300434 }
Ofir Bitton5de406c2020-09-10 10:56:26 +0300435 prop->hw_queues_props[i].collective_mode =
436 get_collective_mode(hdev, i);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300437 }
438
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300439 prop->completion_queues_count = NUMBER_OF_CMPLT_QUEUES;
Ofir Bitton5de406c2020-09-10 10:56:26 +0300440 prop->collective_first_sob = 0;
441 prop->collective_first_mon = 0;
442
443 /* 2 SOBs per internal queue stream are reserved for collective */
444 prop->sync_stream_first_sob =
445 ALIGN(NUMBER_OF_SOBS_IN_GRP, HL_MAX_SOBS_PER_MONITOR)
446 * QMAN_STREAMS * HL_RSVD_SOBS;
447
448 /* 1 monitor per internal queue stream are reserved for collective
449 * 2 monitors per external queue stream are reserved for collective
450 */
451 prop->sync_stream_first_mon =
452 (NUMBER_OF_COLLECTIVE_QUEUES * QMAN_STREAMS) +
453 (NUMBER_OF_EXT_HW_QUEUES * 2);
454
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300455 prop->dram_base_address = DRAM_PHYS_BASE;
456 prop->dram_size = GAUDI_HBM_SIZE_32GB;
457 prop->dram_end_address = prop->dram_base_address +
458 prop->dram_size;
459 prop->dram_user_base_address = DRAM_BASE_ADDR_USER;
460
461 prop->sram_base_address = SRAM_BASE_ADDR;
462 prop->sram_size = SRAM_SIZE;
463 prop->sram_end_address = prop->sram_base_address +
464 prop->sram_size;
465 prop->sram_user_base_address = prop->sram_base_address +
466 SRAM_USER_BASE_OFFSET;
467
468 prop->mmu_pgt_addr = MMU_PAGE_TABLES_ADDR;
469 if (hdev->pldm)
470 prop->mmu_pgt_size = 0x800000; /* 8MB */
471 else
472 prop->mmu_pgt_size = MMU_PAGE_TABLES_SIZE;
473 prop->mmu_pte_size = HL_PTE_SIZE;
474 prop->mmu_hop_table_size = HOP_TABLE_SIZE;
475 prop->mmu_hop0_tables_total_size = HOP0_TABLES_TOTAL_SIZE;
476 prop->dram_page_size = PAGE_SIZE_2MB;
Oded Gabbay7f070c92020-11-09 09:48:31 +0200477 prop->dram_supports_virtual_memory = false;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300478
479 prop->pmmu.hop0_shift = HOP0_SHIFT;
480 prop->pmmu.hop1_shift = HOP1_SHIFT;
481 prop->pmmu.hop2_shift = HOP2_SHIFT;
482 prop->pmmu.hop3_shift = HOP3_SHIFT;
483 prop->pmmu.hop4_shift = HOP4_SHIFT;
484 prop->pmmu.hop0_mask = HOP0_MASK;
485 prop->pmmu.hop1_mask = HOP1_MASK;
486 prop->pmmu.hop2_mask = HOP2_MASK;
487 prop->pmmu.hop3_mask = HOP3_MASK;
488 prop->pmmu.hop4_mask = HOP4_MASK;
489 prop->pmmu.start_addr = VA_HOST_SPACE_START;
490 prop->pmmu.end_addr =
491 (VA_HOST_SPACE_START + VA_HOST_SPACE_SIZE / 2) - 1;
492 prop->pmmu.page_size = PAGE_SIZE_4KB;
Moti Haimovski7edf3412020-08-23 13:23:13 +0300493 prop->pmmu.num_hops = MMU_ARCH_5_HOPS;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300494
495 /* PMMU and HPMMU are the same except of page size */
496 memcpy(&prop->pmmu_huge, &prop->pmmu, sizeof(prop->pmmu));
497 prop->pmmu_huge.page_size = PAGE_SIZE_2MB;
498
499 /* shifts and masks are the same in PMMU and DMMU */
500 memcpy(&prop->dmmu, &prop->pmmu, sizeof(prop->pmmu));
501 prop->dmmu.start_addr = (VA_HOST_SPACE_START + VA_HOST_SPACE_SIZE / 2);
502 prop->dmmu.end_addr = VA_HOST_SPACE_END;
503 prop->dmmu.page_size = PAGE_SIZE_2MB;
504
505 prop->cfg_size = CFG_SIZE;
506 prop->max_asid = MAX_ASID;
507 prop->num_of_events = GAUDI_EVENT_SIZE;
508 prop->tpc_enabled_mask = TPC_ENABLED_MASK;
509
Oded Gabbay58361aa2020-08-08 23:34:47 +0300510 prop->max_power_default = MAX_POWER_DEFAULT_PCI;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300511
512 prop->cb_pool_cb_cnt = GAUDI_CB_POOL_CB_CNT;
513 prop->cb_pool_cb_size = GAUDI_CB_POOL_CB_SIZE;
514
515 prop->pcie_dbi_base_address = mmPCIE_DBI_BASE;
516 prop->pcie_aux_dbi_reg_addr = CFG_BASE + mmPCIE_AUX_DBI;
517
Oded Gabbay2f553422020-08-15 16:28:10 +0300518 strncpy(prop->cpucp_info.card_name, GAUDI_DEFAULT_CARD_NAME,
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300519 CARD_NAME_MAX_LEN);
520
Ofir Bittonc16d45f2020-06-02 12:28:27 +0300521 prop->max_pending_cs = GAUDI_MAX_PENDING_CS;
522
Ofir Bitton843839b2020-07-19 11:08:09 +0300523 prop->first_available_user_sob[HL_GAUDI_WS_DCORE] =
Ofir Bitton3cf74b32020-09-10 09:17:50 +0300524 prop->sync_stream_first_sob +
525 (num_sync_stream_queues * HL_RSVD_SOBS);
Ofir Bitton843839b2020-07-19 11:08:09 +0300526 prop->first_available_user_mon[HL_GAUDI_WS_DCORE] =
Ofir Bitton3cf74b32020-09-10 09:17:50 +0300527 prop->sync_stream_first_mon +
528 (num_sync_stream_queues * HL_RSVD_MONS);
Ofir Bitton843839b2020-07-19 11:08:09 +0300529
Ofir Bittone1fa7242021-01-06 15:40:37 +0200530 prop->first_available_user_msix_interrupt = USHRT_MAX;
531
Ofir Bitton323b7262020-10-04 09:09:19 +0300532 /* disable fw security for now, set it in a later stage */
533 prop->fw_security_disabled = true;
534 prop->fw_security_status_valid = false;
Ofir Bittond611b9f2020-11-08 13:10:09 +0200535 prop->hard_reset_done_by_fw = false;
Ofir Bitton323b7262020-10-04 09:09:19 +0300536
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300537 return 0;
538}
539
540static int gaudi_pci_bars_map(struct hl_device *hdev)
541{
542 static const char * const name[] = {"SRAM", "CFG", "HBM"};
543 bool is_wc[3] = {false, false, true};
544 int rc;
545
546 rc = hl_pci_bars_map(hdev, name, is_wc);
547 if (rc)
548 return rc;
549
550 hdev->rmmio = hdev->pcie_bar[CFG_BAR_ID] +
551 (CFG_BASE - SPI_FLASH_BASE_ADDR);
552
553 return 0;
554}
555
556static u64 gaudi_set_hbm_bar_base(struct hl_device *hdev, u64 addr)
557{
558 struct gaudi_device *gaudi = hdev->asic_specific;
Ofir Bittonf4cbfd22020-06-15 17:45:12 +0300559 struct hl_inbound_pci_region pci_region;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300560 u64 old_addr = addr;
561 int rc;
562
563 if ((gaudi) && (gaudi->hbm_bar_cur_addr == addr))
564 return old_addr;
565
566 /* Inbound Region 2 - Bar 4 - Point to HBM */
Ofir Bittonf4cbfd22020-06-15 17:45:12 +0300567 pci_region.mode = PCI_BAR_MATCH_MODE;
568 pci_region.bar = HBM_BAR_ID;
569 pci_region.addr = addr;
570 rc = hl_pci_set_inbound_region(hdev, 2, &pci_region);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300571 if (rc)
572 return U64_MAX;
573
574 if (gaudi) {
575 old_addr = gaudi->hbm_bar_cur_addr;
576 gaudi->hbm_bar_cur_addr = addr;
577 }
578
579 return old_addr;
580}
581
582static int gaudi_init_iatu(struct hl_device *hdev)
583{
Ofir Bittonf4cbfd22020-06-15 17:45:12 +0300584 struct hl_inbound_pci_region inbound_region;
585 struct hl_outbound_pci_region outbound_region;
586 int rc;
587
588 /* Inbound Region 0 - Bar 0 - Point to SRAM + CFG */
589 inbound_region.mode = PCI_BAR_MATCH_MODE;
590 inbound_region.bar = SRAM_BAR_ID;
591 inbound_region.addr = SRAM_BASE_ADDR;
592 rc = hl_pci_set_inbound_region(hdev, 0, &inbound_region);
593 if (rc)
594 goto done;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300595
596 /* Inbound Region 1 - Bar 2 - Point to SPI FLASH */
Ofir Bittonf4cbfd22020-06-15 17:45:12 +0300597 inbound_region.mode = PCI_BAR_MATCH_MODE;
598 inbound_region.bar = CFG_BAR_ID;
599 inbound_region.addr = SPI_FLASH_BASE_ADDR;
600 rc = hl_pci_set_inbound_region(hdev, 1, &inbound_region);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300601 if (rc)
Ofir Bittonf4cbfd22020-06-15 17:45:12 +0300602 goto done;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300603
Ofir Bittonf4cbfd22020-06-15 17:45:12 +0300604 /* Inbound Region 2 - Bar 4 - Point to HBM */
605 inbound_region.mode = PCI_BAR_MATCH_MODE;
606 inbound_region.bar = HBM_BAR_ID;
607 inbound_region.addr = DRAM_PHYS_BASE;
608 rc = hl_pci_set_inbound_region(hdev, 2, &inbound_region);
609 if (rc)
610 goto done;
611
612 hdev->asic_funcs->set_dma_mask_from_fw(hdev);
613
614 /* Outbound Region 0 - Point to Host */
615 outbound_region.addr = HOST_PHYS_BASE;
616 outbound_region.size = HOST_PHYS_SIZE;
617 rc = hl_pci_set_outbound_region(hdev, &outbound_region);
618
619done:
620 return rc;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300621}
622
Ofir Bittond1ddd902020-10-19 17:04:20 +0300623static enum hl_device_hw_state gaudi_get_hw_state(struct hl_device *hdev)
624{
625 return RREG32(mmHW_STATE);
626}
627
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300628static int gaudi_early_init(struct hl_device *hdev)
629{
630 struct asic_fixed_properties *prop = &hdev->asic_prop;
631 struct pci_dev *pdev = hdev->pdev;
632 int rc;
633
634 rc = gaudi_get_fixed_properties(hdev);
635 if (rc) {
636 dev_err(hdev->dev, "Failed to get fixed properties\n");
637 return rc;
638 }
639
640 /* Check BAR sizes */
641 if (pci_resource_len(pdev, SRAM_BAR_ID) != SRAM_BAR_SIZE) {
642 dev_err(hdev->dev,
643 "Not " HL_NAME "? BAR %d size %llu, expecting %llu\n",
644 SRAM_BAR_ID,
645 (unsigned long long) pci_resource_len(pdev,
646 SRAM_BAR_ID),
647 SRAM_BAR_SIZE);
Ofir Bitton3abc99b2020-06-23 14:50:39 +0300648 rc = -ENODEV;
649 goto free_queue_props;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300650 }
651
652 if (pci_resource_len(pdev, CFG_BAR_ID) != CFG_BAR_SIZE) {
653 dev_err(hdev->dev,
654 "Not " HL_NAME "? BAR %d size %llu, expecting %llu\n",
655 CFG_BAR_ID,
656 (unsigned long long) pci_resource_len(pdev,
657 CFG_BAR_ID),
658 CFG_BAR_SIZE);
Ofir Bitton3abc99b2020-06-23 14:50:39 +0300659 rc = -ENODEV;
660 goto free_queue_props;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300661 }
662
663 prop->dram_pci_bar_size = pci_resource_len(pdev, HBM_BAR_ID);
664
Ofir Bittond1ddd902020-10-19 17:04:20 +0300665 rc = hl_pci_init(hdev);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300666 if (rc)
Ofir Bitton3abc99b2020-06-23 14:50:39 +0300667 goto free_queue_props;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300668
Ofir Bittond1ddd902020-10-19 17:04:20 +0300669 /* Before continuing in the initialization, we need to read the preboot
670 * version to determine whether we run with a security-enabled firmware
671 */
672 rc = hl_fw_read_preboot_status(hdev, mmPSOC_GLOBAL_CONF_CPU_BOOT_STATUS,
673 mmCPU_BOOT_DEV_STS0, mmCPU_BOOT_ERR0,
674 GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC);
675 if (rc) {
676 if (hdev->reset_on_preboot_fail)
677 hdev->asic_funcs->hw_fini(hdev, true);
678 goto pci_fini;
679 }
680
Ofir Bitton9c9013c2020-12-01 10:39:54 +0200681 if (gaudi_get_hw_state(hdev) == HL_DEVICE_HW_STATE_DIRTY) {
682 dev_info(hdev->dev,
683 "H/W state is dirty, must reset before initializing\n");
684 hdev->asic_funcs->hw_fini(hdev, true);
685 }
686
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300687 return 0;
Ofir Bitton3abc99b2020-06-23 14:50:39 +0300688
Ofir Bittond1ddd902020-10-19 17:04:20 +0300689pci_fini:
690 hl_pci_fini(hdev);
Ofir Bitton3abc99b2020-06-23 14:50:39 +0300691free_queue_props:
692 kfree(hdev->asic_prop.hw_queues_props);
693 return rc;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300694}
695
696static int gaudi_early_fini(struct hl_device *hdev)
697{
Ofir Bitton3abc99b2020-06-23 14:50:39 +0300698 kfree(hdev->asic_prop.hw_queues_props);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300699 hl_pci_fini(hdev);
700
701 return 0;
702}
703
704/**
705 * gaudi_fetch_psoc_frequency - Fetch PSOC frequency values
706 *
707 * @hdev: pointer to hl_device structure
708 *
709 */
Ofir Bitton1cbca892020-10-05 11:36:00 +0300710static int gaudi_fetch_psoc_frequency(struct hl_device *hdev)
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300711{
712 struct asic_fixed_properties *prop = &hdev->asic_prop;
Alon Mizrahi65854892020-11-19 16:34:19 +0200713 u32 nr = 0, nf = 0, od = 0, div_fctr = 0, pll_clk, div_sel;
714 u16 pll_freq_arr[HL_PLL_NUM_OUTPUTS], freq;
Ofir Bitton1cbca892020-10-05 11:36:00 +0300715 int rc;
716
Alon Mizrahi65854892020-11-19 16:34:19 +0200717 if (hdev->asic_prop.fw_security_disabled) {
718 /* Backward compatibility */
719 div_fctr = RREG32(mmPSOC_CPU_PLL_DIV_FACTOR_2);
720 div_sel = RREG32(mmPSOC_CPU_PLL_DIV_SEL_2);
721 nr = RREG32(mmPSOC_CPU_PLL_NR);
722 nf = RREG32(mmPSOC_CPU_PLL_NF);
723 od = RREG32(mmPSOC_CPU_PLL_OD);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300724
Alon Mizrahi65854892020-11-19 16:34:19 +0200725 if (div_sel == DIV_SEL_REF_CLK ||
726 div_sel == DIV_SEL_DIVIDED_REF) {
727 if (div_sel == DIV_SEL_REF_CLK)
728 freq = PLL_REF_CLK;
729 else
730 freq = PLL_REF_CLK / (div_fctr + 1);
731 } else if (div_sel == DIV_SEL_PLL_CLK ||
732 div_sel == DIV_SEL_DIVIDED_PLL) {
733 pll_clk = PLL_REF_CLK * (nf + 1) /
734 ((nr + 1) * (od + 1));
735 if (div_sel == DIV_SEL_PLL_CLK)
736 freq = pll_clk;
737 else
738 freq = pll_clk / (div_fctr + 1);
739 } else {
740 dev_warn(hdev->dev,
741 "Received invalid div select value: %d",
742 div_sel);
743 freq = 0;
744 }
745 } else {
746 rc = hl_fw_cpucp_pll_info_get(hdev, CPU_PLL, pll_freq_arr);
747
748 if (rc)
749 return rc;
750
751 freq = pll_freq_arr[2];
752 }
753
754 prop->psoc_timestamp_frequency = freq;
755 prop->psoc_pci_pll_nr = nr;
756 prop->psoc_pci_pll_nf = nf;
757 prop->psoc_pci_pll_od = od;
758 prop->psoc_pci_pll_div_factor = div_fctr;
Ofir Bitton1cbca892020-10-05 11:36:00 +0300759
760 return 0;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300761}
762
763static int _gaudi_init_tpc_mem(struct hl_device *hdev,
764 dma_addr_t tpc_kernel_src_addr, u32 tpc_kernel_size)
765{
766 struct asic_fixed_properties *prop = &hdev->asic_prop;
767 struct packet_lin_dma *init_tpc_mem_pkt;
768 struct hl_cs_job *job;
769 struct hl_cb *cb;
770 u64 dst_addr;
771 u32 cb_size, ctl;
772 u8 tpc_id;
773 int rc;
774
Ofir Bittona04b7cd2020-07-13 13:36:55 +0300775 cb = hl_cb_kernel_create(hdev, PAGE_SIZE, false);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300776 if (!cb)
777 return -EFAULT;
778
Arnd Bergmann82948e62020-10-26 17:08:06 +0100779 init_tpc_mem_pkt = cb->kernel_address;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300780 cb_size = sizeof(*init_tpc_mem_pkt);
781 memset(init_tpc_mem_pkt, 0, cb_size);
782
783 init_tpc_mem_pkt->tsize = cpu_to_le32(tpc_kernel_size);
784
Oded Gabbay65887292020-08-12 11:21:01 +0300785 ctl = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_LIN_DMA);
786 ctl |= FIELD_PREP(GAUDI_PKT_LIN_DMA_CTL_LIN_MASK, 1);
787 ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
788 ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300789
790 init_tpc_mem_pkt->ctl = cpu_to_le32(ctl);
791
792 init_tpc_mem_pkt->src_addr = cpu_to_le64(tpc_kernel_src_addr);
793 dst_addr = (prop->sram_user_base_address &
794 GAUDI_PKT_LIN_DMA_DST_ADDR_MASK) >>
795 GAUDI_PKT_LIN_DMA_DST_ADDR_SHIFT;
796 init_tpc_mem_pkt->dst_addr |= cpu_to_le64(dst_addr);
797
798 job = hl_cs_allocate_job(hdev, QUEUE_TYPE_EXT, true);
799 if (!job) {
800 dev_err(hdev->dev, "Failed to allocate a new job\n");
801 rc = -ENOMEM;
802 goto release_cb;
803 }
804
805 job->id = 0;
806 job->user_cb = cb;
Tomer Tayarf07486742020-08-02 22:51:31 +0300807 atomic_inc(&job->user_cb->cs_cnt);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300808 job->user_cb_size = cb_size;
809 job->hw_queue_id = GAUDI_QUEUE_ID_DMA_0_0;
810 job->patched_cb = job->user_cb;
811 job->job_cb_size = job->user_cb_size + sizeof(struct packet_msg_prot);
812
813 hl_debugfs_add_job(hdev, job);
814
815 rc = gaudi_send_job_on_qman0(hdev, job);
816
817 if (rc)
818 goto free_job;
819
820 for (tpc_id = 0 ; tpc_id < TPC_NUMBER_OF_ENGINES ; tpc_id++) {
821 rc = gaudi_run_tpc_kernel(hdev, dst_addr, tpc_id);
822 if (rc)
823 break;
824 }
825
826free_job:
827 hl_userptr_delete_list(hdev, &job->userptr_list);
828 hl_debugfs_remove_job(hdev, job);
829 kfree(job);
Tomer Tayarf07486742020-08-02 22:51:31 +0300830 atomic_dec(&cb->cs_cnt);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300831
832release_cb:
833 hl_cb_put(cb);
834 hl_cb_destroy(hdev, &hdev->kernel_cb_mgr, cb->id << PAGE_SHIFT);
835
836 return rc;
837}
838
839/*
840 * gaudi_init_tpc_mem() - Initialize TPC memories.
841 * @hdev: Pointer to hl_device structure.
842 *
843 * Copy TPC kernel fw from firmware file and run it to initialize TPC memories.
844 *
845 * Return: 0 for success, negative value for error.
846 */
847static int gaudi_init_tpc_mem(struct hl_device *hdev)
848{
849 const struct firmware *fw;
850 size_t fw_size;
851 void *cpu_addr;
852 dma_addr_t dma_handle;
Oded Gabbay98e87812020-12-09 23:07:58 +0200853 int rc, count = 5;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300854
Oded Gabbay98e87812020-12-09 23:07:58 +0200855again:
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300856 rc = request_firmware(&fw, GAUDI_TPC_FW_FILE, hdev->dev);
Oded Gabbay98e87812020-12-09 23:07:58 +0200857 if (rc == -EINTR && count-- > 0) {
858 msleep(50);
859 goto again;
860 }
861
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300862 if (rc) {
Oded Gabbay98e87812020-12-09 23:07:58 +0200863 dev_err(hdev->dev, "Failed to load firmware file %s\n",
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300864 GAUDI_TPC_FW_FILE);
865 goto out;
866 }
867
868 fw_size = fw->size;
869 cpu_addr = hdev->asic_funcs->asic_dma_alloc_coherent(hdev, fw_size,
870 &dma_handle, GFP_KERNEL | __GFP_ZERO);
871 if (!cpu_addr) {
872 dev_err(hdev->dev,
873 "Failed to allocate %zu of dma memory for TPC kernel\n",
874 fw_size);
875 rc = -ENOMEM;
876 goto out;
877 }
878
879 memcpy(cpu_addr, fw->data, fw_size);
880
881 rc = _gaudi_init_tpc_mem(hdev, dma_handle, fw_size);
882
883 hdev->asic_funcs->asic_dma_free_coherent(hdev, fw->size, cpu_addr,
884 dma_handle);
885
886out:
887 release_firmware(fw);
888 return rc;
889}
890
Ofir Bitton5de406c2020-09-10 10:56:26 +0300891static void gaudi_collective_map_sobs(struct hl_device *hdev, u32 stream)
Ofir Bitton5fe1c172020-09-10 10:10:55 +0300892{
Ofir Bitton5de406c2020-09-10 10:56:26 +0300893 struct gaudi_device *gaudi = hdev->asic_specific;
894 struct gaudi_collective_properties *prop = &gaudi->collective_props;
895 struct hl_hw_queue *q;
896 u32 i, sob_id, sob_group_id, queue_id;
Ofir Bitton5fe1c172020-09-10 10:10:55 +0300897
Ofir Bitton5de406c2020-09-10 10:56:26 +0300898 /* Iterate through SOB groups and assign a SOB for each slave queue */
899 sob_group_id =
900 stream * HL_RSVD_SOBS + prop->curr_sob_group_idx[stream];
901 sob_id = prop->hw_sob_group[sob_group_id].base_sob_id;
902
903 queue_id = GAUDI_QUEUE_ID_NIC_0_0 + stream;
904 for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++) {
905 q = &hdev->kernel_queues[queue_id + (4 * i)];
906 q->sync_stream_prop.collective_sob_id = sob_id + i;
907 }
908
909 /* Both DMA5 and TPC7 use the same resources since only a single
910 * engine need to participate in the reduction process
911 */
912 queue_id = GAUDI_QUEUE_ID_DMA_5_0 + stream;
913 q = &hdev->kernel_queues[queue_id];
914 q->sync_stream_prop.collective_sob_id =
915 sob_id + NIC_NUMBER_OF_ENGINES;
916
917 queue_id = GAUDI_QUEUE_ID_TPC_7_0 + stream;
918 q = &hdev->kernel_queues[queue_id];
919 q->sync_stream_prop.collective_sob_id =
920 sob_id + NIC_NUMBER_OF_ENGINES;
921}
922
923static void gaudi_sob_group_hw_reset(struct kref *ref)
924{
925 struct gaudi_hw_sob_group *hw_sob_group =
926 container_of(ref, struct gaudi_hw_sob_group, kref);
927 struct hl_device *hdev = hw_sob_group->hdev;
Ofir Bitton423815b2021-01-05 09:04:07 +0200928 u64 base_addr;
929 int rc;
Ofir Bitton5de406c2020-09-10 10:56:26 +0300930
Ofir Bitton423815b2021-01-05 09:04:07 +0200931 base_addr = CFG_BASE + mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 +
932 hw_sob_group->base_sob_id * 4;
933 rc = gaudi_schedule_register_memset(hdev, hw_sob_group->queue_id,
934 base_addr, NUMBER_OF_SOBS_IN_GRP, 0);
935 if (rc)
936 dev_err(hdev->dev,
937 "failed resetting sob group - sob base %u, count %u",
938 hw_sob_group->base_sob_id, NUMBER_OF_SOBS_IN_GRP);
Ofir Bitton5de406c2020-09-10 10:56:26 +0300939
940 kref_init(&hw_sob_group->kref);
941}
942
943static void gaudi_sob_group_reset_error(struct kref *ref)
944{
945 struct gaudi_hw_sob_group *hw_sob_group =
946 container_of(ref, struct gaudi_hw_sob_group, kref);
947 struct hl_device *hdev = hw_sob_group->hdev;
948
949 dev_crit(hdev->dev,
950 "SOB release shouldn't be called here, base_sob_id: %d\n",
951 hw_sob_group->base_sob_id);
952}
953
954static int gaudi_collective_init(struct hl_device *hdev)
955{
956 u32 i, master_monitor_sobs, sob_id, reserved_sobs_per_group;
957 struct gaudi_collective_properties *prop;
958 struct gaudi_device *gaudi;
959
960 gaudi = hdev->asic_specific;
961 prop = &gaudi->collective_props;
962 sob_id = hdev->asic_prop.collective_first_sob;
963
964 /* First sob in group must be aligned to HL_MAX_SOBS_PER_MONITOR */
965 reserved_sobs_per_group =
966 ALIGN(NUMBER_OF_SOBS_IN_GRP, HL_MAX_SOBS_PER_MONITOR);
967
968 /* Init SOB groups */
969 for (i = 0 ; i < NUM_SOB_GROUPS; i++) {
970 prop->hw_sob_group[i].hdev = hdev;
971 prop->hw_sob_group[i].base_sob_id = sob_id;
972 sob_id += reserved_sobs_per_group;
973 gaudi_sob_group_hw_reset(&prop->hw_sob_group[i].kref);
974 }
975
976 for (i = 0 ; i < QMAN_STREAMS; i++) {
977 prop->next_sob_group_val[i] = 1;
978 prop->curr_sob_group_idx[i] = 0;
979 gaudi_collective_map_sobs(hdev, i);
980 }
981
982 prop->mstr_sob_mask[0] = 0;
983 master_monitor_sobs = HL_MAX_SOBS_PER_MONITOR;
984 for (i = 0 ; i < master_monitor_sobs ; i++)
985 if (gaudi->hw_cap_initialized & BIT(HW_CAP_NIC_SHIFT + i))
986 prop->mstr_sob_mask[0] |= BIT(i);
987
988 prop->mstr_sob_mask[1] = 0;
989 master_monitor_sobs =
990 NIC_NUMBER_OF_ENGINES - HL_MAX_SOBS_PER_MONITOR;
991 for (i = 0 ; i < master_monitor_sobs; i++) {
992 if (gaudi->hw_cap_initialized & BIT(HW_CAP_NIC_SHIFT + i))
993 prop->mstr_sob_mask[1] |= BIT(i);
994 }
995
996 /* Set collective engine bit */
997 prop->mstr_sob_mask[1] |= BIT(i);
998
999 return 0;
1000}
1001
1002static void gaudi_reset_sob_group(struct hl_device *hdev, u16 sob_group)
1003{
1004 struct gaudi_device *gaudi = hdev->asic_specific;
1005 struct gaudi_collective_properties *cprop = &gaudi->collective_props;
1006
1007 kref_put(&cprop->hw_sob_group[sob_group].kref,
1008 gaudi_sob_group_hw_reset);
1009}
1010
1011static void gaudi_collective_master_init_job(struct hl_device *hdev,
1012 struct hl_cs_job *job, u32 stream, u32 sob_group_offset)
1013{
1014 u32 master_sob_base, master_monitor, queue_id, cb_size = 0;
1015 struct gaudi_collective_properties *cprop;
1016 struct hl_gen_wait_properties wait_prop;
1017 struct hl_sync_stream_properties *prop;
1018 struct gaudi_device *gaudi;
1019
1020 gaudi = hdev->asic_specific;
1021 cprop = &gaudi->collective_props;
1022 queue_id = job->hw_queue_id;
1023 prop = &hdev->kernel_queues[queue_id].sync_stream_prop;
1024
1025 master_sob_base =
1026 cprop->hw_sob_group[sob_group_offset].base_sob_id;
1027 master_monitor = prop->collective_mstr_mon_id[0];
1028
Ofir Bitton423815b2021-01-05 09:04:07 +02001029 cprop->hw_sob_group[sob_group_offset].queue_id = queue_id;
1030
Ofir Bitton5de406c2020-09-10 10:56:26 +03001031 dev_dbg(hdev->dev,
1032 "Generate master wait CBs, sob %d (mask %#x), val:0x%x, mon %u, q %d\n",
1033 master_sob_base, cprop->mstr_sob_mask[0],
1034 cprop->next_sob_group_val[stream],
1035 master_monitor, queue_id);
1036
1037 wait_prop.data = (void *) job->patched_cb;
1038 wait_prop.sob_base = master_sob_base;
1039 wait_prop.sob_mask = cprop->mstr_sob_mask[0];
1040 wait_prop.sob_val = cprop->next_sob_group_val[stream];
1041 wait_prop.mon_id = master_monitor;
1042 wait_prop.q_idx = queue_id;
1043 wait_prop.size = cb_size;
1044 cb_size += gaudi_gen_wait_cb(hdev, &wait_prop);
1045
1046 master_sob_base += HL_MAX_SOBS_PER_MONITOR;
1047 master_monitor = prop->collective_mstr_mon_id[1];
1048
1049 dev_dbg(hdev->dev,
1050 "Generate master wait CBs, sob %d (mask %#x), val:0x%x, mon %u, q %d\n",
1051 master_sob_base, cprop->mstr_sob_mask[1],
1052 cprop->next_sob_group_val[stream],
1053 master_monitor, queue_id);
1054
1055 wait_prop.sob_base = master_sob_base;
1056 wait_prop.sob_mask = cprop->mstr_sob_mask[1];
1057 wait_prop.mon_id = master_monitor;
1058 wait_prop.size = cb_size;
1059 cb_size += gaudi_gen_wait_cb(hdev, &wait_prop);
1060}
1061
1062static void gaudi_collective_slave_init_job(struct hl_device *hdev,
1063 struct hl_cs_job *job, struct hl_cs_compl *cs_cmpl)
1064{
1065 struct hl_gen_wait_properties wait_prop;
1066 struct hl_sync_stream_properties *prop;
1067 u32 queue_id, cb_size = 0;
1068
1069 queue_id = job->hw_queue_id;
1070 prop = &hdev->kernel_queues[queue_id].sync_stream_prop;
1071
1072 /* Add to wait CBs using slave monitor */
1073 wait_prop.data = (void *) job->user_cb;
1074 wait_prop.sob_base = cs_cmpl->hw_sob->sob_id;
1075 wait_prop.sob_mask = 0x1;
1076 wait_prop.sob_val = cs_cmpl->sob_val;
1077 wait_prop.mon_id = prop->collective_slave_mon_id;
1078 wait_prop.q_idx = queue_id;
1079 wait_prop.size = cb_size;
1080
1081 dev_dbg(hdev->dev,
1082 "Generate slave wait CB, sob %d, val:0x%x, mon %d, q %d\n",
1083 cs_cmpl->hw_sob->sob_id, cs_cmpl->sob_val,
1084 prop->collective_slave_mon_id, queue_id);
1085
1086 cb_size += gaudi_gen_wait_cb(hdev, &wait_prop);
1087
1088 dev_dbg(hdev->dev,
1089 "generate signal CB, sob_id: %d, sob val: 1, q_idx: %d\n",
1090 prop->collective_sob_id, queue_id);
1091
1092 cb_size += gaudi_gen_signal_cb(hdev, job->user_cb,
Alon Mizrahi72ab9ca52020-12-02 19:55:30 +02001093 prop->collective_sob_id, cb_size, false);
Ofir Bitton5fe1c172020-09-10 10:10:55 +03001094}
1095
1096static void gaudi_collective_wait_init_cs(struct hl_cs *cs)
1097{
Ofir Bitton5de406c2020-09-10 10:56:26 +03001098 struct hl_cs_compl *signal_cs_cmpl =
1099 container_of(cs->signal_fence, struct hl_cs_compl, base_fence);
1100 struct hl_cs_compl *cs_cmpl =
1101 container_of(cs->fence, struct hl_cs_compl, base_fence);
1102 struct gaudi_collective_properties *cprop;
1103 u32 stream, queue_id, sob_group_offset;
1104 struct gaudi_device *gaudi;
1105 struct hl_device *hdev;
1106 struct hl_cs_job *job;
1107 struct hl_ctx *ctx;
Ofir Bitton5fe1c172020-09-10 10:10:55 +03001108
Ofir Bitton5de406c2020-09-10 10:56:26 +03001109 ctx = cs->ctx;
1110 hdev = ctx->hdev;
1111 gaudi = hdev->asic_specific;
1112 cprop = &gaudi->collective_props;
1113
1114 /* copy the SOB id and value of the signal CS */
1115 cs_cmpl->hw_sob = signal_cs_cmpl->hw_sob;
1116 cs_cmpl->sob_val = signal_cs_cmpl->sob_val;
1117
1118 /* Calculate the stream from collective master queue (1st job) */
1119 job = list_first_entry(&cs->job_list, struct hl_cs_job, cs_node);
1120 stream = job->hw_queue_id % 4;
1121 sob_group_offset =
1122 stream * HL_RSVD_SOBS + cprop->curr_sob_group_idx[stream];
1123
1124 list_for_each_entry(job, &cs->job_list, cs_node) {
1125 queue_id = job->hw_queue_id;
1126
1127 if (hdev->kernel_queues[queue_id].collective_mode ==
1128 HL_COLLECTIVE_MASTER)
1129 gaudi_collective_master_init_job(hdev, job, stream,
1130 sob_group_offset);
1131 else
1132 gaudi_collective_slave_init_job(hdev, job, cs_cmpl);
1133 }
1134
1135 cs_cmpl->sob_group = sob_group_offset;
1136
1137 /* Handle sob group kref and wraparound */
1138 kref_get(&cprop->hw_sob_group[sob_group_offset].kref);
1139 cprop->next_sob_group_val[stream]++;
1140
1141 if (cprop->next_sob_group_val[stream] == HL_MAX_SOB_VAL) {
1142 /*
1143 * Decrement as we reached the max value.
1144 * The release function won't be called here as we've
1145 * just incremented the refcount.
1146 */
1147 kref_put(&cprop->hw_sob_group[sob_group_offset].kref,
1148 gaudi_sob_group_reset_error);
1149 cprop->next_sob_group_val[stream] = 1;
1150 /* only two SOBs are currently in use */
1151 cprop->curr_sob_group_idx[stream] =
1152 (cprop->curr_sob_group_idx[stream] + 1) &
1153 (HL_RSVD_SOBS - 1);
1154
1155 gaudi_collective_map_sobs(hdev, stream);
1156
1157 dev_dbg(hdev->dev, "switched to SOB group %d, stream: %d\n",
1158 cprop->curr_sob_group_idx[stream], stream);
1159 }
1160
1161 /* Increment kref since all slave queues are now waiting on it */
1162 kref_get(&cs_cmpl->hw_sob->kref);
1163 /*
1164 * Must put the signal fence after the SOB refcnt increment so
1165 * the SOB refcnt won't turn 0 and reset the SOB before the
1166 * wait CS was submitted.
1167 */
1168 mb();
1169 hl_fence_put(cs->signal_fence);
1170 cs->signal_fence = NULL;
1171}
1172
1173static int gaudi_collective_wait_create_job(struct hl_device *hdev,
1174 struct hl_ctx *ctx, struct hl_cs *cs,
1175 enum hl_collective_mode mode, u32 queue_id, u32 wait_queue_id)
1176{
1177 struct hw_queue_properties *hw_queue_prop;
1178 struct hl_cs_counters_atomic *cntr;
1179 struct hl_cs_job *job;
1180 struct hl_cb *cb;
1181 u32 cb_size;
1182 bool patched_cb;
1183
1184 cntr = &hdev->aggregated_cs_counters;
1185
1186 if (mode == HL_COLLECTIVE_MASTER) {
1187 /* CB size of collective master queue contains
1188 * 4 msg short packets for monitor 1 configuration
1189 * 1 fence packet
1190 * 4 msg short packets for monitor 2 configuration
1191 * 1 fence packet
1192 * 2 msg prot packets for completion and MSI-X
1193 */
1194 cb_size = sizeof(struct packet_msg_short) * 8 +
1195 sizeof(struct packet_fence) * 2 +
1196 sizeof(struct packet_msg_prot) * 2;
1197 patched_cb = true;
1198 } else {
1199 /* CB size of collective slave queues contains
1200 * 4 msg short packets for monitor configuration
1201 * 1 fence packet
1202 * 1 additional msg short packet for sob signal
1203 */
1204 cb_size = sizeof(struct packet_msg_short) * 5 +
1205 sizeof(struct packet_fence);
1206 patched_cb = false;
1207 }
1208
1209 hw_queue_prop = &hdev->asic_prop.hw_queues_props[queue_id];
1210 job = hl_cs_allocate_job(hdev, hw_queue_prop->type, true);
1211 if (!job) {
farah kassabrie7536432020-10-12 14:30:26 +03001212 atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
Ofir Bitton5de406c2020-09-10 10:56:26 +03001213 atomic64_inc(&cntr->out_of_mem_drop_cnt);
1214 dev_err(hdev->dev, "Failed to allocate a new job\n");
1215 return -ENOMEM;
1216 }
1217
1218 /* Allocate internal mapped CB for non patched CBs */
1219 cb = hl_cb_kernel_create(hdev, cb_size,
1220 hdev->mmu_enable && !patched_cb);
1221 if (!cb) {
farah kassabrie7536432020-10-12 14:30:26 +03001222 atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
Ofir Bitton5de406c2020-09-10 10:56:26 +03001223 atomic64_inc(&cntr->out_of_mem_drop_cnt);
1224 kfree(job);
1225 return -EFAULT;
1226 }
1227
1228 job->id = 0;
1229 job->cs = cs;
1230 job->user_cb = cb;
Tomer Tayarf07486742020-08-02 22:51:31 +03001231 atomic_inc(&job->user_cb->cs_cnt);
Ofir Bitton5de406c2020-09-10 10:56:26 +03001232 job->user_cb_size = cb_size;
1233 job->hw_queue_id = queue_id;
1234
1235 /*
1236 * No need in parsing, user CB is the patched CB.
1237 * We call hl_cb_destroy() out of two reasons - we don't need
1238 * the CB in the CB idr anymore and to decrement its refcount as
1239 * it was incremented inside hl_cb_kernel_create().
1240 */
1241 if (patched_cb)
1242 job->patched_cb = job->user_cb;
1243 else
1244 job->patched_cb = NULL;
1245
1246 job->job_cb_size = job->user_cb_size;
1247 hl_cb_destroy(hdev, &hdev->kernel_cb_mgr, cb->id << PAGE_SHIFT);
1248
1249 /* increment refcount as for external queues we get completion */
1250 if (hw_queue_prop->type == QUEUE_TYPE_EXT)
1251 cs_get(cs);
1252
1253 cs->jobs_in_queue_cnt[job->hw_queue_id]++;
1254
1255 list_add_tail(&job->cs_node, &cs->job_list);
1256
1257 hl_debugfs_add_job(hdev, job);
1258
1259 return 0;
Ofir Bitton5fe1c172020-09-10 10:10:55 +03001260}
1261
1262static int gaudi_collective_wait_create_jobs(struct hl_device *hdev,
1263 struct hl_ctx *ctx, struct hl_cs *cs, u32 wait_queue_id,
1264 u32 collective_engine_id)
1265{
Ofir Bitton5de406c2020-09-10 10:56:26 +03001266 struct gaudi_device *gaudi = hdev->asic_specific;
1267 struct hw_queue_properties *hw_queue_prop;
1268 u32 queue_id, collective_queue, num_jobs;
1269 u32 stream, nic_queue, nic_idx = 0;
1270 bool skip;
Ofir Bitton266cdfa2020-12-22 15:56:12 +02001271 int i, rc = 0;
Ofir Bitton5de406c2020-09-10 10:56:26 +03001272
1273 /* Verify wait queue id is configured as master */
1274 hw_queue_prop = &hdev->asic_prop.hw_queues_props[wait_queue_id];
1275 if (!(hw_queue_prop->collective_mode == HL_COLLECTIVE_MASTER)) {
1276 dev_err(hdev->dev,
1277 "Queue %d is not configured as collective master\n",
1278 wait_queue_id);
1279 return -EINVAL;
1280 }
1281
1282 /* Verify engine id is supported */
1283 if (collective_engine_id != GAUDI_ENGINE_ID_DMA_5 &&
1284 collective_engine_id != GAUDI_ENGINE_ID_TPC_7) {
1285 dev_err(hdev->dev,
1286 "Collective wait does not support engine %u\n",
1287 collective_engine_id);
1288 return -EINVAL;
1289 }
1290
1291 stream = wait_queue_id % 4;
1292
1293 if (collective_engine_id == GAUDI_ENGINE_ID_DMA_5)
1294 collective_queue = GAUDI_QUEUE_ID_DMA_5_0 + stream;
Ofir Bitton5de406c2020-09-10 10:56:26 +03001295 else
Ofir Bitton71a984f2020-10-19 16:52:00 +03001296 collective_queue = GAUDI_QUEUE_ID_TPC_7_0 + stream;
Ofir Bitton5de406c2020-09-10 10:56:26 +03001297
1298 num_jobs = NUMBER_OF_SOBS_IN_GRP + 1;
1299 nic_queue = GAUDI_QUEUE_ID_NIC_0_0 + stream;
1300
1301 /* First job goes to the collective master queue, it will wait for
1302 * the collective slave queues to finish execution.
1303 * The synchronization is done using two monitors:
1304 * First monitor for NICs 0-7, second monitor for NICs 8-9 and the
1305 * reduction engine (DMA5/TPC7).
1306 *
1307 * Rest of the jobs goes to the collective slave queues which will
1308 * all wait for the user to signal sob 'cs_cmpl->sob_val'.
1309 */
1310 for (i = 0 ; i < num_jobs ; i++) {
1311 if (i == 0) {
1312 queue_id = wait_queue_id;
1313 rc = gaudi_collective_wait_create_job(hdev, ctx, cs,
1314 HL_COLLECTIVE_MASTER, queue_id, wait_queue_id);
1315 } else {
1316 if (nic_idx < NIC_NUMBER_OF_ENGINES) {
1317 if (gaudi->hw_cap_initialized &
1318 BIT(HW_CAP_NIC_SHIFT + nic_idx))
1319 skip = false;
1320 else
1321 skip = true;
1322
1323 queue_id = nic_queue;
1324 nic_queue += 4;
1325 nic_idx++;
1326
1327 if (skip)
1328 continue;
1329 } else {
1330 queue_id = collective_queue;
1331 }
1332
1333 rc = gaudi_collective_wait_create_job(hdev, ctx, cs,
1334 HL_COLLECTIVE_SLAVE, queue_id, wait_queue_id);
1335 }
1336
1337 if (rc)
1338 return rc;
1339 }
1340
1341 return rc;
Ofir Bitton5fe1c172020-09-10 10:10:55 +03001342}
1343
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03001344static int gaudi_late_init(struct hl_device *hdev)
1345{
1346 struct gaudi_device *gaudi = hdev->asic_specific;
1347 int rc;
1348
Oded Gabbay2f553422020-08-15 16:28:10 +03001349 rc = gaudi->cpucp_info_get(hdev);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03001350 if (rc) {
Oded Gabbay2f553422020-08-15 16:28:10 +03001351 dev_err(hdev->dev, "Failed to get cpucp info\n");
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03001352 return rc;
1353 }
1354
Oded Gabbay3c681572020-11-02 21:10:39 +02001355 if ((hdev->card_type == cpucp_card_type_pci) &&
1356 (hdev->nic_ports_mask & 0x3)) {
1357 dev_info(hdev->dev,
1358 "PCI card detected, only 8 ports are enabled\n");
1359 hdev->nic_ports_mask &= ~0x3;
1360
1361 /* Stop and disable unused NIC QMANs */
1362 WREG32(mmNIC0_QM0_GLBL_CFG1, NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
1363 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
1364 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
1365
1366 WREG32(mmNIC0_QM1_GLBL_CFG1, NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
1367 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
1368 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
1369
1370 WREG32(mmNIC0_QM0_GLBL_CFG0, 0);
1371 WREG32(mmNIC0_QM1_GLBL_CFG0, 0);
1372
1373 gaudi->hw_cap_initialized &= ~(HW_CAP_NIC0 | HW_CAP_NIC1);
1374 }
1375
Oded Gabbay2f553422020-08-15 16:28:10 +03001376 rc = hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_ENABLE_PCI_ACCESS);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03001377 if (rc) {
1378 dev_err(hdev->dev, "Failed to enable PCI access from CPU\n");
1379 return rc;
1380 }
1381
1382 WREG32(mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR, GAUDI_EVENT_INTS_REGISTER);
1383
Ofir Bitton1cbca892020-10-05 11:36:00 +03001384 rc = gaudi_fetch_psoc_frequency(hdev);
1385 if (rc) {
1386 dev_err(hdev->dev, "Failed to fetch psoc frequency\n");
1387 goto disable_pci_access;
1388 }
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03001389
1390 rc = gaudi_mmu_clear_pgt_range(hdev);
1391 if (rc) {
1392 dev_err(hdev->dev, "Failed to clear MMU page tables range\n");
1393 goto disable_pci_access;
1394 }
1395
1396 rc = gaudi_init_tpc_mem(hdev);
1397 if (rc) {
1398 dev_err(hdev->dev, "Failed to initialize TPC memories\n");
1399 goto disable_pci_access;
1400 }
1401
Ofir Bitton5de406c2020-09-10 10:56:26 +03001402 rc = gaudi_collective_init(hdev);
1403 if (rc) {
1404 dev_err(hdev->dev, "Failed to init collective\n");
1405 goto disable_pci_access;
1406 }
1407
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03001408 return 0;
1409
1410disable_pci_access:
Oded Gabbay2f553422020-08-15 16:28:10 +03001411 hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_DISABLE_PCI_ACCESS);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03001412
1413 return rc;
1414}
1415
1416static void gaudi_late_fini(struct hl_device *hdev)
1417{
1418 const struct hwmon_channel_info **channel_info_arr;
1419 int i = 0;
1420
1421 if (!hdev->hl_chip_info->info)
1422 return;
1423
1424 channel_info_arr = hdev->hl_chip_info->info;
1425
1426 while (channel_info_arr[i]) {
1427 kfree(channel_info_arr[i]->config);
1428 kfree(channel_info_arr[i]);
1429 i++;
1430 }
1431
1432 kfree(channel_info_arr);
1433
1434 hdev->hl_chip_info->info = NULL;
1435}
1436
1437static int gaudi_alloc_cpu_accessible_dma_mem(struct hl_device *hdev)
1438{
1439 dma_addr_t dma_addr_arr[GAUDI_ALLOC_CPU_MEM_RETRY_CNT] = {}, end_addr;
1440 void *virt_addr_arr[GAUDI_ALLOC_CPU_MEM_RETRY_CNT] = {};
1441 int i, j, rc = 0;
1442
1443 /*
1444 * The device CPU works with 40-bits addresses, while bit 39 must be set
1445 * to '1' when accessing the host.
1446 * Bits 49:39 of the full host address are saved for a later
1447 * configuration of the HW to perform extension to 50 bits.
1448 * Because there is a single HW register that holds the extension bits,
1449 * these bits must be identical in all allocated range.
1450 */
1451
1452 for (i = 0 ; i < GAUDI_ALLOC_CPU_MEM_RETRY_CNT ; i++) {
1453 virt_addr_arr[i] =
1454 hdev->asic_funcs->asic_dma_alloc_coherent(hdev,
1455 HL_CPU_ACCESSIBLE_MEM_SIZE,
1456 &dma_addr_arr[i],
1457 GFP_KERNEL | __GFP_ZERO);
1458 if (!virt_addr_arr[i]) {
1459 rc = -ENOMEM;
1460 goto free_dma_mem_arr;
1461 }
1462
1463 end_addr = dma_addr_arr[i] + HL_CPU_ACCESSIBLE_MEM_SIZE - 1;
1464 if (GAUDI_CPU_PCI_MSB_ADDR(dma_addr_arr[i]) ==
1465 GAUDI_CPU_PCI_MSB_ADDR(end_addr))
1466 break;
1467 }
1468
1469 if (i == GAUDI_ALLOC_CPU_MEM_RETRY_CNT) {
1470 dev_err(hdev->dev,
1471 "MSB of CPU accessible DMA memory are not identical in all range\n");
1472 rc = -EFAULT;
1473 goto free_dma_mem_arr;
1474 }
1475
1476 hdev->cpu_accessible_dma_mem = virt_addr_arr[i];
1477 hdev->cpu_accessible_dma_address = dma_addr_arr[i];
1478 hdev->cpu_pci_msb_addr =
1479 GAUDI_CPU_PCI_MSB_ADDR(hdev->cpu_accessible_dma_address);
1480
Ofir Bittonc692dec2020-10-04 17:34:37 +03001481 if (hdev->asic_prop.fw_security_disabled)
1482 GAUDI_PCI_TO_CPU_ADDR(hdev->cpu_accessible_dma_address);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03001483
1484free_dma_mem_arr:
1485 for (j = 0 ; j < i ; j++)
1486 hdev->asic_funcs->asic_dma_free_coherent(hdev,
1487 HL_CPU_ACCESSIBLE_MEM_SIZE,
1488 virt_addr_arr[j],
1489 dma_addr_arr[j]);
1490
1491 return rc;
1492}
1493
1494static void gaudi_free_internal_qmans_pq_mem(struct hl_device *hdev)
1495{
1496 struct gaudi_device *gaudi = hdev->asic_specific;
1497 struct gaudi_internal_qman_info *q;
1498 u32 i;
1499
1500 for (i = 0 ; i < GAUDI_QUEUE_ID_SIZE ; i++) {
1501 q = &gaudi->internal_qmans[i];
1502 if (!q->pq_kernel_addr)
1503 continue;
1504 hdev->asic_funcs->asic_dma_free_coherent(hdev, q->pq_size,
1505 q->pq_kernel_addr,
1506 q->pq_dma_addr);
1507 }
1508}
1509
1510static int gaudi_alloc_internal_qmans_pq_mem(struct hl_device *hdev)
1511{
1512 struct gaudi_device *gaudi = hdev->asic_specific;
1513 struct gaudi_internal_qman_info *q;
1514 int rc, i;
1515
1516 for (i = 0 ; i < GAUDI_QUEUE_ID_SIZE ; i++) {
1517 if (gaudi_queue_type[i] != QUEUE_TYPE_INT)
1518 continue;
1519
1520 q = &gaudi->internal_qmans[i];
1521
1522 switch (i) {
Ofir Bitton0940cab2020-08-31 08:52:56 +03001523 case GAUDI_QUEUE_ID_DMA_2_0 ... GAUDI_QUEUE_ID_DMA_7_3:
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03001524 q->pq_size = HBM_DMA_QMAN_SIZE_IN_BYTES;
1525 break;
1526 case GAUDI_QUEUE_ID_MME_0_0 ... GAUDI_QUEUE_ID_MME_1_3:
1527 q->pq_size = MME_QMAN_SIZE_IN_BYTES;
1528 break;
1529 case GAUDI_QUEUE_ID_TPC_0_0 ... GAUDI_QUEUE_ID_TPC_7_3:
1530 q->pq_size = TPC_QMAN_SIZE_IN_BYTES;
1531 break;
Oded Gabbay3c681572020-11-02 21:10:39 +02001532 case GAUDI_QUEUE_ID_NIC_0_0 ... GAUDI_QUEUE_ID_NIC_9_3:
1533 q->pq_size = NIC_QMAN_SIZE_IN_BYTES;
1534 break;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03001535 default:
1536 dev_err(hdev->dev, "Bad internal queue index %d", i);
1537 rc = -EINVAL;
1538 goto free_internal_qmans_pq_mem;
1539 }
1540
1541 q->pq_kernel_addr = hdev->asic_funcs->asic_dma_alloc_coherent(
1542 hdev, q->pq_size,
1543 &q->pq_dma_addr,
1544 GFP_KERNEL | __GFP_ZERO);
1545 if (!q->pq_kernel_addr) {
1546 rc = -ENOMEM;
1547 goto free_internal_qmans_pq_mem;
1548 }
1549 }
1550
1551 return 0;
1552
1553free_internal_qmans_pq_mem:
1554 gaudi_free_internal_qmans_pq_mem(hdev);
1555 return rc;
1556}
1557
1558static int gaudi_sw_init(struct hl_device *hdev)
1559{
1560 struct gaudi_device *gaudi;
Ofir Bittonebd8d122020-05-10 13:41:28 +03001561 u32 i, event_id = 0;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03001562 int rc;
1563
1564 /* Allocate device structure */
1565 gaudi = kzalloc(sizeof(*gaudi), GFP_KERNEL);
1566 if (!gaudi)
1567 return -ENOMEM;
1568
Ofir Bittonebd8d122020-05-10 13:41:28 +03001569 for (i = 0 ; i < ARRAY_SIZE(gaudi_irq_map_table) ; i++) {
1570 if (gaudi_irq_map_table[i].valid) {
1571 if (event_id == GAUDI_EVENT_SIZE) {
1572 dev_err(hdev->dev,
1573 "Event array exceeds the limit of %u events\n",
1574 GAUDI_EVENT_SIZE);
1575 rc = -EINVAL;
1576 goto free_gaudi_device;
1577 }
1578
1579 gaudi->events[event_id++] =
1580 gaudi_irq_map_table[i].fc_id;
1581 }
1582 }
1583
Oded Gabbay2f553422020-08-15 16:28:10 +03001584 gaudi->cpucp_info_get = gaudi_cpucp_info_get;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03001585
1586 gaudi->max_freq_value = GAUDI_MAX_CLK_FREQ;
1587
1588 hdev->asic_specific = gaudi;
1589
1590 /* Create DMA pool for small allocations */
1591 hdev->dma_pool = dma_pool_create(dev_name(hdev->dev),
1592 &hdev->pdev->dev, GAUDI_DMA_POOL_BLK_SIZE, 8, 0);
1593 if (!hdev->dma_pool) {
1594 dev_err(hdev->dev, "failed to create DMA pool\n");
1595 rc = -ENOMEM;
1596 goto free_gaudi_device;
1597 }
1598
1599 rc = gaudi_alloc_cpu_accessible_dma_mem(hdev);
1600 if (rc)
1601 goto free_dma_pool;
1602
1603 hdev->cpu_accessible_dma_pool = gen_pool_create(ilog2(32), -1);
1604 if (!hdev->cpu_accessible_dma_pool) {
1605 dev_err(hdev->dev,
1606 "Failed to create CPU accessible DMA pool\n");
1607 rc = -ENOMEM;
1608 goto free_cpu_dma_mem;
1609 }
1610
1611 rc = gen_pool_add(hdev->cpu_accessible_dma_pool,
1612 (uintptr_t) hdev->cpu_accessible_dma_mem,
1613 HL_CPU_ACCESSIBLE_MEM_SIZE, -1);
1614 if (rc) {
1615 dev_err(hdev->dev,
1616 "Failed to add memory to CPU accessible DMA pool\n");
1617 rc = -EFAULT;
1618 goto free_cpu_accessible_dma_pool;
1619 }
1620
1621 rc = gaudi_alloc_internal_qmans_pq_mem(hdev);
1622 if (rc)
1623 goto free_cpu_accessible_dma_pool;
1624
1625 spin_lock_init(&gaudi->hw_queues_lock);
1626 mutex_init(&gaudi->clk_gate_mutex);
1627
1628 hdev->supports_sync_stream = true;
1629 hdev->supports_coresight = true;
1630
1631 return 0;
1632
1633free_cpu_accessible_dma_pool:
1634 gen_pool_destroy(hdev->cpu_accessible_dma_pool);
1635free_cpu_dma_mem:
Ofir Bittonc692dec2020-10-04 17:34:37 +03001636 if (hdev->asic_prop.fw_security_disabled)
1637 GAUDI_CPU_TO_PCI_ADDR(hdev->cpu_accessible_dma_address,
1638 hdev->cpu_pci_msb_addr);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03001639 hdev->asic_funcs->asic_dma_free_coherent(hdev,
1640 HL_CPU_ACCESSIBLE_MEM_SIZE,
1641 hdev->cpu_accessible_dma_mem,
1642 hdev->cpu_accessible_dma_address);
1643free_dma_pool:
1644 dma_pool_destroy(hdev->dma_pool);
1645free_gaudi_device:
1646 kfree(gaudi);
1647 return rc;
1648}
1649
1650static int gaudi_sw_fini(struct hl_device *hdev)
1651{
1652 struct gaudi_device *gaudi = hdev->asic_specific;
1653
1654 gaudi_free_internal_qmans_pq_mem(hdev);
1655
1656 gen_pool_destroy(hdev->cpu_accessible_dma_pool);
1657
Ofir Bittonc692dec2020-10-04 17:34:37 +03001658 if (hdev->asic_prop.fw_security_disabled)
1659 GAUDI_CPU_TO_PCI_ADDR(hdev->cpu_accessible_dma_address,
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03001660 hdev->cpu_pci_msb_addr);
Ofir Bittonc692dec2020-10-04 17:34:37 +03001661
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03001662 hdev->asic_funcs->asic_dma_free_coherent(hdev,
1663 HL_CPU_ACCESSIBLE_MEM_SIZE,
1664 hdev->cpu_accessible_dma_mem,
1665 hdev->cpu_accessible_dma_address);
1666
1667 dma_pool_destroy(hdev->dma_pool);
1668
1669 mutex_destroy(&gaudi->clk_gate_mutex);
1670
1671 kfree(gaudi);
1672
1673 return 0;
1674}
1675
1676static irqreturn_t gaudi_irq_handler_single(int irq, void *arg)
1677{
1678 struct hl_device *hdev = arg;
1679 int i;
1680
1681 if (hdev->disabled)
1682 return IRQ_HANDLED;
1683
1684 for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++)
1685 hl_irq_handler_cq(irq, &hdev->completion_queue[i]);
1686
1687 hl_irq_handler_eq(irq, &hdev->event_queue);
1688
1689 return IRQ_HANDLED;
1690}
1691
1692/*
1693 * For backward compatibility, new MSI interrupts should be set after the
1694 * existing CPU and NIC interrupts.
1695 */
1696static int gaudi_pci_irq_vector(struct hl_device *hdev, unsigned int nr,
1697 bool cpu_eq)
1698{
1699 int msi_vec;
1700
1701 if ((nr != GAUDI_EVENT_QUEUE_MSI_IDX) && (cpu_eq))
1702 dev_crit(hdev->dev, "CPU EQ must use IRQ %d\n",
1703 GAUDI_EVENT_QUEUE_MSI_IDX);
1704
1705 msi_vec = ((nr < GAUDI_EVENT_QUEUE_MSI_IDX) || (cpu_eq)) ? nr :
1706 (nr + NIC_NUMBER_OF_ENGINES + 1);
1707
1708 return pci_irq_vector(hdev->pdev, msi_vec);
1709}
1710
1711static int gaudi_enable_msi_single(struct hl_device *hdev)
1712{
1713 int rc, irq;
1714
Oded Gabbay3b82c342020-11-27 18:10:20 +02001715 dev_dbg(hdev->dev, "Working in single MSI IRQ mode\n");
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03001716
1717 irq = gaudi_pci_irq_vector(hdev, 0, false);
1718 rc = request_irq(irq, gaudi_irq_handler_single, 0,
1719 "gaudi single msi", hdev);
1720 if (rc)
1721 dev_err(hdev->dev,
1722 "Failed to request single MSI IRQ\n");
1723
1724 return rc;
1725}
1726
1727static int gaudi_enable_msi_multi(struct hl_device *hdev)
1728{
1729 int cq_cnt = hdev->asic_prop.completion_queues_count;
1730 int rc, i, irq_cnt_init, irq;
1731
1732 for (i = 0, irq_cnt_init = 0 ; i < cq_cnt ; i++, irq_cnt_init++) {
1733 irq = gaudi_pci_irq_vector(hdev, i, false);
1734 rc = request_irq(irq, hl_irq_handler_cq, 0, gaudi_irq_name[i],
1735 &hdev->completion_queue[i]);
1736 if (rc) {
1737 dev_err(hdev->dev, "Failed to request IRQ %d", irq);
1738 goto free_irqs;
1739 }
1740 }
1741
1742 irq = gaudi_pci_irq_vector(hdev, GAUDI_EVENT_QUEUE_MSI_IDX, true);
1743 rc = request_irq(irq, hl_irq_handler_eq, 0, gaudi_irq_name[cq_cnt],
1744 &hdev->event_queue);
1745 if (rc) {
1746 dev_err(hdev->dev, "Failed to request IRQ %d", irq);
1747 goto free_irqs;
1748 }
1749
1750 return 0;
1751
1752free_irqs:
1753 for (i = 0 ; i < irq_cnt_init ; i++)
1754 free_irq(gaudi_pci_irq_vector(hdev, i, false),
1755 &hdev->completion_queue[i]);
1756 return rc;
1757}
1758
1759static int gaudi_enable_msi(struct hl_device *hdev)
1760{
1761 struct gaudi_device *gaudi = hdev->asic_specific;
1762 int rc;
1763
1764 if (gaudi->hw_cap_initialized & HW_CAP_MSI)
1765 return 0;
1766
1767 rc = pci_alloc_irq_vectors(hdev->pdev, 1, GAUDI_MSI_ENTRIES,
1768 PCI_IRQ_MSI);
1769 if (rc < 0) {
1770 dev_err(hdev->dev, "MSI: Failed to enable support %d\n", rc);
1771 return rc;
1772 }
1773
1774 if (rc < NUMBER_OF_INTERRUPTS) {
1775 gaudi->multi_msi_mode = false;
1776 rc = gaudi_enable_msi_single(hdev);
1777 } else {
1778 gaudi->multi_msi_mode = true;
1779 rc = gaudi_enable_msi_multi(hdev);
1780 }
1781
1782 if (rc)
1783 goto free_pci_irq_vectors;
1784
1785 gaudi->hw_cap_initialized |= HW_CAP_MSI;
1786
1787 return 0;
1788
1789free_pci_irq_vectors:
1790 pci_free_irq_vectors(hdev->pdev);
1791 return rc;
1792}
1793
1794static void gaudi_sync_irqs(struct hl_device *hdev)
1795{
1796 struct gaudi_device *gaudi = hdev->asic_specific;
1797 int i, cq_cnt = hdev->asic_prop.completion_queues_count;
1798
1799 if (!(gaudi->hw_cap_initialized & HW_CAP_MSI))
1800 return;
1801
1802 /* Wait for all pending IRQs to be finished */
1803 if (gaudi->multi_msi_mode) {
1804 for (i = 0 ; i < cq_cnt ; i++)
1805 synchronize_irq(gaudi_pci_irq_vector(hdev, i, false));
1806
1807 synchronize_irq(gaudi_pci_irq_vector(hdev,
1808 GAUDI_EVENT_QUEUE_MSI_IDX,
1809 true));
1810 } else {
1811 synchronize_irq(gaudi_pci_irq_vector(hdev, 0, false));
1812 }
1813}
1814
1815static void gaudi_disable_msi(struct hl_device *hdev)
1816{
1817 struct gaudi_device *gaudi = hdev->asic_specific;
1818 int i, irq, cq_cnt = hdev->asic_prop.completion_queues_count;
1819
1820 if (!(gaudi->hw_cap_initialized & HW_CAP_MSI))
1821 return;
1822
1823 gaudi_sync_irqs(hdev);
1824
1825 if (gaudi->multi_msi_mode) {
1826 irq = gaudi_pci_irq_vector(hdev, GAUDI_EVENT_QUEUE_MSI_IDX,
1827 true);
1828 free_irq(irq, &hdev->event_queue);
1829
1830 for (i = 0 ; i < cq_cnt ; i++) {
1831 irq = gaudi_pci_irq_vector(hdev, i, false);
1832 free_irq(irq, &hdev->completion_queue[i]);
1833 }
1834 } else {
1835 free_irq(gaudi_pci_irq_vector(hdev, 0, false), hdev);
1836 }
1837
1838 pci_free_irq_vectors(hdev->pdev);
1839
1840 gaudi->hw_cap_initialized &= ~HW_CAP_MSI;
1841}
1842
1843static void gaudi_init_scrambler_sram(struct hl_device *hdev)
1844{
1845 struct gaudi_device *gaudi = hdev->asic_specific;
1846
Ofir Bittonc692dec2020-10-04 17:34:37 +03001847 if (!hdev->asic_prop.fw_security_disabled)
1848 return;
1849
1850 if (hdev->asic_prop.fw_security_status_valid &&
1851 (hdev->asic_prop.fw_app_security_map &
1852 CPU_BOOT_DEV_STS0_SRAM_SCR_EN))
1853 return;
1854
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03001855 if (gaudi->hw_cap_initialized & HW_CAP_SRAM_SCRAMBLER)
1856 return;
1857
1858 if (!hdev->sram_scrambler_enable)
1859 return;
1860
1861 WREG32(mmNIF_RTR_CTRL_0_SCRAM_SRAM_EN,
1862 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1863 WREG32(mmNIF_RTR_CTRL_1_SCRAM_SRAM_EN,
1864 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1865 WREG32(mmNIF_RTR_CTRL_2_SCRAM_SRAM_EN,
1866 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1867 WREG32(mmNIF_RTR_CTRL_3_SCRAM_SRAM_EN,
1868 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1869 WREG32(mmNIF_RTR_CTRL_4_SCRAM_SRAM_EN,
1870 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1871 WREG32(mmNIF_RTR_CTRL_5_SCRAM_SRAM_EN,
1872 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1873 WREG32(mmNIF_RTR_CTRL_6_SCRAM_SRAM_EN,
1874 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1875 WREG32(mmNIF_RTR_CTRL_7_SCRAM_SRAM_EN,
1876 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1877
1878 WREG32(mmSIF_RTR_CTRL_0_SCRAM_SRAM_EN,
1879 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1880 WREG32(mmSIF_RTR_CTRL_1_SCRAM_SRAM_EN,
1881 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1882 WREG32(mmSIF_RTR_CTRL_2_SCRAM_SRAM_EN,
1883 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1884 WREG32(mmSIF_RTR_CTRL_3_SCRAM_SRAM_EN,
1885 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1886 WREG32(mmSIF_RTR_CTRL_4_SCRAM_SRAM_EN,
1887 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1888 WREG32(mmSIF_RTR_CTRL_5_SCRAM_SRAM_EN,
1889 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1890 WREG32(mmSIF_RTR_CTRL_6_SCRAM_SRAM_EN,
1891 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1892 WREG32(mmSIF_RTR_CTRL_7_SCRAM_SRAM_EN,
1893 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1894
1895 WREG32(mmDMA_IF_E_N_DOWN_CH0_SCRAM_SRAM_EN,
1896 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
1897 WREG32(mmDMA_IF_E_N_DOWN_CH1_SCRAM_SRAM_EN,
1898 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
1899 WREG32(mmDMA_IF_E_S_DOWN_CH0_SCRAM_SRAM_EN,
1900 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
1901 WREG32(mmDMA_IF_E_S_DOWN_CH1_SCRAM_SRAM_EN,
1902 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
1903 WREG32(mmDMA_IF_W_N_DOWN_CH0_SCRAM_SRAM_EN,
1904 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
1905 WREG32(mmDMA_IF_W_N_DOWN_CH1_SCRAM_SRAM_EN,
1906 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
1907 WREG32(mmDMA_IF_W_S_DOWN_CH0_SCRAM_SRAM_EN,
1908 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
1909 WREG32(mmDMA_IF_W_S_DOWN_CH1_SCRAM_SRAM_EN,
1910 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
1911
1912 gaudi->hw_cap_initialized |= HW_CAP_SRAM_SCRAMBLER;
1913}
1914
1915static void gaudi_init_scrambler_hbm(struct hl_device *hdev)
1916{
1917 struct gaudi_device *gaudi = hdev->asic_specific;
1918
Ofir Bittonc692dec2020-10-04 17:34:37 +03001919 if (!hdev->asic_prop.fw_security_disabled)
1920 return;
1921
1922 if (hdev->asic_prop.fw_security_status_valid &&
1923 (hdev->asic_prop.fw_boot_cpu_security_map &
1924 CPU_BOOT_DEV_STS0_DRAM_SCR_EN))
1925 return;
1926
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03001927 if (gaudi->hw_cap_initialized & HW_CAP_HBM_SCRAMBLER)
1928 return;
1929
1930 if (!hdev->dram_scrambler_enable)
1931 return;
1932
1933 WREG32(mmNIF_RTR_CTRL_0_SCRAM_HBM_EN,
1934 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1935 WREG32(mmNIF_RTR_CTRL_1_SCRAM_HBM_EN,
1936 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1937 WREG32(mmNIF_RTR_CTRL_2_SCRAM_HBM_EN,
1938 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1939 WREG32(mmNIF_RTR_CTRL_3_SCRAM_HBM_EN,
1940 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1941 WREG32(mmNIF_RTR_CTRL_4_SCRAM_HBM_EN,
1942 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1943 WREG32(mmNIF_RTR_CTRL_5_SCRAM_HBM_EN,
1944 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1945 WREG32(mmNIF_RTR_CTRL_6_SCRAM_HBM_EN,
1946 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1947 WREG32(mmNIF_RTR_CTRL_7_SCRAM_HBM_EN,
1948 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1949
1950 WREG32(mmSIF_RTR_CTRL_0_SCRAM_HBM_EN,
1951 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1952 WREG32(mmSIF_RTR_CTRL_1_SCRAM_HBM_EN,
1953 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1954 WREG32(mmSIF_RTR_CTRL_2_SCRAM_HBM_EN,
1955 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1956 WREG32(mmSIF_RTR_CTRL_3_SCRAM_HBM_EN,
1957 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1958 WREG32(mmSIF_RTR_CTRL_4_SCRAM_HBM_EN,
1959 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1960 WREG32(mmSIF_RTR_CTRL_5_SCRAM_HBM_EN,
1961 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1962 WREG32(mmSIF_RTR_CTRL_6_SCRAM_HBM_EN,
1963 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1964 WREG32(mmSIF_RTR_CTRL_7_SCRAM_HBM_EN,
1965 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1966
1967 WREG32(mmDMA_IF_E_N_DOWN_CH0_SCRAM_HBM_EN,
1968 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
1969 WREG32(mmDMA_IF_E_N_DOWN_CH1_SCRAM_HBM_EN,
1970 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
1971 WREG32(mmDMA_IF_E_S_DOWN_CH0_SCRAM_HBM_EN,
1972 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
1973 WREG32(mmDMA_IF_E_S_DOWN_CH1_SCRAM_HBM_EN,
1974 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
1975 WREG32(mmDMA_IF_W_N_DOWN_CH0_SCRAM_HBM_EN,
1976 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
1977 WREG32(mmDMA_IF_W_N_DOWN_CH1_SCRAM_HBM_EN,
1978 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
1979 WREG32(mmDMA_IF_W_S_DOWN_CH0_SCRAM_HBM_EN,
1980 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
1981 WREG32(mmDMA_IF_W_S_DOWN_CH1_SCRAM_HBM_EN,
1982 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
1983
1984 gaudi->hw_cap_initialized |= HW_CAP_HBM_SCRAMBLER;
1985}
1986
1987static void gaudi_init_e2e(struct hl_device *hdev)
1988{
Ofir Bittonc692dec2020-10-04 17:34:37 +03001989 if (!hdev->asic_prop.fw_security_disabled)
1990 return;
1991
1992 if (hdev->asic_prop.fw_security_status_valid &&
1993 (hdev->asic_prop.fw_boot_cpu_security_map &
1994 CPU_BOOT_DEV_STS0_E2E_CRED_EN))
1995 return;
1996
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03001997 WREG32(mmSIF_RTR_CTRL_0_E2E_HBM_WR_SIZE, 247 >> 3);
1998 WREG32(mmSIF_RTR_CTRL_0_E2E_HBM_RD_SIZE, 785 >> 3);
1999 WREG32(mmSIF_RTR_CTRL_0_E2E_PCI_WR_SIZE, 49);
2000 WREG32(mmSIF_RTR_CTRL_0_E2E_PCI_RD_SIZE, 101);
2001
2002 WREG32(mmSIF_RTR_CTRL_1_E2E_HBM_WR_SIZE, 275 >> 3);
2003 WREG32(mmSIF_RTR_CTRL_1_E2E_HBM_RD_SIZE, 614 >> 3);
2004 WREG32(mmSIF_RTR_CTRL_1_E2E_PCI_WR_SIZE, 1);
2005 WREG32(mmSIF_RTR_CTRL_1_E2E_PCI_RD_SIZE, 39);
2006
2007 WREG32(mmSIF_RTR_CTRL_2_E2E_HBM_WR_SIZE, 1);
2008 WREG32(mmSIF_RTR_CTRL_2_E2E_HBM_RD_SIZE, 1);
2009 WREG32(mmSIF_RTR_CTRL_2_E2E_PCI_WR_SIZE, 1);
2010 WREG32(mmSIF_RTR_CTRL_2_E2E_PCI_RD_SIZE, 32);
2011
2012 WREG32(mmSIF_RTR_CTRL_3_E2E_HBM_WR_SIZE, 176 >> 3);
2013 WREG32(mmSIF_RTR_CTRL_3_E2E_HBM_RD_SIZE, 32 >> 3);
2014 WREG32(mmSIF_RTR_CTRL_3_E2E_PCI_WR_SIZE, 19);
2015 WREG32(mmSIF_RTR_CTRL_3_E2E_PCI_RD_SIZE, 32);
2016
2017 WREG32(mmSIF_RTR_CTRL_4_E2E_HBM_WR_SIZE, 176 >> 3);
2018 WREG32(mmSIF_RTR_CTRL_4_E2E_HBM_RD_SIZE, 32 >> 3);
2019 WREG32(mmSIF_RTR_CTRL_4_E2E_PCI_WR_SIZE, 19);
2020 WREG32(mmSIF_RTR_CTRL_4_E2E_PCI_RD_SIZE, 32);
2021
2022 WREG32(mmSIF_RTR_CTRL_5_E2E_HBM_WR_SIZE, 1);
2023 WREG32(mmSIF_RTR_CTRL_5_E2E_HBM_RD_SIZE, 1);
2024 WREG32(mmSIF_RTR_CTRL_5_E2E_PCI_WR_SIZE, 1);
2025 WREG32(mmSIF_RTR_CTRL_5_E2E_PCI_RD_SIZE, 32);
2026
2027 WREG32(mmSIF_RTR_CTRL_6_E2E_HBM_WR_SIZE, 275 >> 3);
2028 WREG32(mmSIF_RTR_CTRL_6_E2E_HBM_RD_SIZE, 614 >> 3);
2029 WREG32(mmSIF_RTR_CTRL_6_E2E_PCI_WR_SIZE, 1);
2030 WREG32(mmSIF_RTR_CTRL_6_E2E_PCI_RD_SIZE, 39);
2031
2032 WREG32(mmSIF_RTR_CTRL_7_E2E_HBM_WR_SIZE, 297 >> 3);
2033 WREG32(mmSIF_RTR_CTRL_7_E2E_HBM_RD_SIZE, 908 >> 3);
2034 WREG32(mmSIF_RTR_CTRL_7_E2E_PCI_WR_SIZE, 19);
2035 WREG32(mmSIF_RTR_CTRL_7_E2E_PCI_RD_SIZE, 19);
2036
2037 WREG32(mmNIF_RTR_CTRL_0_E2E_HBM_WR_SIZE, 318 >> 3);
2038 WREG32(mmNIF_RTR_CTRL_0_E2E_HBM_RD_SIZE, 956 >> 3);
2039 WREG32(mmNIF_RTR_CTRL_0_E2E_PCI_WR_SIZE, 79);
2040 WREG32(mmNIF_RTR_CTRL_0_E2E_PCI_RD_SIZE, 163);
2041
2042 WREG32(mmNIF_RTR_CTRL_1_E2E_HBM_WR_SIZE, 275 >> 3);
2043 WREG32(mmNIF_RTR_CTRL_1_E2E_HBM_RD_SIZE, 614 >> 3);
2044 WREG32(mmNIF_RTR_CTRL_1_E2E_PCI_WR_SIZE, 1);
2045 WREG32(mmNIF_RTR_CTRL_1_E2E_PCI_RD_SIZE, 39);
2046
2047 WREG32(mmNIF_RTR_CTRL_2_E2E_HBM_WR_SIZE, 1);
2048 WREG32(mmNIF_RTR_CTRL_2_E2E_HBM_RD_SIZE, 1);
2049 WREG32(mmNIF_RTR_CTRL_2_E2E_PCI_WR_SIZE, 1);
2050 WREG32(mmNIF_RTR_CTRL_2_E2E_PCI_RD_SIZE, 32);
2051
2052 WREG32(mmNIF_RTR_CTRL_3_E2E_HBM_WR_SIZE, 176 >> 3);
2053 WREG32(mmNIF_RTR_CTRL_3_E2E_HBM_RD_SIZE, 32 >> 3);
2054 WREG32(mmNIF_RTR_CTRL_3_E2E_PCI_WR_SIZE, 19);
2055 WREG32(mmNIF_RTR_CTRL_3_E2E_PCI_RD_SIZE, 32);
2056
2057 WREG32(mmNIF_RTR_CTRL_4_E2E_HBM_WR_SIZE, 176 >> 3);
2058 WREG32(mmNIF_RTR_CTRL_4_E2E_HBM_RD_SIZE, 32 >> 3);
2059 WREG32(mmNIF_RTR_CTRL_4_E2E_PCI_WR_SIZE, 19);
2060 WREG32(mmNIF_RTR_CTRL_4_E2E_PCI_RD_SIZE, 32);
2061
2062 WREG32(mmNIF_RTR_CTRL_5_E2E_HBM_WR_SIZE, 1);
2063 WREG32(mmNIF_RTR_CTRL_5_E2E_HBM_RD_SIZE, 1);
2064 WREG32(mmNIF_RTR_CTRL_5_E2E_PCI_WR_SIZE, 1);
2065 WREG32(mmNIF_RTR_CTRL_5_E2E_PCI_RD_SIZE, 32);
2066
2067 WREG32(mmNIF_RTR_CTRL_6_E2E_HBM_WR_SIZE, 275 >> 3);
2068 WREG32(mmNIF_RTR_CTRL_6_E2E_HBM_RD_SIZE, 614 >> 3);
2069 WREG32(mmNIF_RTR_CTRL_6_E2E_PCI_WR_SIZE, 1);
2070 WREG32(mmNIF_RTR_CTRL_6_E2E_PCI_RD_SIZE, 39);
2071
2072 WREG32(mmNIF_RTR_CTRL_7_E2E_HBM_WR_SIZE, 318 >> 3);
2073 WREG32(mmNIF_RTR_CTRL_7_E2E_HBM_RD_SIZE, 956 >> 3);
2074 WREG32(mmNIF_RTR_CTRL_7_E2E_PCI_WR_SIZE, 79);
2075 WREG32(mmNIF_RTR_CTRL_7_E2E_PCI_RD_SIZE, 79);
2076
2077 WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
2078 WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
2079 WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
2080 WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
2081
2082 WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
2083 WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
2084 WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
2085 WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
2086
2087 WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
2088 WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
2089 WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
2090 WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
2091
2092 WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
2093 WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
2094 WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
2095 WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
2096
2097 WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
2098 WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
2099 WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
2100 WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
2101
2102 WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
2103 WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
2104 WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
2105 WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
2106
2107 WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
2108 WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
2109 WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
2110 WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
2111
2112 WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
2113 WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
2114 WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
2115 WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
2116
2117 if (!hdev->dram_scrambler_enable) {
2118 WREG32(mmSIF_RTR_CTRL_0_NL_HBM_SEL_0, 0x21);
2119 WREG32(mmSIF_RTR_CTRL_0_NL_HBM_SEL_1, 0x22);
2120 WREG32(mmSIF_RTR_CTRL_0_NL_HBM_OFFSET_18, 0x1F);
2121 WREG32(mmSIF_RTR_CTRL_0_NL_HBM_PC_SEL_3, 0x20);
2122
2123 WREG32(mmSIF_RTR_CTRL_1_NL_HBM_SEL_0, 0x21);
2124 WREG32(mmSIF_RTR_CTRL_1_NL_HBM_SEL_1, 0x22);
2125 WREG32(mmSIF_RTR_CTRL_1_NL_HBM_OFFSET_18, 0x1F);
2126 WREG32(mmSIF_RTR_CTRL_1_NL_HBM_PC_SEL_3, 0x20);
2127
2128 WREG32(mmSIF_RTR_CTRL_2_NL_HBM_SEL_0, 0x21);
2129 WREG32(mmSIF_RTR_CTRL_2_NL_HBM_SEL_1, 0x22);
2130 WREG32(mmSIF_RTR_CTRL_2_NL_HBM_OFFSET_18, 0x1F);
2131 WREG32(mmSIF_RTR_CTRL_2_NL_HBM_PC_SEL_3, 0x20);
2132
2133 WREG32(mmSIF_RTR_CTRL_3_NL_HBM_SEL_0, 0x21);
2134 WREG32(mmSIF_RTR_CTRL_3_NL_HBM_SEL_1, 0x22);
2135 WREG32(mmSIF_RTR_CTRL_3_NL_HBM_OFFSET_18, 0x1F);
2136 WREG32(mmSIF_RTR_CTRL_3_NL_HBM_PC_SEL_3, 0x20);
2137
2138 WREG32(mmSIF_RTR_CTRL_4_NL_HBM_SEL_0, 0x21);
2139 WREG32(mmSIF_RTR_CTRL_4_NL_HBM_SEL_1, 0x22);
2140 WREG32(mmSIF_RTR_CTRL_4_NL_HBM_OFFSET_18, 0x1F);
2141 WREG32(mmSIF_RTR_CTRL_4_NL_HBM_PC_SEL_3, 0x20);
2142
2143 WREG32(mmSIF_RTR_CTRL_5_NL_HBM_SEL_0, 0x21);
2144 WREG32(mmSIF_RTR_CTRL_5_NL_HBM_SEL_1, 0x22);
2145 WREG32(mmSIF_RTR_CTRL_5_NL_HBM_OFFSET_18, 0x1F);
2146 WREG32(mmSIF_RTR_CTRL_5_NL_HBM_PC_SEL_3, 0x20);
2147
2148 WREG32(mmSIF_RTR_CTRL_6_NL_HBM_SEL_0, 0x21);
2149 WREG32(mmSIF_RTR_CTRL_6_NL_HBM_SEL_1, 0x22);
2150 WREG32(mmSIF_RTR_CTRL_6_NL_HBM_OFFSET_18, 0x1F);
2151 WREG32(mmSIF_RTR_CTRL_6_NL_HBM_PC_SEL_3, 0x20);
2152
2153 WREG32(mmSIF_RTR_CTRL_7_NL_HBM_SEL_0, 0x21);
2154 WREG32(mmSIF_RTR_CTRL_7_NL_HBM_SEL_1, 0x22);
2155 WREG32(mmSIF_RTR_CTRL_7_NL_HBM_OFFSET_18, 0x1F);
2156 WREG32(mmSIF_RTR_CTRL_7_NL_HBM_PC_SEL_3, 0x20);
2157
2158 WREG32(mmNIF_RTR_CTRL_0_NL_HBM_SEL_0, 0x21);
2159 WREG32(mmNIF_RTR_CTRL_0_NL_HBM_SEL_1, 0x22);
2160 WREG32(mmNIF_RTR_CTRL_0_NL_HBM_OFFSET_18, 0x1F);
2161 WREG32(mmNIF_RTR_CTRL_0_NL_HBM_PC_SEL_3, 0x20);
2162
2163 WREG32(mmNIF_RTR_CTRL_1_NL_HBM_SEL_0, 0x21);
2164 WREG32(mmNIF_RTR_CTRL_1_NL_HBM_SEL_1, 0x22);
2165 WREG32(mmNIF_RTR_CTRL_1_NL_HBM_OFFSET_18, 0x1F);
2166 WREG32(mmNIF_RTR_CTRL_1_NL_HBM_PC_SEL_3, 0x20);
2167
2168 WREG32(mmNIF_RTR_CTRL_2_NL_HBM_SEL_0, 0x21);
2169 WREG32(mmNIF_RTR_CTRL_2_NL_HBM_SEL_1, 0x22);
2170 WREG32(mmNIF_RTR_CTRL_2_NL_HBM_OFFSET_18, 0x1F);
2171 WREG32(mmNIF_RTR_CTRL_2_NL_HBM_PC_SEL_3, 0x20);
2172
2173 WREG32(mmNIF_RTR_CTRL_3_NL_HBM_SEL_0, 0x21);
2174 WREG32(mmNIF_RTR_CTRL_3_NL_HBM_SEL_1, 0x22);
2175 WREG32(mmNIF_RTR_CTRL_3_NL_HBM_OFFSET_18, 0x1F);
2176 WREG32(mmNIF_RTR_CTRL_3_NL_HBM_PC_SEL_3, 0x20);
2177
2178 WREG32(mmNIF_RTR_CTRL_4_NL_HBM_SEL_0, 0x21);
2179 WREG32(mmNIF_RTR_CTRL_4_NL_HBM_SEL_1, 0x22);
2180 WREG32(mmNIF_RTR_CTRL_4_NL_HBM_OFFSET_18, 0x1F);
2181 WREG32(mmNIF_RTR_CTRL_4_NL_HBM_PC_SEL_3, 0x20);
2182
2183 WREG32(mmNIF_RTR_CTRL_5_NL_HBM_SEL_0, 0x21);
2184 WREG32(mmNIF_RTR_CTRL_5_NL_HBM_SEL_1, 0x22);
2185 WREG32(mmNIF_RTR_CTRL_5_NL_HBM_OFFSET_18, 0x1F);
2186 WREG32(mmNIF_RTR_CTRL_5_NL_HBM_PC_SEL_3, 0x20);
2187
2188 WREG32(mmNIF_RTR_CTRL_6_NL_HBM_SEL_0, 0x21);
2189 WREG32(mmNIF_RTR_CTRL_6_NL_HBM_SEL_1, 0x22);
2190 WREG32(mmNIF_RTR_CTRL_6_NL_HBM_OFFSET_18, 0x1F);
2191 WREG32(mmNIF_RTR_CTRL_6_NL_HBM_PC_SEL_3, 0x20);
2192
2193 WREG32(mmNIF_RTR_CTRL_7_NL_HBM_SEL_0, 0x21);
2194 WREG32(mmNIF_RTR_CTRL_7_NL_HBM_SEL_1, 0x22);
2195 WREG32(mmNIF_RTR_CTRL_7_NL_HBM_OFFSET_18, 0x1F);
2196 WREG32(mmNIF_RTR_CTRL_7_NL_HBM_PC_SEL_3, 0x20);
2197
2198 WREG32(mmDMA_IF_E_N_DOWN_CH0_NL_HBM_SEL_0, 0x21);
2199 WREG32(mmDMA_IF_E_N_DOWN_CH0_NL_HBM_SEL_1, 0x22);
2200 WREG32(mmDMA_IF_E_N_DOWN_CH0_NL_HBM_OFFSET_18, 0x1F);
2201 WREG32(mmDMA_IF_E_N_DOWN_CH0_NL_HBM_PC_SEL_3, 0x20);
2202
2203 WREG32(mmDMA_IF_E_N_DOWN_CH1_NL_HBM_SEL_0, 0x21);
2204 WREG32(mmDMA_IF_E_N_DOWN_CH1_NL_HBM_SEL_1, 0x22);
2205 WREG32(mmDMA_IF_E_N_DOWN_CH1_NL_HBM_OFFSET_18, 0x1F);
2206 WREG32(mmDMA_IF_E_N_DOWN_CH1_NL_HBM_PC_SEL_3, 0x20);
2207
2208 WREG32(mmDMA_IF_E_S_DOWN_CH0_NL_HBM_SEL_0, 0x21);
2209 WREG32(mmDMA_IF_E_S_DOWN_CH0_NL_HBM_SEL_1, 0x22);
2210 WREG32(mmDMA_IF_E_S_DOWN_CH0_NL_HBM_OFFSET_18, 0x1F);
2211 WREG32(mmDMA_IF_E_S_DOWN_CH0_NL_HBM_PC_SEL_3, 0x20);
2212
2213 WREG32(mmDMA_IF_E_S_DOWN_CH1_NL_HBM_SEL_0, 0x21);
2214 WREG32(mmDMA_IF_E_S_DOWN_CH1_NL_HBM_SEL_1, 0x22);
2215 WREG32(mmDMA_IF_E_S_DOWN_CH1_NL_HBM_OFFSET_18, 0x1F);
2216 WREG32(mmDMA_IF_E_S_DOWN_CH1_NL_HBM_PC_SEL_3, 0x20);
2217
2218 WREG32(mmDMA_IF_W_N_DOWN_CH0_NL_HBM_SEL_0, 0x21);
2219 WREG32(mmDMA_IF_W_N_DOWN_CH0_NL_HBM_SEL_1, 0x22);
2220 WREG32(mmDMA_IF_W_N_DOWN_CH0_NL_HBM_OFFSET_18, 0x1F);
2221 WREG32(mmDMA_IF_W_N_DOWN_CH0_NL_HBM_PC_SEL_3, 0x20);
2222
2223 WREG32(mmDMA_IF_W_N_DOWN_CH1_NL_HBM_SEL_0, 0x21);
2224 WREG32(mmDMA_IF_W_N_DOWN_CH1_NL_HBM_SEL_1, 0x22);
2225 WREG32(mmDMA_IF_W_N_DOWN_CH1_NL_HBM_OFFSET_18, 0x1F);
2226 WREG32(mmDMA_IF_W_N_DOWN_CH1_NL_HBM_PC_SEL_3, 0x20);
2227
2228 WREG32(mmDMA_IF_W_S_DOWN_CH0_NL_HBM_SEL_0, 0x21);
2229 WREG32(mmDMA_IF_W_S_DOWN_CH0_NL_HBM_SEL_1, 0x22);
2230 WREG32(mmDMA_IF_W_S_DOWN_CH0_NL_HBM_OFFSET_18, 0x1F);
2231 WREG32(mmDMA_IF_W_S_DOWN_CH0_NL_HBM_PC_SEL_3, 0x20);
2232
2233 WREG32(mmDMA_IF_W_S_DOWN_CH1_NL_HBM_SEL_0, 0x21);
2234 WREG32(mmDMA_IF_W_S_DOWN_CH1_NL_HBM_SEL_1, 0x22);
2235 WREG32(mmDMA_IF_W_S_DOWN_CH1_NL_HBM_OFFSET_18, 0x1F);
2236 WREG32(mmDMA_IF_W_S_DOWN_CH1_NL_HBM_PC_SEL_3, 0x20);
2237 }
2238
2239 WREG32(mmSIF_RTR_CTRL_0_E2E_HBM_EN,
2240 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2241 WREG32(mmSIF_RTR_CTRL_0_E2E_PCI_EN,
2242 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2243
2244 WREG32(mmSIF_RTR_CTRL_1_E2E_HBM_EN,
2245 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2246 WREG32(mmSIF_RTR_CTRL_1_E2E_PCI_EN,
2247 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2248
2249 WREG32(mmSIF_RTR_CTRL_2_E2E_HBM_EN,
2250 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2251 WREG32(mmSIF_RTR_CTRL_2_E2E_PCI_EN,
2252 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2253
2254 WREG32(mmSIF_RTR_CTRL_3_E2E_HBM_EN,
2255 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2256 WREG32(mmSIF_RTR_CTRL_3_E2E_PCI_EN,
2257 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2258
2259 WREG32(mmSIF_RTR_CTRL_4_E2E_HBM_EN,
2260 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2261 WREG32(mmSIF_RTR_CTRL_4_E2E_PCI_EN,
2262 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2263
2264 WREG32(mmSIF_RTR_CTRL_5_E2E_HBM_EN,
2265 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2266 WREG32(mmSIF_RTR_CTRL_5_E2E_PCI_EN,
2267 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2268
2269 WREG32(mmSIF_RTR_CTRL_6_E2E_HBM_EN,
2270 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2271 WREG32(mmSIF_RTR_CTRL_6_E2E_PCI_EN,
2272 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2273
2274 WREG32(mmSIF_RTR_CTRL_7_E2E_HBM_EN,
2275 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2276 WREG32(mmSIF_RTR_CTRL_7_E2E_PCI_EN,
2277 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2278
2279 WREG32(mmNIF_RTR_CTRL_0_E2E_HBM_EN,
2280 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2281 WREG32(mmNIF_RTR_CTRL_0_E2E_PCI_EN,
2282 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2283
2284 WREG32(mmNIF_RTR_CTRL_1_E2E_HBM_EN,
2285 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2286 WREG32(mmNIF_RTR_CTRL_1_E2E_PCI_EN,
2287 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2288
2289 WREG32(mmNIF_RTR_CTRL_2_E2E_HBM_EN,
2290 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2291 WREG32(mmNIF_RTR_CTRL_2_E2E_PCI_EN,
2292 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2293
2294 WREG32(mmNIF_RTR_CTRL_3_E2E_HBM_EN,
2295 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2296 WREG32(mmNIF_RTR_CTRL_3_E2E_PCI_EN,
2297 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2298
2299 WREG32(mmNIF_RTR_CTRL_4_E2E_HBM_EN,
2300 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2301 WREG32(mmNIF_RTR_CTRL_4_E2E_PCI_EN,
2302 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2303
2304 WREG32(mmNIF_RTR_CTRL_5_E2E_HBM_EN,
2305 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2306 WREG32(mmNIF_RTR_CTRL_5_E2E_PCI_EN,
2307 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2308
2309 WREG32(mmNIF_RTR_CTRL_6_E2E_HBM_EN,
2310 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2311 WREG32(mmNIF_RTR_CTRL_6_E2E_PCI_EN,
2312 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2313
2314 WREG32(mmNIF_RTR_CTRL_7_E2E_HBM_EN,
2315 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2316 WREG32(mmNIF_RTR_CTRL_7_E2E_PCI_EN,
2317 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2318
2319 WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_HBM_EN,
2320 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2321 WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_PCI_EN,
2322 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2323
2324 WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_HBM_EN,
2325 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2326 WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_PCI_EN,
2327 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2328
2329 WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_HBM_EN,
2330 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2331 WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_PCI_EN,
2332 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2333
2334 WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_HBM_EN,
2335 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2336 WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_PCI_EN,
2337 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2338
2339 WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_HBM_EN,
2340 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2341 WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_PCI_EN,
2342 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2343
2344 WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_HBM_EN,
2345 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2346 WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_PCI_EN,
2347 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2348
2349 WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_HBM_EN,
2350 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2351 WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_PCI_EN,
2352 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2353
2354 WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_HBM_EN,
2355 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2356 WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_PCI_EN,
2357 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2358}
2359
2360static void gaudi_init_hbm_cred(struct hl_device *hdev)
2361{
2362 uint32_t hbm0_wr, hbm1_wr, hbm0_rd, hbm1_rd;
2363
Ofir Bittonc692dec2020-10-04 17:34:37 +03002364 if (!hdev->asic_prop.fw_security_disabled)
2365 return;
2366
2367 if (hdev->asic_prop.fw_security_status_valid &&
2368 (hdev->asic_prop.fw_boot_cpu_security_map &
2369 CPU_BOOT_DEV_STS0_HBM_CRED_EN))
2370 return;
2371
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03002372 hbm0_wr = 0x33333333;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03002373 hbm0_rd = 0x77777777;
Oded Gabbay0b168c82020-06-15 19:25:57 +03002374 hbm1_wr = 0x55555555;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03002375 hbm1_rd = 0xDDDDDDDD;
2376
2377 WREG32(mmDMA_IF_E_N_HBM0_WR_CRED_CNT, hbm0_wr);
2378 WREG32(mmDMA_IF_E_N_HBM1_WR_CRED_CNT, hbm1_wr);
2379 WREG32(mmDMA_IF_E_N_HBM0_RD_CRED_CNT, hbm0_rd);
2380 WREG32(mmDMA_IF_E_N_HBM1_RD_CRED_CNT, hbm1_rd);
2381
2382 WREG32(mmDMA_IF_E_S_HBM0_WR_CRED_CNT, hbm0_wr);
2383 WREG32(mmDMA_IF_E_S_HBM1_WR_CRED_CNT, hbm1_wr);
2384 WREG32(mmDMA_IF_E_S_HBM0_RD_CRED_CNT, hbm0_rd);
2385 WREG32(mmDMA_IF_E_S_HBM1_RD_CRED_CNT, hbm1_rd);
2386
2387 WREG32(mmDMA_IF_W_N_HBM0_WR_CRED_CNT, hbm0_wr);
2388 WREG32(mmDMA_IF_W_N_HBM1_WR_CRED_CNT, hbm1_wr);
2389 WREG32(mmDMA_IF_W_N_HBM0_RD_CRED_CNT, hbm0_rd);
2390 WREG32(mmDMA_IF_W_N_HBM1_RD_CRED_CNT, hbm1_rd);
2391
2392 WREG32(mmDMA_IF_W_S_HBM0_WR_CRED_CNT, hbm0_wr);
2393 WREG32(mmDMA_IF_W_S_HBM1_WR_CRED_CNT, hbm1_wr);
2394 WREG32(mmDMA_IF_W_S_HBM0_RD_CRED_CNT, hbm0_rd);
2395 WREG32(mmDMA_IF_W_S_HBM1_RD_CRED_CNT, hbm1_rd);
2396
2397 WREG32(mmDMA_IF_E_N_HBM_CRED_EN_0,
2398 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2399 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2400 WREG32(mmDMA_IF_E_S_HBM_CRED_EN_0,
2401 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2402 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2403 WREG32(mmDMA_IF_W_N_HBM_CRED_EN_0,
2404 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2405 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2406 WREG32(mmDMA_IF_W_S_HBM_CRED_EN_0,
2407 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2408 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2409
2410 WREG32(mmDMA_IF_E_N_HBM_CRED_EN_1,
2411 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2412 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2413 WREG32(mmDMA_IF_E_S_HBM_CRED_EN_1,
2414 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2415 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2416 WREG32(mmDMA_IF_W_N_HBM_CRED_EN_1,
2417 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2418 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2419 WREG32(mmDMA_IF_W_S_HBM_CRED_EN_1,
2420 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2421 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2422}
2423
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03002424static void gaudi_init_golden_registers(struct hl_device *hdev)
2425{
2426 u32 tpc_offset;
2427 int tpc_id, i;
2428
2429 gaudi_init_e2e(hdev);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03002430 gaudi_init_hbm_cred(hdev);
2431
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03002432 for (tpc_id = 0, tpc_offset = 0;
2433 tpc_id < TPC_NUMBER_OF_ENGINES;
2434 tpc_id++, tpc_offset += TPC_CFG_OFFSET) {
2435 /* Mask all arithmetic interrupts from TPC */
2436 WREG32(mmTPC0_CFG_TPC_INTR_MASK + tpc_offset, 0x8FFF);
2437 /* Set 16 cache lines */
2438 WREG32_FIELD(TPC0_CFG_MSS_CONFIG, tpc_offset,
2439 ICACHE_FETCH_LINE_NUM, 2);
2440 }
2441
2442 /* Make sure 1st 128 bytes in SRAM are 0 for Tensor DMA */
2443 for (i = 0 ; i < 128 ; i += 8)
2444 writeq(0, hdev->pcie_bar[SRAM_BAR_ID] + i);
2445
2446 WREG32(mmMME0_CTRL_EUS_ROLLUP_CNT_ADD, 3);
2447 WREG32(mmMME1_CTRL_EUS_ROLLUP_CNT_ADD, 3);
2448 WREG32(mmMME2_CTRL_EUS_ROLLUP_CNT_ADD, 3);
2449 WREG32(mmMME3_CTRL_EUS_ROLLUP_CNT_ADD, 3);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03002450}
2451
2452static void gaudi_init_pci_dma_qman(struct hl_device *hdev, int dma_id,
2453 int qman_id, dma_addr_t qman_pq_addr)
2454{
2455 u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi;
2456 u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi;
2457 u32 q_off, dma_qm_offset;
2458 u32 dma_qm_err_cfg;
2459
2460 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
2461
2462 mtr_base_en_lo = lower_32_bits(CFG_BASE +
2463 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2464 mtr_base_en_hi = upper_32_bits(CFG_BASE +
2465 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2466 so_base_en_lo = lower_32_bits(CFG_BASE +
2467 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2468 so_base_en_hi = upper_32_bits(CFG_BASE +
2469 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2470 mtr_base_ws_lo = lower_32_bits(CFG_BASE +
2471 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2472 mtr_base_ws_hi = upper_32_bits(CFG_BASE +
2473 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2474 so_base_ws_lo = lower_32_bits(CFG_BASE +
2475 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
2476 so_base_ws_hi = upper_32_bits(CFG_BASE +
2477 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
2478
2479 q_off = dma_qm_offset + qman_id * 4;
2480
2481 WREG32(mmDMA0_QM_PQ_BASE_LO_0 + q_off, lower_32_bits(qman_pq_addr));
2482 WREG32(mmDMA0_QM_PQ_BASE_HI_0 + q_off, upper_32_bits(qman_pq_addr));
2483
2484 WREG32(mmDMA0_QM_PQ_SIZE_0 + q_off, ilog2(HL_QUEUE_LENGTH));
2485 WREG32(mmDMA0_QM_PQ_PI_0 + q_off, 0);
2486 WREG32(mmDMA0_QM_PQ_CI_0 + q_off, 0);
2487
Ofir Bitton25121d92020-09-24 08:22:58 +03002488 WREG32(mmDMA0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off, QMAN_LDMA_SIZE_OFFSET);
2489 WREG32(mmDMA0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2490 QMAN_LDMA_SRC_OFFSET);
2491 WREG32(mmDMA0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2492 QMAN_LDMA_DST_OFFSET);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03002493
2494 WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo);
2495 WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi);
2496 WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo);
2497 WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi);
2498 WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_LO_0 + q_off, mtr_base_ws_lo);
2499 WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_HI_0 + q_off, mtr_base_ws_hi);
2500 WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_LO_0 + q_off, so_base_ws_lo);
2501 WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_HI_0 + q_off, so_base_ws_hi);
2502
Omer Shpigelmance043262020-06-16 17:56:27 +03002503 WREG32(mmDMA0_QM_CP_BARRIER_CFG_0 + q_off, 0x100);
2504
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03002505 /* The following configuration is needed only once per QMAN */
2506 if (qman_id == 0) {
2507 /* Configure RAZWI IRQ */
2508 dma_qm_err_cfg = PCI_DMA_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
2509 if (hdev->stop_on_err) {
2510 dma_qm_err_cfg |=
2511 PCI_DMA_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
2512 }
2513
2514 WREG32(mmDMA0_QM_GLBL_ERR_CFG + dma_qm_offset, dma_qm_err_cfg);
2515 WREG32(mmDMA0_QM_GLBL_ERR_ADDR_LO + dma_qm_offset,
2516 lower_32_bits(CFG_BASE +
2517 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR));
2518 WREG32(mmDMA0_QM_GLBL_ERR_ADDR_HI + dma_qm_offset,
2519 upper_32_bits(CFG_BASE +
2520 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR));
2521 WREG32(mmDMA0_QM_GLBL_ERR_WDATA + dma_qm_offset,
2522 gaudi_irq_map_table[GAUDI_EVENT_DMA0_QM].cpu_id +
2523 dma_id);
2524
2525 WREG32(mmDMA0_QM_ARB_ERR_MSG_EN + dma_qm_offset,
2526 QM_ARB_ERR_MSG_EN_MASK);
2527
2528 /* Increase ARB WDT to support streams architecture */
2529 WREG32(mmDMA0_QM_ARB_SLV_CHOISE_WDT + dma_qm_offset,
2530 GAUDI_ARB_WDT_TIMEOUT);
2531
2532 WREG32(mmDMA0_QM_GLBL_PROT + dma_qm_offset,
2533 QMAN_EXTERNAL_MAKE_TRUSTED);
2534
2535 WREG32(mmDMA0_QM_GLBL_CFG1 + dma_qm_offset, 0);
2536 }
2537}
2538
2539static void gaudi_init_dma_core(struct hl_device *hdev, int dma_id)
2540{
2541 u32 dma_offset = dma_id * DMA_CORE_OFFSET;
2542 u32 dma_err_cfg = 1 << DMA0_CORE_ERR_CFG_ERR_MSG_EN_SHIFT;
2543
2544 /* Set to maximum possible according to physical size */
2545 WREG32(mmDMA0_CORE_RD_MAX_OUTSTAND + dma_offset, 0);
2546 WREG32(mmDMA0_CORE_RD_MAX_SIZE + dma_offset, 0);
2547
Oded Gabbayd1f36332020-09-14 09:26:54 +03002548 /* WA for H/W bug H3-2116 */
2549 WREG32(mmDMA0_CORE_LBW_MAX_OUTSTAND + dma_offset, 15);
2550
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03002551 /* STOP_ON bit implies no completion to operation in case of RAZWI */
2552 if (hdev->stop_on_err)
2553 dma_err_cfg |= 1 << DMA0_CORE_ERR_CFG_STOP_ON_ERR_SHIFT;
2554
2555 WREG32(mmDMA0_CORE_ERR_CFG + dma_offset, dma_err_cfg);
2556 WREG32(mmDMA0_CORE_ERRMSG_ADDR_LO + dma_offset,
2557 lower_32_bits(CFG_BASE + mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR));
2558 WREG32(mmDMA0_CORE_ERRMSG_ADDR_HI + dma_offset,
2559 upper_32_bits(CFG_BASE + mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR));
2560 WREG32(mmDMA0_CORE_ERRMSG_WDATA + dma_offset,
2561 gaudi_irq_map_table[GAUDI_EVENT_DMA0_CORE].cpu_id + dma_id);
2562 WREG32(mmDMA0_CORE_PROT + dma_offset,
2563 1 << DMA0_CORE_PROT_ERR_VAL_SHIFT);
2564 /* If the channel is secured, it should be in MMU bypass mode */
2565 WREG32(mmDMA0_CORE_SECURE_PROPS + dma_offset,
2566 1 << DMA0_CORE_SECURE_PROPS_MMBP_SHIFT);
2567 WREG32(mmDMA0_CORE_CFG_0 + dma_offset, 1 << DMA0_CORE_CFG_0_EN_SHIFT);
2568}
2569
2570static void gaudi_enable_qman(struct hl_device *hdev, int dma_id,
2571 u32 enable_mask)
2572{
2573 u32 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
2574
2575 WREG32(mmDMA0_QM_GLBL_CFG0 + dma_qm_offset, enable_mask);
2576}
2577
2578static void gaudi_init_pci_dma_qmans(struct hl_device *hdev)
2579{
2580 struct gaudi_device *gaudi = hdev->asic_specific;
2581 struct hl_hw_queue *q;
2582 int i, j, dma_id, cpu_skip, nic_skip, cq_id = 0, q_idx, msi_vec = 0;
2583
2584 if (gaudi->hw_cap_initialized & HW_CAP_PCI_DMA)
2585 return;
2586
2587 for (i = 0 ; i < PCI_DMA_NUMBER_OF_CHNLS ; i++) {
2588 dma_id = gaudi_dma_assignment[i];
2589 /*
2590 * For queues after the CPU Q need to add 1 to get the correct
2591 * queue. In addition, need to add the CPU EQ and NIC IRQs in
2592 * order to get the correct MSI register.
2593 */
2594 if (dma_id > 1) {
2595 cpu_skip = 1;
2596 nic_skip = NIC_NUMBER_OF_ENGINES;
2597 } else {
2598 cpu_skip = 0;
2599 nic_skip = 0;
2600 }
2601
2602 for (j = 0 ; j < QMAN_STREAMS ; j++) {
2603 q_idx = 4 * dma_id + j + cpu_skip;
2604 q = &hdev->kernel_queues[q_idx];
2605 q->cq_id = cq_id++;
2606 q->msi_vec = nic_skip + cpu_skip + msi_vec++;
2607 gaudi_init_pci_dma_qman(hdev, dma_id, j,
2608 q->bus_address);
2609 }
2610
2611 gaudi_init_dma_core(hdev, dma_id);
2612
2613 gaudi_enable_qman(hdev, dma_id, PCI_DMA_QMAN_ENABLE);
2614 }
2615
2616 gaudi->hw_cap_initialized |= HW_CAP_PCI_DMA;
2617}
2618
2619static void gaudi_init_hbm_dma_qman(struct hl_device *hdev, int dma_id,
2620 int qman_id, u64 qman_base_addr)
2621{
Ofir Bitton5de406c2020-09-10 10:56:26 +03002622 u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi;
2623 u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03002624 u32 q_off, dma_qm_offset;
2625 u32 dma_qm_err_cfg;
2626
2627 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
2628
Ofir Bitton5de406c2020-09-10 10:56:26 +03002629 mtr_base_en_lo = lower_32_bits(CFG_BASE +
2630 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2631 mtr_base_en_hi = upper_32_bits(CFG_BASE +
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03002632 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
Ofir Bitton5de406c2020-09-10 10:56:26 +03002633 so_base_en_lo = lower_32_bits(CFG_BASE +
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03002634 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
Ofir Bitton5de406c2020-09-10 10:56:26 +03002635 so_base_en_hi = upper_32_bits(CFG_BASE +
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03002636 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
Ofir Bitton5de406c2020-09-10 10:56:26 +03002637 mtr_base_ws_lo = lower_32_bits(CFG_BASE +
2638 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2639 mtr_base_ws_hi = upper_32_bits(CFG_BASE +
2640 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2641 so_base_ws_lo = lower_32_bits(CFG_BASE +
2642 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
2643 so_base_ws_hi = upper_32_bits(CFG_BASE +
2644 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03002645
2646 q_off = dma_qm_offset + qman_id * 4;
2647
2648 if (qman_id < 4) {
2649 WREG32(mmDMA0_QM_PQ_BASE_LO_0 + q_off,
2650 lower_32_bits(qman_base_addr));
2651 WREG32(mmDMA0_QM_PQ_BASE_HI_0 + q_off,
2652 upper_32_bits(qman_base_addr));
2653
2654 WREG32(mmDMA0_QM_PQ_SIZE_0 + q_off, ilog2(HBM_DMA_QMAN_LENGTH));
2655 WREG32(mmDMA0_QM_PQ_PI_0 + q_off, 0);
2656 WREG32(mmDMA0_QM_PQ_CI_0 + q_off, 0);
2657
Ofir Bitton25121d92020-09-24 08:22:58 +03002658 WREG32(mmDMA0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
2659 QMAN_CPDMA_SIZE_OFFSET);
2660 WREG32(mmDMA0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2661 QMAN_CPDMA_SRC_OFFSET);
2662 WREG32(mmDMA0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2663 QMAN_CPDMA_DST_OFFSET);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03002664 } else {
Ofir Bitton25121d92020-09-24 08:22:58 +03002665 WREG32(mmDMA0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
2666 QMAN_LDMA_SIZE_OFFSET);
2667 WREG32(mmDMA0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2668 QMAN_LDMA_SRC_OFFSET);
2669 WREG32(mmDMA0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
Oded Gabbay5b94d6e2020-09-25 20:14:15 +03002670 QMAN_LDMA_DST_OFFSET);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03002671
2672 /* Configure RAZWI IRQ */
2673 dma_qm_err_cfg = HBM_DMA_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
2674 if (hdev->stop_on_err) {
2675 dma_qm_err_cfg |=
2676 HBM_DMA_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
2677 }
2678 WREG32(mmDMA0_QM_GLBL_ERR_CFG + dma_qm_offset, dma_qm_err_cfg);
2679
2680 WREG32(mmDMA0_QM_GLBL_ERR_ADDR_LO + dma_qm_offset,
2681 lower_32_bits(CFG_BASE +
2682 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR));
2683 WREG32(mmDMA0_QM_GLBL_ERR_ADDR_HI + dma_qm_offset,
2684 upper_32_bits(CFG_BASE +
2685 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR));
2686 WREG32(mmDMA0_QM_GLBL_ERR_WDATA + dma_qm_offset,
2687 gaudi_irq_map_table[GAUDI_EVENT_DMA0_QM].cpu_id +
2688 dma_id);
2689
2690 WREG32(mmDMA0_QM_ARB_ERR_MSG_EN + dma_qm_offset,
2691 QM_ARB_ERR_MSG_EN_MASK);
2692
2693 /* Increase ARB WDT to support streams architecture */
2694 WREG32(mmDMA0_QM_ARB_SLV_CHOISE_WDT + dma_qm_offset,
2695 GAUDI_ARB_WDT_TIMEOUT);
2696
2697 WREG32(mmDMA0_QM_GLBL_CFG1 + dma_qm_offset, 0);
2698 WREG32(mmDMA0_QM_GLBL_PROT + dma_qm_offset,
2699 QMAN_INTERNAL_MAKE_TRUSTED);
2700 }
2701
Ofir Bitton5de406c2020-09-10 10:56:26 +03002702 WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo);
2703 WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi);
2704 WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo);
2705 WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi);
2706
2707 /* Configure DMA5 CP_MSG_BASE 2/3 for sync stream collective */
2708 if (gaudi_dma_assignment[dma_id] == GAUDI_ENGINE_ID_DMA_5) {
2709 WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_LO_0 + q_off,
2710 mtr_base_ws_lo);
2711 WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_HI_0 + q_off,
2712 mtr_base_ws_hi);
2713 WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_LO_0 + q_off,
2714 so_base_ws_lo);
2715 WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_HI_0 + q_off,
2716 so_base_ws_hi);
2717 }
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03002718}
2719
2720static void gaudi_init_hbm_dma_qmans(struct hl_device *hdev)
2721{
2722 struct gaudi_device *gaudi = hdev->asic_specific;
2723 struct gaudi_internal_qman_info *q;
2724 u64 qman_base_addr;
2725 int i, j, dma_id, internal_q_index;
2726
2727 if (gaudi->hw_cap_initialized & HW_CAP_HBM_DMA)
2728 return;
2729
2730 for (i = 0 ; i < HBM_DMA_NUMBER_OF_CHNLS ; i++) {
2731 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_1 + i];
2732
2733 for (j = 0 ; j < QMAN_STREAMS ; j++) {
2734 /*
2735 * Add the CPU queue in order to get the correct queue
2736 * number as all internal queue are placed after it
2737 */
2738 internal_q_index = dma_id * QMAN_STREAMS + j + 1;
2739
2740 q = &gaudi->internal_qmans[internal_q_index];
2741 qman_base_addr = (u64) q->pq_dma_addr;
2742 gaudi_init_hbm_dma_qman(hdev, dma_id, j,
2743 qman_base_addr);
2744 }
2745
2746 /* Initializing lower CP for HBM DMA QMAN */
2747 gaudi_init_hbm_dma_qman(hdev, dma_id, 4, 0);
2748
2749 gaudi_init_dma_core(hdev, dma_id);
2750
2751 gaudi_enable_qman(hdev, dma_id, HBM_DMA_QMAN_ENABLE);
2752 }
2753
2754 gaudi->hw_cap_initialized |= HW_CAP_HBM_DMA;
2755}
2756
2757static void gaudi_init_mme_qman(struct hl_device *hdev, u32 mme_offset,
2758 int qman_id, u64 qman_base_addr)
2759{
2760 u32 mtr_base_lo, mtr_base_hi;
2761 u32 so_base_lo, so_base_hi;
2762 u32 q_off, mme_id;
2763 u32 mme_qm_err_cfg;
2764
2765 mtr_base_lo = lower_32_bits(CFG_BASE +
2766 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2767 mtr_base_hi = upper_32_bits(CFG_BASE +
2768 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2769 so_base_lo = lower_32_bits(CFG_BASE +
2770 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2771 so_base_hi = upper_32_bits(CFG_BASE +
2772 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2773
2774 q_off = mme_offset + qman_id * 4;
2775
2776 if (qman_id < 4) {
2777 WREG32(mmMME0_QM_PQ_BASE_LO_0 + q_off,
2778 lower_32_bits(qman_base_addr));
2779 WREG32(mmMME0_QM_PQ_BASE_HI_0 + q_off,
2780 upper_32_bits(qman_base_addr));
2781
2782 WREG32(mmMME0_QM_PQ_SIZE_0 + q_off, ilog2(MME_QMAN_LENGTH));
2783 WREG32(mmMME0_QM_PQ_PI_0 + q_off, 0);
2784 WREG32(mmMME0_QM_PQ_CI_0 + q_off, 0);
2785
Ofir Bitton25121d92020-09-24 08:22:58 +03002786 WREG32(mmMME0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
2787 QMAN_CPDMA_SIZE_OFFSET);
2788 WREG32(mmMME0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2789 QMAN_CPDMA_SRC_OFFSET);
2790 WREG32(mmMME0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2791 QMAN_CPDMA_DST_OFFSET);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03002792 } else {
Ofir Bitton25121d92020-09-24 08:22:58 +03002793 WREG32(mmMME0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
2794 QMAN_LDMA_SIZE_OFFSET);
2795 WREG32(mmMME0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2796 QMAN_LDMA_SRC_OFFSET);
2797 WREG32(mmMME0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2798 QMAN_LDMA_DST_OFFSET);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03002799
2800 /* Configure RAZWI IRQ */
2801 mme_id = mme_offset /
2802 (mmMME1_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0);
2803
2804 mme_qm_err_cfg = MME_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
2805 if (hdev->stop_on_err) {
2806 mme_qm_err_cfg |=
2807 MME_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
2808 }
2809 WREG32(mmMME0_QM_GLBL_ERR_CFG + mme_offset, mme_qm_err_cfg);
2810 WREG32(mmMME0_QM_GLBL_ERR_ADDR_LO + mme_offset,
2811 lower_32_bits(CFG_BASE +
2812 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR));
2813 WREG32(mmMME0_QM_GLBL_ERR_ADDR_HI + mme_offset,
2814 upper_32_bits(CFG_BASE +
2815 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR));
2816 WREG32(mmMME0_QM_GLBL_ERR_WDATA + mme_offset,
2817 gaudi_irq_map_table[GAUDI_EVENT_MME0_QM].cpu_id +
2818 mme_id);
2819
2820 WREG32(mmMME0_QM_ARB_ERR_MSG_EN + mme_offset,
2821 QM_ARB_ERR_MSG_EN_MASK);
2822
2823 /* Increase ARB WDT to support streams architecture */
2824 WREG32(mmMME0_QM_ARB_SLV_CHOISE_WDT + mme_offset,
2825 GAUDI_ARB_WDT_TIMEOUT);
2826
2827 WREG32(mmMME0_QM_GLBL_CFG1 + mme_offset, 0);
2828 WREG32(mmMME0_QM_GLBL_PROT + mme_offset,
2829 QMAN_INTERNAL_MAKE_TRUSTED);
2830 }
2831
2832 WREG32(mmMME0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_lo);
2833 WREG32(mmMME0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_hi);
2834 WREG32(mmMME0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_lo);
2835 WREG32(mmMME0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_hi);
2836}
2837
2838static void gaudi_init_mme_qmans(struct hl_device *hdev)
2839{
2840 struct gaudi_device *gaudi = hdev->asic_specific;
2841 struct gaudi_internal_qman_info *q;
2842 u64 qman_base_addr;
2843 u32 mme_offset;
2844 int i, internal_q_index;
2845
2846 if (gaudi->hw_cap_initialized & HW_CAP_MME)
2847 return;
2848
2849 /*
2850 * map GAUDI_QUEUE_ID_MME_0_X to the N_W_MME (mmMME2_QM_BASE)
2851 * and GAUDI_QUEUE_ID_MME_1_X to the S_W_MME (mmMME0_QM_BASE)
2852 */
2853
2854 mme_offset = mmMME2_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0;
2855
2856 for (i = 0 ; i < MME_NUMBER_OF_QMANS ; i++) {
2857 internal_q_index = GAUDI_QUEUE_ID_MME_0_0 + i;
2858 q = &gaudi->internal_qmans[internal_q_index];
2859 qman_base_addr = (u64) q->pq_dma_addr;
2860 gaudi_init_mme_qman(hdev, mme_offset, (i & 0x3),
2861 qman_base_addr);
2862 if (i == 3)
2863 mme_offset = 0;
2864 }
2865
2866 /* Initializing lower CP for MME QMANs */
2867 mme_offset = mmMME2_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0;
2868 gaudi_init_mme_qman(hdev, mme_offset, 4, 0);
2869 gaudi_init_mme_qman(hdev, 0, 4, 0);
2870
2871 WREG32(mmMME2_QM_GLBL_CFG0, QMAN_MME_ENABLE);
2872 WREG32(mmMME0_QM_GLBL_CFG0, QMAN_MME_ENABLE);
2873
2874 gaudi->hw_cap_initialized |= HW_CAP_MME;
2875}
2876
2877static void gaudi_init_tpc_qman(struct hl_device *hdev, u32 tpc_offset,
2878 int qman_id, u64 qman_base_addr)
2879{
Ofir Bitton5de406c2020-09-10 10:56:26 +03002880 u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi;
2881 u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03002882 u32 q_off, tpc_id;
2883 u32 tpc_qm_err_cfg;
2884
Ofir Bitton5de406c2020-09-10 10:56:26 +03002885 mtr_base_en_lo = lower_32_bits(CFG_BASE +
2886 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2887 mtr_base_en_hi = upper_32_bits(CFG_BASE +
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03002888 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
Ofir Bitton5de406c2020-09-10 10:56:26 +03002889 so_base_en_lo = lower_32_bits(CFG_BASE +
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03002890 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
Ofir Bitton5de406c2020-09-10 10:56:26 +03002891 so_base_en_hi = upper_32_bits(CFG_BASE +
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03002892 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
Ofir Bitton5de406c2020-09-10 10:56:26 +03002893 mtr_base_ws_lo = lower_32_bits(CFG_BASE +
2894 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2895 mtr_base_ws_hi = upper_32_bits(CFG_BASE +
2896 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2897 so_base_ws_lo = lower_32_bits(CFG_BASE +
2898 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
2899 so_base_ws_hi = upper_32_bits(CFG_BASE +
2900 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03002901
2902 q_off = tpc_offset + qman_id * 4;
2903
Ofir Bitton5de406c2020-09-10 10:56:26 +03002904 tpc_id = tpc_offset /
2905 (mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0);
2906
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03002907 if (qman_id < 4) {
2908 WREG32(mmTPC0_QM_PQ_BASE_LO_0 + q_off,
2909 lower_32_bits(qman_base_addr));
2910 WREG32(mmTPC0_QM_PQ_BASE_HI_0 + q_off,
2911 upper_32_bits(qman_base_addr));
2912
2913 WREG32(mmTPC0_QM_PQ_SIZE_0 + q_off, ilog2(TPC_QMAN_LENGTH));
2914 WREG32(mmTPC0_QM_PQ_PI_0 + q_off, 0);
2915 WREG32(mmTPC0_QM_PQ_CI_0 + q_off, 0);
2916
Ofir Bitton25121d92020-09-24 08:22:58 +03002917 WREG32(mmTPC0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
2918 QMAN_CPDMA_SIZE_OFFSET);
2919 WREG32(mmTPC0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2920 QMAN_CPDMA_SRC_OFFSET);
2921 WREG32(mmTPC0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2922 QMAN_CPDMA_DST_OFFSET);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03002923 } else {
Ofir Bitton25121d92020-09-24 08:22:58 +03002924 WREG32(mmTPC0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
2925 QMAN_LDMA_SIZE_OFFSET);
2926 WREG32(mmTPC0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2927 QMAN_LDMA_SRC_OFFSET);
2928 WREG32(mmTPC0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2929 QMAN_LDMA_DST_OFFSET);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03002930
2931 /* Configure RAZWI IRQ */
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03002932 tpc_qm_err_cfg = TPC_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
2933 if (hdev->stop_on_err) {
2934 tpc_qm_err_cfg |=
2935 TPC_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
2936 }
2937
2938 WREG32(mmTPC0_QM_GLBL_ERR_CFG + tpc_offset, tpc_qm_err_cfg);
2939 WREG32(mmTPC0_QM_GLBL_ERR_ADDR_LO + tpc_offset,
2940 lower_32_bits(CFG_BASE +
2941 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR));
2942 WREG32(mmTPC0_QM_GLBL_ERR_ADDR_HI + tpc_offset,
2943 upper_32_bits(CFG_BASE +
2944 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR));
2945 WREG32(mmTPC0_QM_GLBL_ERR_WDATA + tpc_offset,
2946 gaudi_irq_map_table[GAUDI_EVENT_TPC0_QM].cpu_id +
2947 tpc_id);
2948
2949 WREG32(mmTPC0_QM_ARB_ERR_MSG_EN + tpc_offset,
2950 QM_ARB_ERR_MSG_EN_MASK);
2951
2952 /* Increase ARB WDT to support streams architecture */
2953 WREG32(mmTPC0_QM_ARB_SLV_CHOISE_WDT + tpc_offset,
2954 GAUDI_ARB_WDT_TIMEOUT);
2955
2956 WREG32(mmTPC0_QM_GLBL_CFG1 + tpc_offset, 0);
2957 WREG32(mmTPC0_QM_GLBL_PROT + tpc_offset,
2958 QMAN_INTERNAL_MAKE_TRUSTED);
2959 }
2960
Ofir Bitton5de406c2020-09-10 10:56:26 +03002961 WREG32(mmTPC0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo);
2962 WREG32(mmTPC0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi);
2963 WREG32(mmTPC0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo);
2964 WREG32(mmTPC0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi);
2965
2966 /* Configure TPC7 CP_MSG_BASE 2/3 for sync stream collective */
2967 if (tpc_id == 6) {
2968 WREG32(mmTPC0_QM_CP_MSG_BASE2_ADDR_LO_0 + q_off,
2969 mtr_base_ws_lo);
2970 WREG32(mmTPC0_QM_CP_MSG_BASE2_ADDR_HI_0 + q_off,
2971 mtr_base_ws_hi);
2972 WREG32(mmTPC0_QM_CP_MSG_BASE3_ADDR_LO_0 + q_off,
2973 so_base_ws_lo);
2974 WREG32(mmTPC0_QM_CP_MSG_BASE3_ADDR_HI_0 + q_off,
2975 so_base_ws_hi);
2976 }
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03002977}
2978
2979static void gaudi_init_tpc_qmans(struct hl_device *hdev)
2980{
2981 struct gaudi_device *gaudi = hdev->asic_specific;
2982 struct gaudi_internal_qman_info *q;
2983 u64 qman_base_addr;
2984 u32 so_base_hi, tpc_offset = 0;
2985 u32 tpc_delta = mmTPC1_CFG_SM_BASE_ADDRESS_HIGH -
2986 mmTPC0_CFG_SM_BASE_ADDRESS_HIGH;
2987 int i, tpc_id, internal_q_index;
2988
2989 if (gaudi->hw_cap_initialized & HW_CAP_TPC_MASK)
2990 return;
2991
2992 so_base_hi = upper_32_bits(CFG_BASE +
2993 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2994
2995 for (tpc_id = 0 ; tpc_id < TPC_NUMBER_OF_ENGINES ; tpc_id++) {
2996 for (i = 0 ; i < QMAN_STREAMS ; i++) {
2997 internal_q_index = GAUDI_QUEUE_ID_TPC_0_0 +
2998 tpc_id * QMAN_STREAMS + i;
2999 q = &gaudi->internal_qmans[internal_q_index];
3000 qman_base_addr = (u64) q->pq_dma_addr;
3001 gaudi_init_tpc_qman(hdev, tpc_offset, i,
3002 qman_base_addr);
3003
3004 if (i == 3) {
3005 /* Initializing lower CP for TPC QMAN */
3006 gaudi_init_tpc_qman(hdev, tpc_offset, 4, 0);
3007
3008 /* Enable the QMAN and TPC channel */
3009 WREG32(mmTPC0_QM_GLBL_CFG0 + tpc_offset,
3010 QMAN_TPC_ENABLE);
3011 }
3012 }
3013
3014 WREG32(mmTPC0_CFG_SM_BASE_ADDRESS_HIGH + tpc_id * tpc_delta,
3015 so_base_hi);
3016
3017 tpc_offset += mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0;
3018
Oded Gabbay65887292020-08-12 11:21:01 +03003019 gaudi->hw_cap_initialized |=
3020 FIELD_PREP(HW_CAP_TPC_MASK, 1 << tpc_id);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003021 }
3022}
3023
Oded Gabbay3c681572020-11-02 21:10:39 +02003024static void gaudi_init_nic_qman(struct hl_device *hdev, u32 nic_offset,
3025 int qman_id, u64 qman_base_addr, int nic_id)
3026{
Ofir Bitton5de406c2020-09-10 10:56:26 +03003027 u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi;
3028 u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi;
Oded Gabbay3c681572020-11-02 21:10:39 +02003029 u32 q_off;
3030 u32 nic_qm_err_cfg;
3031
Ofir Bitton5de406c2020-09-10 10:56:26 +03003032 mtr_base_en_lo = lower_32_bits(CFG_BASE +
3033 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3034 mtr_base_en_hi = upper_32_bits(CFG_BASE +
Oded Gabbay3c681572020-11-02 21:10:39 +02003035 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
Ofir Bitton5de406c2020-09-10 10:56:26 +03003036 so_base_en_lo = lower_32_bits(CFG_BASE +
Oded Gabbay3c681572020-11-02 21:10:39 +02003037 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
Ofir Bitton5de406c2020-09-10 10:56:26 +03003038 so_base_en_hi = upper_32_bits(CFG_BASE +
Oded Gabbay3c681572020-11-02 21:10:39 +02003039 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
Ofir Bitton5de406c2020-09-10 10:56:26 +03003040 mtr_base_ws_lo = lower_32_bits(CFG_BASE +
3041 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3042 mtr_base_ws_hi = upper_32_bits(CFG_BASE +
3043 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3044 so_base_ws_lo = lower_32_bits(CFG_BASE +
3045 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
3046 so_base_ws_hi = upper_32_bits(CFG_BASE +
3047 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
Oded Gabbay3c681572020-11-02 21:10:39 +02003048
3049 q_off = nic_offset + qman_id * 4;
3050
3051 WREG32(mmNIC0_QM0_PQ_BASE_LO_0 + q_off, lower_32_bits(qman_base_addr));
3052 WREG32(mmNIC0_QM0_PQ_BASE_HI_0 + q_off, upper_32_bits(qman_base_addr));
3053
3054 WREG32(mmNIC0_QM0_PQ_SIZE_0 + q_off, ilog2(NIC_QMAN_LENGTH));
3055 WREG32(mmNIC0_QM0_PQ_PI_0 + q_off, 0);
3056 WREG32(mmNIC0_QM0_PQ_CI_0 + q_off, 0);
3057
Ofir Bitton5de406c2020-09-10 10:56:26 +03003058 WREG32(mmNIC0_QM0_CP_LDMA_TSIZE_OFFSET_0 + q_off,
3059 QMAN_LDMA_SIZE_OFFSET);
3060 WREG32(mmNIC0_QM0_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
3061 QMAN_LDMA_SRC_OFFSET);
3062 WREG32(mmNIC0_QM0_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
3063 QMAN_LDMA_DST_OFFSET);
Oded Gabbay3c681572020-11-02 21:10:39 +02003064
Ofir Bitton5de406c2020-09-10 10:56:26 +03003065 WREG32(mmNIC0_QM0_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo);
3066 WREG32(mmNIC0_QM0_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi);
3067 WREG32(mmNIC0_QM0_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo);
3068 WREG32(mmNIC0_QM0_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi);
3069
3070 /* Configure NIC CP_MSG_BASE 2/3 for sync stream collective */
3071 WREG32(mmNIC0_QM0_CP_MSG_BASE2_ADDR_LO_0 + q_off, mtr_base_ws_lo);
3072 WREG32(mmNIC0_QM0_CP_MSG_BASE2_ADDR_HI_0 + q_off, mtr_base_ws_hi);
3073 WREG32(mmNIC0_QM0_CP_MSG_BASE3_ADDR_LO_0 + q_off, so_base_ws_lo);
3074 WREG32(mmNIC0_QM0_CP_MSG_BASE3_ADDR_HI_0 + q_off, so_base_ws_hi);
Oded Gabbay3c681572020-11-02 21:10:39 +02003075
3076 if (qman_id == 0) {
3077 /* Configure RAZWI IRQ */
3078 nic_qm_err_cfg = NIC_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
3079 if (hdev->stop_on_err) {
3080 nic_qm_err_cfg |=
3081 NIC_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
3082 }
3083
3084 WREG32(mmNIC0_QM0_GLBL_ERR_CFG + nic_offset, nic_qm_err_cfg);
3085 WREG32(mmNIC0_QM0_GLBL_ERR_ADDR_LO + nic_offset,
3086 lower_32_bits(CFG_BASE +
3087 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR));
3088 WREG32(mmNIC0_QM0_GLBL_ERR_ADDR_HI + nic_offset,
3089 upper_32_bits(CFG_BASE +
3090 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR));
3091 WREG32(mmNIC0_QM0_GLBL_ERR_WDATA + nic_offset,
3092 gaudi_irq_map_table[GAUDI_EVENT_NIC0_QM0].cpu_id +
3093 nic_id);
3094
3095 WREG32(mmNIC0_QM0_ARB_ERR_MSG_EN + nic_offset,
3096 QM_ARB_ERR_MSG_EN_MASK);
3097
3098 /* Increase ARB WDT to support streams architecture */
3099 WREG32(mmNIC0_QM0_ARB_SLV_CHOISE_WDT + nic_offset,
3100 GAUDI_ARB_WDT_TIMEOUT);
3101
3102 WREG32(mmNIC0_QM0_GLBL_CFG1 + nic_offset, 0);
3103 WREG32(mmNIC0_QM0_GLBL_PROT + nic_offset,
3104 QMAN_INTERNAL_MAKE_TRUSTED);
3105 }
3106}
3107
3108static void gaudi_init_nic_qmans(struct hl_device *hdev)
3109{
3110 struct gaudi_device *gaudi = hdev->asic_specific;
3111 struct gaudi_internal_qman_info *q;
3112 u64 qman_base_addr;
3113 u32 nic_offset = 0;
3114 u32 nic_delta_between_qmans =
3115 mmNIC0_QM1_GLBL_CFG0 - mmNIC0_QM0_GLBL_CFG0;
3116 u32 nic_delta_between_nics =
3117 mmNIC1_QM0_GLBL_CFG0 - mmNIC0_QM0_GLBL_CFG0;
3118 int i, nic_id, internal_q_index;
3119
3120 if (!hdev->nic_ports_mask)
3121 return;
3122
3123 if (gaudi->hw_cap_initialized & HW_CAP_NIC_MASK)
3124 return;
3125
3126 dev_dbg(hdev->dev, "Initializing NIC QMANs\n");
3127
3128 for (nic_id = 0 ; nic_id < NIC_NUMBER_OF_ENGINES ; nic_id++) {
3129 if (!(hdev->nic_ports_mask & (1 << nic_id))) {
3130 nic_offset += nic_delta_between_qmans;
3131 if (nic_id & 1) {
3132 nic_offset -= (nic_delta_between_qmans * 2);
3133 nic_offset += nic_delta_between_nics;
3134 }
3135 continue;
3136 }
3137
3138 for (i = 0 ; i < QMAN_STREAMS ; i++) {
3139 internal_q_index = GAUDI_QUEUE_ID_NIC_0_0 +
3140 nic_id * QMAN_STREAMS + i;
3141 q = &gaudi->internal_qmans[internal_q_index];
3142 qman_base_addr = (u64) q->pq_dma_addr;
3143 gaudi_init_nic_qman(hdev, nic_offset, (i & 0x3),
3144 qman_base_addr, nic_id);
3145 }
3146
3147 /* Enable the QMAN */
3148 WREG32(mmNIC0_QM0_GLBL_CFG0 + nic_offset, NIC_QMAN_ENABLE);
3149
3150 nic_offset += nic_delta_between_qmans;
3151 if (nic_id & 1) {
3152 nic_offset -= (nic_delta_between_qmans * 2);
3153 nic_offset += nic_delta_between_nics;
3154 }
3155
3156 gaudi->hw_cap_initialized |= 1 << (HW_CAP_NIC_SHIFT + nic_id);
3157 }
3158}
3159
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003160static void gaudi_disable_pci_dma_qmans(struct hl_device *hdev)
3161{
3162 struct gaudi_device *gaudi = hdev->asic_specific;
3163
3164 if (!(gaudi->hw_cap_initialized & HW_CAP_PCI_DMA))
3165 return;
3166
3167 WREG32(mmDMA0_QM_GLBL_CFG0, 0);
3168 WREG32(mmDMA1_QM_GLBL_CFG0, 0);
3169 WREG32(mmDMA5_QM_GLBL_CFG0, 0);
3170}
3171
3172static void gaudi_disable_hbm_dma_qmans(struct hl_device *hdev)
3173{
3174 struct gaudi_device *gaudi = hdev->asic_specific;
3175
3176 if (!(gaudi->hw_cap_initialized & HW_CAP_HBM_DMA))
3177 return;
3178
3179 WREG32(mmDMA2_QM_GLBL_CFG0, 0);
3180 WREG32(mmDMA3_QM_GLBL_CFG0, 0);
3181 WREG32(mmDMA4_QM_GLBL_CFG0, 0);
3182 WREG32(mmDMA6_QM_GLBL_CFG0, 0);
3183 WREG32(mmDMA7_QM_GLBL_CFG0, 0);
3184}
3185
3186static void gaudi_disable_mme_qmans(struct hl_device *hdev)
3187{
3188 struct gaudi_device *gaudi = hdev->asic_specific;
3189
3190 if (!(gaudi->hw_cap_initialized & HW_CAP_MME))
3191 return;
3192
3193 WREG32(mmMME2_QM_GLBL_CFG0, 0);
3194 WREG32(mmMME0_QM_GLBL_CFG0, 0);
3195}
3196
3197static void gaudi_disable_tpc_qmans(struct hl_device *hdev)
3198{
3199 struct gaudi_device *gaudi = hdev->asic_specific;
3200 u32 tpc_offset = 0;
3201 int tpc_id;
3202
3203 if (!(gaudi->hw_cap_initialized & HW_CAP_TPC_MASK))
3204 return;
3205
3206 for (tpc_id = 0 ; tpc_id < TPC_NUMBER_OF_ENGINES ; tpc_id++) {
3207 WREG32(mmTPC0_QM_GLBL_CFG0 + tpc_offset, 0);
3208 tpc_offset += mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0;
3209 }
3210}
3211
Oded Gabbay3c681572020-11-02 21:10:39 +02003212static void gaudi_disable_nic_qmans(struct hl_device *hdev)
3213{
3214 struct gaudi_device *gaudi = hdev->asic_specific;
3215 u32 nic_mask, nic_offset = 0;
3216 u32 nic_delta_between_qmans =
3217 mmNIC0_QM1_GLBL_CFG0 - mmNIC0_QM0_GLBL_CFG0;
3218 u32 nic_delta_between_nics =
3219 mmNIC1_QM0_GLBL_CFG0 - mmNIC0_QM0_GLBL_CFG0;
3220 int nic_id;
3221
3222 for (nic_id = 0 ; nic_id < NIC_NUMBER_OF_ENGINES ; nic_id++) {
3223 nic_mask = 1 << (HW_CAP_NIC_SHIFT + nic_id);
3224
3225 if (gaudi->hw_cap_initialized & nic_mask)
3226 WREG32(mmNIC0_QM0_GLBL_CFG0 + nic_offset, 0);
3227
3228 nic_offset += nic_delta_between_qmans;
3229 if (nic_id & 1) {
3230 nic_offset -= (nic_delta_between_qmans * 2);
3231 nic_offset += nic_delta_between_nics;
3232 }
3233 }
3234}
3235
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003236static void gaudi_stop_pci_dma_qmans(struct hl_device *hdev)
3237{
3238 struct gaudi_device *gaudi = hdev->asic_specific;
3239
3240 if (!(gaudi->hw_cap_initialized & HW_CAP_PCI_DMA))
3241 return;
3242
3243 /* Stop upper CPs of QMANs 0.0 to 1.3 and 5.0 to 5.3 */
3244 WREG32(mmDMA0_QM_GLBL_CFG1, 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3245 WREG32(mmDMA1_QM_GLBL_CFG1, 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3246 WREG32(mmDMA5_QM_GLBL_CFG1, 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3247}
3248
3249static void gaudi_stop_hbm_dma_qmans(struct hl_device *hdev)
3250{
3251 struct gaudi_device *gaudi = hdev->asic_specific;
3252
3253 if (!(gaudi->hw_cap_initialized & HW_CAP_HBM_DMA))
3254 return;
3255
3256 /* Stop CPs of HBM DMA QMANs */
3257
3258 WREG32(mmDMA2_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3259 WREG32(mmDMA3_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3260 WREG32(mmDMA4_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3261 WREG32(mmDMA6_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3262 WREG32(mmDMA7_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3263}
3264
3265static void gaudi_stop_mme_qmans(struct hl_device *hdev)
3266{
3267 struct gaudi_device *gaudi = hdev->asic_specific;
3268
3269 if (!(gaudi->hw_cap_initialized & HW_CAP_MME))
3270 return;
3271
3272 /* Stop CPs of MME QMANs */
3273 WREG32(mmMME2_QM_GLBL_CFG1, 0x1F << MME0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3274 WREG32(mmMME0_QM_GLBL_CFG1, 0x1F << MME0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3275}
3276
3277static void gaudi_stop_tpc_qmans(struct hl_device *hdev)
3278{
3279 struct gaudi_device *gaudi = hdev->asic_specific;
3280
3281 if (!(gaudi->hw_cap_initialized & HW_CAP_TPC_MASK))
3282 return;
3283
3284 WREG32(mmTPC0_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3285 WREG32(mmTPC1_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3286 WREG32(mmTPC2_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3287 WREG32(mmTPC3_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3288 WREG32(mmTPC4_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3289 WREG32(mmTPC5_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3290 WREG32(mmTPC6_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3291 WREG32(mmTPC7_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3292}
3293
Oded Gabbay3c681572020-11-02 21:10:39 +02003294static void gaudi_stop_nic_qmans(struct hl_device *hdev)
3295{
3296 struct gaudi_device *gaudi = hdev->asic_specific;
3297
3298 /* Stop upper CPs of QMANs */
3299
3300 if (gaudi->hw_cap_initialized & HW_CAP_NIC0)
3301 WREG32(mmNIC0_QM0_GLBL_CFG1,
3302 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3303 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3304 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3305
3306 if (gaudi->hw_cap_initialized & HW_CAP_NIC1)
3307 WREG32(mmNIC0_QM1_GLBL_CFG1,
3308 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3309 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3310 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3311
3312 if (gaudi->hw_cap_initialized & HW_CAP_NIC2)
3313 WREG32(mmNIC1_QM0_GLBL_CFG1,
3314 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3315 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3316 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3317
3318 if (gaudi->hw_cap_initialized & HW_CAP_NIC3)
3319 WREG32(mmNIC1_QM1_GLBL_CFG1,
3320 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3321 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3322 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3323
3324 if (gaudi->hw_cap_initialized & HW_CAP_NIC4)
3325 WREG32(mmNIC2_QM0_GLBL_CFG1,
3326 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3327 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3328 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3329
3330 if (gaudi->hw_cap_initialized & HW_CAP_NIC5)
3331 WREG32(mmNIC2_QM1_GLBL_CFG1,
3332 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3333 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3334 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3335
3336 if (gaudi->hw_cap_initialized & HW_CAP_NIC6)
3337 WREG32(mmNIC3_QM0_GLBL_CFG1,
3338 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3339 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3340 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3341
3342 if (gaudi->hw_cap_initialized & HW_CAP_NIC7)
3343 WREG32(mmNIC3_QM1_GLBL_CFG1,
3344 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3345 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3346 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3347
3348 if (gaudi->hw_cap_initialized & HW_CAP_NIC8)
3349 WREG32(mmNIC4_QM0_GLBL_CFG1,
3350 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3351 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3352 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3353
3354 if (gaudi->hw_cap_initialized & HW_CAP_NIC9)
3355 WREG32(mmNIC4_QM1_GLBL_CFG1,
3356 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3357 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3358 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3359}
3360
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003361static void gaudi_pci_dma_stall(struct hl_device *hdev)
3362{
3363 struct gaudi_device *gaudi = hdev->asic_specific;
3364
3365 if (!(gaudi->hw_cap_initialized & HW_CAP_PCI_DMA))
3366 return;
3367
3368 WREG32(mmDMA0_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3369 WREG32(mmDMA1_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3370 WREG32(mmDMA5_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3371}
3372
3373static void gaudi_hbm_dma_stall(struct hl_device *hdev)
3374{
3375 struct gaudi_device *gaudi = hdev->asic_specific;
3376
3377 if (!(gaudi->hw_cap_initialized & HW_CAP_HBM_DMA))
3378 return;
3379
3380 WREG32(mmDMA2_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3381 WREG32(mmDMA3_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3382 WREG32(mmDMA4_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3383 WREG32(mmDMA6_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3384 WREG32(mmDMA7_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3385}
3386
3387static void gaudi_mme_stall(struct hl_device *hdev)
3388{
3389 struct gaudi_device *gaudi = hdev->asic_specific;
3390
3391 if (!(gaudi->hw_cap_initialized & HW_CAP_MME))
3392 return;
3393
3394 /* WA for H3-1800 bug: do ACC and SBAB writes twice */
3395 WREG32(mmMME0_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3396 WREG32(mmMME0_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3397 WREG32(mmMME0_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3398 WREG32(mmMME0_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3399 WREG32(mmMME1_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3400 WREG32(mmMME1_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3401 WREG32(mmMME1_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3402 WREG32(mmMME1_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3403 WREG32(mmMME2_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3404 WREG32(mmMME2_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3405 WREG32(mmMME2_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3406 WREG32(mmMME2_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3407 WREG32(mmMME3_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3408 WREG32(mmMME3_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3409 WREG32(mmMME3_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3410 WREG32(mmMME3_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3411}
3412
3413static void gaudi_tpc_stall(struct hl_device *hdev)
3414{
3415 struct gaudi_device *gaudi = hdev->asic_specific;
3416
3417 if (!(gaudi->hw_cap_initialized & HW_CAP_TPC_MASK))
3418 return;
3419
3420 WREG32(mmTPC0_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3421 WREG32(mmTPC1_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3422 WREG32(mmTPC2_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3423 WREG32(mmTPC3_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3424 WREG32(mmTPC4_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3425 WREG32(mmTPC5_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3426 WREG32(mmTPC6_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3427 WREG32(mmTPC7_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3428}
3429
Oded Gabbaye38bfd32020-07-03 20:46:12 +03003430static void gaudi_set_clock_gating(struct hl_device *hdev)
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003431{
3432 struct gaudi_device *gaudi = hdev->asic_specific;
3433 u32 qman_offset;
Ofir Bittonf44d23b2020-08-04 13:38:43 +03003434 bool enable;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003435 int i;
3436
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003437 /* In case we are during debug session, don't enable the clock gate
3438 * as it may interfere
3439 */
3440 if (hdev->in_debug)
3441 return;
3442
Oded Gabbay0024c0942020-12-05 22:55:09 +02003443 if (!hdev->asic_prop.fw_security_disabled)
3444 return;
3445
Oded Gabbaye38bfd32020-07-03 20:46:12 +03003446 for (i = GAUDI_PCI_DMA_1, qman_offset = 0 ; i < GAUDI_HBM_DMA_1 ; i++) {
Ofir Bittonf44d23b2020-08-04 13:38:43 +03003447 enable = !!(hdev->clock_gating_mask &
3448 (BIT_ULL(gaudi_dma_assignment[i])));
Oded Gabbaye38bfd32020-07-03 20:46:12 +03003449
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003450 qman_offset = gaudi_dma_assignment[i] * DMA_QMAN_OFFSET;
Ofir Bittonf44d23b2020-08-04 13:38:43 +03003451 WREG32(mmDMA0_QM_CGM_CFG1 + qman_offset,
3452 enable ? QMAN_CGM1_PWR_GATE_EN : 0);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003453 WREG32(mmDMA0_QM_CGM_CFG + qman_offset,
Ofir Bittonf44d23b2020-08-04 13:38:43 +03003454 enable ? QMAN_UPPER_CP_CGM_PWR_GATE_EN : 0);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003455 }
3456
Oded Gabbaye38bfd32020-07-03 20:46:12 +03003457 for (i = GAUDI_HBM_DMA_1 ; i < GAUDI_DMA_MAX ; i++) {
Ofir Bittonf44d23b2020-08-04 13:38:43 +03003458 enable = !!(hdev->clock_gating_mask &
3459 (BIT_ULL(gaudi_dma_assignment[i])));
Oded Gabbaye38bfd32020-07-03 20:46:12 +03003460
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003461 qman_offset = gaudi_dma_assignment[i] * DMA_QMAN_OFFSET;
Ofir Bittonf44d23b2020-08-04 13:38:43 +03003462 WREG32(mmDMA0_QM_CGM_CFG1 + qman_offset,
3463 enable ? QMAN_CGM1_PWR_GATE_EN : 0);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003464 WREG32(mmDMA0_QM_CGM_CFG + qman_offset,
Ofir Bittonf44d23b2020-08-04 13:38:43 +03003465 enable ? QMAN_COMMON_CP_CGM_PWR_GATE_EN : 0);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003466 }
3467
Ofir Bittonf44d23b2020-08-04 13:38:43 +03003468 enable = !!(hdev->clock_gating_mask & (BIT_ULL(GAUDI_ENGINE_ID_MME_0)));
3469 WREG32(mmMME0_QM_CGM_CFG1, enable ? QMAN_CGM1_PWR_GATE_EN : 0);
3470 WREG32(mmMME0_QM_CGM_CFG, enable ? QMAN_COMMON_CP_CGM_PWR_GATE_EN : 0);
Oded Gabbaye38bfd32020-07-03 20:46:12 +03003471
Ofir Bittonf44d23b2020-08-04 13:38:43 +03003472 enable = !!(hdev->clock_gating_mask & (BIT_ULL(GAUDI_ENGINE_ID_MME_2)));
3473 WREG32(mmMME2_QM_CGM_CFG1, enable ? QMAN_CGM1_PWR_GATE_EN : 0);
3474 WREG32(mmMME2_QM_CGM_CFG, enable ? QMAN_COMMON_CP_CGM_PWR_GATE_EN : 0);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003475
3476 for (i = 0, qman_offset = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) {
Ofir Bittonf44d23b2020-08-04 13:38:43 +03003477 enable = !!(hdev->clock_gating_mask &
3478 (BIT_ULL(GAUDI_ENGINE_ID_TPC_0 + i)));
Oded Gabbaye38bfd32020-07-03 20:46:12 +03003479
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003480 WREG32(mmTPC0_QM_CGM_CFG1 + qman_offset,
Ofir Bittonf44d23b2020-08-04 13:38:43 +03003481 enable ? QMAN_CGM1_PWR_GATE_EN : 0);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003482 WREG32(mmTPC0_QM_CGM_CFG + qman_offset,
Ofir Bittonf44d23b2020-08-04 13:38:43 +03003483 enable ? QMAN_COMMON_CP_CGM_PWR_GATE_EN : 0);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003484
3485 qman_offset += TPC_QMAN_OFFSET;
3486 }
3487
3488 gaudi->hw_cap_initialized |= HW_CAP_CLK_GATE;
3489}
3490
3491static void gaudi_disable_clock_gating(struct hl_device *hdev)
3492{
3493 struct gaudi_device *gaudi = hdev->asic_specific;
3494 u32 qman_offset;
3495 int i;
3496
Oded Gabbay0024c0942020-12-05 22:55:09 +02003497 if (!hdev->asic_prop.fw_security_disabled)
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003498 return;
3499
3500 for (i = 0, qman_offset = 0 ; i < DMA_NUMBER_OF_CHANNELS ; i++) {
3501 WREG32(mmDMA0_QM_CGM_CFG + qman_offset, 0);
3502 WREG32(mmDMA0_QM_CGM_CFG1 + qman_offset, 0);
3503
3504 qman_offset += (mmDMA1_QM_CGM_CFG - mmDMA0_QM_CGM_CFG);
3505 }
3506
3507 WREG32(mmMME0_QM_CGM_CFG, 0);
3508 WREG32(mmMME0_QM_CGM_CFG1, 0);
3509 WREG32(mmMME2_QM_CGM_CFG, 0);
3510 WREG32(mmMME2_QM_CGM_CFG1, 0);
3511
3512 for (i = 0, qman_offset = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) {
3513 WREG32(mmTPC0_QM_CGM_CFG + qman_offset, 0);
3514 WREG32(mmTPC0_QM_CGM_CFG1 + qman_offset, 0);
3515
3516 qman_offset += (mmTPC1_QM_CGM_CFG - mmTPC0_QM_CGM_CFG);
3517 }
3518
3519 gaudi->hw_cap_initialized &= ~(HW_CAP_CLK_GATE);
3520}
3521
3522static void gaudi_enable_timestamp(struct hl_device *hdev)
3523{
3524 /* Disable the timestamp counter */
3525 WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 0);
3526
3527 /* Zero the lower/upper parts of the 64-bit counter */
3528 WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE + 0xC, 0);
3529 WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE + 0x8, 0);
3530
3531 /* Enable the counter */
3532 WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 1);
3533}
3534
3535static void gaudi_disable_timestamp(struct hl_device *hdev)
3536{
3537 /* Disable the timestamp counter */
3538 WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 0);
3539}
3540
3541static void gaudi_halt_engines(struct hl_device *hdev, bool hard_reset)
3542{
Oded Gabbayc83c4172020-07-05 15:48:34 +03003543 u32 wait_timeout_ms;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003544
3545 dev_info(hdev->dev,
3546 "Halting compute engines and disabling interrupts\n");
3547
Oded Gabbayc83c4172020-07-05 15:48:34 +03003548 if (hdev->pldm)
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003549 wait_timeout_ms = GAUDI_PLDM_RESET_WAIT_MSEC;
Oded Gabbayc83c4172020-07-05 15:48:34 +03003550 else
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003551 wait_timeout_ms = GAUDI_RESET_WAIT_MSEC;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003552
Oded Gabbay3c681572020-11-02 21:10:39 +02003553 gaudi_stop_nic_qmans(hdev);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003554 gaudi_stop_mme_qmans(hdev);
3555 gaudi_stop_tpc_qmans(hdev);
3556 gaudi_stop_hbm_dma_qmans(hdev);
3557 gaudi_stop_pci_dma_qmans(hdev);
3558
Oded Gabbaye38bfd32020-07-03 20:46:12 +03003559 hdev->asic_funcs->disable_clock_gating(hdev);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003560
3561 msleep(wait_timeout_ms);
3562
3563 gaudi_pci_dma_stall(hdev);
3564 gaudi_hbm_dma_stall(hdev);
3565 gaudi_tpc_stall(hdev);
3566 gaudi_mme_stall(hdev);
3567
3568 msleep(wait_timeout_ms);
3569
Oded Gabbay3c681572020-11-02 21:10:39 +02003570 gaudi_disable_nic_qmans(hdev);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003571 gaudi_disable_mme_qmans(hdev);
3572 gaudi_disable_tpc_qmans(hdev);
3573 gaudi_disable_hbm_dma_qmans(hdev);
3574 gaudi_disable_pci_dma_qmans(hdev);
3575
3576 gaudi_disable_timestamp(hdev);
3577
Oded Gabbay12ae3132020-07-03 20:58:23 +03003578 gaudi_disable_msi(hdev);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003579}
3580
3581static int gaudi_mmu_init(struct hl_device *hdev)
3582{
3583 struct asic_fixed_properties *prop = &hdev->asic_prop;
3584 struct gaudi_device *gaudi = hdev->asic_specific;
3585 u64 hop0_addr;
3586 int rc, i;
3587
3588 if (!hdev->mmu_enable)
3589 return 0;
3590
3591 if (gaudi->hw_cap_initialized & HW_CAP_MMU)
3592 return 0;
3593
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003594 for (i = 0 ; i < prop->max_asid ; i++) {
3595 hop0_addr = prop->mmu_pgt_addr +
3596 (i * prop->mmu_hop_table_size);
3597
3598 rc = gaudi_mmu_update_asid_hop0_addr(hdev, i, hop0_addr);
3599 if (rc) {
3600 dev_err(hdev->dev,
3601 "failed to set hop0 addr for asid %d\n", i);
3602 goto err;
3603 }
3604 }
3605
3606 /* init MMU cache manage page */
3607 WREG32(mmSTLB_CACHE_INV_BASE_39_8, MMU_CACHE_MNG_ADDR >> 8);
3608 WREG32(mmSTLB_CACHE_INV_BASE_49_40, MMU_CACHE_MNG_ADDR >> 40);
3609
Tomer Tayar644883e2020-07-19 11:00:03 +03003610 hdev->asic_funcs->mmu_invalidate_cache(hdev, true, 0);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003611
3612 WREG32(mmMMU_UP_MMU_ENABLE, 1);
3613 WREG32(mmMMU_UP_SPI_MASK, 0xF);
3614
3615 WREG32(mmSTLB_HOP_CONFIGURATION,
3616 hdev->mmu_huge_page_opt ? 0x30440 : 0x40440);
3617
Omer Shpigelmancfd41762020-06-03 13:03:35 +03003618 /*
3619 * The H/W expects the first PI after init to be 1. After wraparound
3620 * we'll write 0.
3621 */
3622 gaudi->mmu_cache_inv_pi = 1;
3623
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003624 gaudi->hw_cap_initialized |= HW_CAP_MMU;
3625
3626 return 0;
3627
3628err:
3629 return rc;
3630}
3631
3632static int gaudi_load_firmware_to_device(struct hl_device *hdev)
3633{
3634 void __iomem *dst;
3635
3636 /* HBM scrambler must be initialized before pushing F/W to HBM */
3637 gaudi_init_scrambler_hbm(hdev);
3638
3639 dst = hdev->pcie_bar[HBM_BAR_ID] + LINUX_FW_OFFSET;
3640
Ofir Bitton9bb86b62020-10-20 10:45:37 +03003641 return hl_fw_load_fw_to_device(hdev, GAUDI_LINUX_FW_FILE, dst, 0, 0);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003642}
3643
3644static int gaudi_load_boot_fit_to_device(struct hl_device *hdev)
3645{
3646 void __iomem *dst;
3647
3648 dst = hdev->pcie_bar[SRAM_BAR_ID] + BOOT_FIT_SRAM_OFFSET;
3649
Ofir Bitton9bb86b62020-10-20 10:45:37 +03003650 return hl_fw_load_fw_to_device(hdev, GAUDI_BOOT_FIT_FILE, dst, 0, 0);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003651}
3652
farah kassabrieb10b892020-10-14 15:17:36 +03003653static int gaudi_read_device_fw_version(struct hl_device *hdev,
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003654 enum hl_fw_component fwc)
3655{
3656 const char *name;
3657 u32 ver_off;
3658 char *dest;
3659
3660 switch (fwc) {
3661 case FW_COMP_UBOOT:
3662 ver_off = RREG32(mmUBOOT_VER_OFFSET);
3663 dest = hdev->asic_prop.uboot_ver;
3664 name = "U-Boot";
3665 break;
3666 case FW_COMP_PREBOOT:
3667 ver_off = RREG32(mmPREBOOT_VER_OFFSET);
3668 dest = hdev->asic_prop.preboot_ver;
3669 name = "Preboot";
3670 break;
3671 default:
3672 dev_warn(hdev->dev, "Undefined FW component: %d\n", fwc);
farah kassabrieb10b892020-10-14 15:17:36 +03003673 return -EIO;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003674 }
3675
3676 ver_off &= ~((u32)SRAM_BASE_ADDR);
3677
3678 if (ver_off < SRAM_SIZE - VERSION_MAX_LEN) {
3679 memcpy_fromio(dest, hdev->pcie_bar[SRAM_BAR_ID] + ver_off,
3680 VERSION_MAX_LEN);
3681 } else {
3682 dev_err(hdev->dev, "%s version offset (0x%x) is above SRAM\n",
3683 name, ver_off);
3684 strcpy(dest, "unavailable");
farah kassabrieb10b892020-10-14 15:17:36 +03003685 return -EIO;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003686 }
farah kassabrieb10b892020-10-14 15:17:36 +03003687
3688 return 0;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003689}
3690
3691static int gaudi_init_cpu(struct hl_device *hdev)
3692{
3693 struct gaudi_device *gaudi = hdev->asic_specific;
3694 int rc;
3695
3696 if (!hdev->cpu_enable)
3697 return 0;
3698
3699 if (gaudi->hw_cap_initialized & HW_CAP_CPU)
3700 return 0;
3701
3702 /*
3703 * The device CPU works with 40 bits addresses.
3704 * This register sets the extension to 50 bits.
3705 */
Ofir Bittonc692dec2020-10-04 17:34:37 +03003706 if (hdev->asic_prop.fw_security_disabled)
3707 WREG32(mmCPU_IF_CPU_MSB_ADDR, hdev->cpu_pci_msb_addr);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003708
3709 rc = hl_fw_init_cpu(hdev, mmPSOC_GLOBAL_CONF_CPU_BOOT_STATUS,
3710 mmPSOC_GLOBAL_CONF_KMD_MSG_TO_CPU,
3711 mmCPU_CMD_STATUS_TO_HOST,
Ofir Bitton323b7262020-10-04 09:09:19 +03003712 mmCPU_BOOT_DEV_STS0, mmCPU_BOOT_ERR0,
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003713 !hdev->bmc_enable, GAUDI_CPU_TIMEOUT_USEC,
3714 GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC);
3715
3716 if (rc)
3717 return rc;
3718
3719 gaudi->hw_cap_initialized |= HW_CAP_CPU;
3720
3721 return 0;
3722}
3723
3724static int gaudi_init_cpu_queues(struct hl_device *hdev, u32 cpu_timeout)
3725{
3726 struct gaudi_device *gaudi = hdev->asic_specific;
3727 struct hl_eq *eq;
3728 u32 status;
3729 struct hl_hw_queue *cpu_pq =
3730 &hdev->kernel_queues[GAUDI_QUEUE_ID_CPU_PQ];
3731 int err;
3732
3733 if (!hdev->cpu_queues_enable)
3734 return 0;
3735
3736 if (gaudi->hw_cap_initialized & HW_CAP_CPU_Q)
3737 return 0;
3738
3739 eq = &hdev->event_queue;
3740
3741 WREG32(mmCPU_IF_PQ_BASE_ADDR_LOW, lower_32_bits(cpu_pq->bus_address));
3742 WREG32(mmCPU_IF_PQ_BASE_ADDR_HIGH, upper_32_bits(cpu_pq->bus_address));
3743
3744 WREG32(mmCPU_IF_EQ_BASE_ADDR_LOW, lower_32_bits(eq->bus_address));
3745 WREG32(mmCPU_IF_EQ_BASE_ADDR_HIGH, upper_32_bits(eq->bus_address));
3746
3747 WREG32(mmCPU_IF_CQ_BASE_ADDR_LOW,
3748 lower_32_bits(hdev->cpu_accessible_dma_address));
3749 WREG32(mmCPU_IF_CQ_BASE_ADDR_HIGH,
3750 upper_32_bits(hdev->cpu_accessible_dma_address));
3751
3752 WREG32(mmCPU_IF_PQ_LENGTH, HL_QUEUE_SIZE_IN_BYTES);
3753 WREG32(mmCPU_IF_EQ_LENGTH, HL_EQ_SIZE_IN_BYTES);
3754 WREG32(mmCPU_IF_CQ_LENGTH, HL_CPU_ACCESSIBLE_MEM_SIZE);
3755
3756 /* Used for EQ CI */
3757 WREG32(mmCPU_IF_EQ_RD_OFFS, 0);
3758
3759 WREG32(mmCPU_IF_PF_PQ_PI, 0);
3760
3761 if (gaudi->multi_msi_mode)
3762 WREG32(mmCPU_IF_QUEUE_INIT, PQ_INIT_STATUS_READY_FOR_CP);
3763 else
3764 WREG32(mmCPU_IF_QUEUE_INIT,
3765 PQ_INIT_STATUS_READY_FOR_CP_SINGLE_MSI);
3766
3767 WREG32(mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR, GAUDI_EVENT_PI_UPDATE);
3768
3769 err = hl_poll_timeout(
3770 hdev,
3771 mmCPU_IF_QUEUE_INIT,
3772 status,
3773 (status == PQ_INIT_STATUS_READY_FOR_HOST),
3774 1000,
3775 cpu_timeout);
3776
3777 if (err) {
3778 dev_err(hdev->dev,
Oded Gabbay6138bbe2020-09-04 20:18:16 +03003779 "Failed to communicate with Device CPU (CPU-CP timeout)\n");
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003780 return -EIO;
3781 }
3782
3783 gaudi->hw_cap_initialized |= HW_CAP_CPU_Q;
3784 return 0;
3785}
3786
3787static void gaudi_pre_hw_init(struct hl_device *hdev)
3788{
3789 /* Perform read from the device to make sure device is up */
Oded Gabbay377182a2020-12-09 19:50:46 +02003790 RREG32(mmHW_STATE);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003791
Ofir Bittonc692dec2020-10-04 17:34:37 +03003792 if (hdev->asic_prop.fw_security_disabled) {
3793 /* Set the access through PCI bars (Linux driver only) as
3794 * secured
3795 */
3796 WREG32(mmPCIE_WRAP_LBW_PROT_OVR,
3797 (PCIE_WRAP_LBW_PROT_OVR_RD_EN_MASK |
3798 PCIE_WRAP_LBW_PROT_OVR_WR_EN_MASK));
Oded Gabbay57799ce2020-09-13 15:51:28 +03003799
Ofir Bittonc692dec2020-10-04 17:34:37 +03003800 /* Perform read to flush the waiting writes to ensure
3801 * configuration was set in the device
3802 */
3803 RREG32(mmPCIE_WRAP_LBW_PROT_OVR);
3804 }
Oded Gabbay57799ce2020-09-13 15:51:28 +03003805
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003806 /*
3807 * Let's mark in the H/W that we have reached this point. We check
3808 * this value in the reset_before_init function to understand whether
3809 * we need to reset the chip before doing H/W init. This register is
3810 * cleared by the H/W upon H/W reset
3811 */
3812 WREG32(mmHW_STATE, HL_DEVICE_HW_STATE_DIRTY);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003813}
3814
3815static int gaudi_hw_init(struct hl_device *hdev)
3816{
3817 int rc;
3818
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003819 gaudi_pre_hw_init(hdev);
3820
3821 gaudi_init_pci_dma_qmans(hdev);
3822
3823 gaudi_init_hbm_dma_qmans(hdev);
3824
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003825 rc = gaudi_init_cpu(hdev);
3826 if (rc) {
3827 dev_err(hdev->dev, "failed to initialize CPU\n");
3828 return rc;
3829 }
3830
Oded Gabbay0024c0942020-12-05 22:55:09 +02003831 /* In case the clock gating was enabled in preboot we need to disable
3832 * it here before touching the MME/TPC registers.
3833 * There is no need to take clk gating mutex because when this function
3834 * runs, no other relevant code can run
3835 */
3836 hdev->asic_funcs->disable_clock_gating(hdev);
3837
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003838 /* SRAM scrambler must be initialized after CPU is running from HBM */
3839 gaudi_init_scrambler_sram(hdev);
3840
3841 /* This is here just in case we are working without CPU */
3842 gaudi_init_scrambler_hbm(hdev);
3843
3844 gaudi_init_golden_registers(hdev);
3845
3846 rc = gaudi_mmu_init(hdev);
3847 if (rc)
3848 return rc;
3849
Omer Shpigelman3a3a5bf12020-05-11 10:45:12 +03003850 gaudi_init_security(hdev);
3851
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003852 gaudi_init_mme_qmans(hdev);
3853
3854 gaudi_init_tpc_qmans(hdev);
3855
Oded Gabbay3c681572020-11-02 21:10:39 +02003856 gaudi_init_nic_qmans(hdev);
3857
Oded Gabbaye38bfd32020-07-03 20:46:12 +03003858 hdev->asic_funcs->set_clock_gating(hdev);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003859
3860 gaudi_enable_timestamp(hdev);
3861
Oded Gabbay3c681572020-11-02 21:10:39 +02003862 /* MSI must be enabled before CPU queues and NIC are initialized */
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003863 rc = gaudi_enable_msi(hdev);
3864 if (rc)
3865 goto disable_queues;
3866
3867 /* must be called after MSI was enabled */
3868 rc = gaudi_init_cpu_queues(hdev, GAUDI_CPU_TIMEOUT_USEC);
3869 if (rc) {
3870 dev_err(hdev->dev, "failed to initialize CPU H/W queues %d\n",
3871 rc);
3872 goto disable_msi;
3873 }
3874
3875 /* Perform read from the device to flush all configuration */
Oded Gabbay377182a2020-12-09 19:50:46 +02003876 RREG32(mmHW_STATE);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003877
3878 return 0;
3879
3880disable_msi:
3881 gaudi_disable_msi(hdev);
3882disable_queues:
3883 gaudi_disable_mme_qmans(hdev);
3884 gaudi_disable_pci_dma_qmans(hdev);
3885
3886 return rc;
3887}
3888
3889static void gaudi_hw_fini(struct hl_device *hdev, bool hard_reset)
3890{
3891 struct gaudi_device *gaudi = hdev->asic_specific;
Igor Grinbergb726a2f2020-10-29 14:06:54 +02003892 u32 status, reset_timeout_ms, cpu_timeout_ms;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003893
Oded Gabbay12ae3132020-07-03 20:58:23 +03003894 if (!hard_reset) {
3895 dev_err(hdev->dev, "GAUDI doesn't support soft-reset\n");
3896 return;
3897 }
3898
Oded Gabbayc83c4172020-07-05 15:48:34 +03003899 if (hdev->pldm) {
Oded Gabbay12ae3132020-07-03 20:58:23 +03003900 reset_timeout_ms = GAUDI_PLDM_HRESET_TIMEOUT_MSEC;
Oded Gabbayc83c4172020-07-05 15:48:34 +03003901 cpu_timeout_ms = GAUDI_PLDM_RESET_WAIT_MSEC;
3902 } else {
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003903 reset_timeout_ms = GAUDI_RESET_TIMEOUT_MSEC;
Oded Gabbayc83c4172020-07-05 15:48:34 +03003904 cpu_timeout_ms = GAUDI_CPU_RESET_WAIT_MSEC;
3905 }
3906
3907 /* Set device to handle FLR by H/W as we will put the device CPU to
3908 * halt mode
3909 */
Oded Gabbaya63c3fb2020-11-26 18:11:05 +02003910 if (hdev->asic_prop.fw_security_disabled &&
3911 !hdev->asic_prop.hard_reset_done_by_fw)
Ofir Bittonb90c8942020-11-08 12:59:04 +02003912 WREG32(mmPCIE_AUX_FLR_CTRL, (PCIE_AUX_FLR_CTRL_HW_CTRL_MASK |
Oded Gabbayc83c4172020-07-05 15:48:34 +03003913 PCIE_AUX_FLR_CTRL_INT_MASK_MASK));
3914
3915 /* I don't know what is the state of the CPU so make sure it is
3916 * stopped in any means necessary
3917 */
Ofir Bitton9c9013c2020-12-01 10:39:54 +02003918 if (hdev->asic_prop.hard_reset_done_by_fw)
3919 WREG32(mmPSOC_GLOBAL_CONF_KMD_MSG_TO_CPU, KMD_MSG_RST_DEV);
3920 else
3921 WREG32(mmPSOC_GLOBAL_CONF_KMD_MSG_TO_CPU, KMD_MSG_GOTO_WFE);
Ofir Bittonb90c8942020-11-08 12:59:04 +02003922
Oded Gabbayc83c4172020-07-05 15:48:34 +03003923 WREG32(mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR, GAUDI_EVENT_HALT_MACHINE);
3924
Oded Gabbaya63c3fb2020-11-26 18:11:05 +02003925 if (hdev->asic_prop.fw_security_disabled &&
3926 !hdev->asic_prop.hard_reset_done_by_fw) {
3927
3928 /* Configure the reset registers. Must be done as early as
3929 * possible in case we fail during H/W initialization
3930 */
3931 WREG32(mmPSOC_GLOBAL_CONF_SOFT_RST_CFG_H,
3932 (CFG_RST_H_DMA_MASK |
3933 CFG_RST_H_MME_MASK |
3934 CFG_RST_H_SM_MASK |
3935 CFG_RST_H_TPC_7_MASK));
3936
3937 WREG32(mmPSOC_GLOBAL_CONF_SOFT_RST_CFG_L, CFG_RST_L_TPC_MASK);
3938
3939 WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST_CFG_H,
3940 (CFG_RST_H_HBM_MASK |
3941 CFG_RST_H_TPC_7_MASK |
3942 CFG_RST_H_NIC_MASK |
3943 CFG_RST_H_SM_MASK |
3944 CFG_RST_H_DMA_MASK |
3945 CFG_RST_H_MME_MASK |
3946 CFG_RST_H_CPU_MASK |
3947 CFG_RST_H_MMU_MASK));
3948
3949 WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST_CFG_L,
3950 (CFG_RST_L_IF_MASK |
3951 CFG_RST_L_PSOC_MASK |
3952 CFG_RST_L_TPC_MASK));
3953
Ofir Bittonb90c8942020-11-08 12:59:04 +02003954 msleep(cpu_timeout_ms);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003955
Ofir Bittonb90c8942020-11-08 12:59:04 +02003956 /* Tell ASIC not to re-initialize PCIe */
3957 WREG32(mmPREBOOT_PCIE_EN, LKD_HARD_RESET_MAGIC);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003958
Ofir Bittonb90c8942020-11-08 12:59:04 +02003959 /* Restart BTL/BLR upon hard-reset */
3960 if (hdev->asic_prop.fw_security_disabled)
3961 WREG32(mmPSOC_GLOBAL_CONF_BOOT_SEQ_RE_START, 1);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003962
Ofir Bittonb90c8942020-11-08 12:59:04 +02003963 WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST,
Oded Gabbay12ae3132020-07-03 20:58:23 +03003964 1 << PSOC_GLOBAL_CONF_SW_ALL_RST_IND_SHIFT);
Ofir Bittonb90c8942020-11-08 12:59:04 +02003965
Oded Gabbay13d0ee12020-12-06 23:48:45 +02003966 dev_info(hdev->dev,
3967 "Issued HARD reset command, going to wait %dms\n",
3968 reset_timeout_ms);
3969 } else {
3970 dev_info(hdev->dev,
3971 "Firmware performs HARD reset, going to wait %dms\n",
3972 reset_timeout_ms);
3973 }
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003974
3975 /*
3976 * After hard reset, we can't poll the BTM_FSM register because the PSOC
3977 * itself is in reset. Need to wait until the reset is deasserted
3978 */
3979 msleep(reset_timeout_ms);
3980
3981 status = RREG32(mmPSOC_GLOBAL_CONF_BTM_FSM);
3982 if (status & PSOC_GLOBAL_CONF_BTM_FSM_STATE_MASK)
3983 dev_err(hdev->dev,
3984 "Timeout while waiting for device to reset 0x%x\n",
3985 status);
3986
farah kassabrieb10b892020-10-14 15:17:36 +03003987 if (gaudi) {
3988 gaudi->hw_cap_initialized &= ~(HW_CAP_CPU | HW_CAP_CPU_Q |
3989 HW_CAP_HBM | HW_CAP_PCI_DMA |
3990 HW_CAP_MME | HW_CAP_TPC_MASK |
3991 HW_CAP_HBM_DMA | HW_CAP_PLL |
3992 HW_CAP_NIC_MASK | HW_CAP_MMU |
3993 HW_CAP_SRAM_SCRAMBLER |
3994 HW_CAP_HBM_SCRAMBLER |
3995 HW_CAP_CLK_GATE);
Oded Gabbaye38bfd32020-07-03 20:46:12 +03003996
farah kassabrieb10b892020-10-14 15:17:36 +03003997 memset(gaudi->events_stat, 0, sizeof(gaudi->events_stat));
3998 }
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003999}
4000
4001static int gaudi_suspend(struct hl_device *hdev)
4002{
4003 int rc;
4004
Oded Gabbay2f553422020-08-15 16:28:10 +03004005 rc = hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_DISABLE_PCI_ACCESS);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03004006 if (rc)
4007 dev_err(hdev->dev, "Failed to disable PCI access from CPU\n");
4008
4009 return rc;
4010}
4011
4012static int gaudi_resume(struct hl_device *hdev)
4013{
4014 return gaudi_init_iatu(hdev);
4015}
4016
4017static int gaudi_cb_mmap(struct hl_device *hdev, struct vm_area_struct *vma,
Hillf Danton0db57532020-08-23 07:32:42 +08004018 void *cpu_addr, dma_addr_t dma_addr, size_t size)
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03004019{
4020 int rc;
4021
4022 vma->vm_flags |= VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP |
4023 VM_DONTCOPY | VM_NORESERVE;
4024
Oded Gabbaya9d4ef62021-01-11 13:49:38 +02004025 rc = dma_mmap_coherent(hdev->dev, vma, cpu_addr,
4026 (dma_addr - HOST_PHYS_BASE), size);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03004027 if (rc)
Hillf Danton0db57532020-08-23 07:32:42 +08004028 dev_err(hdev->dev, "dma_mmap_coherent error %d", rc);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03004029
4030 return rc;
4031}
4032
4033static void gaudi_ring_doorbell(struct hl_device *hdev, u32 hw_queue_id, u32 pi)
4034{
4035 struct gaudi_device *gaudi = hdev->asic_specific;
4036 u32 db_reg_offset, db_value, dma_qm_offset, q_off;
4037 int dma_id;
4038 bool invalid_queue = false;
4039
4040 switch (hw_queue_id) {
4041 case GAUDI_QUEUE_ID_DMA_0_0...GAUDI_QUEUE_ID_DMA_0_3:
4042 dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_1];
4043 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4044 q_off = dma_qm_offset + (hw_queue_id & 0x3) * 4;
4045 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4046 break;
4047
4048 case GAUDI_QUEUE_ID_DMA_1_0...GAUDI_QUEUE_ID_DMA_1_3:
4049 dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_2];
4050 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4051 q_off = dma_qm_offset + (hw_queue_id & 0x3) * 4;
4052 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4053 break;
4054
4055 case GAUDI_QUEUE_ID_DMA_2_0...GAUDI_QUEUE_ID_DMA_2_3:
4056 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_1];
4057 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4058 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4059 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4060 break;
4061
4062 case GAUDI_QUEUE_ID_DMA_3_0...GAUDI_QUEUE_ID_DMA_3_3:
4063 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_2];
4064 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4065 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4066 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4067 break;
4068
4069 case GAUDI_QUEUE_ID_DMA_4_0...GAUDI_QUEUE_ID_DMA_4_3:
4070 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_3];
4071 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4072 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4073 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4074 break;
4075
4076 case GAUDI_QUEUE_ID_DMA_5_0...GAUDI_QUEUE_ID_DMA_5_3:
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03004077 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_4];
4078 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4079 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4080 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4081 break;
4082
Ofir Bitton0940cab2020-08-31 08:52:56 +03004083 case GAUDI_QUEUE_ID_DMA_6_0...GAUDI_QUEUE_ID_DMA_6_3:
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03004084 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_5];
4085 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4086 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4087 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4088 break;
4089
Ofir Bitton0940cab2020-08-31 08:52:56 +03004090 case GAUDI_QUEUE_ID_DMA_7_0...GAUDI_QUEUE_ID_DMA_7_3:
4091 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_6];
4092 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4093 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4094 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4095 break;
4096
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03004097 case GAUDI_QUEUE_ID_CPU_PQ:
4098 if (gaudi->hw_cap_initialized & HW_CAP_CPU_Q)
4099 db_reg_offset = mmCPU_IF_PF_PQ_PI;
4100 else
4101 invalid_queue = true;
4102 break;
4103
4104 case GAUDI_QUEUE_ID_MME_0_0:
4105 db_reg_offset = mmMME2_QM_PQ_PI_0;
4106 break;
4107
4108 case GAUDI_QUEUE_ID_MME_0_1:
4109 db_reg_offset = mmMME2_QM_PQ_PI_1;
4110 break;
4111
4112 case GAUDI_QUEUE_ID_MME_0_2:
4113 db_reg_offset = mmMME2_QM_PQ_PI_2;
4114 break;
4115
4116 case GAUDI_QUEUE_ID_MME_0_3:
4117 db_reg_offset = mmMME2_QM_PQ_PI_3;
4118 break;
4119
4120 case GAUDI_QUEUE_ID_MME_1_0:
4121 db_reg_offset = mmMME0_QM_PQ_PI_0;
4122 break;
4123
4124 case GAUDI_QUEUE_ID_MME_1_1:
4125 db_reg_offset = mmMME0_QM_PQ_PI_1;
4126 break;
4127
4128 case GAUDI_QUEUE_ID_MME_1_2:
4129 db_reg_offset = mmMME0_QM_PQ_PI_2;
4130 break;
4131
4132 case GAUDI_QUEUE_ID_MME_1_3:
4133 db_reg_offset = mmMME0_QM_PQ_PI_3;
4134 break;
4135
4136 case GAUDI_QUEUE_ID_TPC_0_0:
4137 db_reg_offset = mmTPC0_QM_PQ_PI_0;
4138 break;
4139
4140 case GAUDI_QUEUE_ID_TPC_0_1:
4141 db_reg_offset = mmTPC0_QM_PQ_PI_1;
4142 break;
4143
4144 case GAUDI_QUEUE_ID_TPC_0_2:
4145 db_reg_offset = mmTPC0_QM_PQ_PI_2;
4146 break;
4147
4148 case GAUDI_QUEUE_ID_TPC_0_3:
4149 db_reg_offset = mmTPC0_QM_PQ_PI_3;
4150 break;
4151
4152 case GAUDI_QUEUE_ID_TPC_1_0:
4153 db_reg_offset = mmTPC1_QM_PQ_PI_0;
4154 break;
4155
4156 case GAUDI_QUEUE_ID_TPC_1_1:
4157 db_reg_offset = mmTPC1_QM_PQ_PI_1;
4158 break;
4159
4160 case GAUDI_QUEUE_ID_TPC_1_2:
4161 db_reg_offset = mmTPC1_QM_PQ_PI_2;
4162 break;
4163
4164 case GAUDI_QUEUE_ID_TPC_1_3:
4165 db_reg_offset = mmTPC1_QM_PQ_PI_3;
4166 break;
4167
4168 case GAUDI_QUEUE_ID_TPC_2_0:
4169 db_reg_offset = mmTPC2_QM_PQ_PI_0;
4170 break;
4171
4172 case GAUDI_QUEUE_ID_TPC_2_1:
4173 db_reg_offset = mmTPC2_QM_PQ_PI_1;
4174 break;
4175
4176 case GAUDI_QUEUE_ID_TPC_2_2:
4177 db_reg_offset = mmTPC2_QM_PQ_PI_2;
4178 break;
4179
4180 case GAUDI_QUEUE_ID_TPC_2_3:
4181 db_reg_offset = mmTPC2_QM_PQ_PI_3;
4182 break;
4183
4184 case GAUDI_QUEUE_ID_TPC_3_0:
4185 db_reg_offset = mmTPC3_QM_PQ_PI_0;
4186 break;
4187
4188 case GAUDI_QUEUE_ID_TPC_3_1:
4189 db_reg_offset = mmTPC3_QM_PQ_PI_1;
4190 break;
4191
4192 case GAUDI_QUEUE_ID_TPC_3_2:
4193 db_reg_offset = mmTPC3_QM_PQ_PI_2;
4194 break;
4195
4196 case GAUDI_QUEUE_ID_TPC_3_3:
4197 db_reg_offset = mmTPC3_QM_PQ_PI_3;
4198 break;
4199
4200 case GAUDI_QUEUE_ID_TPC_4_0:
4201 db_reg_offset = mmTPC4_QM_PQ_PI_0;
4202 break;
4203
4204 case GAUDI_QUEUE_ID_TPC_4_1:
4205 db_reg_offset = mmTPC4_QM_PQ_PI_1;
4206 break;
4207
4208 case GAUDI_QUEUE_ID_TPC_4_2:
4209 db_reg_offset = mmTPC4_QM_PQ_PI_2;
4210 break;
4211
4212 case GAUDI_QUEUE_ID_TPC_4_3:
4213 db_reg_offset = mmTPC4_QM_PQ_PI_3;
4214 break;
4215
4216 case GAUDI_QUEUE_ID_TPC_5_0:
4217 db_reg_offset = mmTPC5_QM_PQ_PI_0;
4218 break;
4219
4220 case GAUDI_QUEUE_ID_TPC_5_1:
4221 db_reg_offset = mmTPC5_QM_PQ_PI_1;
4222 break;
4223
4224 case GAUDI_QUEUE_ID_TPC_5_2:
4225 db_reg_offset = mmTPC5_QM_PQ_PI_2;
4226 break;
4227
4228 case GAUDI_QUEUE_ID_TPC_5_3:
4229 db_reg_offset = mmTPC5_QM_PQ_PI_3;
4230 break;
4231
4232 case GAUDI_QUEUE_ID_TPC_6_0:
4233 db_reg_offset = mmTPC6_QM_PQ_PI_0;
4234 break;
4235
4236 case GAUDI_QUEUE_ID_TPC_6_1:
4237 db_reg_offset = mmTPC6_QM_PQ_PI_1;
4238 break;
4239
4240 case GAUDI_QUEUE_ID_TPC_6_2:
4241 db_reg_offset = mmTPC6_QM_PQ_PI_2;
4242 break;
4243
4244 case GAUDI_QUEUE_ID_TPC_6_3:
4245 db_reg_offset = mmTPC6_QM_PQ_PI_3;
4246 break;
4247
4248 case GAUDI_QUEUE_ID_TPC_7_0:
4249 db_reg_offset = mmTPC7_QM_PQ_PI_0;
4250 break;
4251
4252 case GAUDI_QUEUE_ID_TPC_7_1:
4253 db_reg_offset = mmTPC7_QM_PQ_PI_1;
4254 break;
4255
4256 case GAUDI_QUEUE_ID_TPC_7_2:
4257 db_reg_offset = mmTPC7_QM_PQ_PI_2;
4258 break;
4259
4260 case GAUDI_QUEUE_ID_TPC_7_3:
4261 db_reg_offset = mmTPC7_QM_PQ_PI_3;
4262 break;
4263
Oded Gabbay3c681572020-11-02 21:10:39 +02004264 case GAUDI_QUEUE_ID_NIC_0_0:
4265 db_reg_offset = mmNIC0_QM0_PQ_PI_0;
4266 break;
4267
4268 case GAUDI_QUEUE_ID_NIC_0_1:
4269 db_reg_offset = mmNIC0_QM0_PQ_PI_1;
4270 break;
4271
4272 case GAUDI_QUEUE_ID_NIC_0_2:
4273 db_reg_offset = mmNIC0_QM0_PQ_PI_2;
4274 break;
4275
4276 case GAUDI_QUEUE_ID_NIC_0_3:
4277 db_reg_offset = mmNIC0_QM0_PQ_PI_3;
4278 break;
4279
4280 case GAUDI_QUEUE_ID_NIC_1_0:
4281 db_reg_offset = mmNIC0_QM1_PQ_PI_0;
4282 break;
4283
4284 case GAUDI_QUEUE_ID_NIC_1_1:
4285 db_reg_offset = mmNIC0_QM1_PQ_PI_1;
4286 break;
4287
4288 case GAUDI_QUEUE_ID_NIC_1_2:
4289 db_reg_offset = mmNIC0_QM1_PQ_PI_2;
4290 break;
4291
4292 case GAUDI_QUEUE_ID_NIC_1_3:
4293 db_reg_offset = mmNIC0_QM1_PQ_PI_3;
4294 break;
4295
4296 case GAUDI_QUEUE_ID_NIC_2_0:
4297 db_reg_offset = mmNIC1_QM0_PQ_PI_0;
4298 break;
4299
4300 case GAUDI_QUEUE_ID_NIC_2_1:
4301 db_reg_offset = mmNIC1_QM0_PQ_PI_1;
4302 break;
4303
4304 case GAUDI_QUEUE_ID_NIC_2_2:
4305 db_reg_offset = mmNIC1_QM0_PQ_PI_2;
4306 break;
4307
4308 case GAUDI_QUEUE_ID_NIC_2_3:
4309 db_reg_offset = mmNIC1_QM0_PQ_PI_3;
4310 break;
4311
4312 case GAUDI_QUEUE_ID_NIC_3_0:
4313 db_reg_offset = mmNIC1_QM1_PQ_PI_0;
4314 break;
4315
4316 case GAUDI_QUEUE_ID_NIC_3_1:
4317 db_reg_offset = mmNIC1_QM1_PQ_PI_1;
4318 break;
4319
4320 case GAUDI_QUEUE_ID_NIC_3_2:
4321 db_reg_offset = mmNIC1_QM1_PQ_PI_2;
4322 break;
4323
4324 case GAUDI_QUEUE_ID_NIC_3_3:
4325 db_reg_offset = mmNIC1_QM1_PQ_PI_3;
4326 break;
4327
4328 case GAUDI_QUEUE_ID_NIC_4_0:
4329 db_reg_offset = mmNIC2_QM0_PQ_PI_0;
4330 break;
4331
4332 case GAUDI_QUEUE_ID_NIC_4_1:
4333 db_reg_offset = mmNIC2_QM0_PQ_PI_1;
4334 break;
4335
4336 case GAUDI_QUEUE_ID_NIC_4_2:
4337 db_reg_offset = mmNIC2_QM0_PQ_PI_2;
4338 break;
4339
4340 case GAUDI_QUEUE_ID_NIC_4_3:
4341 db_reg_offset = mmNIC2_QM0_PQ_PI_3;
4342 break;
4343
4344 case GAUDI_QUEUE_ID_NIC_5_0:
4345 db_reg_offset = mmNIC2_QM1_PQ_PI_0;
4346 break;
4347
4348 case GAUDI_QUEUE_ID_NIC_5_1:
4349 db_reg_offset = mmNIC2_QM1_PQ_PI_1;
4350 break;
4351
4352 case GAUDI_QUEUE_ID_NIC_5_2:
4353 db_reg_offset = mmNIC2_QM1_PQ_PI_2;
4354 break;
4355
4356 case GAUDI_QUEUE_ID_NIC_5_3:
4357 db_reg_offset = mmNIC2_QM1_PQ_PI_3;
4358 break;
4359
4360 case GAUDI_QUEUE_ID_NIC_6_0:
4361 db_reg_offset = mmNIC3_QM0_PQ_PI_0;
4362 break;
4363
4364 case GAUDI_QUEUE_ID_NIC_6_1:
4365 db_reg_offset = mmNIC3_QM0_PQ_PI_1;
4366 break;
4367
4368 case GAUDI_QUEUE_ID_NIC_6_2:
4369 db_reg_offset = mmNIC3_QM0_PQ_PI_2;
4370 break;
4371
4372 case GAUDI_QUEUE_ID_NIC_6_3:
4373 db_reg_offset = mmNIC3_QM0_PQ_PI_3;
4374 break;
4375
4376 case GAUDI_QUEUE_ID_NIC_7_0:
4377 db_reg_offset = mmNIC3_QM1_PQ_PI_0;
4378 break;
4379
4380 case GAUDI_QUEUE_ID_NIC_7_1:
4381 db_reg_offset = mmNIC3_QM1_PQ_PI_1;
4382 break;
4383
4384 case GAUDI_QUEUE_ID_NIC_7_2:
4385 db_reg_offset = mmNIC3_QM1_PQ_PI_2;
4386 break;
4387
4388 case GAUDI_QUEUE_ID_NIC_7_3:
4389 db_reg_offset = mmNIC3_QM1_PQ_PI_3;
4390 break;
4391
4392 case GAUDI_QUEUE_ID_NIC_8_0:
4393 db_reg_offset = mmNIC4_QM0_PQ_PI_0;
4394 break;
4395
4396 case GAUDI_QUEUE_ID_NIC_8_1:
4397 db_reg_offset = mmNIC4_QM0_PQ_PI_1;
4398 break;
4399
4400 case GAUDI_QUEUE_ID_NIC_8_2:
4401 db_reg_offset = mmNIC4_QM0_PQ_PI_2;
4402 break;
4403
4404 case GAUDI_QUEUE_ID_NIC_8_3:
4405 db_reg_offset = mmNIC4_QM0_PQ_PI_3;
4406 break;
4407
4408 case GAUDI_QUEUE_ID_NIC_9_0:
4409 db_reg_offset = mmNIC4_QM1_PQ_PI_0;
4410 break;
4411
4412 case GAUDI_QUEUE_ID_NIC_9_1:
4413 db_reg_offset = mmNIC4_QM1_PQ_PI_1;
4414 break;
4415
4416 case GAUDI_QUEUE_ID_NIC_9_2:
4417 db_reg_offset = mmNIC4_QM1_PQ_PI_2;
4418 break;
4419
4420 case GAUDI_QUEUE_ID_NIC_9_3:
4421 db_reg_offset = mmNIC4_QM1_PQ_PI_3;
4422 break;
4423
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03004424 default:
4425 invalid_queue = true;
4426 }
4427
4428 if (invalid_queue) {
4429 /* Should never get here */
4430 dev_err(hdev->dev, "h/w queue %d is invalid. Can't set pi\n",
4431 hw_queue_id);
4432 return;
4433 }
4434
4435 db_value = pi;
4436
4437 /* ring the doorbell */
4438 WREG32(db_reg_offset, db_value);
4439
4440 if (hw_queue_id == GAUDI_QUEUE_ID_CPU_PQ)
4441 WREG32(mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR,
4442 GAUDI_EVENT_PI_UPDATE);
4443}
4444
4445static void gaudi_pqe_write(struct hl_device *hdev, __le64 *pqe,
4446 struct hl_bd *bd)
4447{
4448 __le64 *pbd = (__le64 *) bd;
4449
4450 /* The QMANs are on the host memory so a simple copy suffice */
4451 pqe[0] = pbd[0];
4452 pqe[1] = pbd[1];
4453}
4454
4455static void *gaudi_dma_alloc_coherent(struct hl_device *hdev, size_t size,
4456 dma_addr_t *dma_handle, gfp_t flags)
4457{
4458 void *kernel_addr = dma_alloc_coherent(&hdev->pdev->dev, size,
4459 dma_handle, flags);
4460
4461 /* Shift to the device's base physical address of host memory */
4462 if (kernel_addr)
4463 *dma_handle += HOST_PHYS_BASE;
4464
4465 return kernel_addr;
4466}
4467
4468static void gaudi_dma_free_coherent(struct hl_device *hdev, size_t size,
4469 void *cpu_addr, dma_addr_t dma_handle)
4470{
4471 /* Cancel the device's base physical address of host memory */
4472 dma_addr_t fixed_dma_handle = dma_handle - HOST_PHYS_BASE;
4473
4474 dma_free_coherent(&hdev->pdev->dev, size, cpu_addr, fixed_dma_handle);
4475}
4476
farah kassabri03df1362020-05-06 11:17:38 +03004477static int gaudi_hbm_scrubbing(struct hl_device *hdev)
4478{
4479 struct asic_fixed_properties *prop = &hdev->asic_prop;
4480 u64 cur_addr = DRAM_BASE_ADDR_USER;
4481 u32 val;
4482 u32 chunk_size;
4483 int rc, dma_id;
4484
4485 while (cur_addr < prop->dram_end_address) {
4486 for (dma_id = 0 ; dma_id < DMA_NUMBER_OF_CHANNELS ; dma_id++) {
4487 u32 dma_offset = dma_id * DMA_CORE_OFFSET;
4488
4489 chunk_size =
4490 min((u64)SZ_2G, prop->dram_end_address - cur_addr);
4491
4492 dev_dbg(hdev->dev,
4493 "Doing HBM scrubbing for 0x%09llx - 0x%09llx\n",
4494 cur_addr, cur_addr + chunk_size);
4495
4496 WREG32(mmDMA0_CORE_SRC_BASE_LO + dma_offset, 0);
4497 WREG32(mmDMA0_CORE_SRC_BASE_HI + dma_offset, 0);
4498 WREG32(mmDMA0_CORE_DST_BASE_LO + dma_offset,
4499 lower_32_bits(cur_addr));
4500 WREG32(mmDMA0_CORE_DST_BASE_HI + dma_offset,
4501 upper_32_bits(cur_addr));
4502 WREG32(mmDMA0_CORE_DST_TSIZE_0 + dma_offset,
4503 chunk_size);
4504 WREG32(mmDMA0_CORE_COMMIT + dma_offset,
4505 ((1 << DMA0_CORE_COMMIT_LIN_SHIFT) |
4506 (1 << DMA0_CORE_COMMIT_MEM_SET_SHIFT)));
4507
4508 cur_addr += chunk_size;
4509
4510 if (cur_addr == prop->dram_end_address)
4511 break;
4512 }
4513
4514 for (dma_id = 0 ; dma_id < DMA_NUMBER_OF_CHANNELS ; dma_id++) {
4515 u32 dma_offset = dma_id * DMA_CORE_OFFSET;
4516
4517 rc = hl_poll_timeout(
4518 hdev,
4519 mmDMA0_CORE_STS0 + dma_offset,
4520 val,
4521 ((val & DMA0_CORE_STS0_BUSY_MASK) == 0),
4522 1000,
4523 HBM_SCRUBBING_TIMEOUT_US);
4524
4525 if (rc) {
4526 dev_err(hdev->dev,
4527 "DMA Timeout during HBM scrubbing of DMA #%d\n",
4528 dma_id);
4529 return -EIO;
4530 }
4531 }
4532 }
4533
4534 return 0;
4535}
4536
4537static int gaudi_scrub_device_mem(struct hl_device *hdev, u64 addr, u64 size)
4538{
4539 struct asic_fixed_properties *prop = &hdev->asic_prop;
4540 struct gaudi_device *gaudi = hdev->asic_specific;
4541 u64 idle_mask = 0;
4542 int rc = 0;
4543 u64 val = 0;
4544
4545 if (!hdev->memory_scrub)
4546 return 0;
4547
4548 if (!addr && !size) {
4549 /* Wait till device is idle */
4550 rc = hl_poll_timeout(
4551 hdev,
4552 mmDMA0_CORE_STS0/* dummy */,
4553 val/* dummy */,
4554 (hdev->asic_funcs->is_device_idle(hdev,
4555 &idle_mask, NULL)),
4556 1000,
4557 HBM_SCRUBBING_TIMEOUT_US);
4558 if (rc) {
4559 dev_err(hdev->dev, "waiting for idle timeout\n");
4560 return -EIO;
4561 }
4562
4563 /* Scrub SRAM */
4564 addr = prop->sram_user_base_address;
4565 size = hdev->pldm ? 0x10000 :
4566 (prop->sram_size - SRAM_USER_BASE_OFFSET);
4567 val = 0x7777777777777777ull;
4568
4569 rc = gaudi_memset_device_memory(hdev, addr, size, val);
4570 if (rc) {
4571 dev_err(hdev->dev,
4572 "Failed to clear SRAM in mem scrub all\n");
4573 return rc;
4574 }
4575
4576 mutex_lock(&gaudi->clk_gate_mutex);
4577 hdev->asic_funcs->disable_clock_gating(hdev);
4578
4579 /* Scrub HBM using all DMA channels in parallel */
4580 rc = gaudi_hbm_scrubbing(hdev);
4581 if (rc)
4582 dev_err(hdev->dev,
4583 "Failed to clear HBM in mem scrub all\n");
4584
4585 hdev->asic_funcs->set_clock_gating(hdev);
4586 mutex_unlock(&gaudi->clk_gate_mutex);
4587 }
4588
4589 return rc;
4590}
4591
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03004592static void *gaudi_get_int_queue_base(struct hl_device *hdev,
4593 u32 queue_id, dma_addr_t *dma_handle,
4594 u16 *queue_len)
4595{
4596 struct gaudi_device *gaudi = hdev->asic_specific;
4597 struct gaudi_internal_qman_info *q;
4598
4599 if (queue_id >= GAUDI_QUEUE_ID_SIZE ||
4600 gaudi_queue_type[queue_id] != QUEUE_TYPE_INT) {
4601 dev_err(hdev->dev, "Got invalid queue id %d\n", queue_id);
4602 return NULL;
4603 }
4604
4605 q = &gaudi->internal_qmans[queue_id];
4606 *dma_handle = q->pq_dma_addr;
4607 *queue_len = q->pq_size / QMAN_PQ_ENTRY_SIZE;
4608
4609 return q->pq_kernel_addr;
4610}
4611
4612static int gaudi_send_cpu_message(struct hl_device *hdev, u32 *msg,
Alon Mizrahi439bc472020-11-10 13:49:10 +02004613 u16 len, u32 timeout, u64 *result)
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03004614{
4615 struct gaudi_device *gaudi = hdev->asic_specific;
4616
4617 if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q)) {
4618 if (result)
4619 *result = 0;
4620 return 0;
4621 }
4622
Oded Gabbay788cacf2020-07-07 17:30:13 +03004623 if (!timeout)
4624 timeout = GAUDI_MSG_TO_CPU_TIMEOUT_USEC;
4625
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03004626 return hl_fw_send_cpu_message(hdev, GAUDI_QUEUE_ID_CPU_PQ, msg, len,
4627 timeout, result);
4628}
4629
4630static int gaudi_test_queue(struct hl_device *hdev, u32 hw_queue_id)
4631{
4632 struct packet_msg_prot *fence_pkt;
4633 dma_addr_t pkt_dma_addr;
4634 u32 fence_val, tmp, timeout_usec;
4635 dma_addr_t fence_dma_addr;
4636 u32 *fence_ptr;
4637 int rc;
4638
4639 if (hdev->pldm)
4640 timeout_usec = GAUDI_PLDM_TEST_QUEUE_WAIT_USEC;
4641 else
4642 timeout_usec = GAUDI_TEST_QUEUE_WAIT_USEC;
4643
4644 fence_val = GAUDI_QMAN0_FENCE_VAL;
4645
4646 fence_ptr = hdev->asic_funcs->asic_dma_pool_zalloc(hdev, 4, GFP_KERNEL,
4647 &fence_dma_addr);
4648 if (!fence_ptr) {
4649 dev_err(hdev->dev,
Dotan Barakd6b045c2020-08-06 09:20:49 +03004650 "Failed to allocate memory for H/W queue %d testing\n",
4651 hw_queue_id);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03004652 return -ENOMEM;
4653 }
4654
4655 *fence_ptr = 0;
4656
4657 fence_pkt = hdev->asic_funcs->asic_dma_pool_zalloc(hdev,
4658 sizeof(struct packet_msg_prot),
4659 GFP_KERNEL, &pkt_dma_addr);
4660 if (!fence_pkt) {
4661 dev_err(hdev->dev,
Dotan Barakd6b045c2020-08-06 09:20:49 +03004662 "Failed to allocate packet for H/W queue %d testing\n",
4663 hw_queue_id);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03004664 rc = -ENOMEM;
4665 goto free_fence_ptr;
4666 }
4667
Oded Gabbay65887292020-08-12 11:21:01 +03004668 tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT);
4669 tmp |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);
4670 tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
4671
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03004672 fence_pkt->ctl = cpu_to_le32(tmp);
4673 fence_pkt->value = cpu_to_le32(fence_val);
4674 fence_pkt->addr = cpu_to_le64(fence_dma_addr);
4675
4676 rc = hl_hw_queue_send_cb_no_cmpl(hdev, hw_queue_id,
4677 sizeof(struct packet_msg_prot),
4678 pkt_dma_addr);
4679 if (rc) {
4680 dev_err(hdev->dev,
Dotan Barakd6b045c2020-08-06 09:20:49 +03004681 "Failed to send fence packet to H/W queue %d\n",
4682 hw_queue_id);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03004683 goto free_pkt;
4684 }
4685
4686 rc = hl_poll_timeout_memory(hdev, fence_ptr, tmp, (tmp == fence_val),
4687 1000, timeout_usec, true);
4688
4689 hl_hw_queue_inc_ci_kernel(hdev, hw_queue_id);
4690
4691 if (rc == -ETIMEDOUT) {
4692 dev_err(hdev->dev,
4693 "H/W queue %d test failed (scratch(0x%08llX) == 0x%08X)\n",
4694 hw_queue_id, (unsigned long long) fence_dma_addr, tmp);
4695 rc = -EIO;
4696 }
4697
4698free_pkt:
4699 hdev->asic_funcs->asic_dma_pool_free(hdev, (void *) fence_pkt,
4700 pkt_dma_addr);
4701free_fence_ptr:
4702 hdev->asic_funcs->asic_dma_pool_free(hdev, (void *) fence_ptr,
4703 fence_dma_addr);
4704 return rc;
4705}
4706
4707static int gaudi_test_cpu_queue(struct hl_device *hdev)
4708{
4709 struct gaudi_device *gaudi = hdev->asic_specific;
4710
4711 /*
4712 * check capability here as send_cpu_message() won't update the result
4713 * value if no capability
4714 */
4715 if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
4716 return 0;
4717
4718 return hl_fw_test_cpu_queue(hdev);
4719}
4720
4721static int gaudi_test_queues(struct hl_device *hdev)
4722{
4723 int i, rc, ret_val = 0;
4724
Ofir Bitton3abc99b2020-06-23 14:50:39 +03004725 for (i = 0 ; i < hdev->asic_prop.max_queues ; i++) {
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03004726 if (hdev->asic_prop.hw_queues_props[i].type == QUEUE_TYPE_EXT) {
4727 rc = gaudi_test_queue(hdev, i);
4728 if (rc)
4729 ret_val = -EINVAL;
4730 }
4731 }
4732
4733 rc = gaudi_test_cpu_queue(hdev);
4734 if (rc)
4735 ret_val = -EINVAL;
4736
4737 return ret_val;
4738}
4739
4740static void *gaudi_dma_pool_zalloc(struct hl_device *hdev, size_t size,
4741 gfp_t mem_flags, dma_addr_t *dma_handle)
4742{
4743 void *kernel_addr;
4744
4745 if (size > GAUDI_DMA_POOL_BLK_SIZE)
4746 return NULL;
4747
4748 kernel_addr = dma_pool_zalloc(hdev->dma_pool, mem_flags, dma_handle);
4749
4750 /* Shift to the device's base physical address of host memory */
4751 if (kernel_addr)
4752 *dma_handle += HOST_PHYS_BASE;
4753
4754 return kernel_addr;
4755}
4756
4757static void gaudi_dma_pool_free(struct hl_device *hdev, void *vaddr,
4758 dma_addr_t dma_addr)
4759{
4760 /* Cancel the device's base physical address of host memory */
4761 dma_addr_t fixed_dma_addr = dma_addr - HOST_PHYS_BASE;
4762
4763 dma_pool_free(hdev->dma_pool, vaddr, fixed_dma_addr);
4764}
4765
4766static void *gaudi_cpu_accessible_dma_pool_alloc(struct hl_device *hdev,
4767 size_t size, dma_addr_t *dma_handle)
4768{
4769 return hl_fw_cpu_accessible_dma_pool_alloc(hdev, size, dma_handle);
4770}
4771
4772static void gaudi_cpu_accessible_dma_pool_free(struct hl_device *hdev,
4773 size_t size, void *vaddr)
4774{
4775 hl_fw_cpu_accessible_dma_pool_free(hdev, size, vaddr);
4776}
4777
4778static int gaudi_dma_map_sg(struct hl_device *hdev, struct scatterlist *sgl,
4779 int nents, enum dma_data_direction dir)
4780{
4781 struct scatterlist *sg;
4782 int i;
4783
4784 if (!dma_map_sg(&hdev->pdev->dev, sgl, nents, dir))
4785 return -ENOMEM;
4786
4787 /* Shift to the device's base physical address of host memory */
4788 for_each_sg(sgl, sg, nents, i)
4789 sg->dma_address += HOST_PHYS_BASE;
4790
4791 return 0;
4792}
4793
4794static void gaudi_dma_unmap_sg(struct hl_device *hdev, struct scatterlist *sgl,
4795 int nents, enum dma_data_direction dir)
4796{
4797 struct scatterlist *sg;
4798 int i;
4799
4800 /* Cancel the device's base physical address of host memory */
4801 for_each_sg(sgl, sg, nents, i)
4802 sg->dma_address -= HOST_PHYS_BASE;
4803
4804 dma_unmap_sg(&hdev->pdev->dev, sgl, nents, dir);
4805}
4806
4807static u32 gaudi_get_dma_desc_list_size(struct hl_device *hdev,
4808 struct sg_table *sgt)
4809{
4810 struct scatterlist *sg, *sg_next_iter;
4811 u32 count, dma_desc_cnt;
4812 u64 len, len_next;
4813 dma_addr_t addr, addr_next;
4814
4815 dma_desc_cnt = 0;
4816
4817 for_each_sg(sgt->sgl, sg, sgt->nents, count) {
4818
4819 len = sg_dma_len(sg);
4820 addr = sg_dma_address(sg);
4821
4822 if (len == 0)
4823 break;
4824
4825 while ((count + 1) < sgt->nents) {
4826 sg_next_iter = sg_next(sg);
4827 len_next = sg_dma_len(sg_next_iter);
4828 addr_next = sg_dma_address(sg_next_iter);
4829
4830 if (len_next == 0)
4831 break;
4832
4833 if ((addr + len == addr_next) &&
4834 (len + len_next <= DMA_MAX_TRANSFER_SIZE)) {
4835 len += len_next;
4836 count++;
4837 sg = sg_next_iter;
4838 } else {
4839 break;
4840 }
4841 }
4842
4843 dma_desc_cnt++;
4844 }
4845
4846 return dma_desc_cnt * sizeof(struct packet_lin_dma);
4847}
4848
4849static int gaudi_pin_memory_before_cs(struct hl_device *hdev,
4850 struct hl_cs_parser *parser,
4851 struct packet_lin_dma *user_dma_pkt,
4852 u64 addr, enum dma_data_direction dir)
4853{
4854 struct hl_userptr *userptr;
4855 int rc;
4856
4857 if (hl_userptr_is_pinned(hdev, addr, le32_to_cpu(user_dma_pkt->tsize),
4858 parser->job_userptr_list, &userptr))
4859 goto already_pinned;
4860
4861 userptr = kzalloc(sizeof(*userptr), GFP_ATOMIC);
4862 if (!userptr)
4863 return -ENOMEM;
4864
4865 rc = hl_pin_host_memory(hdev, addr, le32_to_cpu(user_dma_pkt->tsize),
4866 userptr);
4867 if (rc)
4868 goto free_userptr;
4869
4870 list_add_tail(&userptr->job_node, parser->job_userptr_list);
4871
4872 rc = hdev->asic_funcs->asic_dma_map_sg(hdev, userptr->sgt->sgl,
4873 userptr->sgt->nents, dir);
4874 if (rc) {
4875 dev_err(hdev->dev, "failed to map sgt with DMA region\n");
4876 goto unpin_memory;
4877 }
4878
4879 userptr->dma_mapped = true;
4880 userptr->dir = dir;
4881
4882already_pinned:
4883 parser->patched_cb_size +=
4884 gaudi_get_dma_desc_list_size(hdev, userptr->sgt);
4885
4886 return 0;
4887
4888unpin_memory:
4889 hl_unpin_host_memory(hdev, userptr);
4890free_userptr:
4891 kfree(userptr);
4892 return rc;
4893}
4894
4895static int gaudi_validate_dma_pkt_host(struct hl_device *hdev,
4896 struct hl_cs_parser *parser,
4897 struct packet_lin_dma *user_dma_pkt,
4898 bool src_in_host)
4899{
4900 enum dma_data_direction dir;
4901 bool skip_host_mem_pin = false, user_memset;
4902 u64 addr;
4903 int rc = 0;
4904
4905 user_memset = (le32_to_cpu(user_dma_pkt->ctl) &
4906 GAUDI_PKT_LIN_DMA_CTL_MEMSET_MASK) >>
4907 GAUDI_PKT_LIN_DMA_CTL_MEMSET_SHIFT;
4908
4909 if (src_in_host) {
4910 if (user_memset)
4911 skip_host_mem_pin = true;
4912
4913 dev_dbg(hdev->dev, "DMA direction is HOST --> DEVICE\n");
4914 dir = DMA_TO_DEVICE;
4915 addr = le64_to_cpu(user_dma_pkt->src_addr);
4916 } else {
4917 dev_dbg(hdev->dev, "DMA direction is DEVICE --> HOST\n");
4918 dir = DMA_FROM_DEVICE;
4919 addr = (le64_to_cpu(user_dma_pkt->dst_addr) &
4920 GAUDI_PKT_LIN_DMA_DST_ADDR_MASK) >>
4921 GAUDI_PKT_LIN_DMA_DST_ADDR_SHIFT;
4922 }
4923
4924 if (skip_host_mem_pin)
4925 parser->patched_cb_size += sizeof(*user_dma_pkt);
4926 else
4927 rc = gaudi_pin_memory_before_cs(hdev, parser, user_dma_pkt,
4928 addr, dir);
4929
4930 return rc;
4931}
4932
4933static int gaudi_validate_dma_pkt_no_mmu(struct hl_device *hdev,
4934 struct hl_cs_parser *parser,
4935 struct packet_lin_dma *user_dma_pkt)
4936{
4937 bool src_in_host = false;
4938 u64 dst_addr = (le64_to_cpu(user_dma_pkt->dst_addr) &
4939 GAUDI_PKT_LIN_DMA_DST_ADDR_MASK) >>
4940 GAUDI_PKT_LIN_DMA_DST_ADDR_SHIFT;
4941
4942 dev_dbg(hdev->dev, "DMA packet details:\n");
4943 dev_dbg(hdev->dev, "source == 0x%llx\n",
4944 le64_to_cpu(user_dma_pkt->src_addr));
4945 dev_dbg(hdev->dev, "destination == 0x%llx\n", dst_addr);
4946 dev_dbg(hdev->dev, "size == %u\n", le32_to_cpu(user_dma_pkt->tsize));
4947
4948 /*
4949 * Special handling for DMA with size 0. Bypass all validations
4950 * because no transactions will be done except for WR_COMP, which
4951 * is not a security issue
4952 */
4953 if (!le32_to_cpu(user_dma_pkt->tsize)) {
4954 parser->patched_cb_size += sizeof(*user_dma_pkt);
4955 return 0;
4956 }
4957
4958 if (parser->hw_queue_id <= GAUDI_QUEUE_ID_DMA_0_3)
4959 src_in_host = true;
4960
4961 return gaudi_validate_dma_pkt_host(hdev, parser, user_dma_pkt,
4962 src_in_host);
4963}
4964
Oded Gabbay64536ab2020-05-27 12:38:16 +03004965static int gaudi_validate_load_and_exe_pkt(struct hl_device *hdev,
4966 struct hl_cs_parser *parser,
4967 struct packet_load_and_exe *user_pkt)
4968{
4969 u32 cfg;
4970
4971 cfg = le32_to_cpu(user_pkt->cfg);
4972
4973 if (cfg & GAUDI_PKT_LOAD_AND_EXE_CFG_DST_MASK) {
4974 dev_err(hdev->dev,
4975 "User not allowed to use Load and Execute\n");
4976 return -EPERM;
4977 }
4978
4979 parser->patched_cb_size += sizeof(struct packet_load_and_exe);
4980
4981 return 0;
4982}
4983
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03004984static int gaudi_validate_cb(struct hl_device *hdev,
4985 struct hl_cs_parser *parser, bool is_mmu)
4986{
4987 u32 cb_parsed_length = 0;
4988 int rc = 0;
4989
4990 parser->patched_cb_size = 0;
4991
4992 /* cb_user_size is more than 0 so loop will always be executed */
4993 while (cb_parsed_length < parser->user_cb_size) {
4994 enum packet_id pkt_id;
4995 u16 pkt_size;
4996 struct gaudi_packet *user_pkt;
4997
Arnd Bergmann82948e62020-10-26 17:08:06 +01004998 user_pkt = parser->user_cb->kernel_address + cb_parsed_length;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03004999
5000 pkt_id = (enum packet_id) (
5001 (le64_to_cpu(user_pkt->header) &
5002 PACKET_HEADER_PACKET_ID_MASK) >>
5003 PACKET_HEADER_PACKET_ID_SHIFT);
5004
Ofir Bittonbc75be22020-07-30 14:56:38 +03005005 if (!validate_packet_id(pkt_id)) {
5006 dev_err(hdev->dev, "Invalid packet id %u\n", pkt_id);
5007 rc = -EINVAL;
5008 break;
5009 }
5010
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005011 pkt_size = gaudi_packet_sizes[pkt_id];
5012 cb_parsed_length += pkt_size;
5013 if (cb_parsed_length > parser->user_cb_size) {
5014 dev_err(hdev->dev,
5015 "packet 0x%x is out of CB boundary\n", pkt_id);
5016 rc = -EINVAL;
5017 break;
5018 }
5019
5020 switch (pkt_id) {
5021 case PACKET_MSG_PROT:
5022 dev_err(hdev->dev,
5023 "User not allowed to use MSG_PROT\n");
5024 rc = -EPERM;
5025 break;
5026
5027 case PACKET_CP_DMA:
5028 dev_err(hdev->dev, "User not allowed to use CP_DMA\n");
5029 rc = -EPERM;
5030 break;
5031
5032 case PACKET_STOP:
5033 dev_err(hdev->dev, "User not allowed to use STOP\n");
5034 rc = -EPERM;
5035 break;
5036
Oded Gabbay2edc66e2020-07-03 19:28:54 +03005037 case PACKET_WREG_BULK:
5038 dev_err(hdev->dev,
5039 "User not allowed to use WREG_BULK\n");
5040 rc = -EPERM;
5041 break;
5042
Oded Gabbay64536ab2020-05-27 12:38:16 +03005043 case PACKET_LOAD_AND_EXE:
5044 rc = gaudi_validate_load_and_exe_pkt(hdev, parser,
5045 (struct packet_load_and_exe *) user_pkt);
5046 break;
5047
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005048 case PACKET_LIN_DMA:
5049 parser->contains_dma_pkt = true;
5050 if (is_mmu)
5051 parser->patched_cb_size += pkt_size;
5052 else
5053 rc = gaudi_validate_dma_pkt_no_mmu(hdev, parser,
5054 (struct packet_lin_dma *) user_pkt);
5055 break;
5056
5057 case PACKET_WREG_32:
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005058 case PACKET_MSG_LONG:
5059 case PACKET_MSG_SHORT:
5060 case PACKET_REPEAT:
5061 case PACKET_FENCE:
5062 case PACKET_NOP:
5063 case PACKET_ARB_POINT:
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005064 parser->patched_cb_size += pkt_size;
5065 break;
5066
5067 default:
5068 dev_err(hdev->dev, "Invalid packet header 0x%x\n",
5069 pkt_id);
5070 rc = -EINVAL;
5071 break;
5072 }
5073
5074 if (rc)
5075 break;
5076 }
5077
5078 /*
5079 * The new CB should have space at the end for two MSG_PROT packets:
5080 * 1. A packet that will act as a completion packet
5081 * 2. A packet that will generate MSI-X interrupt
5082 */
Ofir Bittonac6fdbf2020-12-03 16:59:28 +02005083 if (parser->completion)
5084 parser->patched_cb_size += sizeof(struct packet_msg_prot) * 2;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005085
5086 return rc;
5087}
5088
5089static int gaudi_patch_dma_packet(struct hl_device *hdev,
5090 struct hl_cs_parser *parser,
5091 struct packet_lin_dma *user_dma_pkt,
5092 struct packet_lin_dma *new_dma_pkt,
5093 u32 *new_dma_pkt_size)
5094{
5095 struct hl_userptr *userptr;
5096 struct scatterlist *sg, *sg_next_iter;
5097 u32 count, dma_desc_cnt, user_wrcomp_en_mask, ctl;
5098 u64 len, len_next;
5099 dma_addr_t dma_addr, dma_addr_next;
5100 u64 device_memory_addr, addr;
5101 enum dma_data_direction dir;
5102 struct sg_table *sgt;
5103 bool src_in_host = false;
5104 bool skip_host_mem_pin = false;
5105 bool user_memset;
5106
5107 ctl = le32_to_cpu(user_dma_pkt->ctl);
5108
5109 if (parser->hw_queue_id <= GAUDI_QUEUE_ID_DMA_0_3)
5110 src_in_host = true;
5111
5112 user_memset = (ctl & GAUDI_PKT_LIN_DMA_CTL_MEMSET_MASK) >>
5113 GAUDI_PKT_LIN_DMA_CTL_MEMSET_SHIFT;
5114
5115 if (src_in_host) {
5116 addr = le64_to_cpu(user_dma_pkt->src_addr);
5117 device_memory_addr = le64_to_cpu(user_dma_pkt->dst_addr);
5118 dir = DMA_TO_DEVICE;
5119 if (user_memset)
5120 skip_host_mem_pin = true;
5121 } else {
5122 addr = le64_to_cpu(user_dma_pkt->dst_addr);
5123 device_memory_addr = le64_to_cpu(user_dma_pkt->src_addr);
5124 dir = DMA_FROM_DEVICE;
5125 }
5126
5127 if ((!skip_host_mem_pin) &&
5128 (!hl_userptr_is_pinned(hdev, addr,
5129 le32_to_cpu(user_dma_pkt->tsize),
5130 parser->job_userptr_list, &userptr))) {
5131 dev_err(hdev->dev, "Userptr 0x%llx + 0x%x NOT mapped\n",
5132 addr, user_dma_pkt->tsize);
5133 return -EFAULT;
5134 }
5135
5136 if ((user_memset) && (dir == DMA_TO_DEVICE)) {
5137 memcpy(new_dma_pkt, user_dma_pkt, sizeof(*user_dma_pkt));
5138 *new_dma_pkt_size = sizeof(*user_dma_pkt);
5139 return 0;
5140 }
5141
5142 user_wrcomp_en_mask = ctl & GAUDI_PKT_LIN_DMA_CTL_WRCOMP_EN_MASK;
5143
5144 sgt = userptr->sgt;
5145 dma_desc_cnt = 0;
5146
5147 for_each_sg(sgt->sgl, sg, sgt->nents, count) {
5148 len = sg_dma_len(sg);
5149 dma_addr = sg_dma_address(sg);
5150
5151 if (len == 0)
5152 break;
5153
5154 while ((count + 1) < sgt->nents) {
5155 sg_next_iter = sg_next(sg);
5156 len_next = sg_dma_len(sg_next_iter);
5157 dma_addr_next = sg_dma_address(sg_next_iter);
5158
5159 if (len_next == 0)
5160 break;
5161
5162 if ((dma_addr + len == dma_addr_next) &&
5163 (len + len_next <= DMA_MAX_TRANSFER_SIZE)) {
5164 len += len_next;
5165 count++;
5166 sg = sg_next_iter;
5167 } else {
5168 break;
5169 }
5170 }
5171
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005172 ctl = le32_to_cpu(user_dma_pkt->ctl);
5173 if (likely(dma_desc_cnt))
5174 ctl &= ~GAUDI_PKT_CTL_EB_MASK;
5175 ctl &= ~GAUDI_PKT_LIN_DMA_CTL_WRCOMP_EN_MASK;
5176 new_dma_pkt->ctl = cpu_to_le32(ctl);
5177 new_dma_pkt->tsize = cpu_to_le32(len);
5178
5179 if (dir == DMA_TO_DEVICE) {
5180 new_dma_pkt->src_addr = cpu_to_le64(dma_addr);
5181 new_dma_pkt->dst_addr = cpu_to_le64(device_memory_addr);
5182 } else {
5183 new_dma_pkt->src_addr = cpu_to_le64(device_memory_addr);
5184 new_dma_pkt->dst_addr = cpu_to_le64(dma_addr);
5185 }
5186
5187 if (!user_memset)
5188 device_memory_addr += len;
5189 dma_desc_cnt++;
5190 new_dma_pkt++;
5191 }
5192
5193 if (!dma_desc_cnt) {
5194 dev_err(hdev->dev,
5195 "Error of 0 SG entries when patching DMA packet\n");
5196 return -EFAULT;
5197 }
5198
5199 /* Fix the last dma packet - wrcomp must be as user set it */
5200 new_dma_pkt--;
5201 new_dma_pkt->ctl |= cpu_to_le32(user_wrcomp_en_mask);
5202
5203 *new_dma_pkt_size = dma_desc_cnt * sizeof(struct packet_lin_dma);
5204
5205 return 0;
5206}
5207
5208static int gaudi_patch_cb(struct hl_device *hdev,
5209 struct hl_cs_parser *parser)
5210{
5211 u32 cb_parsed_length = 0;
5212 u32 cb_patched_cur_length = 0;
5213 int rc = 0;
5214
5215 /* cb_user_size is more than 0 so loop will always be executed */
5216 while (cb_parsed_length < parser->user_cb_size) {
5217 enum packet_id pkt_id;
5218 u16 pkt_size;
5219 u32 new_pkt_size = 0;
5220 struct gaudi_packet *user_pkt, *kernel_pkt;
5221
Arnd Bergmann82948e62020-10-26 17:08:06 +01005222 user_pkt = parser->user_cb->kernel_address + cb_parsed_length;
5223 kernel_pkt = parser->patched_cb->kernel_address +
5224 cb_patched_cur_length;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005225
5226 pkt_id = (enum packet_id) (
5227 (le64_to_cpu(user_pkt->header) &
5228 PACKET_HEADER_PACKET_ID_MASK) >>
5229 PACKET_HEADER_PACKET_ID_SHIFT);
5230
Ofir Bittonbc75be22020-07-30 14:56:38 +03005231 if (!validate_packet_id(pkt_id)) {
5232 dev_err(hdev->dev, "Invalid packet id %u\n", pkt_id);
5233 rc = -EINVAL;
5234 break;
5235 }
5236
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005237 pkt_size = gaudi_packet_sizes[pkt_id];
5238 cb_parsed_length += pkt_size;
5239 if (cb_parsed_length > parser->user_cb_size) {
5240 dev_err(hdev->dev,
5241 "packet 0x%x is out of CB boundary\n", pkt_id);
5242 rc = -EINVAL;
5243 break;
5244 }
5245
5246 switch (pkt_id) {
5247 case PACKET_LIN_DMA:
5248 rc = gaudi_patch_dma_packet(hdev, parser,
5249 (struct packet_lin_dma *) user_pkt,
5250 (struct packet_lin_dma *) kernel_pkt,
5251 &new_pkt_size);
5252 cb_patched_cur_length += new_pkt_size;
5253 break;
5254
5255 case PACKET_MSG_PROT:
5256 dev_err(hdev->dev,
5257 "User not allowed to use MSG_PROT\n");
5258 rc = -EPERM;
5259 break;
5260
5261 case PACKET_CP_DMA:
5262 dev_err(hdev->dev, "User not allowed to use CP_DMA\n");
5263 rc = -EPERM;
5264 break;
5265
5266 case PACKET_STOP:
5267 dev_err(hdev->dev, "User not allowed to use STOP\n");
5268 rc = -EPERM;
5269 break;
5270
5271 case PACKET_WREG_32:
5272 case PACKET_WREG_BULK:
5273 case PACKET_MSG_LONG:
5274 case PACKET_MSG_SHORT:
5275 case PACKET_REPEAT:
5276 case PACKET_FENCE:
5277 case PACKET_NOP:
5278 case PACKET_ARB_POINT:
5279 case PACKET_LOAD_AND_EXE:
5280 memcpy(kernel_pkt, user_pkt, pkt_size);
5281 cb_patched_cur_length += pkt_size;
5282 break;
5283
5284 default:
5285 dev_err(hdev->dev, "Invalid packet header 0x%x\n",
5286 pkt_id);
5287 rc = -EINVAL;
5288 break;
5289 }
5290
5291 if (rc)
5292 break;
5293 }
5294
5295 return rc;
5296}
5297
5298static int gaudi_parse_cb_mmu(struct hl_device *hdev,
5299 struct hl_cs_parser *parser)
5300{
5301 u64 patched_cb_handle;
5302 u32 patched_cb_size;
5303 struct hl_cb *user_cb;
5304 int rc;
5305
5306 /*
5307 * The new CB should have space at the end for two MSG_PROT pkt:
5308 * 1. A packet that will act as a completion packet
5309 * 2. A packet that will generate MSI interrupt
5310 */
Ofir Bittonac6fdbf2020-12-03 16:59:28 +02005311 if (parser->completion)
5312 parser->patched_cb_size = parser->user_cb_size +
5313 sizeof(struct packet_msg_prot) * 2;
5314 else
5315 parser->patched_cb_size = parser->user_cb_size;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005316
Tomer Tayarfa8641a12020-09-07 17:36:41 +03005317 rc = hl_cb_create(hdev, &hdev->kernel_cb_mgr, hdev->kernel_ctx,
Tomer Tayaref6a0f62020-07-09 16:17:48 +03005318 parser->patched_cb_size, false, false,
Tomer Tayarfa8641a12020-09-07 17:36:41 +03005319 &patched_cb_handle);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005320
5321 if (rc) {
5322 dev_err(hdev->dev,
5323 "Failed to allocate patched CB for DMA CS %d\n",
5324 rc);
5325 return rc;
5326 }
5327
5328 patched_cb_handle >>= PAGE_SHIFT;
5329 parser->patched_cb = hl_cb_get(hdev, &hdev->kernel_cb_mgr,
5330 (u32) patched_cb_handle);
Alon Mizrahi75d9a2a2020-12-03 17:32:19 +02005331 /* hl_cb_get should never fail */
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005332 if (!parser->patched_cb) {
Alon Mizrahi75d9a2a2020-12-03 17:32:19 +02005333 dev_crit(hdev->dev, "DMA CB handle invalid 0x%x\n",
5334 (u32) patched_cb_handle);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005335 rc = -EFAULT;
5336 goto out;
5337 }
5338
5339 /*
5340 * The check that parser->user_cb_size <= parser->user_cb->size was done
5341 * in validate_queue_index().
5342 */
Arnd Bergmann82948e62020-10-26 17:08:06 +01005343 memcpy(parser->patched_cb->kernel_address,
5344 parser->user_cb->kernel_address,
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005345 parser->user_cb_size);
5346
5347 patched_cb_size = parser->patched_cb_size;
5348
5349 /* Validate patched CB instead of user CB */
5350 user_cb = parser->user_cb;
5351 parser->user_cb = parser->patched_cb;
5352 rc = gaudi_validate_cb(hdev, parser, true);
5353 parser->user_cb = user_cb;
5354
5355 if (rc) {
5356 hl_cb_put(parser->patched_cb);
5357 goto out;
5358 }
5359
5360 if (patched_cb_size != parser->patched_cb_size) {
5361 dev_err(hdev->dev, "user CB size mismatch\n");
5362 hl_cb_put(parser->patched_cb);
5363 rc = -EINVAL;
5364 goto out;
5365 }
5366
5367out:
5368 /*
5369 * Always call cb destroy here because we still have 1 reference
5370 * to it by calling cb_get earlier. After the job will be completed,
5371 * cb_put will release it, but here we want to remove it from the
5372 * idr
5373 */
5374 hl_cb_destroy(hdev, &hdev->kernel_cb_mgr,
5375 patched_cb_handle << PAGE_SHIFT);
5376
5377 return rc;
5378}
5379
5380static int gaudi_parse_cb_no_mmu(struct hl_device *hdev,
5381 struct hl_cs_parser *parser)
5382{
5383 u64 patched_cb_handle;
5384 int rc;
5385
5386 rc = gaudi_validate_cb(hdev, parser, false);
5387
5388 if (rc)
5389 goto free_userptr;
5390
Tomer Tayarfa8641a12020-09-07 17:36:41 +03005391 rc = hl_cb_create(hdev, &hdev->kernel_cb_mgr, hdev->kernel_ctx,
Tomer Tayaref6a0f62020-07-09 16:17:48 +03005392 parser->patched_cb_size, false, false,
Tomer Tayarfa8641a12020-09-07 17:36:41 +03005393 &patched_cb_handle);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005394 if (rc) {
5395 dev_err(hdev->dev,
5396 "Failed to allocate patched CB for DMA CS %d\n", rc);
5397 goto free_userptr;
5398 }
5399
5400 patched_cb_handle >>= PAGE_SHIFT;
5401 parser->patched_cb = hl_cb_get(hdev, &hdev->kernel_cb_mgr,
5402 (u32) patched_cb_handle);
Alon Mizrahi75d9a2a2020-12-03 17:32:19 +02005403 /* hl_cb_get should never fail here */
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005404 if (!parser->patched_cb) {
Alon Mizrahi75d9a2a2020-12-03 17:32:19 +02005405 dev_crit(hdev->dev, "DMA CB handle invalid 0x%x\n",
5406 (u32) patched_cb_handle);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005407 rc = -EFAULT;
5408 goto out;
5409 }
5410
5411 rc = gaudi_patch_cb(hdev, parser);
5412
5413 if (rc)
5414 hl_cb_put(parser->patched_cb);
5415
5416out:
5417 /*
5418 * Always call cb destroy here because we still have 1 reference
5419 * to it by calling cb_get earlier. After the job will be completed,
5420 * cb_put will release it, but here we want to remove it from the
5421 * idr
5422 */
5423 hl_cb_destroy(hdev, &hdev->kernel_cb_mgr,
5424 patched_cb_handle << PAGE_SHIFT);
5425
5426free_userptr:
5427 if (rc)
5428 hl_userptr_delete_list(hdev, parser->job_userptr_list);
5429 return rc;
5430}
5431
5432static int gaudi_parse_cb_no_ext_queue(struct hl_device *hdev,
5433 struct hl_cs_parser *parser)
5434{
5435 struct asic_fixed_properties *asic_prop = &hdev->asic_prop;
Oded Gabbay3c681572020-11-02 21:10:39 +02005436 struct gaudi_device *gaudi = hdev->asic_specific;
5437 u32 nic_mask_q_id = 1 << (HW_CAP_NIC_SHIFT +
5438 ((parser->hw_queue_id - GAUDI_QUEUE_ID_NIC_0_0) >> 2));
5439
5440 if ((parser->hw_queue_id >= GAUDI_QUEUE_ID_NIC_0_0) &&
5441 (parser->hw_queue_id <= GAUDI_QUEUE_ID_NIC_9_3) &&
5442 (!(gaudi->hw_cap_initialized & nic_mask_q_id))) {
5443 dev_err(hdev->dev, "h/w queue %d is disabled\n",
5444 parser->hw_queue_id);
5445 return -EINVAL;
5446 }
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005447
5448 /* For internal queue jobs just check if CB address is valid */
5449 if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,
5450 parser->user_cb_size,
5451 asic_prop->sram_user_base_address,
5452 asic_prop->sram_end_address))
5453 return 0;
5454
5455 if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,
5456 parser->user_cb_size,
5457 asic_prop->dram_user_base_address,
5458 asic_prop->dram_end_address))
5459 return 0;
5460
5461 /* PMMU and HPMMU addresses are equal, check only one of them */
5462 if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,
5463 parser->user_cb_size,
5464 asic_prop->pmmu.start_addr,
5465 asic_prop->pmmu.end_addr))
5466 return 0;
5467
5468 dev_err(hdev->dev,
5469 "CB address 0x%px + 0x%x for internal QMAN is not valid\n",
5470 parser->user_cb, parser->user_cb_size);
5471
5472 return -EFAULT;
5473}
5474
5475static int gaudi_cs_parser(struct hl_device *hdev, struct hl_cs_parser *parser)
5476{
5477 struct gaudi_device *gaudi = hdev->asic_specific;
5478
5479 if (parser->queue_type == QUEUE_TYPE_INT)
5480 return gaudi_parse_cb_no_ext_queue(hdev, parser);
5481
5482 if (gaudi->hw_cap_initialized & HW_CAP_MMU)
5483 return gaudi_parse_cb_mmu(hdev, parser);
5484 else
5485 return gaudi_parse_cb_no_mmu(hdev, parser);
5486}
5487
5488static void gaudi_add_end_of_cb_packets(struct hl_device *hdev,
Arnd Bergmann82948e62020-10-26 17:08:06 +01005489 void *kernel_address, u32 len,
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005490 u64 cq_addr, u32 cq_val, u32 msi_vec,
5491 bool eb)
5492{
5493 struct gaudi_device *gaudi = hdev->asic_specific;
5494 struct packet_msg_prot *cq_pkt;
5495 u32 tmp;
5496
Arnd Bergmann82948e62020-10-26 17:08:06 +01005497 cq_pkt = kernel_address + len - (sizeof(struct packet_msg_prot) * 2);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005498
Oded Gabbay65887292020-08-12 11:21:01 +03005499 tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT);
5500 tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005501
5502 if (eb)
Oded Gabbay65887292020-08-12 11:21:01 +03005503 tmp |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005504
5505 cq_pkt->ctl = cpu_to_le32(tmp);
5506 cq_pkt->value = cpu_to_le32(cq_val);
5507 cq_pkt->addr = cpu_to_le64(cq_addr);
5508
5509 cq_pkt++;
5510
Oded Gabbay65887292020-08-12 11:21:01 +03005511 tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT);
5512 tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005513 cq_pkt->ctl = cpu_to_le32(tmp);
5514 cq_pkt->value = cpu_to_le32(1);
5515
5516 if (!gaudi->multi_msi_mode)
5517 msi_vec = 0;
5518
5519 cq_pkt->addr = cpu_to_le64(CFG_BASE + mmPCIE_MSI_INTR_0 + msi_vec * 4);
5520}
5521
5522static void gaudi_update_eq_ci(struct hl_device *hdev, u32 val)
5523{
5524 WREG32(mmCPU_IF_EQ_RD_OFFS, val);
5525}
5526
5527static int gaudi_memset_device_memory(struct hl_device *hdev, u64 addr,
5528 u32 size, u64 val)
5529{
5530 struct packet_lin_dma *lin_dma_pkt;
5531 struct hl_cs_job *job;
Moti Haimovskia9855a2d92020-06-24 19:40:57 +03005532 u32 cb_size, ctl, err_cause;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005533 struct hl_cb *cb;
5534 int rc;
5535
Ofir Bittona04b7cd2020-07-13 13:36:55 +03005536 cb = hl_cb_kernel_create(hdev, PAGE_SIZE, false);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005537 if (!cb)
5538 return -EFAULT;
5539
Arnd Bergmann82948e62020-10-26 17:08:06 +01005540 lin_dma_pkt = cb->kernel_address;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005541 memset(lin_dma_pkt, 0, sizeof(*lin_dma_pkt));
5542 cb_size = sizeof(*lin_dma_pkt);
5543
Oded Gabbay65887292020-08-12 11:21:01 +03005544 ctl = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_LIN_DMA);
5545 ctl |= FIELD_PREP(GAUDI_PKT_LIN_DMA_CTL_MEMSET_MASK, 1);
5546 ctl |= FIELD_PREP(GAUDI_PKT_LIN_DMA_CTL_LIN_MASK, 1);
5547 ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
5548 ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
5549
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005550 lin_dma_pkt->ctl = cpu_to_le32(ctl);
5551 lin_dma_pkt->src_addr = cpu_to_le64(val);
5552 lin_dma_pkt->dst_addr |= cpu_to_le64(addr);
5553 lin_dma_pkt->tsize = cpu_to_le32(size);
5554
5555 job = hl_cs_allocate_job(hdev, QUEUE_TYPE_EXT, true);
5556 if (!job) {
5557 dev_err(hdev->dev, "Failed to allocate a new job\n");
5558 rc = -ENOMEM;
5559 goto release_cb;
5560 }
5561
Moti Haimovskia9855a2d92020-06-24 19:40:57 +03005562 /* Verify DMA is OK */
5563 err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE);
5564 if (err_cause && !hdev->init_done) {
5565 dev_dbg(hdev->dev,
5566 "Clearing DMA0 engine from errors (cause 0x%x)\n",
5567 err_cause);
5568 WREG32(mmDMA0_CORE_ERR_CAUSE, err_cause);
5569 }
5570
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005571 job->id = 0;
5572 job->user_cb = cb;
Tomer Tayarf07486742020-08-02 22:51:31 +03005573 atomic_inc(&job->user_cb->cs_cnt);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005574 job->user_cb_size = cb_size;
5575 job->hw_queue_id = GAUDI_QUEUE_ID_DMA_0_0;
5576 job->patched_cb = job->user_cb;
5577 job->job_cb_size = job->user_cb_size + sizeof(struct packet_msg_prot);
5578
5579 hl_debugfs_add_job(hdev, job);
5580
5581 rc = gaudi_send_job_on_qman0(hdev, job);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005582 hl_debugfs_remove_job(hdev, job);
5583 kfree(job);
Tomer Tayarf07486742020-08-02 22:51:31 +03005584 atomic_dec(&cb->cs_cnt);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005585
Moti Haimovskia9855a2d92020-06-24 19:40:57 +03005586 /* Verify DMA is OK */
5587 err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE);
5588 if (err_cause) {
5589 dev_err(hdev->dev, "DMA Failed, cause 0x%x\n", err_cause);
5590 rc = -EIO;
5591 if (!hdev->init_done) {
5592 dev_dbg(hdev->dev,
5593 "Clearing DMA0 engine from errors (cause 0x%x)\n",
5594 err_cause);
5595 WREG32(mmDMA0_CORE_ERR_CAUSE, err_cause);
5596 }
5597 }
5598
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005599release_cb:
5600 hl_cb_put(cb);
5601 hl_cb_destroy(hdev, &hdev->kernel_cb_mgr, cb->id << PAGE_SHIFT);
5602
5603 return rc;
5604}
5605
Ofir Bitton423815b2021-01-05 09:04:07 +02005606static int gaudi_memset_registers(struct hl_device *hdev, u64 reg_base,
5607 u32 num_regs, u32 val)
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005608{
Ofir Bitton423815b2021-01-05 09:04:07 +02005609 struct packet_msg_long *pkt;
5610 struct hl_cs_job *job;
5611 u32 cb_size, ctl;
5612 struct hl_cb *cb;
5613 int i, rc;
5614
5615 cb_size = (sizeof(*pkt) * num_regs) + sizeof(struct packet_msg_prot);
5616
5617 if (cb_size > SZ_2M) {
5618 dev_err(hdev->dev, "CB size must be smaller than %uMB", SZ_2M);
5619 return -ENOMEM;
5620 }
5621
5622 cb = hl_cb_kernel_create(hdev, cb_size, false);
5623 if (!cb)
5624 return -EFAULT;
5625
5626 pkt = cb->kernel_address;
5627
5628 ctl = FIELD_PREP(GAUDI_PKT_LONG_CTL_OP_MASK, 0); /* write the value */
5629 ctl |= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_LONG);
5630 ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);
5631 ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
5632 ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
5633
5634 for (i = 0; i < num_regs ; i++, pkt++) {
5635 pkt->ctl = cpu_to_le32(ctl);
5636 pkt->value = cpu_to_le32(val);
5637 pkt->addr = cpu_to_le64(reg_base + (i * 4));
5638 }
5639
5640 job = hl_cs_allocate_job(hdev, QUEUE_TYPE_EXT, true);
5641 if (!job) {
5642 dev_err(hdev->dev, "Failed to allocate a new job\n");
5643 rc = -ENOMEM;
5644 goto release_cb;
5645 }
5646
5647 job->id = 0;
5648 job->user_cb = cb;
5649 atomic_inc(&job->user_cb->cs_cnt);
5650 job->user_cb_size = cb_size;
5651 job->hw_queue_id = GAUDI_QUEUE_ID_DMA_0_0;
5652 job->patched_cb = job->user_cb;
5653 job->job_cb_size = cb_size;
5654
5655 hl_debugfs_add_job(hdev, job);
5656
5657 rc = gaudi_send_job_on_qman0(hdev, job);
5658 hl_debugfs_remove_job(hdev, job);
5659 kfree(job);
5660 atomic_dec(&cb->cs_cnt);
5661
5662release_cb:
5663 hl_cb_put(cb);
5664 hl_cb_destroy(hdev, &hdev->kernel_cb_mgr, cb->id << PAGE_SHIFT);
5665
5666 return rc;
5667}
5668
5669static int gaudi_schedule_register_memset(struct hl_device *hdev,
5670 u32 hw_queue_id, u64 reg_base, u32 num_regs, u32 val)
5671{
5672 struct hl_ctx *ctx = hdev->compute_ctx;
5673 struct hl_pending_cb *pending_cb;
5674 struct packet_msg_long *pkt;
5675 u32 cb_size, ctl;
5676 struct hl_cb *cb;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005677 int i;
5678
Ofir Bitton423815b2021-01-05 09:04:07 +02005679 /* If no compute context available or context is going down
5680 * memset registers directly
5681 */
5682 if (!ctx || kref_read(&ctx->refcount) == 0)
5683 return gaudi_memset_registers(hdev, reg_base, num_regs, val);
5684
5685 cb_size = (sizeof(*pkt) * num_regs) +
5686 sizeof(struct packet_msg_prot) * 2;
5687
5688 if (cb_size > SZ_2M) {
5689 dev_err(hdev->dev, "CB size must be smaller than %uMB", SZ_2M);
5690 return -ENOMEM;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005691 }
5692
Ofir Bitton423815b2021-01-05 09:04:07 +02005693 pending_cb = kzalloc(sizeof(*pending_cb), GFP_KERNEL);
5694 if (!pending_cb)
5695 return -ENOMEM;
5696
5697 cb = hl_cb_kernel_create(hdev, cb_size, false);
5698 if (!cb) {
5699 kfree(pending_cb);
5700 return -EFAULT;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005701 }
5702
Ofir Bitton423815b2021-01-05 09:04:07 +02005703 pkt = cb->kernel_address;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005704
Ofir Bitton423815b2021-01-05 09:04:07 +02005705 ctl = FIELD_PREP(GAUDI_PKT_LONG_CTL_OP_MASK, 0); /* write the value */
5706 ctl |= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_LONG);
5707 ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);
5708 ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
5709 ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005710
Ofir Bitton423815b2021-01-05 09:04:07 +02005711 for (i = 0; i < num_regs ; i++, pkt++) {
5712 pkt->ctl = cpu_to_le32(ctl);
5713 pkt->value = cpu_to_le32(val);
5714 pkt->addr = cpu_to_le64(reg_base + (i * 4));
5715 }
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005716
Ofir Bitton423815b2021-01-05 09:04:07 +02005717 hl_cb_destroy(hdev, &hdev->kernel_cb_mgr, cb->id << PAGE_SHIFT);
5718
5719 pending_cb->cb = cb;
5720 pending_cb->cb_size = cb_size;
5721 /* The queue ID MUST be an external queue ID. Otherwise, we will
5722 * have undefined behavior
5723 */
5724 pending_cb->hw_queue_id = hw_queue_id;
5725
5726 spin_lock(&ctx->pending_cb_lock);
5727 list_add_tail(&pending_cb->cb_node, &ctx->pending_cb_list);
5728 spin_unlock(&ctx->pending_cb_lock);
5729
5730 return 0;
5731}
5732
5733static int gaudi_restore_sm_registers(struct hl_device *hdev)
5734{
5735 u64 base_addr;
5736 u32 num_regs;
5737 int rc;
5738
5739 base_addr = CFG_BASE + mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0;
5740 num_regs = NUM_OF_SOB_IN_BLOCK;
5741 rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5742 if (rc) {
5743 dev_err(hdev->dev, "failed resetting SM registers");
5744 return -ENOMEM;
5745 }
5746
5747 base_addr = CFG_BASE + mmSYNC_MNGR_E_S_SYNC_MNGR_OBJS_SOB_OBJ_0;
5748 num_regs = NUM_OF_SOB_IN_BLOCK;
5749 rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5750 if (rc) {
5751 dev_err(hdev->dev, "failed resetting SM registers");
5752 return -ENOMEM;
5753 }
5754
5755 base_addr = CFG_BASE + mmSYNC_MNGR_W_N_SYNC_MNGR_OBJS_SOB_OBJ_0;
5756 num_regs = NUM_OF_SOB_IN_BLOCK;
5757 rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5758 if (rc) {
5759 dev_err(hdev->dev, "failed resetting SM registers");
5760 return -ENOMEM;
5761 }
5762
5763 base_addr = CFG_BASE + mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_STATUS_0;
5764 num_regs = NUM_OF_MONITORS_IN_BLOCK;
5765 rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5766 if (rc) {
5767 dev_err(hdev->dev, "failed resetting SM registers");
5768 return -ENOMEM;
5769 }
5770
5771 base_addr = CFG_BASE + mmSYNC_MNGR_E_S_SYNC_MNGR_OBJS_MON_STATUS_0;
5772 num_regs = NUM_OF_MONITORS_IN_BLOCK;
5773 rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5774 if (rc) {
5775 dev_err(hdev->dev, "failed resetting SM registers");
5776 return -ENOMEM;
5777 }
5778
5779 base_addr = CFG_BASE + mmSYNC_MNGR_W_N_SYNC_MNGR_OBJS_MON_STATUS_0;
5780 num_regs = NUM_OF_MONITORS_IN_BLOCK;
5781 rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5782 if (rc) {
5783 dev_err(hdev->dev, "failed resetting SM registers");
5784 return -ENOMEM;
5785 }
5786
5787 base_addr = CFG_BASE + mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 +
5788 (GAUDI_FIRST_AVAILABLE_W_S_SYNC_OBJECT * 4);
5789 num_regs = NUM_OF_SOB_IN_BLOCK - GAUDI_FIRST_AVAILABLE_W_S_SYNC_OBJECT;
5790 rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5791 if (rc) {
5792 dev_err(hdev->dev, "failed resetting SM registers");
5793 return -ENOMEM;
5794 }
5795
5796 base_addr = CFG_BASE + mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_STATUS_0 +
5797 (GAUDI_FIRST_AVAILABLE_W_S_MONITOR * 4);
5798 num_regs = NUM_OF_MONITORS_IN_BLOCK - GAUDI_FIRST_AVAILABLE_W_S_MONITOR;
5799 rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5800 if (rc) {
5801 dev_err(hdev->dev, "failed resetting SM registers");
5802 return -ENOMEM;
5803 }
5804
5805 return 0;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005806}
5807
5808static void gaudi_restore_dma_registers(struct hl_device *hdev)
5809{
5810 u32 sob_delta = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_1 -
5811 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0;
5812 int i;
5813
5814 for (i = 0 ; i < DMA_NUMBER_OF_CHANNELS ; i++) {
5815 u64 sob_addr = CFG_BASE +
5816 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0 +
5817 (i * sob_delta);
5818 u32 dma_offset = i * DMA_CORE_OFFSET;
5819
5820 WREG32(mmDMA0_CORE_WR_COMP_ADDR_LO + dma_offset,
5821 lower_32_bits(sob_addr));
5822 WREG32(mmDMA0_CORE_WR_COMP_ADDR_HI + dma_offset,
5823 upper_32_bits(sob_addr));
5824 WREG32(mmDMA0_CORE_WR_COMP_WDATA + dma_offset, 0x80000001);
5825
5826 /* For DMAs 2-7, need to restore WR_AWUSER_31_11 as it can be
5827 * modified by the user for SRAM reduction
5828 */
5829 if (i > 1)
5830 WREG32(mmDMA0_CORE_WR_AWUSER_31_11 + dma_offset,
5831 0x00000001);
5832 }
5833}
5834
5835static void gaudi_restore_qm_registers(struct hl_device *hdev)
5836{
5837 u32 qman_offset;
5838 int i;
5839
5840 for (i = 0 ; i < DMA_NUMBER_OF_CHANNELS ; i++) {
5841 qman_offset = i * DMA_QMAN_OFFSET;
5842 WREG32(mmDMA0_QM_ARB_CFG_0 + qman_offset, 0);
5843 }
5844
5845 for (i = 0 ; i < MME_NUMBER_OF_MASTER_ENGINES ; i++) {
5846 qman_offset = i * (mmMME2_QM_BASE - mmMME0_QM_BASE);
5847 WREG32(mmMME0_QM_ARB_CFG_0 + qman_offset, 0);
5848 }
5849
5850 for (i = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) {
5851 qman_offset = i * TPC_QMAN_OFFSET;
5852 WREG32(mmTPC0_QM_ARB_CFG_0 + qman_offset, 0);
5853 }
Oded Gabbay3c681572020-11-02 21:10:39 +02005854
5855 for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++) {
5856 qman_offset = (i >> 1) * NIC_MACRO_QMAN_OFFSET +
5857 (i & 0x1) * NIC_ENGINE_QMAN_OFFSET;
5858 WREG32(mmNIC0_QM0_ARB_CFG_0 + qman_offset, 0);
5859 }
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005860}
5861
Ofir Bitton423815b2021-01-05 09:04:07 +02005862static int gaudi_restore_user_registers(struct hl_device *hdev)
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005863{
Ofir Bitton423815b2021-01-05 09:04:07 +02005864 int rc;
5865
5866 rc = gaudi_restore_sm_registers(hdev);
5867 if (rc)
5868 return rc;
5869
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005870 gaudi_restore_dma_registers(hdev);
5871 gaudi_restore_qm_registers(hdev);
Ofir Bitton423815b2021-01-05 09:04:07 +02005872
5873 return 0;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005874}
5875
5876static int gaudi_context_switch(struct hl_device *hdev, u32 asid)
5877{
Ofir Bitton423815b2021-01-05 09:04:07 +02005878 return gaudi_restore_user_registers(hdev);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005879}
5880
5881static int gaudi_mmu_clear_pgt_range(struct hl_device *hdev)
5882{
5883 struct asic_fixed_properties *prop = &hdev->asic_prop;
5884 struct gaudi_device *gaudi = hdev->asic_specific;
5885 u64 addr = prop->mmu_pgt_addr;
5886 u32 size = prop->mmu_pgt_size + MMU_CACHE_MNG_SIZE;
5887
5888 if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
5889 return 0;
5890
5891 return gaudi_memset_device_memory(hdev, addr, size, 0);
5892}
5893
5894static void gaudi_restore_phase_topology(struct hl_device *hdev)
5895{
5896
5897}
5898
5899static int gaudi_debugfs_read32(struct hl_device *hdev, u64 addr, u32 *val)
5900{
5901 struct asic_fixed_properties *prop = &hdev->asic_prop;
5902 struct gaudi_device *gaudi = hdev->asic_specific;
5903 u64 hbm_bar_addr;
5904 int rc = 0;
5905
5906 if ((addr >= CFG_BASE) && (addr < CFG_BASE + CFG_SIZE)) {
Oded Gabbaye38bfd32020-07-03 20:46:12 +03005907
5908 if ((gaudi->hw_cap_initialized & HW_CAP_CLK_GATE) &&
5909 (hdev->clock_gating_mask &
5910 GAUDI_CLK_GATE_DEBUGFS_MASK)) {
5911
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005912 dev_err_ratelimited(hdev->dev,
5913 "Can't read register - clock gating is enabled!\n");
5914 rc = -EFAULT;
5915 } else {
5916 *val = RREG32(addr - CFG_BASE);
5917 }
Oded Gabbaye38bfd32020-07-03 20:46:12 +03005918
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005919 } else if ((addr >= SRAM_BASE_ADDR) &&
5920 (addr < SRAM_BASE_ADDR + SRAM_BAR_SIZE)) {
5921 *val = readl(hdev->pcie_bar[SRAM_BAR_ID] +
5922 (addr - SRAM_BASE_ADDR));
5923 } else if (addr < DRAM_PHYS_BASE + hdev->asic_prop.dram_size) {
5924 u64 bar_base_addr = DRAM_PHYS_BASE +
5925 (addr & ~(prop->dram_pci_bar_size - 0x1ull));
5926
5927 hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, bar_base_addr);
5928 if (hbm_bar_addr != U64_MAX) {
5929 *val = readl(hdev->pcie_bar[HBM_BAR_ID] +
5930 (addr - bar_base_addr));
5931
5932 hbm_bar_addr = gaudi_set_hbm_bar_base(hdev,
5933 hbm_bar_addr);
5934 }
5935 if (hbm_bar_addr == U64_MAX)
5936 rc = -EIO;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005937 } else {
5938 rc = -EFAULT;
5939 }
5940
5941 return rc;
5942}
5943
5944static int gaudi_debugfs_write32(struct hl_device *hdev, u64 addr, u32 val)
5945{
5946 struct asic_fixed_properties *prop = &hdev->asic_prop;
5947 struct gaudi_device *gaudi = hdev->asic_specific;
5948 u64 hbm_bar_addr;
5949 int rc = 0;
5950
5951 if ((addr >= CFG_BASE) && (addr < CFG_BASE + CFG_SIZE)) {
Oded Gabbaye38bfd32020-07-03 20:46:12 +03005952
5953 if ((gaudi->hw_cap_initialized & HW_CAP_CLK_GATE) &&
5954 (hdev->clock_gating_mask &
5955 GAUDI_CLK_GATE_DEBUGFS_MASK)) {
5956
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005957 dev_err_ratelimited(hdev->dev,
5958 "Can't write register - clock gating is enabled!\n");
5959 rc = -EFAULT;
5960 } else {
5961 WREG32(addr - CFG_BASE, val);
5962 }
Oded Gabbaye38bfd32020-07-03 20:46:12 +03005963
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005964 } else if ((addr >= SRAM_BASE_ADDR) &&
5965 (addr < SRAM_BASE_ADDR + SRAM_BAR_SIZE)) {
5966 writel(val, hdev->pcie_bar[SRAM_BAR_ID] +
5967 (addr - SRAM_BASE_ADDR));
5968 } else if (addr < DRAM_PHYS_BASE + hdev->asic_prop.dram_size) {
5969 u64 bar_base_addr = DRAM_PHYS_BASE +
5970 (addr & ~(prop->dram_pci_bar_size - 0x1ull));
5971
5972 hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, bar_base_addr);
5973 if (hbm_bar_addr != U64_MAX) {
5974 writel(val, hdev->pcie_bar[HBM_BAR_ID] +
5975 (addr - bar_base_addr));
5976
5977 hbm_bar_addr = gaudi_set_hbm_bar_base(hdev,
5978 hbm_bar_addr);
5979 }
5980 if (hbm_bar_addr == U64_MAX)
5981 rc = -EIO;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005982 } else {
5983 rc = -EFAULT;
5984 }
5985
5986 return rc;
5987}
5988
5989static int gaudi_debugfs_read64(struct hl_device *hdev, u64 addr, u64 *val)
5990{
5991 struct asic_fixed_properties *prop = &hdev->asic_prop;
5992 struct gaudi_device *gaudi = hdev->asic_specific;
5993 u64 hbm_bar_addr;
5994 int rc = 0;
5995
5996 if ((addr >= CFG_BASE) && (addr <= CFG_BASE + CFG_SIZE - sizeof(u64))) {
Oded Gabbaye38bfd32020-07-03 20:46:12 +03005997
5998 if ((gaudi->hw_cap_initialized & HW_CAP_CLK_GATE) &&
5999 (hdev->clock_gating_mask &
6000 GAUDI_CLK_GATE_DEBUGFS_MASK)) {
6001
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006002 dev_err_ratelimited(hdev->dev,
6003 "Can't read register - clock gating is enabled!\n");
6004 rc = -EFAULT;
6005 } else {
6006 u32 val_l = RREG32(addr - CFG_BASE);
6007 u32 val_h = RREG32(addr + sizeof(u32) - CFG_BASE);
6008
6009 *val = (((u64) val_h) << 32) | val_l;
6010 }
Oded Gabbaye38bfd32020-07-03 20:46:12 +03006011
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006012 } else if ((addr >= SRAM_BASE_ADDR) &&
6013 (addr <= SRAM_BASE_ADDR + SRAM_BAR_SIZE - sizeof(u64))) {
6014 *val = readq(hdev->pcie_bar[SRAM_BAR_ID] +
6015 (addr - SRAM_BASE_ADDR));
6016 } else if (addr <=
6017 DRAM_PHYS_BASE + hdev->asic_prop.dram_size - sizeof(u64)) {
6018 u64 bar_base_addr = DRAM_PHYS_BASE +
6019 (addr & ~(prop->dram_pci_bar_size - 0x1ull));
6020
6021 hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, bar_base_addr);
6022 if (hbm_bar_addr != U64_MAX) {
6023 *val = readq(hdev->pcie_bar[HBM_BAR_ID] +
6024 (addr - bar_base_addr));
6025
6026 hbm_bar_addr = gaudi_set_hbm_bar_base(hdev,
6027 hbm_bar_addr);
6028 }
6029 if (hbm_bar_addr == U64_MAX)
6030 rc = -EIO;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006031 } else {
6032 rc = -EFAULT;
6033 }
6034
6035 return rc;
6036}
6037
6038static int gaudi_debugfs_write64(struct hl_device *hdev, u64 addr, u64 val)
6039{
6040 struct asic_fixed_properties *prop = &hdev->asic_prop;
6041 struct gaudi_device *gaudi = hdev->asic_specific;
6042 u64 hbm_bar_addr;
6043 int rc = 0;
6044
6045 if ((addr >= CFG_BASE) && (addr <= CFG_BASE + CFG_SIZE - sizeof(u64))) {
Oded Gabbaye38bfd32020-07-03 20:46:12 +03006046
6047 if ((gaudi->hw_cap_initialized & HW_CAP_CLK_GATE) &&
6048 (hdev->clock_gating_mask &
6049 GAUDI_CLK_GATE_DEBUGFS_MASK)) {
6050
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006051 dev_err_ratelimited(hdev->dev,
6052 "Can't write register - clock gating is enabled!\n");
6053 rc = -EFAULT;
6054 } else {
6055 WREG32(addr - CFG_BASE, lower_32_bits(val));
6056 WREG32(addr + sizeof(u32) - CFG_BASE,
6057 upper_32_bits(val));
6058 }
Oded Gabbaye38bfd32020-07-03 20:46:12 +03006059
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006060 } else if ((addr >= SRAM_BASE_ADDR) &&
6061 (addr <= SRAM_BASE_ADDR + SRAM_BAR_SIZE - sizeof(u64))) {
6062 writeq(val, hdev->pcie_bar[SRAM_BAR_ID] +
6063 (addr - SRAM_BASE_ADDR));
6064 } else if (addr <=
6065 DRAM_PHYS_BASE + hdev->asic_prop.dram_size - sizeof(u64)) {
6066 u64 bar_base_addr = DRAM_PHYS_BASE +
6067 (addr & ~(prop->dram_pci_bar_size - 0x1ull));
6068
6069 hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, bar_base_addr);
6070 if (hbm_bar_addr != U64_MAX) {
6071 writeq(val, hdev->pcie_bar[HBM_BAR_ID] +
6072 (addr - bar_base_addr));
6073
6074 hbm_bar_addr = gaudi_set_hbm_bar_base(hdev,
6075 hbm_bar_addr);
6076 }
6077 if (hbm_bar_addr == U64_MAX)
6078 rc = -EIO;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006079 } else {
6080 rc = -EFAULT;
6081 }
6082
6083 return rc;
6084}
6085
6086static u64 gaudi_read_pte(struct hl_device *hdev, u64 addr)
6087{
6088 struct gaudi_device *gaudi = hdev->asic_specific;
6089
6090 if (hdev->hard_reset_pending)
6091 return U64_MAX;
6092
6093 return readq(hdev->pcie_bar[HBM_BAR_ID] +
6094 (addr - gaudi->hbm_bar_cur_addr));
6095}
6096
6097static void gaudi_write_pte(struct hl_device *hdev, u64 addr, u64 val)
6098{
6099 struct gaudi_device *gaudi = hdev->asic_specific;
6100
6101 if (hdev->hard_reset_pending)
6102 return;
6103
6104 writeq(val, hdev->pcie_bar[HBM_BAR_ID] +
6105 (addr - gaudi->hbm_bar_cur_addr));
6106}
6107
Ofir Bitton1137e1e2020-09-30 18:43:52 +03006108void gaudi_mmu_prepare_reg(struct hl_device *hdev, u64 reg, u32 asid)
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006109{
6110 /* mask to zero the MMBP and ASID bits */
6111 WREG32_AND(reg, ~0x7FF);
6112 WREG32_OR(reg, asid);
6113}
6114
6115static void gaudi_mmu_prepare(struct hl_device *hdev, u32 asid)
6116{
6117 struct gaudi_device *gaudi = hdev->asic_specific;
6118
6119 if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
6120 return;
6121
6122 if (asid & ~DMA0_QM_GLBL_NON_SECURE_PROPS_0_ASID_MASK) {
Alon Mizrahi75d9a2a2020-12-03 17:32:19 +02006123 dev_crit(hdev->dev, "asid %u is too big\n", asid);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006124 return;
6125 }
6126
6127 mutex_lock(&gaudi->clk_gate_mutex);
6128
6129 hdev->asic_funcs->disable_clock_gating(hdev);
6130
6131 gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_0, asid);
6132 gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_1, asid);
6133 gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_2, asid);
6134 gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_3, asid);
6135 gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_4, asid);
6136
6137 gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_0, asid);
6138 gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_1, asid);
6139 gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_2, asid);
6140 gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_3, asid);
6141 gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_4, asid);
6142
6143 gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_0, asid);
6144 gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_1, asid);
6145 gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_2, asid);
6146 gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_3, asid);
6147 gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_4, asid);
6148
6149 gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_0, asid);
6150 gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_1, asid);
6151 gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_2, asid);
6152 gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_3, asid);
6153 gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_4, asid);
6154
6155 gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_0, asid);
6156 gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_1, asid);
6157 gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_2, asid);
6158 gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_3, asid);
6159 gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_4, asid);
6160
6161 gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_0, asid);
6162 gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_1, asid);
6163 gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_2, asid);
6164 gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_3, asid);
6165 gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_4, asid);
6166
6167 gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_0, asid);
6168 gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_1, asid);
6169 gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_2, asid);
6170 gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_3, asid);
6171 gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_4, asid);
6172
6173 gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_0, asid);
6174 gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_1, asid);
6175 gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_2, asid);
6176 gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_3, asid);
6177 gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_4, asid);
6178
6179 gaudi_mmu_prepare_reg(hdev, mmDMA0_CORE_NON_SECURE_PROPS, asid);
6180 gaudi_mmu_prepare_reg(hdev, mmDMA1_CORE_NON_SECURE_PROPS, asid);
6181 gaudi_mmu_prepare_reg(hdev, mmDMA2_CORE_NON_SECURE_PROPS, asid);
6182 gaudi_mmu_prepare_reg(hdev, mmDMA3_CORE_NON_SECURE_PROPS, asid);
6183 gaudi_mmu_prepare_reg(hdev, mmDMA4_CORE_NON_SECURE_PROPS, asid);
6184 gaudi_mmu_prepare_reg(hdev, mmDMA5_CORE_NON_SECURE_PROPS, asid);
6185 gaudi_mmu_prepare_reg(hdev, mmDMA6_CORE_NON_SECURE_PROPS, asid);
6186 gaudi_mmu_prepare_reg(hdev, mmDMA7_CORE_NON_SECURE_PROPS, asid);
6187
6188 gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_0, asid);
6189 gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_1, asid);
6190 gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_2, asid);
6191 gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_3, asid);
6192 gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_4, asid);
6193 gaudi_mmu_prepare_reg(hdev, mmTPC0_CFG_ARUSER_LO, asid);
6194 gaudi_mmu_prepare_reg(hdev, mmTPC0_CFG_AWUSER_LO, asid);
6195
6196 gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_0, asid);
6197 gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_1, asid);
6198 gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_2, asid);
6199 gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_3, asid);
6200 gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_4, asid);
6201 gaudi_mmu_prepare_reg(hdev, mmTPC1_CFG_ARUSER_LO, asid);
6202 gaudi_mmu_prepare_reg(hdev, mmTPC1_CFG_AWUSER_LO, asid);
6203
6204 gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_0, asid);
6205 gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_1, asid);
6206 gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_2, asid);
6207 gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_3, asid);
6208 gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_4, asid);
6209 gaudi_mmu_prepare_reg(hdev, mmTPC2_CFG_ARUSER_LO, asid);
6210 gaudi_mmu_prepare_reg(hdev, mmTPC2_CFG_AWUSER_LO, asid);
6211
6212 gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_0, asid);
6213 gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_1, asid);
6214 gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_2, asid);
6215 gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_3, asid);
6216 gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_4, asid);
6217 gaudi_mmu_prepare_reg(hdev, mmTPC3_CFG_ARUSER_LO, asid);
6218 gaudi_mmu_prepare_reg(hdev, mmTPC3_CFG_AWUSER_LO, asid);
6219
6220 gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_0, asid);
6221 gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_1, asid);
6222 gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_2, asid);
6223 gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_3, asid);
6224 gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_4, asid);
6225 gaudi_mmu_prepare_reg(hdev, mmTPC4_CFG_ARUSER_LO, asid);
6226 gaudi_mmu_prepare_reg(hdev, mmTPC4_CFG_AWUSER_LO, asid);
6227
6228 gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_0, asid);
6229 gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_1, asid);
6230 gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_2, asid);
6231 gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_3, asid);
6232 gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_4, asid);
6233 gaudi_mmu_prepare_reg(hdev, mmTPC5_CFG_ARUSER_LO, asid);
6234 gaudi_mmu_prepare_reg(hdev, mmTPC5_CFG_AWUSER_LO, asid);
6235
6236 gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_0, asid);
6237 gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_1, asid);
6238 gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_2, asid);
6239 gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_3, asid);
6240 gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_4, asid);
6241 gaudi_mmu_prepare_reg(hdev, mmTPC6_CFG_ARUSER_LO, asid);
6242 gaudi_mmu_prepare_reg(hdev, mmTPC6_CFG_AWUSER_LO, asid);
6243
6244 gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_0, asid);
6245 gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_1, asid);
6246 gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_2, asid);
6247 gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_3, asid);
6248 gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_4, asid);
6249 gaudi_mmu_prepare_reg(hdev, mmTPC7_CFG_ARUSER_LO, asid);
6250 gaudi_mmu_prepare_reg(hdev, mmTPC7_CFG_AWUSER_LO, asid);
6251
6252 gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_0, asid);
6253 gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_1, asid);
6254 gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_2, asid);
6255 gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_3, asid);
6256 gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_4, asid);
6257 gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_0, asid);
6258 gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_1, asid);
6259 gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_2, asid);
6260 gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_3, asid);
6261 gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_4, asid);
6262
6263 gaudi_mmu_prepare_reg(hdev, mmMME0_SBAB_ARUSER0, asid);
6264 gaudi_mmu_prepare_reg(hdev, mmMME0_SBAB_ARUSER1, asid);
6265 gaudi_mmu_prepare_reg(hdev, mmMME1_SBAB_ARUSER0, asid);
6266 gaudi_mmu_prepare_reg(hdev, mmMME1_SBAB_ARUSER1, asid);
6267 gaudi_mmu_prepare_reg(hdev, mmMME2_SBAB_ARUSER0, asid);
6268 gaudi_mmu_prepare_reg(hdev, mmMME2_SBAB_ARUSER1, asid);
6269 gaudi_mmu_prepare_reg(hdev, mmMME3_SBAB_ARUSER0, asid);
6270 gaudi_mmu_prepare_reg(hdev, mmMME3_SBAB_ARUSER1, asid);
6271 gaudi_mmu_prepare_reg(hdev, mmMME0_ACC_WBC, asid);
6272 gaudi_mmu_prepare_reg(hdev, mmMME1_ACC_WBC, asid);
6273 gaudi_mmu_prepare_reg(hdev, mmMME2_ACC_WBC, asid);
6274 gaudi_mmu_prepare_reg(hdev, mmMME3_ACC_WBC, asid);
6275
Oded Gabbay3c681572020-11-02 21:10:39 +02006276 if (hdev->nic_ports_mask & GAUDI_NIC_MASK_NIC0) {
6277 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_0,
6278 asid);
6279 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_1,
6280 asid);
6281 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_2,
6282 asid);
6283 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_3,
6284 asid);
6285 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_4,
6286 asid);
6287 }
6288
6289 if (hdev->nic_ports_mask & GAUDI_NIC_MASK_NIC1) {
6290 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_0,
6291 asid);
6292 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_1,
6293 asid);
6294 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_2,
6295 asid);
6296 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_3,
6297 asid);
6298 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_4,
6299 asid);
6300 }
6301
6302 if (hdev->nic_ports_mask & GAUDI_NIC_MASK_NIC2) {
6303 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_0,
6304 asid);
6305 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_1,
6306 asid);
6307 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_2,
6308 asid);
6309 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_3,
6310 asid);
6311 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_4,
6312 asid);
6313 }
6314
6315 if (hdev->nic_ports_mask & GAUDI_NIC_MASK_NIC3) {
6316 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_0,
6317 asid);
6318 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_1,
6319 asid);
6320 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_2,
6321 asid);
6322 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_3,
6323 asid);
6324 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_4,
6325 asid);
6326 }
6327
6328 if (hdev->nic_ports_mask & GAUDI_NIC_MASK_NIC4) {
6329 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_0,
6330 asid);
6331 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_1,
6332 asid);
6333 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_2,
6334 asid);
6335 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_3,
6336 asid);
6337 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_4,
6338 asid);
6339 }
6340
6341 if (hdev->nic_ports_mask & GAUDI_NIC_MASK_NIC5) {
6342 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_0,
6343 asid);
6344 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_1,
6345 asid);
6346 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_2,
6347 asid);
6348 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_3,
6349 asid);
6350 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_4,
6351 asid);
6352 }
6353
6354 if (hdev->nic_ports_mask & GAUDI_NIC_MASK_NIC6) {
6355 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_0,
6356 asid);
6357 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_1,
6358 asid);
6359 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_2,
6360 asid);
6361 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_3,
6362 asid);
6363 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_4,
6364 asid);
6365 }
6366
6367 if (hdev->nic_ports_mask & GAUDI_NIC_MASK_NIC7) {
6368 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_0,
6369 asid);
6370 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_1,
6371 asid);
6372 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_2,
6373 asid);
6374 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_3,
6375 asid);
6376 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_4,
6377 asid);
6378 }
6379
6380 if (hdev->nic_ports_mask & GAUDI_NIC_MASK_NIC8) {
6381 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_0,
6382 asid);
6383 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_1,
6384 asid);
6385 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_2,
6386 asid);
6387 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_3,
6388 asid);
6389 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_4,
6390 asid);
6391 }
6392
6393 if (hdev->nic_ports_mask & GAUDI_NIC_MASK_NIC9) {
6394 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_0,
6395 asid);
6396 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_1,
6397 asid);
6398 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_2,
6399 asid);
6400 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_3,
6401 asid);
6402 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_4,
6403 asid);
6404 }
6405
Oded Gabbaye38bfd32020-07-03 20:46:12 +03006406 hdev->asic_funcs->set_clock_gating(hdev);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006407
6408 mutex_unlock(&gaudi->clk_gate_mutex);
6409}
6410
6411static int gaudi_send_job_on_qman0(struct hl_device *hdev,
6412 struct hl_cs_job *job)
6413{
6414 struct packet_msg_prot *fence_pkt;
6415 u32 *fence_ptr;
6416 dma_addr_t fence_dma_addr;
6417 struct hl_cb *cb;
6418 u32 tmp, timeout, dma_offset;
6419 int rc;
6420
6421 if (hdev->pldm)
6422 timeout = GAUDI_PLDM_QMAN0_TIMEOUT_USEC;
6423 else
6424 timeout = HL_DEVICE_TIMEOUT_USEC;
6425
6426 if (!hdev->asic_funcs->is_device_idle(hdev, NULL, NULL)) {
6427 dev_err_ratelimited(hdev->dev,
6428 "Can't send driver job on QMAN0 because the device is not idle\n");
6429 return -EBUSY;
6430 }
6431
6432 fence_ptr = hdev->asic_funcs->asic_dma_pool_zalloc(hdev, 4, GFP_KERNEL,
6433 &fence_dma_addr);
6434 if (!fence_ptr) {
6435 dev_err(hdev->dev,
6436 "Failed to allocate fence memory for QMAN0\n");
6437 return -ENOMEM;
6438 }
6439
6440 cb = job->patched_cb;
6441
Arnd Bergmann82948e62020-10-26 17:08:06 +01006442 fence_pkt = cb->kernel_address +
6443 job->job_cb_size - sizeof(struct packet_msg_prot);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006444
Oded Gabbay65887292020-08-12 11:21:01 +03006445 tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT);
6446 tmp |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);
6447 tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
6448
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006449 fence_pkt->ctl = cpu_to_le32(tmp);
6450 fence_pkt->value = cpu_to_le32(GAUDI_QMAN0_FENCE_VAL);
6451 fence_pkt->addr = cpu_to_le64(fence_dma_addr);
6452
6453 dma_offset = gaudi_dma_assignment[GAUDI_PCI_DMA_1] * DMA_CORE_OFFSET;
6454
6455 WREG32_OR(mmDMA0_CORE_PROT + dma_offset, BIT(DMA0_CORE_PROT_VAL_SHIFT));
6456
6457 rc = hl_hw_queue_send_cb_no_cmpl(hdev, GAUDI_QUEUE_ID_DMA_0_0,
6458 job->job_cb_size, cb->bus_address);
6459 if (rc) {
6460 dev_err(hdev->dev, "Failed to send CB on QMAN0, %d\n", rc);
6461 goto free_fence_ptr;
6462 }
6463
6464 rc = hl_poll_timeout_memory(hdev, fence_ptr, tmp,
6465 (tmp == GAUDI_QMAN0_FENCE_VAL), 1000,
6466 timeout, true);
6467
6468 hl_hw_queue_inc_ci_kernel(hdev, GAUDI_QUEUE_ID_DMA_0_0);
6469
6470 if (rc == -ETIMEDOUT) {
6471 dev_err(hdev->dev, "QMAN0 Job timeout (0x%x)\n", tmp);
6472 goto free_fence_ptr;
6473 }
6474
6475free_fence_ptr:
6476 WREG32_AND(mmDMA0_CORE_PROT + dma_offset,
6477 ~BIT(DMA0_CORE_PROT_VAL_SHIFT));
6478
6479 hdev->asic_funcs->asic_dma_pool_free(hdev, (void *) fence_ptr,
6480 fence_dma_addr);
6481 return rc;
6482}
6483
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006484static void gaudi_get_event_desc(u16 event_type, char *desc, size_t size)
6485{
Ofir Bittonebd8d122020-05-10 13:41:28 +03006486 if (event_type >= GAUDI_EVENT_SIZE)
6487 goto event_not_supported;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006488
Ofir Bittonebd8d122020-05-10 13:41:28 +03006489 if (!gaudi_irq_map_table[event_type].valid)
6490 goto event_not_supported;
6491
6492 snprintf(desc, size, gaudi_irq_map_table[event_type].name);
6493
6494 return;
6495
6496event_not_supported:
6497 snprintf(desc, size, "N/A");
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006498}
6499
6500static const char *gaudi_get_razwi_initiator_dma_name(struct hl_device *hdev,
6501 u32 x_y, bool is_write)
6502{
6503 u32 dma_id[2], dma_offset, err_cause[2], mask, i;
6504
6505 mask = is_write ? DMA0_CORE_ERR_CAUSE_HBW_WR_ERR_MASK :
6506 DMA0_CORE_ERR_CAUSE_HBW_RD_ERR_MASK;
6507
6508 switch (x_y) {
6509 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_0:
6510 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_1:
6511 dma_id[0] = 0;
6512 dma_id[1] = 2;
6513 break;
6514 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_0:
6515 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_1:
6516 dma_id[0] = 1;
6517 dma_id[1] = 3;
6518 break;
6519 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_0:
6520 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_1:
6521 dma_id[0] = 4;
6522 dma_id[1] = 6;
6523 break;
6524 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_0:
6525 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_1:
6526 dma_id[0] = 5;
6527 dma_id[1] = 7;
6528 break;
6529 default:
6530 goto unknown_initiator;
6531 }
6532
6533 for (i = 0 ; i < 2 ; i++) {
6534 dma_offset = dma_id[i] * DMA_CORE_OFFSET;
6535 err_cause[i] = RREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset);
6536 }
6537
6538 switch (x_y) {
6539 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_0:
6540 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_1:
6541 if ((err_cause[0] & mask) && !(err_cause[1] & mask))
6542 return "DMA0";
6543 else if (!(err_cause[0] & mask) && (err_cause[1] & mask))
6544 return "DMA2";
6545 else
6546 return "DMA0 or DMA2";
6547 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_0:
6548 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_1:
6549 if ((err_cause[0] & mask) && !(err_cause[1] & mask))
6550 return "DMA1";
6551 else if (!(err_cause[0] & mask) && (err_cause[1] & mask))
6552 return "DMA3";
6553 else
6554 return "DMA1 or DMA3";
6555 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_0:
6556 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_1:
6557 if ((err_cause[0] & mask) && !(err_cause[1] & mask))
6558 return "DMA4";
6559 else if (!(err_cause[0] & mask) && (err_cause[1] & mask))
6560 return "DMA6";
6561 else
6562 return "DMA4 or DMA6";
6563 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_0:
6564 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_1:
6565 if ((err_cause[0] & mask) && !(err_cause[1] & mask))
6566 return "DMA5";
6567 else if (!(err_cause[0] & mask) && (err_cause[1] & mask))
6568 return "DMA7";
6569 else
6570 return "DMA5 or DMA7";
6571 }
6572
6573unknown_initiator:
6574 return "unknown initiator";
6575}
6576
6577static const char *gaudi_get_razwi_initiator_name(struct hl_device *hdev,
6578 bool is_write)
6579{
6580 u32 val, x_y, axi_id;
6581
6582 val = is_write ? RREG32(mmMMU_UP_RAZWI_WRITE_ID) :
6583 RREG32(mmMMU_UP_RAZWI_READ_ID);
6584 x_y = val & ((RAZWI_INITIATOR_Y_MASK << RAZWI_INITIATOR_Y_SHIFT) |
6585 (RAZWI_INITIATOR_X_MASK << RAZWI_INITIATOR_X_SHIFT));
6586 axi_id = val & (RAZWI_INITIATOR_AXI_ID_MASK <<
6587 RAZWI_INITIATOR_AXI_ID_SHIFT);
6588
6589 switch (x_y) {
6590 case RAZWI_INITIATOR_ID_X_Y_TPC0_NIC0:
6591 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC))
6592 return "TPC0";
6593 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC))
6594 return "NIC0";
6595 break;
6596 case RAZWI_INITIATOR_ID_X_Y_TPC1:
6597 return "TPC1";
6598 case RAZWI_INITIATOR_ID_X_Y_MME0_0:
6599 case RAZWI_INITIATOR_ID_X_Y_MME0_1:
6600 return "MME0";
6601 case RAZWI_INITIATOR_ID_X_Y_MME1_0:
6602 case RAZWI_INITIATOR_ID_X_Y_MME1_1:
6603 return "MME1";
6604 case RAZWI_INITIATOR_ID_X_Y_TPC2:
6605 return "TPC2";
6606 case RAZWI_INITIATOR_ID_X_Y_TPC3_PCI_CPU_PSOC:
6607 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC))
6608 return "TPC3";
6609 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_PCI))
6610 return "PCI";
6611 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_CPU))
6612 return "CPU";
6613 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_PSOC))
6614 return "PSOC";
6615 break;
6616 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_0:
6617 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_1:
6618 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_0:
6619 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_1:
6620 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_0:
6621 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_1:
6622 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_0:
6623 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_1:
6624 return gaudi_get_razwi_initiator_dma_name(hdev, x_y, is_write);
6625 case RAZWI_INITIATOR_ID_X_Y_TPC4_NIC1_NIC2:
6626 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC))
6627 return "TPC4";
6628 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC))
6629 return "NIC1";
6630 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC_FT))
6631 return "NIC2";
6632 break;
6633 case RAZWI_INITIATOR_ID_X_Y_TPC5:
6634 return "TPC5";
6635 case RAZWI_INITIATOR_ID_X_Y_MME2_0:
6636 case RAZWI_INITIATOR_ID_X_Y_MME2_1:
6637 return "MME2";
6638 case RAZWI_INITIATOR_ID_X_Y_MME3_0:
6639 case RAZWI_INITIATOR_ID_X_Y_MME3_1:
6640 return "MME3";
6641 case RAZWI_INITIATOR_ID_X_Y_TPC6:
6642 return "TPC6";
6643 case RAZWI_INITIATOR_ID_X_Y_TPC7_NIC4_NIC5:
6644 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC))
6645 return "TPC7";
6646 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC))
6647 return "NIC4";
6648 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC_FT))
6649 return "NIC5";
6650 break;
6651 default:
6652 break;
6653 }
6654
6655 dev_err(hdev->dev,
6656 "Unknown RAZWI initiator ID 0x%x [Y=%d, X=%d, AXI_ID=%d]\n",
6657 val,
6658 (val >> RAZWI_INITIATOR_Y_SHIFT) & RAZWI_INITIATOR_Y_MASK,
6659 (val >> RAZWI_INITIATOR_X_SHIFT) & RAZWI_INITIATOR_X_MASK,
6660 (val >> RAZWI_INITIATOR_AXI_ID_SHIFT) &
6661 RAZWI_INITIATOR_AXI_ID_MASK);
6662
6663 return "unknown initiator";
6664}
6665
6666static void gaudi_print_razwi_info(struct hl_device *hdev)
6667{
6668 if (RREG32(mmMMU_UP_RAZWI_WRITE_VLD)) {
6669 dev_err_ratelimited(hdev->dev,
6670 "RAZWI event caused by illegal write of %s\n",
6671 gaudi_get_razwi_initiator_name(hdev, true));
6672 WREG32(mmMMU_UP_RAZWI_WRITE_VLD, 0);
6673 }
6674
6675 if (RREG32(mmMMU_UP_RAZWI_READ_VLD)) {
6676 dev_err_ratelimited(hdev->dev,
6677 "RAZWI event caused by illegal read of %s\n",
6678 gaudi_get_razwi_initiator_name(hdev, false));
6679 WREG32(mmMMU_UP_RAZWI_READ_VLD, 0);
6680 }
6681}
6682
6683static void gaudi_print_mmu_error_info(struct hl_device *hdev)
6684{
6685 struct gaudi_device *gaudi = hdev->asic_specific;
6686 u64 addr;
6687 u32 val;
6688
6689 if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
6690 return;
6691
6692 val = RREG32(mmMMU_UP_PAGE_ERROR_CAPTURE);
6693 if (val & MMU_UP_PAGE_ERROR_CAPTURE_ENTRY_VALID_MASK) {
6694 addr = val & MMU_UP_PAGE_ERROR_CAPTURE_VA_49_32_MASK;
6695 addr <<= 32;
6696 addr |= RREG32(mmMMU_UP_PAGE_ERROR_CAPTURE_VA);
6697
6698 dev_err_ratelimited(hdev->dev, "MMU page fault on va 0x%llx\n",
6699 addr);
6700
6701 WREG32(mmMMU_UP_PAGE_ERROR_CAPTURE, 0);
6702 }
6703
6704 val = RREG32(mmMMU_UP_ACCESS_ERROR_CAPTURE);
6705 if (val & MMU_UP_ACCESS_ERROR_CAPTURE_ENTRY_VALID_MASK) {
6706 addr = val & MMU_UP_ACCESS_ERROR_CAPTURE_VA_49_32_MASK;
6707 addr <<= 32;
6708 addr |= RREG32(mmMMU_UP_ACCESS_ERROR_CAPTURE_VA);
6709
6710 dev_err_ratelimited(hdev->dev,
6711 "MMU access error on va 0x%llx\n", addr);
6712
6713 WREG32(mmMMU_UP_ACCESS_ERROR_CAPTURE, 0);
6714 }
6715}
6716
6717/*
6718 * +-------------------+------------------------------------------------------+
6719 * | Configuration Reg | Description |
6720 * | Address | |
6721 * +-------------------+------------------------------------------------------+
6722 * | 0xF30 - 0xF3F |ECC single error indication (1 bit per memory wrapper)|
6723 * | |0xF30 memory wrappers 31:0 (MSB to LSB) |
6724 * | |0xF34 memory wrappers 63:32 |
6725 * | |0xF38 memory wrappers 95:64 |
6726 * | |0xF3C memory wrappers 127:96 |
6727 * +-------------------+------------------------------------------------------+
6728 * | 0xF40 - 0xF4F |ECC double error indication (1 bit per memory wrapper)|
6729 * | |0xF40 memory wrappers 31:0 (MSB to LSB) |
6730 * | |0xF44 memory wrappers 63:32 |
6731 * | |0xF48 memory wrappers 95:64 |
6732 * | |0xF4C memory wrappers 127:96 |
6733 * +-------------------+------------------------------------------------------+
6734 */
Oded Gabbayfcc6a4e2020-05-17 08:20:35 +03006735static int gaudi_extract_ecc_info(struct hl_device *hdev,
6736 struct ecc_info_extract_params *params, u64 *ecc_address,
6737 u64 *ecc_syndrom, u8 *memory_wrapper_idx)
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006738{
6739 struct gaudi_device *gaudi = hdev->asic_specific;
Oded Gabbayfcc6a4e2020-05-17 08:20:35 +03006740 u32 i, num_mem_regs, reg, err_bit;
6741 u64 err_addr, err_word = 0;
6742 int rc = 0;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006743
Oded Gabbayfcc6a4e2020-05-17 08:20:35 +03006744 num_mem_regs = params->num_memories / 32 +
6745 ((params->num_memories % 32) ? 1 : 0);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006746
Oded Gabbayfcc6a4e2020-05-17 08:20:35 +03006747 if (params->block_address >= CFG_BASE)
6748 params->block_address -= CFG_BASE;
6749
6750 if (params->derr)
6751 err_addr = params->block_address + GAUDI_ECC_DERR0_OFFSET;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006752 else
Oded Gabbayfcc6a4e2020-05-17 08:20:35 +03006753 err_addr = params->block_address + GAUDI_ECC_SERR0_OFFSET;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006754
Oded Gabbayfcc6a4e2020-05-17 08:20:35 +03006755 if (params->disable_clock_gating) {
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006756 mutex_lock(&gaudi->clk_gate_mutex);
6757 hdev->asic_funcs->disable_clock_gating(hdev);
6758 }
6759
Oded Gabbayfcc6a4e2020-05-17 08:20:35 +03006760 /* Set invalid wrapper index */
6761 *memory_wrapper_idx = 0xFF;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006762
Oded Gabbayfcc6a4e2020-05-17 08:20:35 +03006763 /* Iterate through memory wrappers, a single bit must be set */
Dan Carpenterb0353542020-08-05 12:51:05 +03006764 for (i = 0 ; i < num_mem_regs ; i++) {
Oded Gabbayfcc6a4e2020-05-17 08:20:35 +03006765 err_addr += i * 4;
6766 err_word = RREG32(err_addr);
6767 if (err_word) {
6768 err_bit = __ffs(err_word);
6769 *memory_wrapper_idx = err_bit + (32 * i);
6770 break;
6771 }
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006772 }
6773
Oded Gabbayfcc6a4e2020-05-17 08:20:35 +03006774 if (*memory_wrapper_idx == 0xFF) {
6775 dev_err(hdev->dev, "ECC error information cannot be found\n");
6776 rc = -EINVAL;
6777 goto enable_clk_gate;
6778 }
6779
6780 WREG32(params->block_address + GAUDI_ECC_MEM_SEL_OFFSET,
6781 *memory_wrapper_idx);
6782
6783 *ecc_address =
6784 RREG32(params->block_address + GAUDI_ECC_ADDRESS_OFFSET);
6785 *ecc_syndrom =
6786 RREG32(params->block_address + GAUDI_ECC_SYNDROME_OFFSET);
6787
6788 /* Clear error indication */
6789 reg = RREG32(params->block_address + GAUDI_ECC_MEM_INFO_CLR_OFFSET);
6790 if (params->derr)
6791 reg |= FIELD_PREP(GAUDI_ECC_MEM_INFO_CLR_DERR_MASK, 1);
6792 else
6793 reg |= FIELD_PREP(GAUDI_ECC_MEM_INFO_CLR_SERR_MASK, 1);
6794
6795 WREG32(params->block_address + GAUDI_ECC_MEM_INFO_CLR_OFFSET, reg);
6796
6797enable_clk_gate:
6798 if (params->disable_clock_gating) {
Oded Gabbaye38bfd32020-07-03 20:46:12 +03006799 hdev->asic_funcs->set_clock_gating(hdev);
Greg Kroah-Hartman65a9bde62020-07-27 11:49:37 +02006800
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006801 mutex_unlock(&gaudi->clk_gate_mutex);
6802 }
Oded Gabbayfcc6a4e2020-05-17 08:20:35 +03006803
6804 return rc;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006805}
6806
6807static void gaudi_handle_qman_err_generic(struct hl_device *hdev,
6808 const char *qm_name,
6809 u64 glbl_sts_addr,
6810 u64 arb_err_addr)
6811{
6812 u32 i, j, glbl_sts_val, arb_err_val, glbl_sts_clr_val;
6813 char reg_desc[32];
6814
6815 /* Iterate through all stream GLBL_STS1 registers + Lower CP */
6816 for (i = 0 ; i < QMAN_STREAMS + 1 ; i++) {
6817 glbl_sts_clr_val = 0;
6818 glbl_sts_val = RREG32(glbl_sts_addr + 4 * i);
6819
6820 if (!glbl_sts_val)
6821 continue;
6822
6823 if (i == QMAN_STREAMS)
6824 snprintf(reg_desc, ARRAY_SIZE(reg_desc), "LowerCP");
6825 else
6826 snprintf(reg_desc, ARRAY_SIZE(reg_desc), "stream%u", i);
6827
6828 for (j = 0 ; j < GAUDI_NUM_OF_QM_ERR_CAUSE ; j++) {
6829 if (glbl_sts_val & BIT(j)) {
6830 dev_err_ratelimited(hdev->dev,
6831 "%s %s. err cause: %s\n",
6832 qm_name, reg_desc,
6833 gaudi_qman_error_cause[j]);
6834 glbl_sts_clr_val |= BIT(j);
6835 }
6836 }
6837
6838 /* Write 1 clear errors */
6839 WREG32(glbl_sts_addr + 4 * i, glbl_sts_clr_val);
6840 }
6841
6842 arb_err_val = RREG32(arb_err_addr);
6843
6844 if (!arb_err_val)
6845 return;
6846
6847 for (j = 0 ; j < GAUDI_NUM_OF_QM_ARB_ERR_CAUSE ; j++) {
6848 if (arb_err_val & BIT(j)) {
6849 dev_err_ratelimited(hdev->dev,
6850 "%s ARB_ERR. err cause: %s\n",
6851 qm_name,
6852 gaudi_qman_arb_error_cause[j]);
6853 }
6854 }
6855}
6856
Ofir Bittonf8bc7f02021-01-03 20:52:40 +02006857static void gaudi_print_sm_sei_info(struct hl_device *hdev, u16 event_type,
6858 struct hl_eq_sm_sei_data *sei_data)
6859{
6860 u32 index = event_type - GAUDI_EVENT_DMA_IF_SEI_0;
6861
6862 switch (sei_data->sei_cause) {
6863 case GAUDI_SM_SEI_SO_OVERFLOW:
6864 dev_err(hdev->dev,
6865 "SM %u SEI Error: SO %u overflow/underflow",
6866 index, le16_to_cpu(sei_data->sei_log));
6867 break;
6868 case GAUDI_SM_SEI_LBW_4B_UNALIGNED:
6869 dev_err(hdev->dev,
6870 "SM %u SEI Error: Unaligned 4B LBW access, monitor agent address low - %#x",
6871 index, le16_to_cpu(sei_data->sei_log));
6872 break;
6873 case GAUDI_SM_SEI_AXI_RESPONSE_ERR:
6874 dev_err(hdev->dev,
6875 "SM %u SEI Error: AXI ID %u response error",
6876 index, le16_to_cpu(sei_data->sei_log));
6877 break;
6878 default:
6879 dev_err(hdev->dev, "Unknown SM SEI cause %u",
6880 le16_to_cpu(sei_data->sei_log));
6881 break;
6882 }
6883}
6884
Oded Gabbayfcc6a4e2020-05-17 08:20:35 +03006885static void gaudi_handle_ecc_event(struct hl_device *hdev, u16 event_type,
6886 struct hl_eq_ecc_data *ecc_data)
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006887{
Oded Gabbayfcc6a4e2020-05-17 08:20:35 +03006888 struct ecc_info_extract_params params;
6889 u64 ecc_address = 0, ecc_syndrom = 0;
6890 u8 index, memory_wrapper_idx = 0;
6891 bool extract_info_from_fw;
6892 int rc;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006893
6894 switch (event_type) {
Oded Gabbayfcc6a4e2020-05-17 08:20:35 +03006895 case GAUDI_EVENT_PCIE_CORE_SERR ... GAUDI_EVENT_PCIE_PHY_DERR:
6896 case GAUDI_EVENT_DMA0_SERR_ECC ... GAUDI_EVENT_MMU_DERR:
6897 extract_info_from_fw = true;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006898 break;
6899 case GAUDI_EVENT_TPC0_SERR ... GAUDI_EVENT_TPC7_SERR:
6900 index = event_type - GAUDI_EVENT_TPC0_SERR;
Oded Gabbayfcc6a4e2020-05-17 08:20:35 +03006901 params.block_address = mmTPC0_CFG_BASE + index * TPC_CFG_OFFSET;
6902 params.num_memories = 90;
6903 params.derr = false;
6904 params.disable_clock_gating = true;
6905 extract_info_from_fw = false;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006906 break;
6907 case GAUDI_EVENT_TPC0_DERR ... GAUDI_EVENT_TPC7_DERR:
6908 index = event_type - GAUDI_EVENT_TPC0_DERR;
Oded Gabbayfcc6a4e2020-05-17 08:20:35 +03006909 params.block_address =
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006910 mmTPC0_CFG_BASE + index * TPC_CFG_OFFSET;
Oded Gabbayfcc6a4e2020-05-17 08:20:35 +03006911 params.num_memories = 90;
6912 params.derr = true;
6913 params.disable_clock_gating = true;
6914 extract_info_from_fw = false;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006915 break;
6916 case GAUDI_EVENT_MME0_ACC_SERR:
6917 case GAUDI_EVENT_MME1_ACC_SERR:
6918 case GAUDI_EVENT_MME2_ACC_SERR:
6919 case GAUDI_EVENT_MME3_ACC_SERR:
6920 index = (event_type - GAUDI_EVENT_MME0_ACC_SERR) / 4;
Oded Gabbayfcc6a4e2020-05-17 08:20:35 +03006921 params.block_address = mmMME0_ACC_BASE + index * MME_ACC_OFFSET;
6922 params.num_memories = 128;
6923 params.derr = false;
6924 params.disable_clock_gating = true;
6925 extract_info_from_fw = false;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006926 break;
6927 case GAUDI_EVENT_MME0_ACC_DERR:
6928 case GAUDI_EVENT_MME1_ACC_DERR:
6929 case GAUDI_EVENT_MME2_ACC_DERR:
6930 case GAUDI_EVENT_MME3_ACC_DERR:
6931 index = (event_type - GAUDI_EVENT_MME0_ACC_DERR) / 4;
Oded Gabbayfcc6a4e2020-05-17 08:20:35 +03006932 params.block_address = mmMME0_ACC_BASE + index * MME_ACC_OFFSET;
6933 params.num_memories = 128;
6934 params.derr = true;
6935 params.disable_clock_gating = true;
6936 extract_info_from_fw = false;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006937 break;
6938 case GAUDI_EVENT_MME0_SBAB_SERR:
6939 case GAUDI_EVENT_MME1_SBAB_SERR:
6940 case GAUDI_EVENT_MME2_SBAB_SERR:
6941 case GAUDI_EVENT_MME3_SBAB_SERR:
6942 index = (event_type - GAUDI_EVENT_MME0_SBAB_SERR) / 4;
Oded Gabbayfcc6a4e2020-05-17 08:20:35 +03006943 params.block_address =
6944 mmMME0_SBAB_BASE + index * MME_ACC_OFFSET;
6945 params.num_memories = 33;
6946 params.derr = false;
6947 params.disable_clock_gating = true;
6948 extract_info_from_fw = false;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006949 break;
6950 case GAUDI_EVENT_MME0_SBAB_DERR:
6951 case GAUDI_EVENT_MME1_SBAB_DERR:
6952 case GAUDI_EVENT_MME2_SBAB_DERR:
6953 case GAUDI_EVENT_MME3_SBAB_DERR:
6954 index = (event_type - GAUDI_EVENT_MME0_SBAB_DERR) / 4;
Oded Gabbayfcc6a4e2020-05-17 08:20:35 +03006955 params.block_address =
6956 mmMME0_SBAB_BASE + index * MME_ACC_OFFSET;
6957 params.num_memories = 33;
6958 params.derr = true;
6959 params.disable_clock_gating = true;
Oded Gabbay652b4442020-11-21 14:35:35 +02006960 extract_info_from_fw = false;
6961 break;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006962 default:
6963 return;
6964 }
6965
Oded Gabbayfcc6a4e2020-05-17 08:20:35 +03006966 if (extract_info_from_fw) {
6967 ecc_address = le64_to_cpu(ecc_data->ecc_address);
6968 ecc_syndrom = le64_to_cpu(ecc_data->ecc_syndrom);
6969 memory_wrapper_idx = ecc_data->memory_wrapper_idx;
6970 } else {
6971 rc = gaudi_extract_ecc_info(hdev, &params, &ecc_address,
6972 &ecc_syndrom, &memory_wrapper_idx);
6973 if (rc)
6974 return;
6975 }
6976
6977 dev_err(hdev->dev,
6978 "ECC error detected. address: %#llx. Syndrom: %#llx. block id %u\n",
6979 ecc_address, ecc_syndrom, memory_wrapper_idx);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006980}
6981
6982static void gaudi_handle_qman_err(struct hl_device *hdev, u16 event_type)
6983{
6984 u64 glbl_sts_addr, arb_err_addr;
6985 u8 index;
6986 char desc[32];
6987
6988 switch (event_type) {
6989 case GAUDI_EVENT_TPC0_QM ... GAUDI_EVENT_TPC7_QM:
6990 index = event_type - GAUDI_EVENT_TPC0_QM;
6991 glbl_sts_addr =
6992 mmTPC0_QM_GLBL_STS1_0 + index * TPC_QMAN_OFFSET;
6993 arb_err_addr =
6994 mmTPC0_QM_ARB_ERR_CAUSE + index * TPC_QMAN_OFFSET;
6995 snprintf(desc, ARRAY_SIZE(desc), "%s%d", "TPC_QM", index);
6996 break;
6997 case GAUDI_EVENT_MME0_QM ... GAUDI_EVENT_MME2_QM:
6998 index = event_type - GAUDI_EVENT_MME0_QM;
6999 glbl_sts_addr =
7000 mmMME0_QM_GLBL_STS1_0 + index * MME_QMAN_OFFSET;
7001 arb_err_addr =
7002 mmMME0_QM_ARB_ERR_CAUSE + index * MME_QMAN_OFFSET;
7003 snprintf(desc, ARRAY_SIZE(desc), "%s%d", "MME_QM", index);
7004 break;
7005 case GAUDI_EVENT_DMA0_QM ... GAUDI_EVENT_DMA7_QM:
7006 index = event_type - GAUDI_EVENT_DMA0_QM;
7007 glbl_sts_addr =
7008 mmDMA0_QM_GLBL_STS1_0 + index * DMA_QMAN_OFFSET;
7009 arb_err_addr =
7010 mmDMA0_QM_ARB_ERR_CAUSE + index * DMA_QMAN_OFFSET;
7011 snprintf(desc, ARRAY_SIZE(desc), "%s%d", "DMA_QM", index);
7012 break;
Oded Gabbay3c681572020-11-02 21:10:39 +02007013 case GAUDI_EVENT_NIC0_QM0:
7014 glbl_sts_addr = mmNIC0_QM0_GLBL_STS1_0;
7015 arb_err_addr = mmNIC0_QM0_ARB_ERR_CAUSE;
7016 snprintf(desc, ARRAY_SIZE(desc), "NIC0_QM0");
7017 break;
7018 case GAUDI_EVENT_NIC0_QM1:
7019 glbl_sts_addr = mmNIC0_QM1_GLBL_STS1_0;
7020 arb_err_addr = mmNIC0_QM1_ARB_ERR_CAUSE;
7021 snprintf(desc, ARRAY_SIZE(desc), "NIC0_QM1");
7022 break;
7023 case GAUDI_EVENT_NIC1_QM0:
7024 glbl_sts_addr = mmNIC1_QM0_GLBL_STS1_0;
7025 arb_err_addr = mmNIC1_QM0_ARB_ERR_CAUSE;
7026 snprintf(desc, ARRAY_SIZE(desc), "NIC1_QM0");
7027 break;
7028 case GAUDI_EVENT_NIC1_QM1:
7029 glbl_sts_addr = mmNIC1_QM1_GLBL_STS1_0;
7030 arb_err_addr = mmNIC1_QM1_ARB_ERR_CAUSE;
7031 snprintf(desc, ARRAY_SIZE(desc), "NIC1_QM1");
7032 break;
7033 case GAUDI_EVENT_NIC2_QM0:
7034 glbl_sts_addr = mmNIC2_QM0_GLBL_STS1_0;
7035 arb_err_addr = mmNIC2_QM0_ARB_ERR_CAUSE;
7036 snprintf(desc, ARRAY_SIZE(desc), "NIC2_QM0");
7037 break;
7038 case GAUDI_EVENT_NIC2_QM1:
7039 glbl_sts_addr = mmNIC2_QM1_GLBL_STS1_0;
7040 arb_err_addr = mmNIC2_QM1_ARB_ERR_CAUSE;
7041 snprintf(desc, ARRAY_SIZE(desc), "NIC2_QM1");
7042 break;
7043 case GAUDI_EVENT_NIC3_QM0:
7044 glbl_sts_addr = mmNIC3_QM0_GLBL_STS1_0;
7045 arb_err_addr = mmNIC3_QM0_ARB_ERR_CAUSE;
7046 snprintf(desc, ARRAY_SIZE(desc), "NIC3_QM0");
7047 break;
7048 case GAUDI_EVENT_NIC3_QM1:
7049 glbl_sts_addr = mmNIC3_QM1_GLBL_STS1_0;
7050 arb_err_addr = mmNIC3_QM1_ARB_ERR_CAUSE;
7051 snprintf(desc, ARRAY_SIZE(desc), "NIC3_QM1");
7052 break;
7053 case GAUDI_EVENT_NIC4_QM0:
7054 glbl_sts_addr = mmNIC4_QM0_GLBL_STS1_0;
7055 arb_err_addr = mmNIC4_QM0_ARB_ERR_CAUSE;
7056 snprintf(desc, ARRAY_SIZE(desc), "NIC4_QM0");
7057 break;
7058 case GAUDI_EVENT_NIC4_QM1:
7059 glbl_sts_addr = mmNIC4_QM1_GLBL_STS1_0;
7060 arb_err_addr = mmNIC4_QM1_ARB_ERR_CAUSE;
7061 snprintf(desc, ARRAY_SIZE(desc), "NIC4_QM1");
7062 break;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007063 default:
7064 return;
7065 }
7066
7067 gaudi_handle_qman_err_generic(hdev, desc, glbl_sts_addr, arb_err_addr);
7068}
7069
7070static void gaudi_print_irq_info(struct hl_device *hdev, u16 event_type,
7071 bool razwi)
7072{
Ofir Bittonebd8d122020-05-10 13:41:28 +03007073 char desc[64] = "";
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007074
7075 gaudi_get_event_desc(event_type, desc, sizeof(desc));
7076 dev_err_ratelimited(hdev->dev, "Received H/W interrupt %d [\"%s\"]\n",
7077 event_type, desc);
7078
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007079 if (razwi) {
7080 gaudi_print_razwi_info(hdev);
7081 gaudi_print_mmu_error_info(hdev);
7082 }
7083}
7084
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007085static int gaudi_soft_reset_late_init(struct hl_device *hdev)
7086{
Ofir Bittonebd8d122020-05-10 13:41:28 +03007087 struct gaudi_device *gaudi = hdev->asic_specific;
7088
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007089 /* Unmask all IRQs since some could have been received
7090 * during the soft reset
7091 */
Ofir Bittonebd8d122020-05-10 13:41:28 +03007092 return hl_fw_unmask_irq_arr(hdev, gaudi->events, sizeof(gaudi->events));
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007093}
7094
Ofir Bitton5a2998f2020-10-05 13:44:59 +03007095static int gaudi_hbm_read_interrupts(struct hl_device *hdev, int device,
7096 struct hl_eq_hbm_ecc_data *hbm_ecc_data)
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007097{
Ofir Bitton5a2998f2020-10-05 13:44:59 +03007098 u32 base, val, val2, wr_par, rd_par, ca_par, derr, serr, type, ch;
7099 int err = 0;
7100
7101 if (!hdev->asic_prop.fw_security_disabled) {
7102 if (!hbm_ecc_data) {
7103 dev_err(hdev->dev, "No FW ECC data");
7104 return 0;
7105 }
7106
7107 wr_par = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_WR_PAR_MASK,
7108 le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7109 rd_par = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_RD_PAR_MASK,
7110 le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7111 ca_par = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_CA_PAR_MASK,
7112 le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7113 derr = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_DERR_MASK,
7114 le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7115 serr = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_SERR_MASK,
7116 le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7117 type = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_TYPE_MASK,
7118 le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7119 ch = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_HBM_CH_MASK,
7120 le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7121
7122 dev_err(hdev->dev,
Oded Gabbay64a9d5a2020-11-21 14:29:25 +02007123 "HBM%d pc%d ECC: TYPE=%d, WR_PAR=%d, RD_PAR=%d, CA_PAR=%d, SERR=%d, DERR=%d\n",
7124 device, ch, type, wr_par, rd_par, ca_par, serr, derr);
Ofir Bitton5a2998f2020-10-05 13:44:59 +03007125
7126 err = 1;
7127
7128 return 0;
7129 }
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007130
7131 base = GAUDI_HBM_CFG_BASE + device * GAUDI_HBM_CFG_OFFSET;
7132 for (ch = 0 ; ch < GAUDI_HBM_CHANNELS ; ch++) {
7133 val = RREG32_MASK(base + ch * 0x1000 + 0x06C, 0x0000FFFF);
7134 val = (val & 0xFF) | ((val >> 8) & 0xFF);
7135 if (val) {
7136 err = 1;
7137 dev_err(hdev->dev,
7138 "HBM%d pc%d interrupts info: WR_PAR=%d, RD_PAR=%d, CA_PAR=%d, SERR=%d, DERR=%d\n",
7139 device, ch * 2, val & 0x1, (val >> 1) & 0x1,
7140 (val >> 2) & 0x1, (val >> 3) & 0x1,
7141 (val >> 4) & 0x1);
7142
7143 val2 = RREG32(base + ch * 0x1000 + 0x060);
7144 dev_err(hdev->dev,
Ofir Bitton5a2998f2020-10-05 13:44:59 +03007145 "HBM%d pc%d ECC info: 1ST_ERR_ADDR=0x%x, 1ST_ERR_TYPE=%d, SEC_CONT_CNT=%d, SEC_CNT=%d, DEC_CNT=%d\n",
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007146 device, ch * 2,
7147 RREG32(base + ch * 0x1000 + 0x064),
7148 (val2 & 0x200) >> 9, (val2 & 0xFC00) >> 10,
7149 (val2 & 0xFF0000) >> 16,
7150 (val2 & 0xFF000000) >> 24);
7151 }
7152
7153 val = RREG32_MASK(base + ch * 0x1000 + 0x07C, 0x0000FFFF);
7154 val = (val & 0xFF) | ((val >> 8) & 0xFF);
7155 if (val) {
7156 err = 1;
7157 dev_err(hdev->dev,
7158 "HBM%d pc%d interrupts info: WR_PAR=%d, RD_PAR=%d, CA_PAR=%d, SERR=%d, DERR=%d\n",
7159 device, ch * 2 + 1, val & 0x1, (val >> 1) & 0x1,
7160 (val >> 2) & 0x1, (val >> 3) & 0x1,
7161 (val >> 4) & 0x1);
7162
7163 val2 = RREG32(base + ch * 0x1000 + 0x070);
7164 dev_err(hdev->dev,
Ofir Bitton5a2998f2020-10-05 13:44:59 +03007165 "HBM%d pc%d ECC info: 1ST_ERR_ADDR=0x%x, 1ST_ERR_TYPE=%d, SEC_CONT_CNT=%d, SEC_CNT=%d, DEC_CNT=%d\n",
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007166 device, ch * 2 + 1,
7167 RREG32(base + ch * 0x1000 + 0x074),
7168 (val2 & 0x200) >> 9, (val2 & 0xFC00) >> 10,
7169 (val2 & 0xFF0000) >> 16,
7170 (val2 & 0xFF000000) >> 24);
7171 }
7172
7173 /* Clear interrupts */
7174 RMWREG32(base + (ch * 0x1000) + 0x060, 0x1C8, 0x1FF);
7175 RMWREG32(base + (ch * 0x1000) + 0x070, 0x1C8, 0x1FF);
7176 WREG32(base + (ch * 0x1000) + 0x06C, 0x1F1F);
7177 WREG32(base + (ch * 0x1000) + 0x07C, 0x1F1F);
7178 RMWREG32(base + (ch * 0x1000) + 0x060, 0x0, 0xF);
7179 RMWREG32(base + (ch * 0x1000) + 0x070, 0x0, 0xF);
7180 }
7181
7182 val = RREG32(base + 0x8F30);
7183 val2 = RREG32(base + 0x8F34);
7184 if (val | val2) {
7185 err = 1;
7186 dev_err(hdev->dev,
7187 "HBM %d MC SRAM SERR info: Reg 0x8F30=0x%x, Reg 0x8F34=0x%x\n",
7188 device, val, val2);
7189 }
7190 val = RREG32(base + 0x8F40);
7191 val2 = RREG32(base + 0x8F44);
7192 if (val | val2) {
7193 err = 1;
7194 dev_err(hdev->dev,
7195 "HBM %d MC SRAM DERR info: Reg 0x8F40=0x%x, Reg 0x8F44=0x%x\n",
7196 device, val, val2);
7197 }
7198
7199 return err;
7200}
7201
7202static int gaudi_hbm_event_to_dev(u16 hbm_event_type)
7203{
7204 switch (hbm_event_type) {
7205 case GAUDI_EVENT_HBM0_SPI_0:
7206 case GAUDI_EVENT_HBM0_SPI_1:
7207 return 0;
7208 case GAUDI_EVENT_HBM1_SPI_0:
7209 case GAUDI_EVENT_HBM1_SPI_1:
7210 return 1;
7211 case GAUDI_EVENT_HBM2_SPI_0:
7212 case GAUDI_EVENT_HBM2_SPI_1:
7213 return 2;
7214 case GAUDI_EVENT_HBM3_SPI_0:
7215 case GAUDI_EVENT_HBM3_SPI_1:
7216 return 3;
7217 default:
7218 break;
7219 }
7220
7221 /* Should never happen */
7222 return 0;
7223}
7224
7225static bool gaudi_tpc_read_interrupts(struct hl_device *hdev, u8 tpc_id,
7226 char *interrupt_name)
7227{
7228 struct gaudi_device *gaudi = hdev->asic_specific;
7229 u32 tpc_offset = tpc_id * TPC_CFG_OFFSET, tpc_interrupts_cause, i;
7230 bool soft_reset_required = false;
7231
7232 /* Accessing the TPC_INTR_CAUSE registers requires disabling the clock
Oded Gabbay6138bbe2020-09-04 20:18:16 +03007233 * gating, and thus cannot be done in CPU-CP and should be done instead
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007234 * by the driver.
7235 */
7236
7237 mutex_lock(&gaudi->clk_gate_mutex);
7238
7239 hdev->asic_funcs->disable_clock_gating(hdev);
7240
7241 tpc_interrupts_cause = RREG32(mmTPC0_CFG_TPC_INTR_CAUSE + tpc_offset) &
7242 TPC0_CFG_TPC_INTR_CAUSE_CAUSE_MASK;
7243
7244 for (i = 0 ; i < GAUDI_NUM_OF_TPC_INTR_CAUSE ; i++)
7245 if (tpc_interrupts_cause & BIT(i)) {
7246 dev_err_ratelimited(hdev->dev,
7247 "TPC%d_%s interrupt cause: %s\n",
7248 tpc_id, interrupt_name,
7249 gaudi_tpc_interrupts_cause[i]);
7250 /* If this is QM error, we need to soft-reset */
7251 if (i == 15)
7252 soft_reset_required = true;
7253 }
7254
7255 /* Clear interrupts */
7256 WREG32(mmTPC0_CFG_TPC_INTR_CAUSE + tpc_offset, 0);
7257
Oded Gabbaye38bfd32020-07-03 20:46:12 +03007258 hdev->asic_funcs->set_clock_gating(hdev);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007259
7260 mutex_unlock(&gaudi->clk_gate_mutex);
7261
7262 return soft_reset_required;
7263}
7264
7265static int tpc_dec_event_to_tpc_id(u16 tpc_dec_event_type)
7266{
7267 return (tpc_dec_event_type - GAUDI_EVENT_TPC0_DEC) >> 1;
7268}
7269
7270static int tpc_krn_event_to_tpc_id(u16 tpc_dec_event_type)
7271{
7272 return (tpc_dec_event_type - GAUDI_EVENT_TPC0_KRN_ERR) / 6;
7273}
7274
7275static void gaudi_print_clk_change_info(struct hl_device *hdev,
7276 u16 event_type)
7277{
7278 switch (event_type) {
7279 case GAUDI_EVENT_FIX_POWER_ENV_S:
Ofir Bitton0a068ad2020-07-21 10:49:51 +03007280 hdev->clk_throttling_reason |= HL_CLK_THROTTLE_POWER;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007281 dev_info_ratelimited(hdev->dev,
7282 "Clock throttling due to power consumption\n");
7283 break;
7284
7285 case GAUDI_EVENT_FIX_POWER_ENV_E:
Ofir Bitton0a068ad2020-07-21 10:49:51 +03007286 hdev->clk_throttling_reason &= ~HL_CLK_THROTTLE_POWER;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007287 dev_info_ratelimited(hdev->dev,
7288 "Power envelop is safe, back to optimal clock\n");
7289 break;
7290
7291 case GAUDI_EVENT_FIX_THERMAL_ENV_S:
Ofir Bitton0a068ad2020-07-21 10:49:51 +03007292 hdev->clk_throttling_reason |= HL_CLK_THROTTLE_THERMAL;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007293 dev_info_ratelimited(hdev->dev,
7294 "Clock throttling due to overheating\n");
7295 break;
7296
7297 case GAUDI_EVENT_FIX_THERMAL_ENV_E:
Ofir Bitton0a068ad2020-07-21 10:49:51 +03007298 hdev->clk_throttling_reason &= ~HL_CLK_THROTTLE_THERMAL;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007299 dev_info_ratelimited(hdev->dev,
7300 "Thermal envelop is safe, back to optimal clock\n");
7301 break;
7302
7303 default:
7304 dev_err(hdev->dev, "Received invalid clock change event %d\n",
7305 event_type);
7306 break;
7307 }
7308}
7309
7310static void gaudi_handle_eqe(struct hl_device *hdev,
7311 struct hl_eq_entry *eq_entry)
7312{
7313 struct gaudi_device *gaudi = hdev->asic_specific;
7314 u32 ctl = le32_to_cpu(eq_entry->hdr.ctl);
7315 u16 event_type = ((ctl & EQ_CTL_EVENT_TYPE_MASK)
7316 >> EQ_CTL_EVENT_TYPE_SHIFT);
7317 u8 cause;
Oded Gabbay66446822020-05-18 16:48:01 +03007318 bool reset_required;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007319
7320 gaudi->events_stat[event_type]++;
7321 gaudi->events_stat_aggregate[event_type]++;
7322
7323 switch (event_type) {
7324 case GAUDI_EVENT_PCIE_CORE_DERR:
7325 case GAUDI_EVENT_PCIE_IF_DERR:
7326 case GAUDI_EVENT_PCIE_PHY_DERR:
7327 case GAUDI_EVENT_TPC0_DERR ... GAUDI_EVENT_TPC7_DERR:
7328 case GAUDI_EVENT_MME0_ACC_DERR:
7329 case GAUDI_EVENT_MME0_SBAB_DERR:
7330 case GAUDI_EVENT_MME1_ACC_DERR:
7331 case GAUDI_EVENT_MME1_SBAB_DERR:
7332 case GAUDI_EVENT_MME2_ACC_DERR:
7333 case GAUDI_EVENT_MME2_SBAB_DERR:
7334 case GAUDI_EVENT_MME3_ACC_DERR:
7335 case GAUDI_EVENT_MME3_SBAB_DERR:
7336 case GAUDI_EVENT_DMA0_DERR_ECC ... GAUDI_EVENT_DMA7_DERR_ECC:
7337 fallthrough;
7338 case GAUDI_EVENT_CPU_IF_ECC_DERR:
7339 case GAUDI_EVENT_PSOC_MEM_DERR:
7340 case GAUDI_EVENT_PSOC_CORESIGHT_DERR:
7341 case GAUDI_EVENT_SRAM0_DERR ... GAUDI_EVENT_SRAM28_DERR:
7342 case GAUDI_EVENT_DMA_IF0_DERR ... GAUDI_EVENT_DMA_IF3_DERR:
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007343 case GAUDI_EVENT_HBM_0_DERR ... GAUDI_EVENT_HBM_3_DERR:
7344 case GAUDI_EVENT_MMU_DERR:
Oded Gabbayfcc6a4e2020-05-17 08:20:35 +03007345 gaudi_print_irq_info(hdev, event_type, true);
7346 gaudi_handle_ecc_event(hdev, event_type, &eq_entry->ecc_data);
7347 if (hdev->hard_reset_on_fw_events)
7348 hl_device_reset(hdev, true, false);
7349 break;
7350
7351 case GAUDI_EVENT_GIC500:
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007352 case GAUDI_EVENT_AXI_ECC:
7353 case GAUDI_EVENT_L2_RAM_ECC:
7354 case GAUDI_EVENT_PLL0 ... GAUDI_EVENT_PLL17:
7355 gaudi_print_irq_info(hdev, event_type, false);
7356 if (hdev->hard_reset_on_fw_events)
7357 hl_device_reset(hdev, true, false);
7358 break;
7359
7360 case GAUDI_EVENT_HBM0_SPI_0:
7361 case GAUDI_EVENT_HBM1_SPI_0:
7362 case GAUDI_EVENT_HBM2_SPI_0:
7363 case GAUDI_EVENT_HBM3_SPI_0:
7364 gaudi_print_irq_info(hdev, event_type, false);
7365 gaudi_hbm_read_interrupts(hdev,
Ofir Bitton5a2998f2020-10-05 13:44:59 +03007366 gaudi_hbm_event_to_dev(event_type),
7367 &eq_entry->hbm_ecc_data);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007368 if (hdev->hard_reset_on_fw_events)
7369 hl_device_reset(hdev, true, false);
7370 break;
7371
7372 case GAUDI_EVENT_HBM0_SPI_1:
7373 case GAUDI_EVENT_HBM1_SPI_1:
7374 case GAUDI_EVENT_HBM2_SPI_1:
7375 case GAUDI_EVENT_HBM3_SPI_1:
7376 gaudi_print_irq_info(hdev, event_type, false);
7377 gaudi_hbm_read_interrupts(hdev,
Ofir Bitton5a2998f2020-10-05 13:44:59 +03007378 gaudi_hbm_event_to_dev(event_type),
7379 &eq_entry->hbm_ecc_data);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007380 break;
7381
7382 case GAUDI_EVENT_TPC0_DEC:
7383 case GAUDI_EVENT_TPC1_DEC:
7384 case GAUDI_EVENT_TPC2_DEC:
7385 case GAUDI_EVENT_TPC3_DEC:
7386 case GAUDI_EVENT_TPC4_DEC:
7387 case GAUDI_EVENT_TPC5_DEC:
7388 case GAUDI_EVENT_TPC6_DEC:
7389 case GAUDI_EVENT_TPC7_DEC:
7390 gaudi_print_irq_info(hdev, event_type, true);
Oded Gabbay66446822020-05-18 16:48:01 +03007391 reset_required = gaudi_tpc_read_interrupts(hdev,
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007392 tpc_dec_event_to_tpc_id(event_type),
7393 "AXI_SLV_DEC_Error");
Oded Gabbay66446822020-05-18 16:48:01 +03007394 if (reset_required) {
7395 dev_err(hdev->dev, "hard reset required due to %s\n",
7396 gaudi_irq_map_table[event_type].name);
7397
7398 if (hdev->hard_reset_on_fw_events)
7399 hl_device_reset(hdev, true, false);
7400 } else {
7401 hl_fw_unmask_irq(hdev, event_type);
Omer Shpigelmand7985072020-05-17 23:01:22 +03007402 }
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007403 break;
7404
7405 case GAUDI_EVENT_TPC0_KRN_ERR:
7406 case GAUDI_EVENT_TPC1_KRN_ERR:
7407 case GAUDI_EVENT_TPC2_KRN_ERR:
7408 case GAUDI_EVENT_TPC3_KRN_ERR:
7409 case GAUDI_EVENT_TPC4_KRN_ERR:
7410 case GAUDI_EVENT_TPC5_KRN_ERR:
7411 case GAUDI_EVENT_TPC6_KRN_ERR:
7412 case GAUDI_EVENT_TPC7_KRN_ERR:
7413 gaudi_print_irq_info(hdev, event_type, true);
Oded Gabbay66446822020-05-18 16:48:01 +03007414 reset_required = gaudi_tpc_read_interrupts(hdev,
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007415 tpc_krn_event_to_tpc_id(event_type),
7416 "KRN_ERR");
Oded Gabbay66446822020-05-18 16:48:01 +03007417 if (reset_required) {
7418 dev_err(hdev->dev, "hard reset required due to %s\n",
7419 gaudi_irq_map_table[event_type].name);
7420
7421 if (hdev->hard_reset_on_fw_events)
7422 hl_device_reset(hdev, true, false);
7423 } else {
7424 hl_fw_unmask_irq(hdev, event_type);
Omer Shpigelmand7985072020-05-17 23:01:22 +03007425 }
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007426 break;
7427
7428 case GAUDI_EVENT_PCIE_CORE_SERR:
7429 case GAUDI_EVENT_PCIE_IF_SERR:
7430 case GAUDI_EVENT_PCIE_PHY_SERR:
7431 case GAUDI_EVENT_TPC0_SERR ... GAUDI_EVENT_TPC7_SERR:
7432 case GAUDI_EVENT_MME0_ACC_SERR:
7433 case GAUDI_EVENT_MME0_SBAB_SERR:
7434 case GAUDI_EVENT_MME1_ACC_SERR:
7435 case GAUDI_EVENT_MME1_SBAB_SERR:
7436 case GAUDI_EVENT_MME2_ACC_SERR:
7437 case GAUDI_EVENT_MME2_SBAB_SERR:
7438 case GAUDI_EVENT_MME3_ACC_SERR:
7439 case GAUDI_EVENT_MME3_SBAB_SERR:
7440 case GAUDI_EVENT_DMA0_SERR_ECC ... GAUDI_EVENT_DMA7_SERR_ECC:
7441 case GAUDI_EVENT_CPU_IF_ECC_SERR:
7442 case GAUDI_EVENT_PSOC_MEM_SERR:
7443 case GAUDI_EVENT_PSOC_CORESIGHT_SERR:
7444 case GAUDI_EVENT_SRAM0_SERR ... GAUDI_EVENT_SRAM28_SERR:
7445 case GAUDI_EVENT_DMA_IF0_SERR ... GAUDI_EVENT_DMA_IF3_SERR:
7446 case GAUDI_EVENT_HBM_0_SERR ... GAUDI_EVENT_HBM_3_SERR:
7447 fallthrough;
7448 case GAUDI_EVENT_MMU_SERR:
Oded Gabbayfcc6a4e2020-05-17 08:20:35 +03007449 gaudi_print_irq_info(hdev, event_type, true);
7450 gaudi_handle_ecc_event(hdev, event_type, &eq_entry->ecc_data);
7451 hl_fw_unmask_irq(hdev, event_type);
7452 break;
7453
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007454 case GAUDI_EVENT_PCIE_DEC:
7455 case GAUDI_EVENT_MME0_WBC_RSP:
7456 case GAUDI_EVENT_MME0_SBAB0_RSP:
7457 case GAUDI_EVENT_MME1_WBC_RSP:
7458 case GAUDI_EVENT_MME1_SBAB0_RSP:
7459 case GAUDI_EVENT_MME2_WBC_RSP:
7460 case GAUDI_EVENT_MME2_SBAB0_RSP:
7461 case GAUDI_EVENT_MME3_WBC_RSP:
7462 case GAUDI_EVENT_MME3_SBAB0_RSP:
7463 case GAUDI_EVENT_CPU_AXI_SPLITTER:
7464 case GAUDI_EVENT_PSOC_AXI_DEC:
7465 case GAUDI_EVENT_PSOC_PRSTN_FALL:
7466 case GAUDI_EVENT_MMU_PAGE_FAULT:
7467 case GAUDI_EVENT_MMU_WR_PERM:
7468 case GAUDI_EVENT_RAZWI_OR_ADC:
7469 case GAUDI_EVENT_TPC0_QM ... GAUDI_EVENT_TPC7_QM:
7470 case GAUDI_EVENT_MME0_QM ... GAUDI_EVENT_MME2_QM:
7471 case GAUDI_EVENT_DMA0_QM ... GAUDI_EVENT_DMA7_QM:
7472 fallthrough;
Oded Gabbay3c681572020-11-02 21:10:39 +02007473 case GAUDI_EVENT_NIC0_QM0:
7474 case GAUDI_EVENT_NIC0_QM1:
7475 case GAUDI_EVENT_NIC1_QM0:
7476 case GAUDI_EVENT_NIC1_QM1:
7477 case GAUDI_EVENT_NIC2_QM0:
7478 case GAUDI_EVENT_NIC2_QM1:
7479 case GAUDI_EVENT_NIC3_QM0:
7480 case GAUDI_EVENT_NIC3_QM1:
7481 case GAUDI_EVENT_NIC4_QM0:
7482 case GAUDI_EVENT_NIC4_QM1:
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007483 case GAUDI_EVENT_DMA0_CORE ... GAUDI_EVENT_DMA7_CORE:
7484 gaudi_print_irq_info(hdev, event_type, true);
7485 gaudi_handle_qman_err(hdev, event_type);
Ofir Bittonebd8d122020-05-10 13:41:28 +03007486 hl_fw_unmask_irq(hdev, event_type);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007487 break;
7488
7489 case GAUDI_EVENT_RAZWI_OR_ADC_SW:
7490 gaudi_print_irq_info(hdev, event_type, true);
Oded Gabbay66446822020-05-18 16:48:01 +03007491 if (hdev->hard_reset_on_fw_events)
7492 hl_device_reset(hdev, true, false);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007493 break;
7494
7495 case GAUDI_EVENT_TPC0_BMON_SPMU:
7496 case GAUDI_EVENT_TPC1_BMON_SPMU:
7497 case GAUDI_EVENT_TPC2_BMON_SPMU:
7498 case GAUDI_EVENT_TPC3_BMON_SPMU:
7499 case GAUDI_EVENT_TPC4_BMON_SPMU:
7500 case GAUDI_EVENT_TPC5_BMON_SPMU:
7501 case GAUDI_EVENT_TPC6_BMON_SPMU:
7502 case GAUDI_EVENT_TPC7_BMON_SPMU:
7503 case GAUDI_EVENT_DMA_BM_CH0 ... GAUDI_EVENT_DMA_BM_CH7:
7504 gaudi_print_irq_info(hdev, event_type, false);
Ofir Bittonebd8d122020-05-10 13:41:28 +03007505 hl_fw_unmask_irq(hdev, event_type);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007506 break;
7507
Ofir Bittonf8bc7f02021-01-03 20:52:40 +02007508 case GAUDI_EVENT_DMA_IF_SEI_0 ... GAUDI_EVENT_DMA_IF_SEI_3:
7509 gaudi_print_irq_info(hdev, event_type, false);
7510 gaudi_print_sm_sei_info(hdev, event_type,
7511 &eq_entry->sm_sei_data);
7512 hl_fw_unmask_irq(hdev, event_type);
7513 break;
7514
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007515 case GAUDI_EVENT_FIX_POWER_ENV_S ... GAUDI_EVENT_FIX_THERMAL_ENV_E:
7516 gaudi_print_clk_change_info(hdev, event_type);
Ofir Bittonebd8d122020-05-10 13:41:28 +03007517 hl_fw_unmask_irq(hdev, event_type);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007518 break;
7519
7520 case GAUDI_EVENT_PSOC_GPIO_U16_0:
7521 cause = le64_to_cpu(eq_entry->data[0]) & 0xFF;
7522 dev_err(hdev->dev,
7523 "Received high temp H/W interrupt %d (cause %d)\n",
7524 event_type, cause);
7525 break;
7526
7527 default:
7528 dev_err(hdev->dev, "Received invalid H/W interrupt %d\n",
7529 event_type);
7530 break;
7531 }
7532}
7533
7534static void *gaudi_get_events_stat(struct hl_device *hdev, bool aggregate,
7535 u32 *size)
7536{
7537 struct gaudi_device *gaudi = hdev->asic_specific;
7538
7539 if (aggregate) {
7540 *size = (u32) sizeof(gaudi->events_stat_aggregate);
7541 return gaudi->events_stat_aggregate;
7542 }
7543
7544 *size = (u32) sizeof(gaudi->events_stat);
7545 return gaudi->events_stat;
7546}
7547
Omer Shpigelman8ff5f4f2020-05-24 23:06:59 +03007548static int gaudi_mmu_invalidate_cache(struct hl_device *hdev, bool is_hard,
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007549 u32 flags)
7550{
7551 struct gaudi_device *gaudi = hdev->asic_specific;
7552 u32 status, timeout_usec;
7553 int rc;
7554
7555 if (!(gaudi->hw_cap_initialized & HW_CAP_MMU) ||
7556 hdev->hard_reset_pending)
Omer Shpigelman8ff5f4f2020-05-24 23:06:59 +03007557 return 0;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007558
7559 if (hdev->pldm)
7560 timeout_usec = GAUDI_PLDM_MMU_TIMEOUT_USEC;
7561 else
7562 timeout_usec = MMU_CONFIG_TIMEOUT_USEC;
7563
7564 /* L0 & L1 invalidation */
Omer Shpigelmancfd41762020-06-03 13:03:35 +03007565 WREG32(mmSTLB_INV_PS, 3);
7566 WREG32(mmSTLB_CACHE_INV, gaudi->mmu_cache_inv_pi++);
Omer Shpigelman42d0b0b2020-05-17 17:35:39 +03007567 WREG32(mmSTLB_INV_PS, 2);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007568
7569 rc = hl_poll_timeout(
7570 hdev,
Omer Shpigelman42d0b0b2020-05-17 17:35:39 +03007571 mmSTLB_INV_PS,
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007572 status,
7573 !status,
7574 1000,
7575 timeout_usec);
7576
Omer Shpigelman42d0b0b2020-05-17 17:35:39 +03007577 WREG32(mmSTLB_INV_SET, 0);
7578
Omer Shpigelman8ff5f4f2020-05-24 23:06:59 +03007579 if (rc) {
7580 dev_err_ratelimited(hdev->dev,
7581 "MMU cache invalidation timeout\n");
7582 hl_device_reset(hdev, true, false);
7583 }
7584
7585 return rc;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007586}
7587
Omer Shpigelman8ff5f4f2020-05-24 23:06:59 +03007588static int gaudi_mmu_invalidate_cache_range(struct hl_device *hdev,
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007589 bool is_hard, u32 asid, u64 va, u64 size)
7590{
7591 struct gaudi_device *gaudi = hdev->asic_specific;
7592 u32 status, timeout_usec;
7593 u32 inv_data;
7594 u32 pi;
7595 int rc;
7596
7597 if (!(gaudi->hw_cap_initialized & HW_CAP_MMU) ||
7598 hdev->hard_reset_pending)
Omer Shpigelman8ff5f4f2020-05-24 23:06:59 +03007599 return 0;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007600
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007601 if (hdev->pldm)
7602 timeout_usec = GAUDI_PLDM_MMU_TIMEOUT_USEC;
7603 else
7604 timeout_usec = MMU_CONFIG_TIMEOUT_USEC;
7605
7606 /*
7607 * TODO: currently invalidate entire L0 & L1 as in regular hard
7608 * invalidation. Need to apply invalidation of specific cache
7609 * lines with mask of ASID & VA & size.
7610 * Note that L1 with be flushed entirely in any case.
7611 */
7612
7613 /* L0 & L1 invalidation */
7614 inv_data = RREG32(mmSTLB_CACHE_INV);
7615 /* PI is 8 bit */
7616 pi = ((inv_data & STLB_CACHE_INV_PRODUCER_INDEX_MASK) + 1) & 0xFF;
7617 WREG32(mmSTLB_CACHE_INV,
7618 (inv_data & STLB_CACHE_INV_INDEX_MASK_MASK) | pi);
7619
7620 rc = hl_poll_timeout(
7621 hdev,
7622 mmSTLB_INV_CONSUMER_INDEX,
7623 status,
7624 status == pi,
7625 1000,
7626 timeout_usec);
7627
Omer Shpigelman8ff5f4f2020-05-24 23:06:59 +03007628 if (rc) {
7629 dev_err_ratelimited(hdev->dev,
7630 "MMU cache invalidation timeout\n");
7631 hl_device_reset(hdev, true, false);
7632 }
7633
7634 return rc;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007635}
7636
7637static int gaudi_mmu_update_asid_hop0_addr(struct hl_device *hdev,
7638 u32 asid, u64 phys_addr)
7639{
7640 u32 status, timeout_usec;
7641 int rc;
7642
7643 if (hdev->pldm)
7644 timeout_usec = GAUDI_PLDM_MMU_TIMEOUT_USEC;
7645 else
7646 timeout_usec = MMU_CONFIG_TIMEOUT_USEC;
7647
7648 WREG32(MMU_ASID, asid);
7649 WREG32(MMU_HOP0_PA43_12, phys_addr >> MMU_HOP0_PA43_12_SHIFT);
7650 WREG32(MMU_HOP0_PA49_44, phys_addr >> MMU_HOP0_PA49_44_SHIFT);
7651 WREG32(MMU_BUSY, 0x80000000);
7652
7653 rc = hl_poll_timeout(
7654 hdev,
7655 MMU_BUSY,
7656 status,
7657 !(status & 0x80000000),
7658 1000,
7659 timeout_usec);
7660
7661 if (rc) {
7662 dev_err(hdev->dev,
7663 "Timeout during MMU hop0 config of asid %d\n", asid);
7664 return rc;
7665 }
7666
7667 return 0;
7668}
7669
7670static int gaudi_send_heartbeat(struct hl_device *hdev)
7671{
7672 struct gaudi_device *gaudi = hdev->asic_specific;
7673
7674 if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
7675 return 0;
7676
7677 return hl_fw_send_heartbeat(hdev);
7678}
7679
Oded Gabbay2f553422020-08-15 16:28:10 +03007680static int gaudi_cpucp_info_get(struct hl_device *hdev)
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007681{
7682 struct gaudi_device *gaudi = hdev->asic_specific;
7683 struct asic_fixed_properties *prop = &hdev->asic_prop;
7684 int rc;
7685
7686 if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
7687 return 0;
7688
Ofir Bittonedb07cb2020-12-27 17:09:09 +02007689 rc = hl_fw_cpucp_info_get(hdev, mmCPU_BOOT_DEV_STS0, mmCPU_BOOT_ERR0);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007690 if (rc)
7691 return rc;
7692
Oded Gabbay2f553422020-08-15 16:28:10 +03007693 if (!strlen(prop->cpucp_info.card_name))
7694 strncpy(prop->cpucp_info.card_name, GAUDI_DEFAULT_CARD_NAME,
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007695 CARD_NAME_MAX_LEN);
7696
Oded Gabbay2f553422020-08-15 16:28:10 +03007697 hdev->card_type = le32_to_cpu(hdev->asic_prop.cpucp_info.card_type);
Oded Gabbay58361aa2020-08-08 23:34:47 +03007698
Oded Gabbay2f553422020-08-15 16:28:10 +03007699 if (hdev->card_type == cpucp_card_type_pci)
Oded Gabbay58361aa2020-08-08 23:34:47 +03007700 prop->max_power_default = MAX_POWER_DEFAULT_PCI;
Oded Gabbay2f553422020-08-15 16:28:10 +03007701 else if (hdev->card_type == cpucp_card_type_pmc)
Oded Gabbay58361aa2020-08-08 23:34:47 +03007702 prop->max_power_default = MAX_POWER_DEFAULT_PMC;
7703
7704 hdev->max_power = prop->max_power_default;
7705
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007706 return 0;
7707}
7708
farah kassabrid90416c2020-08-12 17:20:13 +03007709static bool gaudi_is_device_idle(struct hl_device *hdev, u64 *mask,
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007710 struct seq_file *s)
7711{
7712 struct gaudi_device *gaudi = hdev->asic_specific;
7713 const char *fmt = "%-5d%-9s%#-14x%#-12x%#x\n";
7714 const char *mme_slave_fmt = "%-5d%-9s%-14s%-12s%#x\n";
Oded Gabbay3c681572020-11-02 21:10:39 +02007715 const char *nic_fmt = "%-5d%-9s%#-14x%#x\n";
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007716 u32 qm_glbl_sts0, qm_cgm_sts, dma_core_sts0, tpc_cfg_sts, mme_arch_sts;
7717 bool is_idle = true, is_eng_idle, is_slave;
7718 u64 offset;
Oded Gabbay3c681572020-11-02 21:10:39 +02007719 int i, dma_id, port;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007720
7721 mutex_lock(&gaudi->clk_gate_mutex);
7722
7723 hdev->asic_funcs->disable_clock_gating(hdev);
7724
7725 if (s)
7726 seq_puts(s,
7727 "\nDMA is_idle QM_GLBL_STS0 QM_CGM_STS DMA_CORE_STS0\n"
7728 "--- ------- ------------ ---------- -------------\n");
7729
7730 for (i = 0 ; i < DMA_NUMBER_OF_CHNLS ; i++) {
7731 dma_id = gaudi_dma_assignment[i];
7732 offset = dma_id * DMA_QMAN_OFFSET;
7733
7734 qm_glbl_sts0 = RREG32(mmDMA0_QM_GLBL_STS0 + offset);
7735 qm_cgm_sts = RREG32(mmDMA0_QM_CGM_STS + offset);
7736 dma_core_sts0 = RREG32(mmDMA0_CORE_STS0 + offset);
7737 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts) &&
7738 IS_DMA_IDLE(dma_core_sts0);
7739 is_idle &= is_eng_idle;
7740
7741 if (mask)
Oded Gabbayf7639462020-08-29 11:24:03 +03007742 *mask |= ((u64) !is_eng_idle) <<
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007743 (GAUDI_ENGINE_ID_DMA_0 + dma_id);
7744 if (s)
7745 seq_printf(s, fmt, dma_id,
7746 is_eng_idle ? "Y" : "N", qm_glbl_sts0,
7747 qm_cgm_sts, dma_core_sts0);
7748 }
7749
7750 if (s)
7751 seq_puts(s,
7752 "\nTPC is_idle QM_GLBL_STS0 QM_CGM_STS CFG_STATUS\n"
7753 "--- ------- ------------ ---------- ----------\n");
7754
7755 for (i = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) {
7756 offset = i * TPC_QMAN_OFFSET;
7757 qm_glbl_sts0 = RREG32(mmTPC0_QM_GLBL_STS0 + offset);
7758 qm_cgm_sts = RREG32(mmTPC0_QM_CGM_STS + offset);
7759 tpc_cfg_sts = RREG32(mmTPC0_CFG_STATUS + offset);
7760 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts) &&
7761 IS_TPC_IDLE(tpc_cfg_sts);
7762 is_idle &= is_eng_idle;
7763
7764 if (mask)
Oded Gabbayf7639462020-08-29 11:24:03 +03007765 *mask |= ((u64) !is_eng_idle) <<
7766 (GAUDI_ENGINE_ID_TPC_0 + i);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007767 if (s)
7768 seq_printf(s, fmt, i,
7769 is_eng_idle ? "Y" : "N",
7770 qm_glbl_sts0, qm_cgm_sts, tpc_cfg_sts);
7771 }
7772
7773 if (s)
7774 seq_puts(s,
7775 "\nMME is_idle QM_GLBL_STS0 QM_CGM_STS ARCH_STATUS\n"
7776 "--- ------- ------------ ---------- -----------\n");
7777
7778 for (i = 0 ; i < MME_NUMBER_OF_ENGINES ; i++) {
7779 offset = i * MME_QMAN_OFFSET;
7780 mme_arch_sts = RREG32(mmMME0_CTRL_ARCH_STATUS + offset);
7781 is_eng_idle = IS_MME_IDLE(mme_arch_sts);
7782
7783 /* MME 1 & 3 are slaves, no need to check their QMANs */
7784 is_slave = i % 2;
7785 if (!is_slave) {
7786 qm_glbl_sts0 = RREG32(mmMME0_QM_GLBL_STS0 + offset);
7787 qm_cgm_sts = RREG32(mmMME0_QM_CGM_STS + offset);
7788 is_eng_idle &= IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts);
7789 }
7790
7791 is_idle &= is_eng_idle;
7792
7793 if (mask)
Oded Gabbayf7639462020-08-29 11:24:03 +03007794 *mask |= ((u64) !is_eng_idle) <<
7795 (GAUDI_ENGINE_ID_MME_0 + i);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007796 if (s) {
7797 if (!is_slave)
7798 seq_printf(s, fmt, i,
7799 is_eng_idle ? "Y" : "N",
7800 qm_glbl_sts0, qm_cgm_sts, mme_arch_sts);
7801 else
7802 seq_printf(s, mme_slave_fmt, i,
7803 is_eng_idle ? "Y" : "N", "-",
7804 "-", mme_arch_sts);
7805 }
7806 }
7807
7808 if (s)
Oded Gabbay3c681572020-11-02 21:10:39 +02007809 seq_puts(s, "\nNIC is_idle QM_GLBL_STS0 QM_CGM_STS\n"
7810 "--- ------- ------------ ----------\n");
7811
7812 for (i = 0 ; i < (NIC_NUMBER_OF_ENGINES / 2) ; i++) {
7813 offset = i * NIC_MACRO_QMAN_OFFSET;
7814 port = 2 * i;
7815 if (hdev->nic_ports_mask & BIT(port)) {
7816 qm_glbl_sts0 = RREG32(mmNIC0_QM0_GLBL_STS0 + offset);
7817 qm_cgm_sts = RREG32(mmNIC0_QM0_CGM_STS + offset);
7818 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts);
7819 is_idle &= is_eng_idle;
7820
7821 if (mask)
7822 *mask |= ((u64) !is_eng_idle) <<
7823 (GAUDI_ENGINE_ID_NIC_0 + port);
7824 if (s)
7825 seq_printf(s, nic_fmt, port,
7826 is_eng_idle ? "Y" : "N",
7827 qm_glbl_sts0, qm_cgm_sts);
7828 }
7829
7830 port = 2 * i + 1;
7831 if (hdev->nic_ports_mask & BIT(port)) {
7832 qm_glbl_sts0 = RREG32(mmNIC0_QM1_GLBL_STS0 + offset);
7833 qm_cgm_sts = RREG32(mmNIC0_QM1_CGM_STS + offset);
7834 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts);
7835 is_idle &= is_eng_idle;
7836
7837 if (mask)
7838 *mask |= ((u64) !is_eng_idle) <<
7839 (GAUDI_ENGINE_ID_NIC_0 + port);
7840 if (s)
7841 seq_printf(s, nic_fmt, port,
7842 is_eng_idle ? "Y" : "N",
7843 qm_glbl_sts0, qm_cgm_sts);
7844 }
7845 }
7846
7847 if (s)
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007848 seq_puts(s, "\n");
7849
Oded Gabbaye38bfd32020-07-03 20:46:12 +03007850 hdev->asic_funcs->set_clock_gating(hdev);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007851
7852 mutex_unlock(&gaudi->clk_gate_mutex);
7853
7854 return is_idle;
7855}
7856
7857static void gaudi_hw_queues_lock(struct hl_device *hdev)
7858 __acquires(&gaudi->hw_queues_lock)
7859{
7860 struct gaudi_device *gaudi = hdev->asic_specific;
7861
7862 spin_lock(&gaudi->hw_queues_lock);
7863}
7864
7865static void gaudi_hw_queues_unlock(struct hl_device *hdev)
7866 __releases(&gaudi->hw_queues_lock)
7867{
7868 struct gaudi_device *gaudi = hdev->asic_specific;
7869
7870 spin_unlock(&gaudi->hw_queues_lock);
7871}
7872
7873static u32 gaudi_get_pci_id(struct hl_device *hdev)
7874{
7875 return hdev->pdev->device;
7876}
7877
7878static int gaudi_get_eeprom_data(struct hl_device *hdev, void *data,
7879 size_t max_size)
7880{
7881 struct gaudi_device *gaudi = hdev->asic_specific;
7882
7883 if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
7884 return 0;
7885
7886 return hl_fw_get_eeprom_data(hdev, data, max_size);
7887}
7888
7889/*
7890 * this function should be used only during initialization and/or after reset,
7891 * when there are no active users.
7892 */
7893static int gaudi_run_tpc_kernel(struct hl_device *hdev, u64 tpc_kernel,
7894 u32 tpc_id)
7895{
7896 struct gaudi_device *gaudi = hdev->asic_specific;
7897 u64 kernel_timeout;
7898 u32 status, offset;
7899 int rc;
7900
7901 offset = tpc_id * (mmTPC1_CFG_STATUS - mmTPC0_CFG_STATUS);
7902
7903 if (hdev->pldm)
7904 kernel_timeout = GAUDI_PLDM_TPC_KERNEL_WAIT_USEC;
7905 else
7906 kernel_timeout = HL_DEVICE_TIMEOUT_USEC;
7907
7908 mutex_lock(&gaudi->clk_gate_mutex);
7909
7910 hdev->asic_funcs->disable_clock_gating(hdev);
7911
7912 WREG32(mmTPC0_CFG_QM_KERNEL_BASE_ADDRESS_LOW + offset,
7913 lower_32_bits(tpc_kernel));
7914 WREG32(mmTPC0_CFG_QM_KERNEL_BASE_ADDRESS_HIGH + offset,
7915 upper_32_bits(tpc_kernel));
7916
7917 WREG32(mmTPC0_CFG_ICACHE_BASE_ADDERESS_LOW + offset,
7918 lower_32_bits(tpc_kernel));
7919 WREG32(mmTPC0_CFG_ICACHE_BASE_ADDERESS_HIGH + offset,
7920 upper_32_bits(tpc_kernel));
7921 /* set a valid LUT pointer, content is of no significance */
7922 WREG32(mmTPC0_CFG_LUT_FUNC256_BASE_ADDR_LO + offset,
7923 lower_32_bits(tpc_kernel));
7924 WREG32(mmTPC0_CFG_LUT_FUNC256_BASE_ADDR_HI + offset,
7925 upper_32_bits(tpc_kernel));
7926
7927 WREG32(mmTPC0_CFG_QM_SYNC_OBJECT_ADDR + offset,
7928 lower_32_bits(CFG_BASE +
7929 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0));
7930
7931 WREG32(mmTPC0_CFG_TPC_CMD + offset,
7932 (1 << TPC0_CFG_TPC_CMD_ICACHE_INVALIDATE_SHIFT |
7933 1 << TPC0_CFG_TPC_CMD_ICACHE_PREFETCH_64KB_SHIFT));
7934 /* wait a bit for the engine to start executing */
7935 usleep_range(1000, 1500);
7936
7937 /* wait until engine has finished executing */
7938 rc = hl_poll_timeout(
7939 hdev,
7940 mmTPC0_CFG_STATUS + offset,
7941 status,
7942 (status & TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK) ==
7943 TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK,
7944 1000,
7945 kernel_timeout);
7946
7947 if (rc) {
7948 dev_err(hdev->dev,
7949 "Timeout while waiting for TPC%d icache prefetch\n",
7950 tpc_id);
Oded Gabbaye38bfd32020-07-03 20:46:12 +03007951 hdev->asic_funcs->set_clock_gating(hdev);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007952 mutex_unlock(&gaudi->clk_gate_mutex);
7953 return -EIO;
7954 }
7955
7956 WREG32(mmTPC0_CFG_TPC_EXECUTE + offset,
7957 1 << TPC0_CFG_TPC_EXECUTE_V_SHIFT);
7958
7959 /* wait a bit for the engine to start executing */
7960 usleep_range(1000, 1500);
7961
7962 /* wait until engine has finished executing */
7963 rc = hl_poll_timeout(
7964 hdev,
7965 mmTPC0_CFG_STATUS + offset,
7966 status,
7967 (status & TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK) ==
7968 TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK,
7969 1000,
7970 kernel_timeout);
7971
Oded Gabbay31ac1f12020-08-12 11:28:13 +03007972 if (rc) {
7973 dev_err(hdev->dev,
7974 "Timeout while waiting for TPC%d vector pipe\n",
7975 tpc_id);
7976 hdev->asic_funcs->set_clock_gating(hdev);
7977 mutex_unlock(&gaudi->clk_gate_mutex);
7978 return -EIO;
7979 }
7980
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007981 rc = hl_poll_timeout(
7982 hdev,
7983 mmTPC0_CFG_WQ_INFLIGHT_CNTR + offset,
7984 status,
7985 (status == 0),
7986 1000,
7987 kernel_timeout);
7988
Oded Gabbaye38bfd32020-07-03 20:46:12 +03007989 hdev->asic_funcs->set_clock_gating(hdev);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007990 mutex_unlock(&gaudi->clk_gate_mutex);
7991
7992 if (rc) {
7993 dev_err(hdev->dev,
7994 "Timeout while waiting for TPC%d kernel to execute\n",
7995 tpc_id);
7996 return -EIO;
7997 }
7998
7999 return 0;
8000}
8001
Ofir Bitton5de406c2020-09-10 10:56:26 +03008002static int gaudi_internal_cb_pool_init(struct hl_device *hdev,
8003 struct hl_ctx *ctx)
8004{
8005 struct gaudi_device *gaudi = hdev->asic_specific;
Ofir Bitton5de406c2020-09-10 10:56:26 +03008006 int min_alloc_order, rc, collective_cb_size;
8007
8008 if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
8009 return 0;
8010
8011 hdev->internal_cb_pool_virt_addr =
8012 hdev->asic_funcs->asic_dma_alloc_coherent(hdev,
8013 HOST_SPACE_INTERNAL_CB_SZ,
8014 &hdev->internal_cb_pool_dma_addr,
8015 GFP_KERNEL | __GFP_ZERO);
8016
8017 if (!hdev->internal_cb_pool_virt_addr)
8018 return -ENOMEM;
8019
8020 collective_cb_size = sizeof(struct packet_msg_short) * 5 +
8021 sizeof(struct packet_fence);
8022 min_alloc_order = ilog2(collective_cb_size);
8023
8024 hdev->internal_cb_pool = gen_pool_create(min_alloc_order, -1);
8025 if (!hdev->internal_cb_pool) {
8026 dev_err(hdev->dev,
8027 "Failed to create internal CB pool\n");
8028 rc = -ENOMEM;
8029 goto free_internal_cb_pool;
8030 }
8031
8032 rc = gen_pool_add(hdev->internal_cb_pool,
8033 (uintptr_t) hdev->internal_cb_pool_virt_addr,
8034 HOST_SPACE_INTERNAL_CB_SZ, -1);
8035 if (rc) {
8036 dev_err(hdev->dev,
8037 "Failed to add memory to internal CB pool\n");
8038 rc = -EFAULT;
8039 goto destroy_internal_cb_pool;
8040 }
8041
Ofir Bittonbe91b912020-10-22 15:04:10 +03008042 hdev->internal_cb_va_base = hl_reserve_va_block(hdev, ctx,
Ofir Bitton412c41f2020-11-04 15:18:55 +02008043 HL_VA_RANGE_TYPE_HOST, HOST_SPACE_INTERNAL_CB_SZ,
8044 HL_MMU_VA_ALIGNMENT_NOT_NEEDED);
Ofir Bittonbe91b912020-10-22 15:04:10 +03008045
8046 if (!hdev->internal_cb_va_base)
8047 goto destroy_internal_cb_pool;
Ofir Bitton5de406c2020-09-10 10:56:26 +03008048
8049 mutex_lock(&ctx->mmu_lock);
Ofir Bitton5c054872020-10-22 15:13:10 +03008050 rc = hl_mmu_map_contiguous(ctx, hdev->internal_cb_va_base,
8051 hdev->internal_cb_pool_dma_addr,
8052 HOST_SPACE_INTERNAL_CB_SZ);
Ofir Bitton5de406c2020-09-10 10:56:26 +03008053
8054 hdev->asic_funcs->mmu_invalidate_cache(hdev, false, VM_TYPE_USERPTR);
Ofir Bitton5de406c2020-09-10 10:56:26 +03008055 mutex_unlock(&ctx->mmu_lock);
8056
Ofir Bitton5c054872020-10-22 15:13:10 +03008057 if (rc)
8058 goto unreserve_internal_cb_pool;
8059
Ofir Bitton5de406c2020-09-10 10:56:26 +03008060 return 0;
8061
Ofir Bitton5c054872020-10-22 15:13:10 +03008062unreserve_internal_cb_pool:
Ofir Bittonbe91b912020-10-22 15:04:10 +03008063 hl_unreserve_va_block(hdev, ctx, hdev->internal_cb_va_base,
8064 HOST_SPACE_INTERNAL_CB_SZ);
Ofir Bitton5de406c2020-09-10 10:56:26 +03008065destroy_internal_cb_pool:
8066 gen_pool_destroy(hdev->internal_cb_pool);
Ofir Bitton5de406c2020-09-10 10:56:26 +03008067free_internal_cb_pool:
8068 hdev->asic_funcs->asic_dma_free_coherent(hdev,
8069 HOST_SPACE_INTERNAL_CB_SZ,
8070 hdev->internal_cb_pool_virt_addr,
8071 hdev->internal_cb_pool_dma_addr);
8072
8073 return rc;
8074}
8075
8076static void gaudi_internal_cb_pool_fini(struct hl_device *hdev,
8077 struct hl_ctx *ctx)
8078{
8079 struct gaudi_device *gaudi = hdev->asic_specific;
Ofir Bitton5de406c2020-09-10 10:56:26 +03008080
8081 if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
8082 return;
8083
8084 mutex_lock(&ctx->mmu_lock);
Ofir Bitton5c054872020-10-22 15:13:10 +03008085 hl_mmu_unmap_contiguous(ctx, hdev->internal_cb_va_base,
8086 HOST_SPACE_INTERNAL_CB_SZ);
Ofir Bittonbe91b912020-10-22 15:04:10 +03008087 hl_unreserve_va_block(hdev, ctx, hdev->internal_cb_va_base,
8088 HOST_SPACE_INTERNAL_CB_SZ);
Ofir Bitton5de406c2020-09-10 10:56:26 +03008089 hdev->asic_funcs->mmu_invalidate_cache(hdev, true, VM_TYPE_USERPTR);
Ofir Bitton5de406c2020-09-10 10:56:26 +03008090 mutex_unlock(&ctx->mmu_lock);
8091
8092 gen_pool_destroy(hdev->internal_cb_pool);
8093
8094 hdev->asic_funcs->asic_dma_free_coherent(hdev,
8095 HOST_SPACE_INTERNAL_CB_SZ,
8096 hdev->internal_cb_pool_virt_addr,
8097 hdev->internal_cb_pool_dma_addr);
8098}
8099
kernel test robotbb34bf72020-07-29 08:03:13 +08008100static int gaudi_ctx_init(struct hl_ctx *ctx)
Ofir Bittona04b7cd2020-07-13 13:36:55 +03008101{
Ofir Bitton8e39e752020-11-12 11:03:32 +02008102 if (ctx->asid == HL_KERNEL_ASID_ID)
8103 return 0;
8104
Ofir Bitton20b75252020-09-30 15:51:10 +03008105 gaudi_mmu_prepare(ctx->hdev, ctx->asid);
Ofir Bitton5de406c2020-09-10 10:56:26 +03008106 return gaudi_internal_cb_pool_init(ctx->hdev, ctx);
8107}
Ofir Bitton20b75252020-09-30 15:51:10 +03008108
kernel test robot293744d2020-11-19 12:25:43 +08008109static void gaudi_ctx_fini(struct hl_ctx *ctx)
Ofir Bitton5de406c2020-09-10 10:56:26 +03008110{
Ofir Bitton8e39e752020-11-12 11:03:32 +02008111 if (ctx->asid == HL_KERNEL_ASID_ID)
Ofir Bitton5de406c2020-09-10 10:56:26 +03008112 return;
8113
8114 gaudi_internal_cb_pool_fini(ctx->hdev, ctx);
Ofir Bittona04b7cd2020-07-13 13:36:55 +03008115}
8116
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03008117static u32 gaudi_get_queue_id_for_cq(struct hl_device *hdev, u32 cq_idx)
8118{
8119 return gaudi_cq_assignment[cq_idx];
8120}
8121
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03008122static u32 gaudi_get_signal_cb_size(struct hl_device *hdev)
8123{
8124 return sizeof(struct packet_msg_short) +
8125 sizeof(struct packet_msg_prot) * 2;
8126}
8127
8128static u32 gaudi_get_wait_cb_size(struct hl_device *hdev)
8129{
8130 return sizeof(struct packet_msg_short) * 4 +
8131 sizeof(struct packet_fence) +
8132 sizeof(struct packet_msg_prot) * 2;
8133}
8134
Ofir Bitton2992c1d2020-09-10 09:40:35 +03008135static u32 gaudi_gen_signal_cb(struct hl_device *hdev, void *data, u16 sob_id,
Alon Mizrahi72ab9ca52020-12-02 19:55:30 +02008136 u32 size, bool eb)
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03008137{
8138 struct hl_cb *cb = (struct hl_cb *) data;
8139 struct packet_msg_short *pkt;
Ofir Bitton2992c1d2020-09-10 09:40:35 +03008140 u32 value, ctl, pkt_size = sizeof(*pkt);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03008141
Ofir Bitton2992c1d2020-09-10 09:40:35 +03008142 pkt = cb->kernel_address + size;
8143 memset(pkt, 0, pkt_size);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03008144
Ofir Bitton6c07bab2020-06-01 10:38:46 +03008145 /* Inc by 1, Mode ADD */
8146 value = FIELD_PREP(GAUDI_PKT_SHORT_VAL_SOB_SYNC_VAL_MASK, 1);
8147 value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_SOB_MOD_MASK, 1);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03008148
Ofir Bitton6c07bab2020-06-01 10:38:46 +03008149 ctl = FIELD_PREP(GAUDI_PKT_SHORT_CTL_ADDR_MASK, sob_id * 4);
8150 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_OP_MASK, 0); /* write the value */
8151 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_BASE_MASK, 3); /* W_S SOB base */
Ofir Bittonf8b0f2e2020-12-06 10:22:32 +02008152 ctl |= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_SHORT);
8153 ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, eb);
8154 ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
8155 ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03008156
8157 pkt->value = cpu_to_le32(value);
8158 pkt->ctl = cpu_to_le32(ctl);
Ofir Bitton2992c1d2020-09-10 09:40:35 +03008159
8160 return size + pkt_size;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03008161}
8162
8163static u32 gaudi_add_mon_msg_short(struct packet_msg_short *pkt, u32 value,
8164 u16 addr)
8165{
8166 u32 ctl, pkt_size = sizeof(*pkt);
8167
8168 memset(pkt, 0, pkt_size);
8169
Ofir Bitton6c07bab2020-06-01 10:38:46 +03008170 ctl = FIELD_PREP(GAUDI_PKT_SHORT_CTL_ADDR_MASK, addr);
8171 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_BASE_MASK, 2); /* W_S MON base */
Ofir Bittonf8b0f2e2020-12-06 10:22:32 +02008172 ctl |= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_SHORT);
8173 ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 0);
8174 ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
8175 ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 0); /* last pkt MB */
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03008176
8177 pkt->value = cpu_to_le32(value);
8178 pkt->ctl = cpu_to_le32(ctl);
8179
8180 return pkt_size;
8181}
8182
Ofir Bitton2992c1d2020-09-10 09:40:35 +03008183static u32 gaudi_add_arm_monitor_pkt(struct hl_device *hdev,
8184 struct packet_msg_short *pkt, u16 sob_base, u8 sob_mask,
8185 u16 sob_val, u16 mon_id)
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03008186{
Ofir Bitton2992c1d2020-09-10 09:40:35 +03008187 u64 monitor_base;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03008188 u32 ctl, value, pkt_size = sizeof(*pkt);
Ofir Bitton2992c1d2020-09-10 09:40:35 +03008189 u16 msg_addr_offset;
8190 u8 mask;
8191
8192 if (hl_gen_sob_mask(sob_base, sob_mask, &mask)) {
8193 dev_err(hdev->dev,
8194 "sob_base %u (mask %#x) is not valid\n",
8195 sob_base, sob_mask);
8196 return 0;
8197 }
8198
8199 /*
8200 * monitor_base should be the content of the base0 address registers,
8201 * so it will be added to the msg short offsets
8202 */
8203 monitor_base = mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0;
8204
8205 msg_addr_offset =
8206 (mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0 + mon_id * 4) -
8207 monitor_base;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03008208
8209 memset(pkt, 0, pkt_size);
8210
Ofir Bitton2992c1d2020-09-10 09:40:35 +03008211 /* Monitor config packet: bind the monitor to a sync object */
8212 value = FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_SYNC_GID_MASK, sob_base / 8);
Ofir Bitton6c07bab2020-06-01 10:38:46 +03008213 value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_SYNC_VAL_MASK, sob_val);
8214 value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_MODE_MASK,
8215 0); /* GREATER OR EQUAL*/
8216 value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_MASK_MASK, mask);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03008217
Ofir Bitton2992c1d2020-09-10 09:40:35 +03008218 ctl = FIELD_PREP(GAUDI_PKT_SHORT_CTL_ADDR_MASK, msg_addr_offset);
Ofir Bitton6c07bab2020-06-01 10:38:46 +03008219 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_OP_MASK, 0); /* write the value */
8220 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_BASE_MASK, 2); /* W_S MON base */
Ofir Bittonf8b0f2e2020-12-06 10:22:32 +02008221 ctl |= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_SHORT);
8222 ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 0);
8223 ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
8224 ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03008225
8226 pkt->value = cpu_to_le32(value);
8227 pkt->ctl = cpu_to_le32(ctl);
8228
8229 return pkt_size;
8230}
8231
8232static u32 gaudi_add_fence_pkt(struct packet_fence *pkt)
8233{
8234 u32 ctl, cfg, pkt_size = sizeof(*pkt);
8235
8236 memset(pkt, 0, pkt_size);
8237
Ofir Bitton6c07bab2020-06-01 10:38:46 +03008238 cfg = FIELD_PREP(GAUDI_PKT_FENCE_CFG_DEC_VAL_MASK, 1);
8239 cfg |= FIELD_PREP(GAUDI_PKT_FENCE_CFG_TARGET_VAL_MASK, 1);
8240 cfg |= FIELD_PREP(GAUDI_PKT_FENCE_CFG_ID_MASK, 2);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03008241
Ofir Bittonf8b0f2e2020-12-06 10:22:32 +02008242 ctl = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_FENCE);
8243 ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 0);
8244 ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
8245 ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03008246
8247 pkt->cfg = cpu_to_le32(cfg);
8248 pkt->ctl = cpu_to_le32(ctl);
8249
8250 return pkt_size;
8251}
8252
Ofir Bitton2992c1d2020-09-10 09:40:35 +03008253static int gaudi_get_fence_addr(struct hl_device *hdev, u32 queue_id, u64 *addr)
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03008254{
Ofir Bitton5de406c2020-09-10 10:56:26 +03008255 u32 offset, nic_index;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03008256
Ofir Bitton2992c1d2020-09-10 09:40:35 +03008257 switch (queue_id) {
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03008258 case GAUDI_QUEUE_ID_DMA_0_0:
Ofir Bitton2992c1d2020-09-10 09:40:35 +03008259 offset = mmDMA0_QM_CP_FENCE2_RDATA_0;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03008260 break;
8261 case GAUDI_QUEUE_ID_DMA_0_1:
Ofir Bitton2992c1d2020-09-10 09:40:35 +03008262 offset = mmDMA0_QM_CP_FENCE2_RDATA_1;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03008263 break;
8264 case GAUDI_QUEUE_ID_DMA_0_2:
Ofir Bitton2992c1d2020-09-10 09:40:35 +03008265 offset = mmDMA0_QM_CP_FENCE2_RDATA_2;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03008266 break;
8267 case GAUDI_QUEUE_ID_DMA_0_3:
Ofir Bitton2992c1d2020-09-10 09:40:35 +03008268 offset = mmDMA0_QM_CP_FENCE2_RDATA_3;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03008269 break;
8270 case GAUDI_QUEUE_ID_DMA_1_0:
Ofir Bitton2992c1d2020-09-10 09:40:35 +03008271 offset = mmDMA1_QM_CP_FENCE2_RDATA_0;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03008272 break;
8273 case GAUDI_QUEUE_ID_DMA_1_1:
Ofir Bitton2992c1d2020-09-10 09:40:35 +03008274 offset = mmDMA1_QM_CP_FENCE2_RDATA_1;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03008275 break;
8276 case GAUDI_QUEUE_ID_DMA_1_2:
Ofir Bitton2992c1d2020-09-10 09:40:35 +03008277 offset = mmDMA1_QM_CP_FENCE2_RDATA_2;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03008278 break;
8279 case GAUDI_QUEUE_ID_DMA_1_3:
Ofir Bitton2992c1d2020-09-10 09:40:35 +03008280 offset = mmDMA1_QM_CP_FENCE2_RDATA_3;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03008281 break;
8282 case GAUDI_QUEUE_ID_DMA_5_0:
Ofir Bitton2992c1d2020-09-10 09:40:35 +03008283 offset = mmDMA5_QM_CP_FENCE2_RDATA_0;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03008284 break;
8285 case GAUDI_QUEUE_ID_DMA_5_1:
Ofir Bitton2992c1d2020-09-10 09:40:35 +03008286 offset = mmDMA5_QM_CP_FENCE2_RDATA_1;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03008287 break;
8288 case GAUDI_QUEUE_ID_DMA_5_2:
Ofir Bitton2992c1d2020-09-10 09:40:35 +03008289 offset = mmDMA5_QM_CP_FENCE2_RDATA_2;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03008290 break;
8291 case GAUDI_QUEUE_ID_DMA_5_3:
Ofir Bitton2992c1d2020-09-10 09:40:35 +03008292 offset = mmDMA5_QM_CP_FENCE2_RDATA_3;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03008293 break;
Ofir Bitton5de406c2020-09-10 10:56:26 +03008294 case GAUDI_QUEUE_ID_TPC_7_0:
8295 offset = mmTPC7_QM_CP_FENCE2_RDATA_0;
8296 break;
8297 case GAUDI_QUEUE_ID_TPC_7_1:
8298 offset = mmTPC7_QM_CP_FENCE2_RDATA_1;
8299 break;
8300 case GAUDI_QUEUE_ID_TPC_7_2:
8301 offset = mmTPC7_QM_CP_FENCE2_RDATA_2;
8302 break;
8303 case GAUDI_QUEUE_ID_TPC_7_3:
8304 offset = mmTPC7_QM_CP_FENCE2_RDATA_3;
8305 break;
8306 case GAUDI_QUEUE_ID_NIC_0_0:
8307 case GAUDI_QUEUE_ID_NIC_1_0:
8308 case GAUDI_QUEUE_ID_NIC_2_0:
8309 case GAUDI_QUEUE_ID_NIC_3_0:
8310 case GAUDI_QUEUE_ID_NIC_4_0:
8311 case GAUDI_QUEUE_ID_NIC_5_0:
8312 case GAUDI_QUEUE_ID_NIC_6_0:
8313 case GAUDI_QUEUE_ID_NIC_7_0:
8314 case GAUDI_QUEUE_ID_NIC_8_0:
8315 case GAUDI_QUEUE_ID_NIC_9_0:
8316 nic_index = (queue_id - GAUDI_QUEUE_ID_NIC_0_0) >> 2;
8317 offset = mmNIC0_QM0_CP_FENCE2_RDATA_0 +
8318 (nic_index >> 1) * NIC_MACRO_QMAN_OFFSET +
8319 (nic_index & 0x1) * NIC_ENGINE_QMAN_OFFSET;
8320 break;
8321 case GAUDI_QUEUE_ID_NIC_0_1:
8322 case GAUDI_QUEUE_ID_NIC_1_1:
8323 case GAUDI_QUEUE_ID_NIC_2_1:
8324 case GAUDI_QUEUE_ID_NIC_3_1:
8325 case GAUDI_QUEUE_ID_NIC_4_1:
8326 case GAUDI_QUEUE_ID_NIC_5_1:
8327 case GAUDI_QUEUE_ID_NIC_6_1:
8328 case GAUDI_QUEUE_ID_NIC_7_1:
8329 case GAUDI_QUEUE_ID_NIC_8_1:
8330 case GAUDI_QUEUE_ID_NIC_9_1:
8331 nic_index = (queue_id - GAUDI_QUEUE_ID_NIC_0_1) >> 2;
8332 offset = mmNIC0_QM0_CP_FENCE2_RDATA_1 +
8333 (nic_index >> 1) * NIC_MACRO_QMAN_OFFSET +
8334 (nic_index & 0x1) * NIC_ENGINE_QMAN_OFFSET;
8335 break;
8336 case GAUDI_QUEUE_ID_NIC_0_2:
8337 case GAUDI_QUEUE_ID_NIC_1_2:
8338 case GAUDI_QUEUE_ID_NIC_2_2:
8339 case GAUDI_QUEUE_ID_NIC_3_2:
8340 case GAUDI_QUEUE_ID_NIC_4_2:
8341 case GAUDI_QUEUE_ID_NIC_5_2:
8342 case GAUDI_QUEUE_ID_NIC_6_2:
8343 case GAUDI_QUEUE_ID_NIC_7_2:
8344 case GAUDI_QUEUE_ID_NIC_8_2:
8345 case GAUDI_QUEUE_ID_NIC_9_2:
8346 nic_index = (queue_id - GAUDI_QUEUE_ID_NIC_0_2) >> 2;
8347 offset = mmNIC0_QM0_CP_FENCE2_RDATA_2 +
8348 (nic_index >> 1) * NIC_MACRO_QMAN_OFFSET +
8349 (nic_index & 0x1) * NIC_ENGINE_QMAN_OFFSET;
8350 break;
8351 case GAUDI_QUEUE_ID_NIC_0_3:
8352 case GAUDI_QUEUE_ID_NIC_1_3:
8353 case GAUDI_QUEUE_ID_NIC_2_3:
8354 case GAUDI_QUEUE_ID_NIC_3_3:
8355 case GAUDI_QUEUE_ID_NIC_4_3:
8356 case GAUDI_QUEUE_ID_NIC_5_3:
8357 case GAUDI_QUEUE_ID_NIC_6_3:
8358 case GAUDI_QUEUE_ID_NIC_7_3:
8359 case GAUDI_QUEUE_ID_NIC_8_3:
8360 case GAUDI_QUEUE_ID_NIC_9_3:
8361 nic_index = (queue_id - GAUDI_QUEUE_ID_NIC_0_3) >> 2;
8362 offset = mmNIC0_QM0_CP_FENCE2_RDATA_3 +
8363 (nic_index >> 1) * NIC_MACRO_QMAN_OFFSET +
8364 (nic_index & 0x1) * NIC_ENGINE_QMAN_OFFSET;
8365 break;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03008366 default:
Ofir Bitton2992c1d2020-09-10 09:40:35 +03008367 return -EINVAL;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03008368 }
8369
Ofir Bitton2992c1d2020-09-10 09:40:35 +03008370 *addr = CFG_BASE + offset;
8371
8372 return 0;
8373}
8374
8375static u32 gaudi_add_mon_pkts(void *buf, u16 mon_id, u64 fence_addr)
8376{
8377 u64 monitor_base;
8378 u32 size = 0;
8379 u16 msg_addr_offset;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03008380
8381 /*
8382 * monitor_base should be the content of the base0 address registers,
8383 * so it will be added to the msg short offsets
8384 */
8385 monitor_base = mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0;
8386
8387 /* First monitor config packet: low address of the sync */
Ofir Bitton2992c1d2020-09-10 09:40:35 +03008388 msg_addr_offset =
8389 (mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0 + mon_id * 4) -
8390 monitor_base;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03008391
8392 size += gaudi_add_mon_msg_short(buf + size, (u32) fence_addr,
8393 msg_addr_offset);
8394
8395 /* Second monitor config packet: high address of the sync */
Ofir Bitton2992c1d2020-09-10 09:40:35 +03008396 msg_addr_offset =
8397 (mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRH_0 + mon_id * 4) -
8398 monitor_base;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03008399
8400 size += gaudi_add_mon_msg_short(buf + size, (u32) (fence_addr >> 32),
8401 msg_addr_offset);
8402
8403 /*
8404 * Third monitor config packet: the payload, i.e. what to write when the
8405 * sync triggers
8406 */
Ofir Bitton2992c1d2020-09-10 09:40:35 +03008407 msg_addr_offset =
8408 (mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_DATA_0 + mon_id * 4) -
8409 monitor_base;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03008410
8411 size += gaudi_add_mon_msg_short(buf + size, 1, msg_addr_offset);
8412
Ofir Bitton2992c1d2020-09-10 09:40:35 +03008413 return size;
8414}
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03008415
Oded Gabbay3c681572020-11-02 21:10:39 +02008416static u32 gaudi_gen_wait_cb(struct hl_device *hdev,
8417 struct hl_gen_wait_properties *prop)
Ofir Bitton2992c1d2020-09-10 09:40:35 +03008418{
8419 struct hl_cb *cb = (struct hl_cb *) prop->data;
8420 void *buf = cb->kernel_address;
8421 u64 fence_addr = 0;
8422 u32 size = prop->size;
8423
8424 if (gaudi_get_fence_addr(hdev, prop->q_idx, &fence_addr)) {
8425 dev_crit(hdev->dev, "wrong queue id %d for wait packet\n",
8426 prop->q_idx);
8427 return 0;
8428 }
8429
8430 size += gaudi_add_mon_pkts(buf + size, prop->mon_id, fence_addr);
8431 size += gaudi_add_arm_monitor_pkt(hdev, buf + size, prop->sob_base,
8432 prop->sob_mask, prop->sob_val, prop->mon_id);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03008433 size += gaudi_add_fence_pkt(buf + size);
Ofir Bitton2992c1d2020-09-10 09:40:35 +03008434
8435 return size;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03008436}
8437
8438static void gaudi_reset_sob(struct hl_device *hdev, void *data)
8439{
8440 struct hl_hw_sob *hw_sob = (struct hl_hw_sob *) data;
Ofir Bitton423815b2021-01-05 09:04:07 +02008441 int rc;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03008442
8443 dev_dbg(hdev->dev, "reset SOB, q_idx: %d, sob_id: %d\n", hw_sob->q_idx,
8444 hw_sob->sob_id);
8445
Ofir Bitton423815b2021-01-05 09:04:07 +02008446 rc = gaudi_schedule_register_memset(hdev, hw_sob->q_idx,
8447 CFG_BASE + mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 +
8448 hw_sob->sob_id * 4, 1, 0);
8449 if (rc)
8450 dev_err(hdev->dev, "failed resetting sob %u", hw_sob->sob_id);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03008451
8452 kref_init(&hw_sob->kref);
8453}
8454
8455static void gaudi_set_dma_mask_from_fw(struct hl_device *hdev)
8456{
8457 if (RREG32(mmPSOC_GLOBAL_CONF_NON_RST_FLOPS_0) ==
8458 HL_POWER9_HOST_MAGIC) {
8459 hdev->power9_64bit_dma_enable = 1;
8460 hdev->dma_mask = 64;
8461 } else {
8462 hdev->power9_64bit_dma_enable = 0;
8463 hdev->dma_mask = 48;
8464 }
8465}
8466
8467static u64 gaudi_get_device_time(struct hl_device *hdev)
8468{
8469 u64 device_time = ((u64) RREG32(mmPSOC_TIMESTAMP_CNTCVU)) << 32;
8470
8471 return device_time | RREG32(mmPSOC_TIMESTAMP_CNTCVL);
8472}
8473
Ofir Bittond00697f2021-01-05 12:55:06 +02008474static int gaudi_get_hw_block_id(struct hl_device *hdev, u64 block_addr,
8475 u32 *block_id)
8476{
8477 return -EPERM;
8478}
8479
8480static int gaudi_block_mmap(struct hl_device *hdev,
8481 struct vm_area_struct *vma,
8482 u32 block_id, u32 block_size)
8483{
8484 return -EPERM;
8485}
8486
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03008487static const struct hl_asic_funcs gaudi_funcs = {
8488 .early_init = gaudi_early_init,
8489 .early_fini = gaudi_early_fini,
8490 .late_init = gaudi_late_init,
8491 .late_fini = gaudi_late_fini,
8492 .sw_init = gaudi_sw_init,
8493 .sw_fini = gaudi_sw_fini,
8494 .hw_init = gaudi_hw_init,
8495 .hw_fini = gaudi_hw_fini,
8496 .halt_engines = gaudi_halt_engines,
8497 .suspend = gaudi_suspend,
8498 .resume = gaudi_resume,
8499 .cb_mmap = gaudi_cb_mmap,
8500 .ring_doorbell = gaudi_ring_doorbell,
8501 .pqe_write = gaudi_pqe_write,
8502 .asic_dma_alloc_coherent = gaudi_dma_alloc_coherent,
8503 .asic_dma_free_coherent = gaudi_dma_free_coherent,
farah kassabri03df1362020-05-06 11:17:38 +03008504 .scrub_device_mem = gaudi_scrub_device_mem,
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03008505 .get_int_queue_base = gaudi_get_int_queue_base,
8506 .test_queues = gaudi_test_queues,
8507 .asic_dma_pool_zalloc = gaudi_dma_pool_zalloc,
8508 .asic_dma_pool_free = gaudi_dma_pool_free,
8509 .cpu_accessible_dma_pool_alloc = gaudi_cpu_accessible_dma_pool_alloc,
8510 .cpu_accessible_dma_pool_free = gaudi_cpu_accessible_dma_pool_free,
8511 .hl_dma_unmap_sg = gaudi_dma_unmap_sg,
8512 .cs_parser = gaudi_cs_parser,
8513 .asic_dma_map_sg = gaudi_dma_map_sg,
8514 .get_dma_desc_list_size = gaudi_get_dma_desc_list_size,
8515 .add_end_of_cb_packets = gaudi_add_end_of_cb_packets,
8516 .update_eq_ci = gaudi_update_eq_ci,
8517 .context_switch = gaudi_context_switch,
8518 .restore_phase_topology = gaudi_restore_phase_topology,
8519 .debugfs_read32 = gaudi_debugfs_read32,
8520 .debugfs_write32 = gaudi_debugfs_write32,
8521 .debugfs_read64 = gaudi_debugfs_read64,
8522 .debugfs_write64 = gaudi_debugfs_write64,
Oded Gabbaybcaf4152020-05-11 10:41:37 +03008523 .add_device_attr = gaudi_add_device_attr,
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03008524 .handle_eqe = gaudi_handle_eqe,
Oded Gabbaybcaf4152020-05-11 10:41:37 +03008525 .set_pll_profile = gaudi_set_pll_profile,
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03008526 .get_events_stat = gaudi_get_events_stat,
8527 .read_pte = gaudi_read_pte,
8528 .write_pte = gaudi_write_pte,
8529 .mmu_invalidate_cache = gaudi_mmu_invalidate_cache,
8530 .mmu_invalidate_cache_range = gaudi_mmu_invalidate_cache_range,
8531 .send_heartbeat = gaudi_send_heartbeat,
Oded Gabbaye38bfd32020-07-03 20:46:12 +03008532 .set_clock_gating = gaudi_set_clock_gating,
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03008533 .disable_clock_gating = gaudi_disable_clock_gating,
Omer Shpigelman79fc7a92020-05-11 10:46:29 +03008534 .debug_coresight = gaudi_debug_coresight,
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03008535 .is_device_idle = gaudi_is_device_idle,
8536 .soft_reset_late_init = gaudi_soft_reset_late_init,
8537 .hw_queues_lock = gaudi_hw_queues_lock,
8538 .hw_queues_unlock = gaudi_hw_queues_unlock,
8539 .get_pci_id = gaudi_get_pci_id,
8540 .get_eeprom_data = gaudi_get_eeprom_data,
8541 .send_cpu_message = gaudi_send_cpu_message,
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03008542 .pci_bars_map = gaudi_pci_bars_map,
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03008543 .init_iatu = gaudi_init_iatu,
8544 .rreg = hl_rreg,
8545 .wreg = hl_wreg,
Omer Shpigelman79fc7a92020-05-11 10:46:29 +03008546 .halt_coresight = gaudi_halt_coresight,
Ofir Bittona04b7cd2020-07-13 13:36:55 +03008547 .ctx_init = gaudi_ctx_init,
Ofir Bitton5de406c2020-09-10 10:56:26 +03008548 .ctx_fini = gaudi_ctx_fini,
Oded Gabbaybcaf4152020-05-11 10:41:37 +03008549 .get_clk_rate = gaudi_get_clk_rate,
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03008550 .get_queue_id_for_cq = gaudi_get_queue_id_for_cq,
8551 .read_device_fw_version = gaudi_read_device_fw_version,
8552 .load_firmware_to_device = gaudi_load_firmware_to_device,
8553 .load_boot_fit_to_device = gaudi_load_boot_fit_to_device,
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03008554 .get_signal_cb_size = gaudi_get_signal_cb_size,
8555 .get_wait_cb_size = gaudi_get_wait_cb_size,
8556 .gen_signal_cb = gaudi_gen_signal_cb,
8557 .gen_wait_cb = gaudi_gen_wait_cb,
8558 .reset_sob = gaudi_reset_sob,
Ofir Bitton5fe1c172020-09-10 10:10:55 +03008559 .reset_sob_group = gaudi_reset_sob_group,
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03008560 .set_dma_mask_from_fw = gaudi_set_dma_mask_from_fw,
Ofir Bitton5fe1c172020-09-10 10:10:55 +03008561 .get_device_time = gaudi_get_device_time,
8562 .collective_wait_init_cs = gaudi_collective_wait_init_cs,
Moti Haimovskib19dc672020-11-18 20:15:29 +02008563 .collective_wait_create_jobs = gaudi_collective_wait_create_jobs,
farah kassabri89473a12021-01-12 17:24:00 +02008564 .scramble_addr = hl_mmu_scramble_addr,
8565 .descramble_addr = hl_mmu_descramble_addr,
Ofir Bittond00697f2021-01-05 12:55:06 +02008566 .ack_protection_bits_errors = gaudi_ack_protection_bits_errors,
8567 .get_hw_block_id = gaudi_get_hw_block_id,
8568 .hw_block_mmap = gaudi_block_mmap
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03008569};
8570
8571/**
8572 * gaudi_set_asic_funcs - set GAUDI function pointers
8573 *
Lee Jonesf7d227c2020-07-01 09:58:42 +01008574 * @hdev: pointer to hl_device structure
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03008575 *
8576 */
8577void gaudi_set_asic_funcs(struct hl_device *hdev)
8578{
8579 hdev->asic_funcs = &gaudi_funcs;
8580}