blob: 2b01c081404adc6ca9ab4300bff864d1833572f7 [file] [log] [blame]
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03001// SPDX-License-Identifier: GPL-2.0
2
3/*
4 * Copyright 2016-2020 HabanaLabs, Ltd.
5 * All Rights Reserved.
6 */
7
8#include "gaudiP.h"
Greg Kroah-Hartman7b16a152020-07-28 19:18:51 +02009#include "../include/hw_ip/mmu/mmu_general.h"
10#include "../include/hw_ip/mmu/mmu_v1_1.h"
11#include "../include/gaudi/gaudi_masks.h"
12#include "../include/gaudi/gaudi_fw_if.h"
13#include "../include/gaudi/gaudi_reg_map.h"
14#include "../include/gaudi/gaudi_async_ids_map_extended.h"
Oded Gabbayac0ae6a2020-05-11 10:29:27 +030015
16#include <linux/module.h>
17#include <linux/pci.h>
18#include <linux/firmware.h>
19#include <linux/hwmon.h>
Oded Gabbayac0ae6a2020-05-11 10:29:27 +030020#include <linux/iommu.h>
21#include <linux/seq_file.h>
22
23/*
24 * Gaudi security scheme:
25 *
26 * 1. Host is protected by:
27 * - Range registers
28 * - MMU
29 *
30 * 2. DDR is protected by:
31 * - Range registers (protect the first 512MB)
32 *
33 * 3. Configuration is protected by:
34 * - Range registers
35 * - Protection bits
36 *
37 * MMU is always enabled.
38 *
Ofir Bitton0940cab2020-08-31 08:52:56 +030039 * QMAN DMA channels 0,1 (PCI DMAN):
Oded Gabbayac0ae6a2020-05-11 10:29:27 +030040 * - DMA is not secured.
41 * - PQ and CQ are secured.
42 * - CP is secured: The driver needs to parse CB but WREG should be allowed
43 * because of TDMA (tensor DMA). Hence, WREG is always not
44 * secured.
45 *
46 * When the driver needs to use DMA it will check that Gaudi is idle, set DMA
47 * channel 0 to be secured, execute the DMA and change it back to not secured.
48 * Currently, the driver doesn't use the DMA while there are compute jobs
49 * running.
50 *
51 * The current use cases for the driver to use the DMA are:
52 * - Clear SRAM on context switch (happens on context switch when device is
53 * idle)
54 * - MMU page tables area clear (happens on init)
55 *
Ofir Bitton0940cab2020-08-31 08:52:56 +030056 * QMAN DMA 2-7, TPC, MME, NIC:
Oded Gabbayac0ae6a2020-05-11 10:29:27 +030057 * PQ is secured and is located on the Host (HBM CON TPC3 bug)
58 * CQ, CP and the engine are not secured
59 *
60 */
61
62#define GAUDI_BOOT_FIT_FILE "habanalabs/gaudi/gaudi-boot-fit.itb"
63#define GAUDI_LINUX_FW_FILE "habanalabs/gaudi/gaudi-fit.itb"
64#define GAUDI_TPC_FW_FILE "habanalabs/gaudi/gaudi_tpc.bin"
65
66#define GAUDI_DMA_POOL_BLK_SIZE 0x100 /* 256 bytes */
67
Ofir Bittonb90c8942020-11-08 12:59:04 +020068#define GAUDI_RESET_TIMEOUT_MSEC 2000 /* 2000ms */
Oded Gabbayac0ae6a2020-05-11 10:29:27 +030069#define GAUDI_RESET_WAIT_MSEC 1 /* 1ms */
70#define GAUDI_CPU_RESET_WAIT_MSEC 200 /* 200ms */
71#define GAUDI_TEST_QUEUE_WAIT_USEC 100000 /* 100ms */
72
73#define GAUDI_PLDM_RESET_WAIT_MSEC 1000 /* 1s */
74#define GAUDI_PLDM_HRESET_TIMEOUT_MSEC 20000 /* 20s */
Oded Gabbayac0ae6a2020-05-11 10:29:27 +030075#define GAUDI_PLDM_TEST_QUEUE_WAIT_USEC 1000000 /* 1s */
76#define GAUDI_PLDM_MMU_TIMEOUT_USEC (MMU_CONFIG_TIMEOUT_USEC * 100)
77#define GAUDI_PLDM_QMAN0_TIMEOUT_USEC (HL_DEVICE_TIMEOUT_USEC * 30)
78#define GAUDI_PLDM_TPC_KERNEL_WAIT_USEC (HL_DEVICE_TIMEOUT_USEC * 30)
79#define GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC 1000000 /* 1s */
Oded Gabbay788cacf2020-07-07 17:30:13 +030080#define GAUDI_MSG_TO_CPU_TIMEOUT_USEC 4000000 /* 4s */
Oded Gabbayac0ae6a2020-05-11 10:29:27 +030081
82#define GAUDI_QMAN0_FENCE_VAL 0x72E91AB9
83
84#define GAUDI_MAX_STRING_LEN 20
85
86#define GAUDI_CB_POOL_CB_CNT 512
87#define GAUDI_CB_POOL_CB_SIZE 0x20000 /* 128KB */
88
89#define GAUDI_ALLOC_CPU_MEM_RETRY_CNT 3
90
91#define GAUDI_NUM_OF_TPC_INTR_CAUSE 20
92
93#define GAUDI_NUM_OF_QM_ERR_CAUSE 16
94
95#define GAUDI_NUM_OF_QM_ARB_ERR_CAUSE 3
96
Oded Gabbay647e8352020-06-07 11:26:48 +030097#define GAUDI_ARB_WDT_TIMEOUT 0x1000000
Oded Gabbayac0ae6a2020-05-11 10:29:27 +030098
Oded Gabbaye38bfd32020-07-03 20:46:12 +030099#define GAUDI_CLK_GATE_DEBUGFS_MASK (\
100 BIT(GAUDI_ENGINE_ID_MME_0) |\
101 BIT(GAUDI_ENGINE_ID_MME_2) |\
102 GENMASK_ULL(GAUDI_ENGINE_ID_TPC_7, GAUDI_ENGINE_ID_TPC_0))
103
farah kassabri03df1362020-05-06 11:17:38 +0300104#define HBM_SCRUBBING_TIMEOUT_US 1000000 /* 1s */
105
Alon Mizrahi41478642020-11-17 14:25:14 +0200106#define GAUDI_PLL_MAX 10
107
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300108static const char gaudi_irq_name[GAUDI_MSI_ENTRIES][GAUDI_MAX_STRING_LEN] = {
109 "gaudi cq 0_0", "gaudi cq 0_1", "gaudi cq 0_2", "gaudi cq 0_3",
110 "gaudi cq 1_0", "gaudi cq 1_1", "gaudi cq 1_2", "gaudi cq 1_3",
111 "gaudi cq 5_0", "gaudi cq 5_1", "gaudi cq 5_2", "gaudi cq 5_3",
112 "gaudi cpu eq"
113};
114
115static const u8 gaudi_dma_assignment[GAUDI_DMA_MAX] = {
Oded Gabbaye38bfd32020-07-03 20:46:12 +0300116 [GAUDI_PCI_DMA_1] = GAUDI_ENGINE_ID_DMA_0,
117 [GAUDI_PCI_DMA_2] = GAUDI_ENGINE_ID_DMA_1,
Oded Gabbaye38bfd32020-07-03 20:46:12 +0300118 [GAUDI_HBM_DMA_1] = GAUDI_ENGINE_ID_DMA_2,
119 [GAUDI_HBM_DMA_2] = GAUDI_ENGINE_ID_DMA_3,
120 [GAUDI_HBM_DMA_3] = GAUDI_ENGINE_ID_DMA_4,
Ofir Bitton0940cab2020-08-31 08:52:56 +0300121 [GAUDI_HBM_DMA_4] = GAUDI_ENGINE_ID_DMA_5,
122 [GAUDI_HBM_DMA_5] = GAUDI_ENGINE_ID_DMA_6,
123 [GAUDI_HBM_DMA_6] = GAUDI_ENGINE_ID_DMA_7
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300124};
125
126static const u8 gaudi_cq_assignment[NUMBER_OF_CMPLT_QUEUES] = {
127 [0] = GAUDI_QUEUE_ID_DMA_0_0,
128 [1] = GAUDI_QUEUE_ID_DMA_0_1,
129 [2] = GAUDI_QUEUE_ID_DMA_0_2,
130 [3] = GAUDI_QUEUE_ID_DMA_0_3,
131 [4] = GAUDI_QUEUE_ID_DMA_1_0,
132 [5] = GAUDI_QUEUE_ID_DMA_1_1,
133 [6] = GAUDI_QUEUE_ID_DMA_1_2,
134 [7] = GAUDI_QUEUE_ID_DMA_1_3,
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300135};
136
137static const u16 gaudi_packet_sizes[MAX_PACKET_ID] = {
138 [PACKET_WREG_32] = sizeof(struct packet_wreg32),
139 [PACKET_WREG_BULK] = sizeof(struct packet_wreg_bulk),
140 [PACKET_MSG_LONG] = sizeof(struct packet_msg_long),
141 [PACKET_MSG_SHORT] = sizeof(struct packet_msg_short),
142 [PACKET_CP_DMA] = sizeof(struct packet_cp_dma),
143 [PACKET_REPEAT] = sizeof(struct packet_repeat),
144 [PACKET_MSG_PROT] = sizeof(struct packet_msg_prot),
145 [PACKET_FENCE] = sizeof(struct packet_fence),
146 [PACKET_LIN_DMA] = sizeof(struct packet_lin_dma),
147 [PACKET_NOP] = sizeof(struct packet_nop),
148 [PACKET_STOP] = sizeof(struct packet_stop),
149 [PACKET_ARB_POINT] = sizeof(struct packet_arb_point),
150 [PACKET_WAIT] = sizeof(struct packet_wait),
151 [PACKET_LOAD_AND_EXE] = sizeof(struct packet_load_and_exe)
152};
153
Ofir Bittonbc75be22020-07-30 14:56:38 +0300154static inline bool validate_packet_id(enum packet_id id)
155{
156 switch (id) {
157 case PACKET_WREG_32:
158 case PACKET_WREG_BULK:
159 case PACKET_MSG_LONG:
160 case PACKET_MSG_SHORT:
161 case PACKET_CP_DMA:
162 case PACKET_REPEAT:
163 case PACKET_MSG_PROT:
164 case PACKET_FENCE:
165 case PACKET_LIN_DMA:
166 case PACKET_NOP:
167 case PACKET_STOP:
168 case PACKET_ARB_POINT:
169 case PACKET_WAIT:
170 case PACKET_LOAD_AND_EXE:
171 return true;
172 default:
173 return false;
174 }
175}
176
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300177static const char * const
178gaudi_tpc_interrupts_cause[GAUDI_NUM_OF_TPC_INTR_CAUSE] = {
179 "tpc_address_exceed_slm",
180 "tpc_div_by_0",
181 "tpc_spu_mac_overflow",
182 "tpc_spu_addsub_overflow",
183 "tpc_spu_abs_overflow",
184 "tpc_spu_fp_dst_nan_inf",
185 "tpc_spu_fp_dst_denorm",
186 "tpc_vpu_mac_overflow",
187 "tpc_vpu_addsub_overflow",
188 "tpc_vpu_abs_overflow",
189 "tpc_vpu_fp_dst_nan_inf",
190 "tpc_vpu_fp_dst_denorm",
191 "tpc_assertions",
192 "tpc_illegal_instruction",
193 "tpc_pc_wrap_around",
194 "tpc_qm_sw_err",
195 "tpc_hbw_rresp_err",
196 "tpc_hbw_bresp_err",
197 "tpc_lbw_rresp_err",
198 "tpc_lbw_bresp_err"
199};
200
201static const char * const
202gaudi_qman_error_cause[GAUDI_NUM_OF_QM_ERR_CAUSE] = {
203 "PQ AXI HBW error",
204 "CQ AXI HBW error",
205 "CP AXI HBW error",
206 "CP error due to undefined OPCODE",
207 "CP encountered STOP OPCODE",
208 "CP AXI LBW error",
209 "CP WRREG32 or WRBULK returned error",
210 "N/A",
211 "FENCE 0 inc over max value and clipped",
212 "FENCE 1 inc over max value and clipped",
213 "FENCE 2 inc over max value and clipped",
214 "FENCE 3 inc over max value and clipped",
215 "FENCE 0 dec under min value and clipped",
216 "FENCE 1 dec under min value and clipped",
217 "FENCE 2 dec under min value and clipped",
218 "FENCE 3 dec under min value and clipped"
219};
220
221static const char * const
222gaudi_qman_arb_error_cause[GAUDI_NUM_OF_QM_ARB_ERR_CAUSE] = {
223 "Choice push while full error",
224 "Choice Q watchdog error",
225 "MSG AXI LBW returned with error"
226};
227
Ofir Bittonf8bc7f02021-01-03 20:52:40 +0200228enum gaudi_sm_sei_cause {
229 GAUDI_SM_SEI_SO_OVERFLOW,
230 GAUDI_SM_SEI_LBW_4B_UNALIGNED,
231 GAUDI_SM_SEI_AXI_RESPONSE_ERR
232};
233
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300234static enum hl_queue_type gaudi_queue_type[GAUDI_QUEUE_ID_SIZE] = {
235 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_0 */
236 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_1 */
237 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_2 */
238 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_3 */
239 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_0 */
240 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_1 */
241 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_2 */
242 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_3 */
243 QUEUE_TYPE_CPU, /* GAUDI_QUEUE_ID_CPU_PQ */
244 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_0 */
245 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_1 */
246 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_2 */
247 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_3 */
248 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_0 */
249 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_1 */
250 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_2 */
251 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_3 */
252 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_0 */
253 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_1 */
254 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_2 */
255 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_3 */
Ofir Bitton0940cab2020-08-31 08:52:56 +0300256 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_5_0 */
257 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_5_1 */
258 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_5_2 */
259 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_5_3 */
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300260 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_0 */
261 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_1 */
262 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_2 */
263 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_3 */
264 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_0 */
265 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_1 */
266 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_2 */
267 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_3 */
268 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_0 */
269 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_1 */
270 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_2 */
271 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_3 */
272 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_0 */
273 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_1 */
274 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_2 */
275 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_3 */
276 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_0 */
277 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_1 */
278 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_2 */
279 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_3 */
280 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_0 */
281 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_1 */
282 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_2 */
283 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_3 */
284 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_0 */
285 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_1 */
286 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_2 */
287 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_3 */
288 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_0 */
289 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_1 */
290 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_2 */
291 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_3 */
292 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_0 */
293 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_1 */
294 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_2 */
295 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_3 */
296 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_0 */
297 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_1 */
298 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_2 */
299 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_3 */
300 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_0 */
301 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_1 */
302 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_2 */
303 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_3 */
304 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_0 */
305 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_1 */
306 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_2 */
307 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_3 */
Oded Gabbay3c681572020-11-02 21:10:39 +0200308 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_0_0 */
309 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_0_1 */
310 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_0_2 */
311 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_0_3 */
312 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_1_0 */
313 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_1_1 */
314 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_1_2 */
315 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_1_3 */
316 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_2_0 */
317 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_2_1 */
318 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_2_2 */
319 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_2_3 */
320 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_3_0 */
321 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_3_1 */
322 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_3_2 */
323 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_3_3 */
324 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_4_0 */
325 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_4_1 */
326 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_4_2 */
327 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_4_3 */
328 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_5_0 */
329 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_5_1 */
330 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_5_2 */
331 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_5_3 */
332 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_6_0 */
333 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_6_1 */
334 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_6_2 */
335 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_6_3 */
336 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_7_0 */
337 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_7_1 */
338 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_7_2 */
339 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_7_3 */
340 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_8_0 */
341 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_8_1 */
342 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_8_2 */
343 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_8_3 */
344 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_9_0 */
345 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_9_1 */
346 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_9_2 */
347 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_9_3 */
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300348};
349
Oded Gabbayfcc6a4e2020-05-17 08:20:35 +0300350struct ecc_info_extract_params {
351 u64 block_address;
352 u32 num_memories;
353 bool derr;
354 bool disable_clock_gating;
355};
356
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300357static int gaudi_mmu_update_asid_hop0_addr(struct hl_device *hdev, u32 asid,
358 u64 phys_addr);
359static int gaudi_send_job_on_qman0(struct hl_device *hdev,
360 struct hl_cs_job *job);
361static int gaudi_memset_device_memory(struct hl_device *hdev, u64 addr,
362 u32 size, u64 val);
Ofir Bitton423815b2021-01-05 09:04:07 +0200363static int gaudi_memset_registers(struct hl_device *hdev, u64 reg_base,
364 u32 num_regs, u32 val);
365static int gaudi_schedule_register_memset(struct hl_device *hdev,
366 u32 hw_queue_id, u64 reg_base, u32 num_regs, u32 val);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300367static int gaudi_run_tpc_kernel(struct hl_device *hdev, u64 tpc_kernel,
368 u32 tpc_id);
369static int gaudi_mmu_clear_pgt_range(struct hl_device *hdev);
Oded Gabbay2f553422020-08-15 16:28:10 +0300370static int gaudi_cpucp_info_get(struct hl_device *hdev);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300371static void gaudi_disable_clock_gating(struct hl_device *hdev);
372static void gaudi_mmu_prepare(struct hl_device *hdev, u32 asid);
Ofir Bitton5de406c2020-09-10 10:56:26 +0300373static u32 gaudi_gen_signal_cb(struct hl_device *hdev, void *data, u16 sob_id,
Alon Mizrahi72ab9ca52020-12-02 19:55:30 +0200374 u32 size, bool eb);
Ofir Bitton5de406c2020-09-10 10:56:26 +0300375static u32 gaudi_gen_wait_cb(struct hl_device *hdev,
376 struct hl_gen_wait_properties *prop);
377
378static inline enum hl_collective_mode
379get_collective_mode(struct hl_device *hdev, u32 queue_id)
380{
381 if (gaudi_queue_type[queue_id] == QUEUE_TYPE_EXT)
382 return HL_COLLECTIVE_MASTER;
383
384 if (queue_id >= GAUDI_QUEUE_ID_DMA_5_0 &&
385 queue_id <= GAUDI_QUEUE_ID_DMA_5_3)
386 return HL_COLLECTIVE_SLAVE;
387
388 if (queue_id >= GAUDI_QUEUE_ID_TPC_7_0 &&
389 queue_id <= GAUDI_QUEUE_ID_TPC_7_3)
390 return HL_COLLECTIVE_SLAVE;
391
392 if (queue_id >= GAUDI_QUEUE_ID_NIC_0_0 &&
393 queue_id <= GAUDI_QUEUE_ID_NIC_9_3)
394 return HL_COLLECTIVE_SLAVE;
395
396 return HL_COLLECTIVE_NOT_SUPPORTED;
397}
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300398
399static int gaudi_get_fixed_properties(struct hl_device *hdev)
400{
401 struct asic_fixed_properties *prop = &hdev->asic_prop;
Ofir Bitton843839b2020-07-19 11:08:09 +0300402 u32 num_sync_stream_queues = 0;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300403 int i;
404
Ofir Bitton3abc99b2020-06-23 14:50:39 +0300405 prop->max_queues = GAUDI_QUEUE_ID_SIZE;
406 prop->hw_queues_props = kcalloc(prop->max_queues,
407 sizeof(struct hw_queue_properties),
408 GFP_KERNEL);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300409
Ofir Bitton3abc99b2020-06-23 14:50:39 +0300410 if (!prop->hw_queues_props)
411 return -ENOMEM;
412
413 for (i = 0 ; i < prop->max_queues ; i++) {
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300414 if (gaudi_queue_type[i] == QUEUE_TYPE_EXT) {
415 prop->hw_queues_props[i].type = QUEUE_TYPE_EXT;
416 prop->hw_queues_props[i].driver_only = 0;
Ofir Bitton21e7a342020-05-14 18:25:47 +0300417 prop->hw_queues_props[i].supports_sync_stream = 1;
Tal Cohen4bb1f2f2020-06-03 09:25:27 +0300418 prop->hw_queues_props[i].cb_alloc_flags =
419 CB_ALLOC_KERNEL;
Ofir Bitton843839b2020-07-19 11:08:09 +0300420 num_sync_stream_queues++;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300421 } else if (gaudi_queue_type[i] == QUEUE_TYPE_CPU) {
422 prop->hw_queues_props[i].type = QUEUE_TYPE_CPU;
423 prop->hw_queues_props[i].driver_only = 1;
Ofir Bitton21e7a342020-05-14 18:25:47 +0300424 prop->hw_queues_props[i].supports_sync_stream = 0;
Tal Cohen4bb1f2f2020-06-03 09:25:27 +0300425 prop->hw_queues_props[i].cb_alloc_flags =
426 CB_ALLOC_KERNEL;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300427 } else if (gaudi_queue_type[i] == QUEUE_TYPE_INT) {
428 prop->hw_queues_props[i].type = QUEUE_TYPE_INT;
429 prop->hw_queues_props[i].driver_only = 0;
Tal Cohen4bb1f2f2020-06-03 09:25:27 +0300430 prop->hw_queues_props[i].supports_sync_stream = 0;
431 prop->hw_queues_props[i].cb_alloc_flags =
432 CB_ALLOC_USER;
Ofir Bitton5de406c2020-09-10 10:56:26 +0300433
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300434 }
Ofir Bitton5de406c2020-09-10 10:56:26 +0300435 prop->hw_queues_props[i].collective_mode =
436 get_collective_mode(hdev, i);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300437 }
438
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300439 prop->completion_queues_count = NUMBER_OF_CMPLT_QUEUES;
Ofir Bitton5de406c2020-09-10 10:56:26 +0300440 prop->collective_first_sob = 0;
441 prop->collective_first_mon = 0;
442
443 /* 2 SOBs per internal queue stream are reserved for collective */
444 prop->sync_stream_first_sob =
445 ALIGN(NUMBER_OF_SOBS_IN_GRP, HL_MAX_SOBS_PER_MONITOR)
446 * QMAN_STREAMS * HL_RSVD_SOBS;
447
448 /* 1 monitor per internal queue stream are reserved for collective
449 * 2 monitors per external queue stream are reserved for collective
450 */
451 prop->sync_stream_first_mon =
452 (NUMBER_OF_COLLECTIVE_QUEUES * QMAN_STREAMS) +
453 (NUMBER_OF_EXT_HW_QUEUES * 2);
454
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300455 prop->dram_base_address = DRAM_PHYS_BASE;
456 prop->dram_size = GAUDI_HBM_SIZE_32GB;
457 prop->dram_end_address = prop->dram_base_address +
458 prop->dram_size;
459 prop->dram_user_base_address = DRAM_BASE_ADDR_USER;
460
461 prop->sram_base_address = SRAM_BASE_ADDR;
462 prop->sram_size = SRAM_SIZE;
463 prop->sram_end_address = prop->sram_base_address +
464 prop->sram_size;
465 prop->sram_user_base_address = prop->sram_base_address +
466 SRAM_USER_BASE_OFFSET;
467
468 prop->mmu_pgt_addr = MMU_PAGE_TABLES_ADDR;
469 if (hdev->pldm)
470 prop->mmu_pgt_size = 0x800000; /* 8MB */
471 else
472 prop->mmu_pgt_size = MMU_PAGE_TABLES_SIZE;
473 prop->mmu_pte_size = HL_PTE_SIZE;
474 prop->mmu_hop_table_size = HOP_TABLE_SIZE;
475 prop->mmu_hop0_tables_total_size = HOP0_TABLES_TOTAL_SIZE;
476 prop->dram_page_size = PAGE_SIZE_2MB;
Oded Gabbay7f070c92020-11-09 09:48:31 +0200477 prop->dram_supports_virtual_memory = false;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300478
479 prop->pmmu.hop0_shift = HOP0_SHIFT;
480 prop->pmmu.hop1_shift = HOP1_SHIFT;
481 prop->pmmu.hop2_shift = HOP2_SHIFT;
482 prop->pmmu.hop3_shift = HOP3_SHIFT;
483 prop->pmmu.hop4_shift = HOP4_SHIFT;
484 prop->pmmu.hop0_mask = HOP0_MASK;
485 prop->pmmu.hop1_mask = HOP1_MASK;
486 prop->pmmu.hop2_mask = HOP2_MASK;
487 prop->pmmu.hop3_mask = HOP3_MASK;
488 prop->pmmu.hop4_mask = HOP4_MASK;
489 prop->pmmu.start_addr = VA_HOST_SPACE_START;
490 prop->pmmu.end_addr =
491 (VA_HOST_SPACE_START + VA_HOST_SPACE_SIZE / 2) - 1;
492 prop->pmmu.page_size = PAGE_SIZE_4KB;
Moti Haimovski7edf3412020-08-23 13:23:13 +0300493 prop->pmmu.num_hops = MMU_ARCH_5_HOPS;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300494
495 /* PMMU and HPMMU are the same except of page size */
496 memcpy(&prop->pmmu_huge, &prop->pmmu, sizeof(prop->pmmu));
497 prop->pmmu_huge.page_size = PAGE_SIZE_2MB;
498
499 /* shifts and masks are the same in PMMU and DMMU */
500 memcpy(&prop->dmmu, &prop->pmmu, sizeof(prop->pmmu));
501 prop->dmmu.start_addr = (VA_HOST_SPACE_START + VA_HOST_SPACE_SIZE / 2);
502 prop->dmmu.end_addr = VA_HOST_SPACE_END;
503 prop->dmmu.page_size = PAGE_SIZE_2MB;
504
505 prop->cfg_size = CFG_SIZE;
506 prop->max_asid = MAX_ASID;
507 prop->num_of_events = GAUDI_EVENT_SIZE;
508 prop->tpc_enabled_mask = TPC_ENABLED_MASK;
509
Oded Gabbay58361aa2020-08-08 23:34:47 +0300510 prop->max_power_default = MAX_POWER_DEFAULT_PCI;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300511
512 prop->cb_pool_cb_cnt = GAUDI_CB_POOL_CB_CNT;
513 prop->cb_pool_cb_size = GAUDI_CB_POOL_CB_SIZE;
514
515 prop->pcie_dbi_base_address = mmPCIE_DBI_BASE;
516 prop->pcie_aux_dbi_reg_addr = CFG_BASE + mmPCIE_AUX_DBI;
517
Oded Gabbay2f553422020-08-15 16:28:10 +0300518 strncpy(prop->cpucp_info.card_name, GAUDI_DEFAULT_CARD_NAME,
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300519 CARD_NAME_MAX_LEN);
520
Ofir Bittonc16d45f2020-06-02 12:28:27 +0300521 prop->max_pending_cs = GAUDI_MAX_PENDING_CS;
522
Ofir Bitton843839b2020-07-19 11:08:09 +0300523 prop->first_available_user_sob[HL_GAUDI_WS_DCORE] =
Ofir Bitton3cf74b32020-09-10 09:17:50 +0300524 prop->sync_stream_first_sob +
525 (num_sync_stream_queues * HL_RSVD_SOBS);
Ofir Bitton843839b2020-07-19 11:08:09 +0300526 prop->first_available_user_mon[HL_GAUDI_WS_DCORE] =
Ofir Bitton3cf74b32020-09-10 09:17:50 +0300527 prop->sync_stream_first_mon +
528 (num_sync_stream_queues * HL_RSVD_MONS);
Ofir Bitton843839b2020-07-19 11:08:09 +0300529
Ofir Bitton323b7262020-10-04 09:09:19 +0300530 /* disable fw security for now, set it in a later stage */
531 prop->fw_security_disabled = true;
532 prop->fw_security_status_valid = false;
Ofir Bittond611b9f2020-11-08 13:10:09 +0200533 prop->hard_reset_done_by_fw = false;
Ofir Bitton323b7262020-10-04 09:09:19 +0300534
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300535 return 0;
536}
537
538static int gaudi_pci_bars_map(struct hl_device *hdev)
539{
540 static const char * const name[] = {"SRAM", "CFG", "HBM"};
541 bool is_wc[3] = {false, false, true};
542 int rc;
543
544 rc = hl_pci_bars_map(hdev, name, is_wc);
545 if (rc)
546 return rc;
547
548 hdev->rmmio = hdev->pcie_bar[CFG_BAR_ID] +
549 (CFG_BASE - SPI_FLASH_BASE_ADDR);
550
551 return 0;
552}
553
554static u64 gaudi_set_hbm_bar_base(struct hl_device *hdev, u64 addr)
555{
556 struct gaudi_device *gaudi = hdev->asic_specific;
Ofir Bittonf4cbfd22020-06-15 17:45:12 +0300557 struct hl_inbound_pci_region pci_region;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300558 u64 old_addr = addr;
559 int rc;
560
561 if ((gaudi) && (gaudi->hbm_bar_cur_addr == addr))
562 return old_addr;
563
564 /* Inbound Region 2 - Bar 4 - Point to HBM */
Ofir Bittonf4cbfd22020-06-15 17:45:12 +0300565 pci_region.mode = PCI_BAR_MATCH_MODE;
566 pci_region.bar = HBM_BAR_ID;
567 pci_region.addr = addr;
568 rc = hl_pci_set_inbound_region(hdev, 2, &pci_region);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300569 if (rc)
570 return U64_MAX;
571
572 if (gaudi) {
573 old_addr = gaudi->hbm_bar_cur_addr;
574 gaudi->hbm_bar_cur_addr = addr;
575 }
576
577 return old_addr;
578}
579
580static int gaudi_init_iatu(struct hl_device *hdev)
581{
Ofir Bittonf4cbfd22020-06-15 17:45:12 +0300582 struct hl_inbound_pci_region inbound_region;
583 struct hl_outbound_pci_region outbound_region;
584 int rc;
585
586 /* Inbound Region 0 - Bar 0 - Point to SRAM + CFG */
587 inbound_region.mode = PCI_BAR_MATCH_MODE;
588 inbound_region.bar = SRAM_BAR_ID;
589 inbound_region.addr = SRAM_BASE_ADDR;
590 rc = hl_pci_set_inbound_region(hdev, 0, &inbound_region);
591 if (rc)
592 goto done;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300593
594 /* Inbound Region 1 - Bar 2 - Point to SPI FLASH */
Ofir Bittonf4cbfd22020-06-15 17:45:12 +0300595 inbound_region.mode = PCI_BAR_MATCH_MODE;
596 inbound_region.bar = CFG_BAR_ID;
597 inbound_region.addr = SPI_FLASH_BASE_ADDR;
598 rc = hl_pci_set_inbound_region(hdev, 1, &inbound_region);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300599 if (rc)
Ofir Bittonf4cbfd22020-06-15 17:45:12 +0300600 goto done;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300601
Ofir Bittonf4cbfd22020-06-15 17:45:12 +0300602 /* Inbound Region 2 - Bar 4 - Point to HBM */
603 inbound_region.mode = PCI_BAR_MATCH_MODE;
604 inbound_region.bar = HBM_BAR_ID;
605 inbound_region.addr = DRAM_PHYS_BASE;
606 rc = hl_pci_set_inbound_region(hdev, 2, &inbound_region);
607 if (rc)
608 goto done;
609
610 hdev->asic_funcs->set_dma_mask_from_fw(hdev);
611
612 /* Outbound Region 0 - Point to Host */
613 outbound_region.addr = HOST_PHYS_BASE;
614 outbound_region.size = HOST_PHYS_SIZE;
615 rc = hl_pci_set_outbound_region(hdev, &outbound_region);
616
617done:
618 return rc;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300619}
620
Ofir Bittond1ddd902020-10-19 17:04:20 +0300621static enum hl_device_hw_state gaudi_get_hw_state(struct hl_device *hdev)
622{
623 return RREG32(mmHW_STATE);
624}
625
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300626static int gaudi_early_init(struct hl_device *hdev)
627{
628 struct asic_fixed_properties *prop = &hdev->asic_prop;
629 struct pci_dev *pdev = hdev->pdev;
630 int rc;
631
632 rc = gaudi_get_fixed_properties(hdev);
633 if (rc) {
634 dev_err(hdev->dev, "Failed to get fixed properties\n");
635 return rc;
636 }
637
638 /* Check BAR sizes */
639 if (pci_resource_len(pdev, SRAM_BAR_ID) != SRAM_BAR_SIZE) {
640 dev_err(hdev->dev,
641 "Not " HL_NAME "? BAR %d size %llu, expecting %llu\n",
642 SRAM_BAR_ID,
643 (unsigned long long) pci_resource_len(pdev,
644 SRAM_BAR_ID),
645 SRAM_BAR_SIZE);
Ofir Bitton3abc99b2020-06-23 14:50:39 +0300646 rc = -ENODEV;
647 goto free_queue_props;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300648 }
649
650 if (pci_resource_len(pdev, CFG_BAR_ID) != CFG_BAR_SIZE) {
651 dev_err(hdev->dev,
652 "Not " HL_NAME "? BAR %d size %llu, expecting %llu\n",
653 CFG_BAR_ID,
654 (unsigned long long) pci_resource_len(pdev,
655 CFG_BAR_ID),
656 CFG_BAR_SIZE);
Ofir Bitton3abc99b2020-06-23 14:50:39 +0300657 rc = -ENODEV;
658 goto free_queue_props;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300659 }
660
661 prop->dram_pci_bar_size = pci_resource_len(pdev, HBM_BAR_ID);
662
Ofir Bittond1ddd902020-10-19 17:04:20 +0300663 rc = hl_pci_init(hdev);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300664 if (rc)
Ofir Bitton3abc99b2020-06-23 14:50:39 +0300665 goto free_queue_props;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300666
Ofir Bittond1ddd902020-10-19 17:04:20 +0300667 /* Before continuing in the initialization, we need to read the preboot
668 * version to determine whether we run with a security-enabled firmware
669 */
670 rc = hl_fw_read_preboot_status(hdev, mmPSOC_GLOBAL_CONF_CPU_BOOT_STATUS,
671 mmCPU_BOOT_DEV_STS0, mmCPU_BOOT_ERR0,
672 GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC);
673 if (rc) {
674 if (hdev->reset_on_preboot_fail)
675 hdev->asic_funcs->hw_fini(hdev, true);
676 goto pci_fini;
677 }
678
Ofir Bitton9c9013c2020-12-01 10:39:54 +0200679 if (gaudi_get_hw_state(hdev) == HL_DEVICE_HW_STATE_DIRTY) {
680 dev_info(hdev->dev,
681 "H/W state is dirty, must reset before initializing\n");
682 hdev->asic_funcs->hw_fini(hdev, true);
683 }
684
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300685 return 0;
Ofir Bitton3abc99b2020-06-23 14:50:39 +0300686
Ofir Bittond1ddd902020-10-19 17:04:20 +0300687pci_fini:
688 hl_pci_fini(hdev);
Ofir Bitton3abc99b2020-06-23 14:50:39 +0300689free_queue_props:
690 kfree(hdev->asic_prop.hw_queues_props);
691 return rc;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300692}
693
694static int gaudi_early_fini(struct hl_device *hdev)
695{
Ofir Bitton3abc99b2020-06-23 14:50:39 +0300696 kfree(hdev->asic_prop.hw_queues_props);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300697 hl_pci_fini(hdev);
698
699 return 0;
700}
701
702/**
703 * gaudi_fetch_psoc_frequency - Fetch PSOC frequency values
704 *
705 * @hdev: pointer to hl_device structure
706 *
707 */
Ofir Bitton1cbca892020-10-05 11:36:00 +0300708static int gaudi_fetch_psoc_frequency(struct hl_device *hdev)
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300709{
710 struct asic_fixed_properties *prop = &hdev->asic_prop;
Alon Mizrahi65854892020-11-19 16:34:19 +0200711 u32 nr = 0, nf = 0, od = 0, div_fctr = 0, pll_clk, div_sel;
712 u16 pll_freq_arr[HL_PLL_NUM_OUTPUTS], freq;
Ofir Bitton1cbca892020-10-05 11:36:00 +0300713 int rc;
714
Alon Mizrahi65854892020-11-19 16:34:19 +0200715 if (hdev->asic_prop.fw_security_disabled) {
716 /* Backward compatibility */
717 div_fctr = RREG32(mmPSOC_CPU_PLL_DIV_FACTOR_2);
718 div_sel = RREG32(mmPSOC_CPU_PLL_DIV_SEL_2);
719 nr = RREG32(mmPSOC_CPU_PLL_NR);
720 nf = RREG32(mmPSOC_CPU_PLL_NF);
721 od = RREG32(mmPSOC_CPU_PLL_OD);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300722
Alon Mizrahi65854892020-11-19 16:34:19 +0200723 if (div_sel == DIV_SEL_REF_CLK ||
724 div_sel == DIV_SEL_DIVIDED_REF) {
725 if (div_sel == DIV_SEL_REF_CLK)
726 freq = PLL_REF_CLK;
727 else
728 freq = PLL_REF_CLK / (div_fctr + 1);
729 } else if (div_sel == DIV_SEL_PLL_CLK ||
730 div_sel == DIV_SEL_DIVIDED_PLL) {
731 pll_clk = PLL_REF_CLK * (nf + 1) /
732 ((nr + 1) * (od + 1));
733 if (div_sel == DIV_SEL_PLL_CLK)
734 freq = pll_clk;
735 else
736 freq = pll_clk / (div_fctr + 1);
737 } else {
738 dev_warn(hdev->dev,
739 "Received invalid div select value: %d",
740 div_sel);
741 freq = 0;
742 }
743 } else {
744 rc = hl_fw_cpucp_pll_info_get(hdev, CPU_PLL, pll_freq_arr);
745
746 if (rc)
747 return rc;
748
749 freq = pll_freq_arr[2];
750 }
751
752 prop->psoc_timestamp_frequency = freq;
753 prop->psoc_pci_pll_nr = nr;
754 prop->psoc_pci_pll_nf = nf;
755 prop->psoc_pci_pll_od = od;
756 prop->psoc_pci_pll_div_factor = div_fctr;
Ofir Bitton1cbca892020-10-05 11:36:00 +0300757
758 return 0;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300759}
760
761static int _gaudi_init_tpc_mem(struct hl_device *hdev,
762 dma_addr_t tpc_kernel_src_addr, u32 tpc_kernel_size)
763{
764 struct asic_fixed_properties *prop = &hdev->asic_prop;
765 struct packet_lin_dma *init_tpc_mem_pkt;
766 struct hl_cs_job *job;
767 struct hl_cb *cb;
768 u64 dst_addr;
769 u32 cb_size, ctl;
770 u8 tpc_id;
771 int rc;
772
Ofir Bittona04b7cd2020-07-13 13:36:55 +0300773 cb = hl_cb_kernel_create(hdev, PAGE_SIZE, false);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300774 if (!cb)
775 return -EFAULT;
776
Arnd Bergmann82948e62020-10-26 17:08:06 +0100777 init_tpc_mem_pkt = cb->kernel_address;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300778 cb_size = sizeof(*init_tpc_mem_pkt);
779 memset(init_tpc_mem_pkt, 0, cb_size);
780
781 init_tpc_mem_pkt->tsize = cpu_to_le32(tpc_kernel_size);
782
Oded Gabbay65887292020-08-12 11:21:01 +0300783 ctl = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_LIN_DMA);
784 ctl |= FIELD_PREP(GAUDI_PKT_LIN_DMA_CTL_LIN_MASK, 1);
785 ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
786 ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300787
788 init_tpc_mem_pkt->ctl = cpu_to_le32(ctl);
789
790 init_tpc_mem_pkt->src_addr = cpu_to_le64(tpc_kernel_src_addr);
791 dst_addr = (prop->sram_user_base_address &
792 GAUDI_PKT_LIN_DMA_DST_ADDR_MASK) >>
793 GAUDI_PKT_LIN_DMA_DST_ADDR_SHIFT;
794 init_tpc_mem_pkt->dst_addr |= cpu_to_le64(dst_addr);
795
796 job = hl_cs_allocate_job(hdev, QUEUE_TYPE_EXT, true);
797 if (!job) {
798 dev_err(hdev->dev, "Failed to allocate a new job\n");
799 rc = -ENOMEM;
800 goto release_cb;
801 }
802
803 job->id = 0;
804 job->user_cb = cb;
Tomer Tayarf07486742020-08-02 22:51:31 +0300805 atomic_inc(&job->user_cb->cs_cnt);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300806 job->user_cb_size = cb_size;
807 job->hw_queue_id = GAUDI_QUEUE_ID_DMA_0_0;
808 job->patched_cb = job->user_cb;
809 job->job_cb_size = job->user_cb_size + sizeof(struct packet_msg_prot);
810
811 hl_debugfs_add_job(hdev, job);
812
813 rc = gaudi_send_job_on_qman0(hdev, job);
814
815 if (rc)
816 goto free_job;
817
818 for (tpc_id = 0 ; tpc_id < TPC_NUMBER_OF_ENGINES ; tpc_id++) {
819 rc = gaudi_run_tpc_kernel(hdev, dst_addr, tpc_id);
820 if (rc)
821 break;
822 }
823
824free_job:
825 hl_userptr_delete_list(hdev, &job->userptr_list);
826 hl_debugfs_remove_job(hdev, job);
827 kfree(job);
Tomer Tayarf07486742020-08-02 22:51:31 +0300828 atomic_dec(&cb->cs_cnt);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300829
830release_cb:
831 hl_cb_put(cb);
832 hl_cb_destroy(hdev, &hdev->kernel_cb_mgr, cb->id << PAGE_SHIFT);
833
834 return rc;
835}
836
837/*
838 * gaudi_init_tpc_mem() - Initialize TPC memories.
839 * @hdev: Pointer to hl_device structure.
840 *
841 * Copy TPC kernel fw from firmware file and run it to initialize TPC memories.
842 *
843 * Return: 0 for success, negative value for error.
844 */
845static int gaudi_init_tpc_mem(struct hl_device *hdev)
846{
847 const struct firmware *fw;
848 size_t fw_size;
849 void *cpu_addr;
850 dma_addr_t dma_handle;
Oded Gabbay98e87812020-12-09 23:07:58 +0200851 int rc, count = 5;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300852
Oded Gabbay98e87812020-12-09 23:07:58 +0200853again:
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300854 rc = request_firmware(&fw, GAUDI_TPC_FW_FILE, hdev->dev);
Oded Gabbay98e87812020-12-09 23:07:58 +0200855 if (rc == -EINTR && count-- > 0) {
856 msleep(50);
857 goto again;
858 }
859
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300860 if (rc) {
Oded Gabbay98e87812020-12-09 23:07:58 +0200861 dev_err(hdev->dev, "Failed to load firmware file %s\n",
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300862 GAUDI_TPC_FW_FILE);
863 goto out;
864 }
865
866 fw_size = fw->size;
867 cpu_addr = hdev->asic_funcs->asic_dma_alloc_coherent(hdev, fw_size,
868 &dma_handle, GFP_KERNEL | __GFP_ZERO);
869 if (!cpu_addr) {
870 dev_err(hdev->dev,
871 "Failed to allocate %zu of dma memory for TPC kernel\n",
872 fw_size);
873 rc = -ENOMEM;
874 goto out;
875 }
876
877 memcpy(cpu_addr, fw->data, fw_size);
878
879 rc = _gaudi_init_tpc_mem(hdev, dma_handle, fw_size);
880
881 hdev->asic_funcs->asic_dma_free_coherent(hdev, fw->size, cpu_addr,
882 dma_handle);
883
884out:
885 release_firmware(fw);
886 return rc;
887}
888
Ofir Bitton5de406c2020-09-10 10:56:26 +0300889static void gaudi_collective_map_sobs(struct hl_device *hdev, u32 stream)
Ofir Bitton5fe1c172020-09-10 10:10:55 +0300890{
Ofir Bitton5de406c2020-09-10 10:56:26 +0300891 struct gaudi_device *gaudi = hdev->asic_specific;
892 struct gaudi_collective_properties *prop = &gaudi->collective_props;
893 struct hl_hw_queue *q;
894 u32 i, sob_id, sob_group_id, queue_id;
Ofir Bitton5fe1c172020-09-10 10:10:55 +0300895
Ofir Bitton5de406c2020-09-10 10:56:26 +0300896 /* Iterate through SOB groups and assign a SOB for each slave queue */
897 sob_group_id =
898 stream * HL_RSVD_SOBS + prop->curr_sob_group_idx[stream];
899 sob_id = prop->hw_sob_group[sob_group_id].base_sob_id;
900
901 queue_id = GAUDI_QUEUE_ID_NIC_0_0 + stream;
902 for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++) {
903 q = &hdev->kernel_queues[queue_id + (4 * i)];
904 q->sync_stream_prop.collective_sob_id = sob_id + i;
905 }
906
907 /* Both DMA5 and TPC7 use the same resources since only a single
908 * engine need to participate in the reduction process
909 */
910 queue_id = GAUDI_QUEUE_ID_DMA_5_0 + stream;
911 q = &hdev->kernel_queues[queue_id];
912 q->sync_stream_prop.collective_sob_id =
913 sob_id + NIC_NUMBER_OF_ENGINES;
914
915 queue_id = GAUDI_QUEUE_ID_TPC_7_0 + stream;
916 q = &hdev->kernel_queues[queue_id];
917 q->sync_stream_prop.collective_sob_id =
918 sob_id + NIC_NUMBER_OF_ENGINES;
919}
920
921static void gaudi_sob_group_hw_reset(struct kref *ref)
922{
923 struct gaudi_hw_sob_group *hw_sob_group =
924 container_of(ref, struct gaudi_hw_sob_group, kref);
925 struct hl_device *hdev = hw_sob_group->hdev;
Ofir Bitton423815b2021-01-05 09:04:07 +0200926 u64 base_addr;
927 int rc;
Ofir Bitton5de406c2020-09-10 10:56:26 +0300928
Ofir Bitton423815b2021-01-05 09:04:07 +0200929 base_addr = CFG_BASE + mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 +
930 hw_sob_group->base_sob_id * 4;
931 rc = gaudi_schedule_register_memset(hdev, hw_sob_group->queue_id,
932 base_addr, NUMBER_OF_SOBS_IN_GRP, 0);
933 if (rc)
934 dev_err(hdev->dev,
935 "failed resetting sob group - sob base %u, count %u",
936 hw_sob_group->base_sob_id, NUMBER_OF_SOBS_IN_GRP);
Ofir Bitton5de406c2020-09-10 10:56:26 +0300937
938 kref_init(&hw_sob_group->kref);
939}
940
941static void gaudi_sob_group_reset_error(struct kref *ref)
942{
943 struct gaudi_hw_sob_group *hw_sob_group =
944 container_of(ref, struct gaudi_hw_sob_group, kref);
945 struct hl_device *hdev = hw_sob_group->hdev;
946
947 dev_crit(hdev->dev,
948 "SOB release shouldn't be called here, base_sob_id: %d\n",
949 hw_sob_group->base_sob_id);
950}
951
952static int gaudi_collective_init(struct hl_device *hdev)
953{
954 u32 i, master_monitor_sobs, sob_id, reserved_sobs_per_group;
955 struct gaudi_collective_properties *prop;
956 struct gaudi_device *gaudi;
957
958 gaudi = hdev->asic_specific;
959 prop = &gaudi->collective_props;
960 sob_id = hdev->asic_prop.collective_first_sob;
961
962 /* First sob in group must be aligned to HL_MAX_SOBS_PER_MONITOR */
963 reserved_sobs_per_group =
964 ALIGN(NUMBER_OF_SOBS_IN_GRP, HL_MAX_SOBS_PER_MONITOR);
965
966 /* Init SOB groups */
967 for (i = 0 ; i < NUM_SOB_GROUPS; i++) {
968 prop->hw_sob_group[i].hdev = hdev;
969 prop->hw_sob_group[i].base_sob_id = sob_id;
970 sob_id += reserved_sobs_per_group;
971 gaudi_sob_group_hw_reset(&prop->hw_sob_group[i].kref);
972 }
973
974 for (i = 0 ; i < QMAN_STREAMS; i++) {
975 prop->next_sob_group_val[i] = 1;
976 prop->curr_sob_group_idx[i] = 0;
977 gaudi_collective_map_sobs(hdev, i);
978 }
979
980 prop->mstr_sob_mask[0] = 0;
981 master_monitor_sobs = HL_MAX_SOBS_PER_MONITOR;
982 for (i = 0 ; i < master_monitor_sobs ; i++)
983 if (gaudi->hw_cap_initialized & BIT(HW_CAP_NIC_SHIFT + i))
984 prop->mstr_sob_mask[0] |= BIT(i);
985
986 prop->mstr_sob_mask[1] = 0;
987 master_monitor_sobs =
988 NIC_NUMBER_OF_ENGINES - HL_MAX_SOBS_PER_MONITOR;
989 for (i = 0 ; i < master_monitor_sobs; i++) {
990 if (gaudi->hw_cap_initialized & BIT(HW_CAP_NIC_SHIFT + i))
991 prop->mstr_sob_mask[1] |= BIT(i);
992 }
993
994 /* Set collective engine bit */
995 prop->mstr_sob_mask[1] |= BIT(i);
996
997 return 0;
998}
999
1000static void gaudi_reset_sob_group(struct hl_device *hdev, u16 sob_group)
1001{
1002 struct gaudi_device *gaudi = hdev->asic_specific;
1003 struct gaudi_collective_properties *cprop = &gaudi->collective_props;
1004
1005 kref_put(&cprop->hw_sob_group[sob_group].kref,
1006 gaudi_sob_group_hw_reset);
1007}
1008
1009static void gaudi_collective_master_init_job(struct hl_device *hdev,
1010 struct hl_cs_job *job, u32 stream, u32 sob_group_offset)
1011{
1012 u32 master_sob_base, master_monitor, queue_id, cb_size = 0;
1013 struct gaudi_collective_properties *cprop;
1014 struct hl_gen_wait_properties wait_prop;
1015 struct hl_sync_stream_properties *prop;
1016 struct gaudi_device *gaudi;
1017
1018 gaudi = hdev->asic_specific;
1019 cprop = &gaudi->collective_props;
1020 queue_id = job->hw_queue_id;
1021 prop = &hdev->kernel_queues[queue_id].sync_stream_prop;
1022
1023 master_sob_base =
1024 cprop->hw_sob_group[sob_group_offset].base_sob_id;
1025 master_monitor = prop->collective_mstr_mon_id[0];
1026
Ofir Bitton423815b2021-01-05 09:04:07 +02001027 cprop->hw_sob_group[sob_group_offset].queue_id = queue_id;
1028
Ofir Bitton5de406c2020-09-10 10:56:26 +03001029 dev_dbg(hdev->dev,
1030 "Generate master wait CBs, sob %d (mask %#x), val:0x%x, mon %u, q %d\n",
1031 master_sob_base, cprop->mstr_sob_mask[0],
1032 cprop->next_sob_group_val[stream],
1033 master_monitor, queue_id);
1034
1035 wait_prop.data = (void *) job->patched_cb;
1036 wait_prop.sob_base = master_sob_base;
1037 wait_prop.sob_mask = cprop->mstr_sob_mask[0];
1038 wait_prop.sob_val = cprop->next_sob_group_val[stream];
1039 wait_prop.mon_id = master_monitor;
1040 wait_prop.q_idx = queue_id;
1041 wait_prop.size = cb_size;
1042 cb_size += gaudi_gen_wait_cb(hdev, &wait_prop);
1043
1044 master_sob_base += HL_MAX_SOBS_PER_MONITOR;
1045 master_monitor = prop->collective_mstr_mon_id[1];
1046
1047 dev_dbg(hdev->dev,
1048 "Generate master wait CBs, sob %d (mask %#x), val:0x%x, mon %u, q %d\n",
1049 master_sob_base, cprop->mstr_sob_mask[1],
1050 cprop->next_sob_group_val[stream],
1051 master_monitor, queue_id);
1052
1053 wait_prop.sob_base = master_sob_base;
1054 wait_prop.sob_mask = cprop->mstr_sob_mask[1];
1055 wait_prop.mon_id = master_monitor;
1056 wait_prop.size = cb_size;
1057 cb_size += gaudi_gen_wait_cb(hdev, &wait_prop);
1058}
1059
1060static void gaudi_collective_slave_init_job(struct hl_device *hdev,
1061 struct hl_cs_job *job, struct hl_cs_compl *cs_cmpl)
1062{
1063 struct hl_gen_wait_properties wait_prop;
1064 struct hl_sync_stream_properties *prop;
1065 u32 queue_id, cb_size = 0;
1066
1067 queue_id = job->hw_queue_id;
1068 prop = &hdev->kernel_queues[queue_id].sync_stream_prop;
1069
1070 /* Add to wait CBs using slave monitor */
1071 wait_prop.data = (void *) job->user_cb;
1072 wait_prop.sob_base = cs_cmpl->hw_sob->sob_id;
1073 wait_prop.sob_mask = 0x1;
1074 wait_prop.sob_val = cs_cmpl->sob_val;
1075 wait_prop.mon_id = prop->collective_slave_mon_id;
1076 wait_prop.q_idx = queue_id;
1077 wait_prop.size = cb_size;
1078
1079 dev_dbg(hdev->dev,
1080 "Generate slave wait CB, sob %d, val:0x%x, mon %d, q %d\n",
1081 cs_cmpl->hw_sob->sob_id, cs_cmpl->sob_val,
1082 prop->collective_slave_mon_id, queue_id);
1083
1084 cb_size += gaudi_gen_wait_cb(hdev, &wait_prop);
1085
1086 dev_dbg(hdev->dev,
1087 "generate signal CB, sob_id: %d, sob val: 1, q_idx: %d\n",
1088 prop->collective_sob_id, queue_id);
1089
1090 cb_size += gaudi_gen_signal_cb(hdev, job->user_cb,
Alon Mizrahi72ab9ca52020-12-02 19:55:30 +02001091 prop->collective_sob_id, cb_size, false);
Ofir Bitton5fe1c172020-09-10 10:10:55 +03001092}
1093
1094static void gaudi_collective_wait_init_cs(struct hl_cs *cs)
1095{
Ofir Bitton5de406c2020-09-10 10:56:26 +03001096 struct hl_cs_compl *signal_cs_cmpl =
1097 container_of(cs->signal_fence, struct hl_cs_compl, base_fence);
1098 struct hl_cs_compl *cs_cmpl =
1099 container_of(cs->fence, struct hl_cs_compl, base_fence);
1100 struct gaudi_collective_properties *cprop;
1101 u32 stream, queue_id, sob_group_offset;
1102 struct gaudi_device *gaudi;
1103 struct hl_device *hdev;
1104 struct hl_cs_job *job;
1105 struct hl_ctx *ctx;
Ofir Bitton5fe1c172020-09-10 10:10:55 +03001106
Ofir Bitton5de406c2020-09-10 10:56:26 +03001107 ctx = cs->ctx;
1108 hdev = ctx->hdev;
1109 gaudi = hdev->asic_specific;
1110 cprop = &gaudi->collective_props;
1111
1112 /* copy the SOB id and value of the signal CS */
1113 cs_cmpl->hw_sob = signal_cs_cmpl->hw_sob;
1114 cs_cmpl->sob_val = signal_cs_cmpl->sob_val;
1115
1116 /* Calculate the stream from collective master queue (1st job) */
1117 job = list_first_entry(&cs->job_list, struct hl_cs_job, cs_node);
1118 stream = job->hw_queue_id % 4;
1119 sob_group_offset =
1120 stream * HL_RSVD_SOBS + cprop->curr_sob_group_idx[stream];
1121
1122 list_for_each_entry(job, &cs->job_list, cs_node) {
1123 queue_id = job->hw_queue_id;
1124
1125 if (hdev->kernel_queues[queue_id].collective_mode ==
1126 HL_COLLECTIVE_MASTER)
1127 gaudi_collective_master_init_job(hdev, job, stream,
1128 sob_group_offset);
1129 else
1130 gaudi_collective_slave_init_job(hdev, job, cs_cmpl);
1131 }
1132
1133 cs_cmpl->sob_group = sob_group_offset;
1134
1135 /* Handle sob group kref and wraparound */
1136 kref_get(&cprop->hw_sob_group[sob_group_offset].kref);
1137 cprop->next_sob_group_val[stream]++;
1138
1139 if (cprop->next_sob_group_val[stream] == HL_MAX_SOB_VAL) {
1140 /*
1141 * Decrement as we reached the max value.
1142 * The release function won't be called here as we've
1143 * just incremented the refcount.
1144 */
1145 kref_put(&cprop->hw_sob_group[sob_group_offset].kref,
1146 gaudi_sob_group_reset_error);
1147 cprop->next_sob_group_val[stream] = 1;
1148 /* only two SOBs are currently in use */
1149 cprop->curr_sob_group_idx[stream] =
1150 (cprop->curr_sob_group_idx[stream] + 1) &
1151 (HL_RSVD_SOBS - 1);
1152
1153 gaudi_collective_map_sobs(hdev, stream);
1154
1155 dev_dbg(hdev->dev, "switched to SOB group %d, stream: %d\n",
1156 cprop->curr_sob_group_idx[stream], stream);
1157 }
1158
1159 /* Increment kref since all slave queues are now waiting on it */
1160 kref_get(&cs_cmpl->hw_sob->kref);
1161 /*
1162 * Must put the signal fence after the SOB refcnt increment so
1163 * the SOB refcnt won't turn 0 and reset the SOB before the
1164 * wait CS was submitted.
1165 */
1166 mb();
1167 hl_fence_put(cs->signal_fence);
1168 cs->signal_fence = NULL;
1169}
1170
1171static int gaudi_collective_wait_create_job(struct hl_device *hdev,
1172 struct hl_ctx *ctx, struct hl_cs *cs,
1173 enum hl_collective_mode mode, u32 queue_id, u32 wait_queue_id)
1174{
1175 struct hw_queue_properties *hw_queue_prop;
1176 struct hl_cs_counters_atomic *cntr;
1177 struct hl_cs_job *job;
1178 struct hl_cb *cb;
1179 u32 cb_size;
1180 bool patched_cb;
1181
1182 cntr = &hdev->aggregated_cs_counters;
1183
1184 if (mode == HL_COLLECTIVE_MASTER) {
1185 /* CB size of collective master queue contains
1186 * 4 msg short packets for monitor 1 configuration
1187 * 1 fence packet
1188 * 4 msg short packets for monitor 2 configuration
1189 * 1 fence packet
1190 * 2 msg prot packets for completion and MSI-X
1191 */
1192 cb_size = sizeof(struct packet_msg_short) * 8 +
1193 sizeof(struct packet_fence) * 2 +
1194 sizeof(struct packet_msg_prot) * 2;
1195 patched_cb = true;
1196 } else {
1197 /* CB size of collective slave queues contains
1198 * 4 msg short packets for monitor configuration
1199 * 1 fence packet
1200 * 1 additional msg short packet for sob signal
1201 */
1202 cb_size = sizeof(struct packet_msg_short) * 5 +
1203 sizeof(struct packet_fence);
1204 patched_cb = false;
1205 }
1206
1207 hw_queue_prop = &hdev->asic_prop.hw_queues_props[queue_id];
1208 job = hl_cs_allocate_job(hdev, hw_queue_prop->type, true);
1209 if (!job) {
farah kassabrie7536432020-10-12 14:30:26 +03001210 atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
Ofir Bitton5de406c2020-09-10 10:56:26 +03001211 atomic64_inc(&cntr->out_of_mem_drop_cnt);
1212 dev_err(hdev->dev, "Failed to allocate a new job\n");
1213 return -ENOMEM;
1214 }
1215
1216 /* Allocate internal mapped CB for non patched CBs */
1217 cb = hl_cb_kernel_create(hdev, cb_size,
1218 hdev->mmu_enable && !patched_cb);
1219 if (!cb) {
farah kassabrie7536432020-10-12 14:30:26 +03001220 atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
Ofir Bitton5de406c2020-09-10 10:56:26 +03001221 atomic64_inc(&cntr->out_of_mem_drop_cnt);
1222 kfree(job);
1223 return -EFAULT;
1224 }
1225
1226 job->id = 0;
1227 job->cs = cs;
1228 job->user_cb = cb;
Tomer Tayarf07486742020-08-02 22:51:31 +03001229 atomic_inc(&job->user_cb->cs_cnt);
Ofir Bitton5de406c2020-09-10 10:56:26 +03001230 job->user_cb_size = cb_size;
1231 job->hw_queue_id = queue_id;
1232
1233 /*
1234 * No need in parsing, user CB is the patched CB.
1235 * We call hl_cb_destroy() out of two reasons - we don't need
1236 * the CB in the CB idr anymore and to decrement its refcount as
1237 * it was incremented inside hl_cb_kernel_create().
1238 */
1239 if (patched_cb)
1240 job->patched_cb = job->user_cb;
1241 else
1242 job->patched_cb = NULL;
1243
1244 job->job_cb_size = job->user_cb_size;
1245 hl_cb_destroy(hdev, &hdev->kernel_cb_mgr, cb->id << PAGE_SHIFT);
1246
1247 /* increment refcount as for external queues we get completion */
1248 if (hw_queue_prop->type == QUEUE_TYPE_EXT)
1249 cs_get(cs);
1250
1251 cs->jobs_in_queue_cnt[job->hw_queue_id]++;
1252
1253 list_add_tail(&job->cs_node, &cs->job_list);
1254
1255 hl_debugfs_add_job(hdev, job);
1256
1257 return 0;
Ofir Bitton5fe1c172020-09-10 10:10:55 +03001258}
1259
1260static int gaudi_collective_wait_create_jobs(struct hl_device *hdev,
1261 struct hl_ctx *ctx, struct hl_cs *cs, u32 wait_queue_id,
1262 u32 collective_engine_id)
1263{
Ofir Bitton5de406c2020-09-10 10:56:26 +03001264 struct gaudi_device *gaudi = hdev->asic_specific;
1265 struct hw_queue_properties *hw_queue_prop;
1266 u32 queue_id, collective_queue, num_jobs;
1267 u32 stream, nic_queue, nic_idx = 0;
1268 bool skip;
Ofir Bitton266cdfa2020-12-22 15:56:12 +02001269 int i, rc = 0;
Ofir Bitton5de406c2020-09-10 10:56:26 +03001270
1271 /* Verify wait queue id is configured as master */
1272 hw_queue_prop = &hdev->asic_prop.hw_queues_props[wait_queue_id];
1273 if (!(hw_queue_prop->collective_mode == HL_COLLECTIVE_MASTER)) {
1274 dev_err(hdev->dev,
1275 "Queue %d is not configured as collective master\n",
1276 wait_queue_id);
1277 return -EINVAL;
1278 }
1279
1280 /* Verify engine id is supported */
1281 if (collective_engine_id != GAUDI_ENGINE_ID_DMA_5 &&
1282 collective_engine_id != GAUDI_ENGINE_ID_TPC_7) {
1283 dev_err(hdev->dev,
1284 "Collective wait does not support engine %u\n",
1285 collective_engine_id);
1286 return -EINVAL;
1287 }
1288
1289 stream = wait_queue_id % 4;
1290
1291 if (collective_engine_id == GAUDI_ENGINE_ID_DMA_5)
1292 collective_queue = GAUDI_QUEUE_ID_DMA_5_0 + stream;
Ofir Bitton5de406c2020-09-10 10:56:26 +03001293 else
Ofir Bitton71a984f2020-10-19 16:52:00 +03001294 collective_queue = GAUDI_QUEUE_ID_TPC_7_0 + stream;
Ofir Bitton5de406c2020-09-10 10:56:26 +03001295
1296 num_jobs = NUMBER_OF_SOBS_IN_GRP + 1;
1297 nic_queue = GAUDI_QUEUE_ID_NIC_0_0 + stream;
1298
1299 /* First job goes to the collective master queue, it will wait for
1300 * the collective slave queues to finish execution.
1301 * The synchronization is done using two monitors:
1302 * First monitor for NICs 0-7, second monitor for NICs 8-9 and the
1303 * reduction engine (DMA5/TPC7).
1304 *
1305 * Rest of the jobs goes to the collective slave queues which will
1306 * all wait for the user to signal sob 'cs_cmpl->sob_val'.
1307 */
1308 for (i = 0 ; i < num_jobs ; i++) {
1309 if (i == 0) {
1310 queue_id = wait_queue_id;
1311 rc = gaudi_collective_wait_create_job(hdev, ctx, cs,
1312 HL_COLLECTIVE_MASTER, queue_id, wait_queue_id);
1313 } else {
1314 if (nic_idx < NIC_NUMBER_OF_ENGINES) {
1315 if (gaudi->hw_cap_initialized &
1316 BIT(HW_CAP_NIC_SHIFT + nic_idx))
1317 skip = false;
1318 else
1319 skip = true;
1320
1321 queue_id = nic_queue;
1322 nic_queue += 4;
1323 nic_idx++;
1324
1325 if (skip)
1326 continue;
1327 } else {
1328 queue_id = collective_queue;
1329 }
1330
1331 rc = gaudi_collective_wait_create_job(hdev, ctx, cs,
1332 HL_COLLECTIVE_SLAVE, queue_id, wait_queue_id);
1333 }
1334
1335 if (rc)
1336 return rc;
1337 }
1338
1339 return rc;
Ofir Bitton5fe1c172020-09-10 10:10:55 +03001340}
1341
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03001342static int gaudi_late_init(struct hl_device *hdev)
1343{
1344 struct gaudi_device *gaudi = hdev->asic_specific;
1345 int rc;
1346
Oded Gabbay2f553422020-08-15 16:28:10 +03001347 rc = gaudi->cpucp_info_get(hdev);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03001348 if (rc) {
Oded Gabbay2f553422020-08-15 16:28:10 +03001349 dev_err(hdev->dev, "Failed to get cpucp info\n");
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03001350 return rc;
1351 }
1352
Oded Gabbay3c681572020-11-02 21:10:39 +02001353 if ((hdev->card_type == cpucp_card_type_pci) &&
1354 (hdev->nic_ports_mask & 0x3)) {
1355 dev_info(hdev->dev,
1356 "PCI card detected, only 8 ports are enabled\n");
1357 hdev->nic_ports_mask &= ~0x3;
1358
1359 /* Stop and disable unused NIC QMANs */
1360 WREG32(mmNIC0_QM0_GLBL_CFG1, NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
1361 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
1362 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
1363
1364 WREG32(mmNIC0_QM1_GLBL_CFG1, NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
1365 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
1366 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
1367
1368 WREG32(mmNIC0_QM0_GLBL_CFG0, 0);
1369 WREG32(mmNIC0_QM1_GLBL_CFG0, 0);
1370
1371 gaudi->hw_cap_initialized &= ~(HW_CAP_NIC0 | HW_CAP_NIC1);
1372 }
1373
Oded Gabbay2f553422020-08-15 16:28:10 +03001374 rc = hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_ENABLE_PCI_ACCESS);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03001375 if (rc) {
1376 dev_err(hdev->dev, "Failed to enable PCI access from CPU\n");
1377 return rc;
1378 }
1379
1380 WREG32(mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR, GAUDI_EVENT_INTS_REGISTER);
1381
Ofir Bitton1cbca892020-10-05 11:36:00 +03001382 rc = gaudi_fetch_psoc_frequency(hdev);
1383 if (rc) {
1384 dev_err(hdev->dev, "Failed to fetch psoc frequency\n");
1385 goto disable_pci_access;
1386 }
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03001387
1388 rc = gaudi_mmu_clear_pgt_range(hdev);
1389 if (rc) {
1390 dev_err(hdev->dev, "Failed to clear MMU page tables range\n");
1391 goto disable_pci_access;
1392 }
1393
1394 rc = gaudi_init_tpc_mem(hdev);
1395 if (rc) {
1396 dev_err(hdev->dev, "Failed to initialize TPC memories\n");
1397 goto disable_pci_access;
1398 }
1399
Ofir Bitton5de406c2020-09-10 10:56:26 +03001400 rc = gaudi_collective_init(hdev);
1401 if (rc) {
1402 dev_err(hdev->dev, "Failed to init collective\n");
1403 goto disable_pci_access;
1404 }
1405
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03001406 return 0;
1407
1408disable_pci_access:
Oded Gabbay2f553422020-08-15 16:28:10 +03001409 hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_DISABLE_PCI_ACCESS);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03001410
1411 return rc;
1412}
1413
1414static void gaudi_late_fini(struct hl_device *hdev)
1415{
1416 const struct hwmon_channel_info **channel_info_arr;
1417 int i = 0;
1418
1419 if (!hdev->hl_chip_info->info)
1420 return;
1421
1422 channel_info_arr = hdev->hl_chip_info->info;
1423
1424 while (channel_info_arr[i]) {
1425 kfree(channel_info_arr[i]->config);
1426 kfree(channel_info_arr[i]);
1427 i++;
1428 }
1429
1430 kfree(channel_info_arr);
1431
1432 hdev->hl_chip_info->info = NULL;
1433}
1434
1435static int gaudi_alloc_cpu_accessible_dma_mem(struct hl_device *hdev)
1436{
1437 dma_addr_t dma_addr_arr[GAUDI_ALLOC_CPU_MEM_RETRY_CNT] = {}, end_addr;
1438 void *virt_addr_arr[GAUDI_ALLOC_CPU_MEM_RETRY_CNT] = {};
1439 int i, j, rc = 0;
1440
1441 /*
1442 * The device CPU works with 40-bits addresses, while bit 39 must be set
1443 * to '1' when accessing the host.
1444 * Bits 49:39 of the full host address are saved for a later
1445 * configuration of the HW to perform extension to 50 bits.
1446 * Because there is a single HW register that holds the extension bits,
1447 * these bits must be identical in all allocated range.
1448 */
1449
1450 for (i = 0 ; i < GAUDI_ALLOC_CPU_MEM_RETRY_CNT ; i++) {
1451 virt_addr_arr[i] =
1452 hdev->asic_funcs->asic_dma_alloc_coherent(hdev,
1453 HL_CPU_ACCESSIBLE_MEM_SIZE,
1454 &dma_addr_arr[i],
1455 GFP_KERNEL | __GFP_ZERO);
1456 if (!virt_addr_arr[i]) {
1457 rc = -ENOMEM;
1458 goto free_dma_mem_arr;
1459 }
1460
1461 end_addr = dma_addr_arr[i] + HL_CPU_ACCESSIBLE_MEM_SIZE - 1;
1462 if (GAUDI_CPU_PCI_MSB_ADDR(dma_addr_arr[i]) ==
1463 GAUDI_CPU_PCI_MSB_ADDR(end_addr))
1464 break;
1465 }
1466
1467 if (i == GAUDI_ALLOC_CPU_MEM_RETRY_CNT) {
1468 dev_err(hdev->dev,
1469 "MSB of CPU accessible DMA memory are not identical in all range\n");
1470 rc = -EFAULT;
1471 goto free_dma_mem_arr;
1472 }
1473
1474 hdev->cpu_accessible_dma_mem = virt_addr_arr[i];
1475 hdev->cpu_accessible_dma_address = dma_addr_arr[i];
1476 hdev->cpu_pci_msb_addr =
1477 GAUDI_CPU_PCI_MSB_ADDR(hdev->cpu_accessible_dma_address);
1478
Ofir Bittonc692dec2020-10-04 17:34:37 +03001479 if (hdev->asic_prop.fw_security_disabled)
1480 GAUDI_PCI_TO_CPU_ADDR(hdev->cpu_accessible_dma_address);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03001481
1482free_dma_mem_arr:
1483 for (j = 0 ; j < i ; j++)
1484 hdev->asic_funcs->asic_dma_free_coherent(hdev,
1485 HL_CPU_ACCESSIBLE_MEM_SIZE,
1486 virt_addr_arr[j],
1487 dma_addr_arr[j]);
1488
1489 return rc;
1490}
1491
1492static void gaudi_free_internal_qmans_pq_mem(struct hl_device *hdev)
1493{
1494 struct gaudi_device *gaudi = hdev->asic_specific;
1495 struct gaudi_internal_qman_info *q;
1496 u32 i;
1497
1498 for (i = 0 ; i < GAUDI_QUEUE_ID_SIZE ; i++) {
1499 q = &gaudi->internal_qmans[i];
1500 if (!q->pq_kernel_addr)
1501 continue;
1502 hdev->asic_funcs->asic_dma_free_coherent(hdev, q->pq_size,
1503 q->pq_kernel_addr,
1504 q->pq_dma_addr);
1505 }
1506}
1507
1508static int gaudi_alloc_internal_qmans_pq_mem(struct hl_device *hdev)
1509{
1510 struct gaudi_device *gaudi = hdev->asic_specific;
1511 struct gaudi_internal_qman_info *q;
1512 int rc, i;
1513
1514 for (i = 0 ; i < GAUDI_QUEUE_ID_SIZE ; i++) {
1515 if (gaudi_queue_type[i] != QUEUE_TYPE_INT)
1516 continue;
1517
1518 q = &gaudi->internal_qmans[i];
1519
1520 switch (i) {
Ofir Bitton0940cab2020-08-31 08:52:56 +03001521 case GAUDI_QUEUE_ID_DMA_2_0 ... GAUDI_QUEUE_ID_DMA_7_3:
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03001522 q->pq_size = HBM_DMA_QMAN_SIZE_IN_BYTES;
1523 break;
1524 case GAUDI_QUEUE_ID_MME_0_0 ... GAUDI_QUEUE_ID_MME_1_3:
1525 q->pq_size = MME_QMAN_SIZE_IN_BYTES;
1526 break;
1527 case GAUDI_QUEUE_ID_TPC_0_0 ... GAUDI_QUEUE_ID_TPC_7_3:
1528 q->pq_size = TPC_QMAN_SIZE_IN_BYTES;
1529 break;
Oded Gabbay3c681572020-11-02 21:10:39 +02001530 case GAUDI_QUEUE_ID_NIC_0_0 ... GAUDI_QUEUE_ID_NIC_9_3:
1531 q->pq_size = NIC_QMAN_SIZE_IN_BYTES;
1532 break;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03001533 default:
1534 dev_err(hdev->dev, "Bad internal queue index %d", i);
1535 rc = -EINVAL;
1536 goto free_internal_qmans_pq_mem;
1537 }
1538
1539 q->pq_kernel_addr = hdev->asic_funcs->asic_dma_alloc_coherent(
1540 hdev, q->pq_size,
1541 &q->pq_dma_addr,
1542 GFP_KERNEL | __GFP_ZERO);
1543 if (!q->pq_kernel_addr) {
1544 rc = -ENOMEM;
1545 goto free_internal_qmans_pq_mem;
1546 }
1547 }
1548
1549 return 0;
1550
1551free_internal_qmans_pq_mem:
1552 gaudi_free_internal_qmans_pq_mem(hdev);
1553 return rc;
1554}
1555
1556static int gaudi_sw_init(struct hl_device *hdev)
1557{
1558 struct gaudi_device *gaudi;
Ofir Bittonebd8d122020-05-10 13:41:28 +03001559 u32 i, event_id = 0;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03001560 int rc;
1561
1562 /* Allocate device structure */
1563 gaudi = kzalloc(sizeof(*gaudi), GFP_KERNEL);
1564 if (!gaudi)
1565 return -ENOMEM;
1566
Ofir Bittonebd8d122020-05-10 13:41:28 +03001567 for (i = 0 ; i < ARRAY_SIZE(gaudi_irq_map_table) ; i++) {
1568 if (gaudi_irq_map_table[i].valid) {
1569 if (event_id == GAUDI_EVENT_SIZE) {
1570 dev_err(hdev->dev,
1571 "Event array exceeds the limit of %u events\n",
1572 GAUDI_EVENT_SIZE);
1573 rc = -EINVAL;
1574 goto free_gaudi_device;
1575 }
1576
1577 gaudi->events[event_id++] =
1578 gaudi_irq_map_table[i].fc_id;
1579 }
1580 }
1581
Oded Gabbay2f553422020-08-15 16:28:10 +03001582 gaudi->cpucp_info_get = gaudi_cpucp_info_get;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03001583
1584 gaudi->max_freq_value = GAUDI_MAX_CLK_FREQ;
1585
1586 hdev->asic_specific = gaudi;
1587
1588 /* Create DMA pool for small allocations */
1589 hdev->dma_pool = dma_pool_create(dev_name(hdev->dev),
1590 &hdev->pdev->dev, GAUDI_DMA_POOL_BLK_SIZE, 8, 0);
1591 if (!hdev->dma_pool) {
1592 dev_err(hdev->dev, "failed to create DMA pool\n");
1593 rc = -ENOMEM;
1594 goto free_gaudi_device;
1595 }
1596
1597 rc = gaudi_alloc_cpu_accessible_dma_mem(hdev);
1598 if (rc)
1599 goto free_dma_pool;
1600
1601 hdev->cpu_accessible_dma_pool = gen_pool_create(ilog2(32), -1);
1602 if (!hdev->cpu_accessible_dma_pool) {
1603 dev_err(hdev->dev,
1604 "Failed to create CPU accessible DMA pool\n");
1605 rc = -ENOMEM;
1606 goto free_cpu_dma_mem;
1607 }
1608
1609 rc = gen_pool_add(hdev->cpu_accessible_dma_pool,
1610 (uintptr_t) hdev->cpu_accessible_dma_mem,
1611 HL_CPU_ACCESSIBLE_MEM_SIZE, -1);
1612 if (rc) {
1613 dev_err(hdev->dev,
1614 "Failed to add memory to CPU accessible DMA pool\n");
1615 rc = -EFAULT;
1616 goto free_cpu_accessible_dma_pool;
1617 }
1618
1619 rc = gaudi_alloc_internal_qmans_pq_mem(hdev);
1620 if (rc)
1621 goto free_cpu_accessible_dma_pool;
1622
1623 spin_lock_init(&gaudi->hw_queues_lock);
1624 mutex_init(&gaudi->clk_gate_mutex);
1625
1626 hdev->supports_sync_stream = true;
1627 hdev->supports_coresight = true;
1628
1629 return 0;
1630
1631free_cpu_accessible_dma_pool:
1632 gen_pool_destroy(hdev->cpu_accessible_dma_pool);
1633free_cpu_dma_mem:
Ofir Bittonc692dec2020-10-04 17:34:37 +03001634 if (hdev->asic_prop.fw_security_disabled)
1635 GAUDI_CPU_TO_PCI_ADDR(hdev->cpu_accessible_dma_address,
1636 hdev->cpu_pci_msb_addr);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03001637 hdev->asic_funcs->asic_dma_free_coherent(hdev,
1638 HL_CPU_ACCESSIBLE_MEM_SIZE,
1639 hdev->cpu_accessible_dma_mem,
1640 hdev->cpu_accessible_dma_address);
1641free_dma_pool:
1642 dma_pool_destroy(hdev->dma_pool);
1643free_gaudi_device:
1644 kfree(gaudi);
1645 return rc;
1646}
1647
1648static int gaudi_sw_fini(struct hl_device *hdev)
1649{
1650 struct gaudi_device *gaudi = hdev->asic_specific;
1651
1652 gaudi_free_internal_qmans_pq_mem(hdev);
1653
1654 gen_pool_destroy(hdev->cpu_accessible_dma_pool);
1655
Ofir Bittonc692dec2020-10-04 17:34:37 +03001656 if (hdev->asic_prop.fw_security_disabled)
1657 GAUDI_CPU_TO_PCI_ADDR(hdev->cpu_accessible_dma_address,
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03001658 hdev->cpu_pci_msb_addr);
Ofir Bittonc692dec2020-10-04 17:34:37 +03001659
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03001660 hdev->asic_funcs->asic_dma_free_coherent(hdev,
1661 HL_CPU_ACCESSIBLE_MEM_SIZE,
1662 hdev->cpu_accessible_dma_mem,
1663 hdev->cpu_accessible_dma_address);
1664
1665 dma_pool_destroy(hdev->dma_pool);
1666
1667 mutex_destroy(&gaudi->clk_gate_mutex);
1668
1669 kfree(gaudi);
1670
1671 return 0;
1672}
1673
1674static irqreturn_t gaudi_irq_handler_single(int irq, void *arg)
1675{
1676 struct hl_device *hdev = arg;
1677 int i;
1678
1679 if (hdev->disabled)
1680 return IRQ_HANDLED;
1681
1682 for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++)
1683 hl_irq_handler_cq(irq, &hdev->completion_queue[i]);
1684
1685 hl_irq_handler_eq(irq, &hdev->event_queue);
1686
1687 return IRQ_HANDLED;
1688}
1689
1690/*
1691 * For backward compatibility, new MSI interrupts should be set after the
1692 * existing CPU and NIC interrupts.
1693 */
1694static int gaudi_pci_irq_vector(struct hl_device *hdev, unsigned int nr,
1695 bool cpu_eq)
1696{
1697 int msi_vec;
1698
1699 if ((nr != GAUDI_EVENT_QUEUE_MSI_IDX) && (cpu_eq))
1700 dev_crit(hdev->dev, "CPU EQ must use IRQ %d\n",
1701 GAUDI_EVENT_QUEUE_MSI_IDX);
1702
1703 msi_vec = ((nr < GAUDI_EVENT_QUEUE_MSI_IDX) || (cpu_eq)) ? nr :
1704 (nr + NIC_NUMBER_OF_ENGINES + 1);
1705
1706 return pci_irq_vector(hdev->pdev, msi_vec);
1707}
1708
1709static int gaudi_enable_msi_single(struct hl_device *hdev)
1710{
1711 int rc, irq;
1712
Oded Gabbay3b82c342020-11-27 18:10:20 +02001713 dev_dbg(hdev->dev, "Working in single MSI IRQ mode\n");
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03001714
1715 irq = gaudi_pci_irq_vector(hdev, 0, false);
1716 rc = request_irq(irq, gaudi_irq_handler_single, 0,
1717 "gaudi single msi", hdev);
1718 if (rc)
1719 dev_err(hdev->dev,
1720 "Failed to request single MSI IRQ\n");
1721
1722 return rc;
1723}
1724
1725static int gaudi_enable_msi_multi(struct hl_device *hdev)
1726{
1727 int cq_cnt = hdev->asic_prop.completion_queues_count;
1728 int rc, i, irq_cnt_init, irq;
1729
1730 for (i = 0, irq_cnt_init = 0 ; i < cq_cnt ; i++, irq_cnt_init++) {
1731 irq = gaudi_pci_irq_vector(hdev, i, false);
1732 rc = request_irq(irq, hl_irq_handler_cq, 0, gaudi_irq_name[i],
1733 &hdev->completion_queue[i]);
1734 if (rc) {
1735 dev_err(hdev->dev, "Failed to request IRQ %d", irq);
1736 goto free_irqs;
1737 }
1738 }
1739
1740 irq = gaudi_pci_irq_vector(hdev, GAUDI_EVENT_QUEUE_MSI_IDX, true);
1741 rc = request_irq(irq, hl_irq_handler_eq, 0, gaudi_irq_name[cq_cnt],
1742 &hdev->event_queue);
1743 if (rc) {
1744 dev_err(hdev->dev, "Failed to request IRQ %d", irq);
1745 goto free_irqs;
1746 }
1747
1748 return 0;
1749
1750free_irqs:
1751 for (i = 0 ; i < irq_cnt_init ; i++)
1752 free_irq(gaudi_pci_irq_vector(hdev, i, false),
1753 &hdev->completion_queue[i]);
1754 return rc;
1755}
1756
1757static int gaudi_enable_msi(struct hl_device *hdev)
1758{
1759 struct gaudi_device *gaudi = hdev->asic_specific;
1760 int rc;
1761
1762 if (gaudi->hw_cap_initialized & HW_CAP_MSI)
1763 return 0;
1764
1765 rc = pci_alloc_irq_vectors(hdev->pdev, 1, GAUDI_MSI_ENTRIES,
1766 PCI_IRQ_MSI);
1767 if (rc < 0) {
1768 dev_err(hdev->dev, "MSI: Failed to enable support %d\n", rc);
1769 return rc;
1770 }
1771
1772 if (rc < NUMBER_OF_INTERRUPTS) {
1773 gaudi->multi_msi_mode = false;
1774 rc = gaudi_enable_msi_single(hdev);
1775 } else {
1776 gaudi->multi_msi_mode = true;
1777 rc = gaudi_enable_msi_multi(hdev);
1778 }
1779
1780 if (rc)
1781 goto free_pci_irq_vectors;
1782
1783 gaudi->hw_cap_initialized |= HW_CAP_MSI;
1784
1785 return 0;
1786
1787free_pci_irq_vectors:
1788 pci_free_irq_vectors(hdev->pdev);
1789 return rc;
1790}
1791
1792static void gaudi_sync_irqs(struct hl_device *hdev)
1793{
1794 struct gaudi_device *gaudi = hdev->asic_specific;
1795 int i, cq_cnt = hdev->asic_prop.completion_queues_count;
1796
1797 if (!(gaudi->hw_cap_initialized & HW_CAP_MSI))
1798 return;
1799
1800 /* Wait for all pending IRQs to be finished */
1801 if (gaudi->multi_msi_mode) {
1802 for (i = 0 ; i < cq_cnt ; i++)
1803 synchronize_irq(gaudi_pci_irq_vector(hdev, i, false));
1804
1805 synchronize_irq(gaudi_pci_irq_vector(hdev,
1806 GAUDI_EVENT_QUEUE_MSI_IDX,
1807 true));
1808 } else {
1809 synchronize_irq(gaudi_pci_irq_vector(hdev, 0, false));
1810 }
1811}
1812
1813static void gaudi_disable_msi(struct hl_device *hdev)
1814{
1815 struct gaudi_device *gaudi = hdev->asic_specific;
1816 int i, irq, cq_cnt = hdev->asic_prop.completion_queues_count;
1817
1818 if (!(gaudi->hw_cap_initialized & HW_CAP_MSI))
1819 return;
1820
1821 gaudi_sync_irqs(hdev);
1822
1823 if (gaudi->multi_msi_mode) {
1824 irq = gaudi_pci_irq_vector(hdev, GAUDI_EVENT_QUEUE_MSI_IDX,
1825 true);
1826 free_irq(irq, &hdev->event_queue);
1827
1828 for (i = 0 ; i < cq_cnt ; i++) {
1829 irq = gaudi_pci_irq_vector(hdev, i, false);
1830 free_irq(irq, &hdev->completion_queue[i]);
1831 }
1832 } else {
1833 free_irq(gaudi_pci_irq_vector(hdev, 0, false), hdev);
1834 }
1835
1836 pci_free_irq_vectors(hdev->pdev);
1837
1838 gaudi->hw_cap_initialized &= ~HW_CAP_MSI;
1839}
1840
1841static void gaudi_init_scrambler_sram(struct hl_device *hdev)
1842{
1843 struct gaudi_device *gaudi = hdev->asic_specific;
1844
Ofir Bittonc692dec2020-10-04 17:34:37 +03001845 if (!hdev->asic_prop.fw_security_disabled)
1846 return;
1847
1848 if (hdev->asic_prop.fw_security_status_valid &&
1849 (hdev->asic_prop.fw_app_security_map &
1850 CPU_BOOT_DEV_STS0_SRAM_SCR_EN))
1851 return;
1852
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03001853 if (gaudi->hw_cap_initialized & HW_CAP_SRAM_SCRAMBLER)
1854 return;
1855
1856 if (!hdev->sram_scrambler_enable)
1857 return;
1858
1859 WREG32(mmNIF_RTR_CTRL_0_SCRAM_SRAM_EN,
1860 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1861 WREG32(mmNIF_RTR_CTRL_1_SCRAM_SRAM_EN,
1862 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1863 WREG32(mmNIF_RTR_CTRL_2_SCRAM_SRAM_EN,
1864 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1865 WREG32(mmNIF_RTR_CTRL_3_SCRAM_SRAM_EN,
1866 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1867 WREG32(mmNIF_RTR_CTRL_4_SCRAM_SRAM_EN,
1868 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1869 WREG32(mmNIF_RTR_CTRL_5_SCRAM_SRAM_EN,
1870 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1871 WREG32(mmNIF_RTR_CTRL_6_SCRAM_SRAM_EN,
1872 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1873 WREG32(mmNIF_RTR_CTRL_7_SCRAM_SRAM_EN,
1874 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1875
1876 WREG32(mmSIF_RTR_CTRL_0_SCRAM_SRAM_EN,
1877 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1878 WREG32(mmSIF_RTR_CTRL_1_SCRAM_SRAM_EN,
1879 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1880 WREG32(mmSIF_RTR_CTRL_2_SCRAM_SRAM_EN,
1881 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1882 WREG32(mmSIF_RTR_CTRL_3_SCRAM_SRAM_EN,
1883 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1884 WREG32(mmSIF_RTR_CTRL_4_SCRAM_SRAM_EN,
1885 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1886 WREG32(mmSIF_RTR_CTRL_5_SCRAM_SRAM_EN,
1887 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1888 WREG32(mmSIF_RTR_CTRL_6_SCRAM_SRAM_EN,
1889 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1890 WREG32(mmSIF_RTR_CTRL_7_SCRAM_SRAM_EN,
1891 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1892
1893 WREG32(mmDMA_IF_E_N_DOWN_CH0_SCRAM_SRAM_EN,
1894 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
1895 WREG32(mmDMA_IF_E_N_DOWN_CH1_SCRAM_SRAM_EN,
1896 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
1897 WREG32(mmDMA_IF_E_S_DOWN_CH0_SCRAM_SRAM_EN,
1898 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
1899 WREG32(mmDMA_IF_E_S_DOWN_CH1_SCRAM_SRAM_EN,
1900 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
1901 WREG32(mmDMA_IF_W_N_DOWN_CH0_SCRAM_SRAM_EN,
1902 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
1903 WREG32(mmDMA_IF_W_N_DOWN_CH1_SCRAM_SRAM_EN,
1904 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
1905 WREG32(mmDMA_IF_W_S_DOWN_CH0_SCRAM_SRAM_EN,
1906 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
1907 WREG32(mmDMA_IF_W_S_DOWN_CH1_SCRAM_SRAM_EN,
1908 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
1909
1910 gaudi->hw_cap_initialized |= HW_CAP_SRAM_SCRAMBLER;
1911}
1912
1913static void gaudi_init_scrambler_hbm(struct hl_device *hdev)
1914{
1915 struct gaudi_device *gaudi = hdev->asic_specific;
1916
Ofir Bittonc692dec2020-10-04 17:34:37 +03001917 if (!hdev->asic_prop.fw_security_disabled)
1918 return;
1919
1920 if (hdev->asic_prop.fw_security_status_valid &&
1921 (hdev->asic_prop.fw_boot_cpu_security_map &
1922 CPU_BOOT_DEV_STS0_DRAM_SCR_EN))
1923 return;
1924
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03001925 if (gaudi->hw_cap_initialized & HW_CAP_HBM_SCRAMBLER)
1926 return;
1927
1928 if (!hdev->dram_scrambler_enable)
1929 return;
1930
1931 WREG32(mmNIF_RTR_CTRL_0_SCRAM_HBM_EN,
1932 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1933 WREG32(mmNIF_RTR_CTRL_1_SCRAM_HBM_EN,
1934 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1935 WREG32(mmNIF_RTR_CTRL_2_SCRAM_HBM_EN,
1936 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1937 WREG32(mmNIF_RTR_CTRL_3_SCRAM_HBM_EN,
1938 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1939 WREG32(mmNIF_RTR_CTRL_4_SCRAM_HBM_EN,
1940 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1941 WREG32(mmNIF_RTR_CTRL_5_SCRAM_HBM_EN,
1942 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1943 WREG32(mmNIF_RTR_CTRL_6_SCRAM_HBM_EN,
1944 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1945 WREG32(mmNIF_RTR_CTRL_7_SCRAM_HBM_EN,
1946 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1947
1948 WREG32(mmSIF_RTR_CTRL_0_SCRAM_HBM_EN,
1949 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1950 WREG32(mmSIF_RTR_CTRL_1_SCRAM_HBM_EN,
1951 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1952 WREG32(mmSIF_RTR_CTRL_2_SCRAM_HBM_EN,
1953 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1954 WREG32(mmSIF_RTR_CTRL_3_SCRAM_HBM_EN,
1955 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1956 WREG32(mmSIF_RTR_CTRL_4_SCRAM_HBM_EN,
1957 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1958 WREG32(mmSIF_RTR_CTRL_5_SCRAM_HBM_EN,
1959 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1960 WREG32(mmSIF_RTR_CTRL_6_SCRAM_HBM_EN,
1961 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1962 WREG32(mmSIF_RTR_CTRL_7_SCRAM_HBM_EN,
1963 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1964
1965 WREG32(mmDMA_IF_E_N_DOWN_CH0_SCRAM_HBM_EN,
1966 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
1967 WREG32(mmDMA_IF_E_N_DOWN_CH1_SCRAM_HBM_EN,
1968 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
1969 WREG32(mmDMA_IF_E_S_DOWN_CH0_SCRAM_HBM_EN,
1970 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
1971 WREG32(mmDMA_IF_E_S_DOWN_CH1_SCRAM_HBM_EN,
1972 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
1973 WREG32(mmDMA_IF_W_N_DOWN_CH0_SCRAM_HBM_EN,
1974 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
1975 WREG32(mmDMA_IF_W_N_DOWN_CH1_SCRAM_HBM_EN,
1976 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
1977 WREG32(mmDMA_IF_W_S_DOWN_CH0_SCRAM_HBM_EN,
1978 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
1979 WREG32(mmDMA_IF_W_S_DOWN_CH1_SCRAM_HBM_EN,
1980 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
1981
1982 gaudi->hw_cap_initialized |= HW_CAP_HBM_SCRAMBLER;
1983}
1984
1985static void gaudi_init_e2e(struct hl_device *hdev)
1986{
Ofir Bittonc692dec2020-10-04 17:34:37 +03001987 if (!hdev->asic_prop.fw_security_disabled)
1988 return;
1989
1990 if (hdev->asic_prop.fw_security_status_valid &&
1991 (hdev->asic_prop.fw_boot_cpu_security_map &
1992 CPU_BOOT_DEV_STS0_E2E_CRED_EN))
1993 return;
1994
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03001995 WREG32(mmSIF_RTR_CTRL_0_E2E_HBM_WR_SIZE, 247 >> 3);
1996 WREG32(mmSIF_RTR_CTRL_0_E2E_HBM_RD_SIZE, 785 >> 3);
1997 WREG32(mmSIF_RTR_CTRL_0_E2E_PCI_WR_SIZE, 49);
1998 WREG32(mmSIF_RTR_CTRL_0_E2E_PCI_RD_SIZE, 101);
1999
2000 WREG32(mmSIF_RTR_CTRL_1_E2E_HBM_WR_SIZE, 275 >> 3);
2001 WREG32(mmSIF_RTR_CTRL_1_E2E_HBM_RD_SIZE, 614 >> 3);
2002 WREG32(mmSIF_RTR_CTRL_1_E2E_PCI_WR_SIZE, 1);
2003 WREG32(mmSIF_RTR_CTRL_1_E2E_PCI_RD_SIZE, 39);
2004
2005 WREG32(mmSIF_RTR_CTRL_2_E2E_HBM_WR_SIZE, 1);
2006 WREG32(mmSIF_RTR_CTRL_2_E2E_HBM_RD_SIZE, 1);
2007 WREG32(mmSIF_RTR_CTRL_2_E2E_PCI_WR_SIZE, 1);
2008 WREG32(mmSIF_RTR_CTRL_2_E2E_PCI_RD_SIZE, 32);
2009
2010 WREG32(mmSIF_RTR_CTRL_3_E2E_HBM_WR_SIZE, 176 >> 3);
2011 WREG32(mmSIF_RTR_CTRL_3_E2E_HBM_RD_SIZE, 32 >> 3);
2012 WREG32(mmSIF_RTR_CTRL_3_E2E_PCI_WR_SIZE, 19);
2013 WREG32(mmSIF_RTR_CTRL_3_E2E_PCI_RD_SIZE, 32);
2014
2015 WREG32(mmSIF_RTR_CTRL_4_E2E_HBM_WR_SIZE, 176 >> 3);
2016 WREG32(mmSIF_RTR_CTRL_4_E2E_HBM_RD_SIZE, 32 >> 3);
2017 WREG32(mmSIF_RTR_CTRL_4_E2E_PCI_WR_SIZE, 19);
2018 WREG32(mmSIF_RTR_CTRL_4_E2E_PCI_RD_SIZE, 32);
2019
2020 WREG32(mmSIF_RTR_CTRL_5_E2E_HBM_WR_SIZE, 1);
2021 WREG32(mmSIF_RTR_CTRL_5_E2E_HBM_RD_SIZE, 1);
2022 WREG32(mmSIF_RTR_CTRL_5_E2E_PCI_WR_SIZE, 1);
2023 WREG32(mmSIF_RTR_CTRL_5_E2E_PCI_RD_SIZE, 32);
2024
2025 WREG32(mmSIF_RTR_CTRL_6_E2E_HBM_WR_SIZE, 275 >> 3);
2026 WREG32(mmSIF_RTR_CTRL_6_E2E_HBM_RD_SIZE, 614 >> 3);
2027 WREG32(mmSIF_RTR_CTRL_6_E2E_PCI_WR_SIZE, 1);
2028 WREG32(mmSIF_RTR_CTRL_6_E2E_PCI_RD_SIZE, 39);
2029
2030 WREG32(mmSIF_RTR_CTRL_7_E2E_HBM_WR_SIZE, 297 >> 3);
2031 WREG32(mmSIF_RTR_CTRL_7_E2E_HBM_RD_SIZE, 908 >> 3);
2032 WREG32(mmSIF_RTR_CTRL_7_E2E_PCI_WR_SIZE, 19);
2033 WREG32(mmSIF_RTR_CTRL_7_E2E_PCI_RD_SIZE, 19);
2034
2035 WREG32(mmNIF_RTR_CTRL_0_E2E_HBM_WR_SIZE, 318 >> 3);
2036 WREG32(mmNIF_RTR_CTRL_0_E2E_HBM_RD_SIZE, 956 >> 3);
2037 WREG32(mmNIF_RTR_CTRL_0_E2E_PCI_WR_SIZE, 79);
2038 WREG32(mmNIF_RTR_CTRL_0_E2E_PCI_RD_SIZE, 163);
2039
2040 WREG32(mmNIF_RTR_CTRL_1_E2E_HBM_WR_SIZE, 275 >> 3);
2041 WREG32(mmNIF_RTR_CTRL_1_E2E_HBM_RD_SIZE, 614 >> 3);
2042 WREG32(mmNIF_RTR_CTRL_1_E2E_PCI_WR_SIZE, 1);
2043 WREG32(mmNIF_RTR_CTRL_1_E2E_PCI_RD_SIZE, 39);
2044
2045 WREG32(mmNIF_RTR_CTRL_2_E2E_HBM_WR_SIZE, 1);
2046 WREG32(mmNIF_RTR_CTRL_2_E2E_HBM_RD_SIZE, 1);
2047 WREG32(mmNIF_RTR_CTRL_2_E2E_PCI_WR_SIZE, 1);
2048 WREG32(mmNIF_RTR_CTRL_2_E2E_PCI_RD_SIZE, 32);
2049
2050 WREG32(mmNIF_RTR_CTRL_3_E2E_HBM_WR_SIZE, 176 >> 3);
2051 WREG32(mmNIF_RTR_CTRL_3_E2E_HBM_RD_SIZE, 32 >> 3);
2052 WREG32(mmNIF_RTR_CTRL_3_E2E_PCI_WR_SIZE, 19);
2053 WREG32(mmNIF_RTR_CTRL_3_E2E_PCI_RD_SIZE, 32);
2054
2055 WREG32(mmNIF_RTR_CTRL_4_E2E_HBM_WR_SIZE, 176 >> 3);
2056 WREG32(mmNIF_RTR_CTRL_4_E2E_HBM_RD_SIZE, 32 >> 3);
2057 WREG32(mmNIF_RTR_CTRL_4_E2E_PCI_WR_SIZE, 19);
2058 WREG32(mmNIF_RTR_CTRL_4_E2E_PCI_RD_SIZE, 32);
2059
2060 WREG32(mmNIF_RTR_CTRL_5_E2E_HBM_WR_SIZE, 1);
2061 WREG32(mmNIF_RTR_CTRL_5_E2E_HBM_RD_SIZE, 1);
2062 WREG32(mmNIF_RTR_CTRL_5_E2E_PCI_WR_SIZE, 1);
2063 WREG32(mmNIF_RTR_CTRL_5_E2E_PCI_RD_SIZE, 32);
2064
2065 WREG32(mmNIF_RTR_CTRL_6_E2E_HBM_WR_SIZE, 275 >> 3);
2066 WREG32(mmNIF_RTR_CTRL_6_E2E_HBM_RD_SIZE, 614 >> 3);
2067 WREG32(mmNIF_RTR_CTRL_6_E2E_PCI_WR_SIZE, 1);
2068 WREG32(mmNIF_RTR_CTRL_6_E2E_PCI_RD_SIZE, 39);
2069
2070 WREG32(mmNIF_RTR_CTRL_7_E2E_HBM_WR_SIZE, 318 >> 3);
2071 WREG32(mmNIF_RTR_CTRL_7_E2E_HBM_RD_SIZE, 956 >> 3);
2072 WREG32(mmNIF_RTR_CTRL_7_E2E_PCI_WR_SIZE, 79);
2073 WREG32(mmNIF_RTR_CTRL_7_E2E_PCI_RD_SIZE, 79);
2074
2075 WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
2076 WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
2077 WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
2078 WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
2079
2080 WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
2081 WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
2082 WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
2083 WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
2084
2085 WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
2086 WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
2087 WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
2088 WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
2089
2090 WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
2091 WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
2092 WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
2093 WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
2094
2095 WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
2096 WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
2097 WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
2098 WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
2099
2100 WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
2101 WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
2102 WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
2103 WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
2104
2105 WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
2106 WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
2107 WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
2108 WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
2109
2110 WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
2111 WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
2112 WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
2113 WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
2114
2115 if (!hdev->dram_scrambler_enable) {
2116 WREG32(mmSIF_RTR_CTRL_0_NL_HBM_SEL_0, 0x21);
2117 WREG32(mmSIF_RTR_CTRL_0_NL_HBM_SEL_1, 0x22);
2118 WREG32(mmSIF_RTR_CTRL_0_NL_HBM_OFFSET_18, 0x1F);
2119 WREG32(mmSIF_RTR_CTRL_0_NL_HBM_PC_SEL_3, 0x20);
2120
2121 WREG32(mmSIF_RTR_CTRL_1_NL_HBM_SEL_0, 0x21);
2122 WREG32(mmSIF_RTR_CTRL_1_NL_HBM_SEL_1, 0x22);
2123 WREG32(mmSIF_RTR_CTRL_1_NL_HBM_OFFSET_18, 0x1F);
2124 WREG32(mmSIF_RTR_CTRL_1_NL_HBM_PC_SEL_3, 0x20);
2125
2126 WREG32(mmSIF_RTR_CTRL_2_NL_HBM_SEL_0, 0x21);
2127 WREG32(mmSIF_RTR_CTRL_2_NL_HBM_SEL_1, 0x22);
2128 WREG32(mmSIF_RTR_CTRL_2_NL_HBM_OFFSET_18, 0x1F);
2129 WREG32(mmSIF_RTR_CTRL_2_NL_HBM_PC_SEL_3, 0x20);
2130
2131 WREG32(mmSIF_RTR_CTRL_3_NL_HBM_SEL_0, 0x21);
2132 WREG32(mmSIF_RTR_CTRL_3_NL_HBM_SEL_1, 0x22);
2133 WREG32(mmSIF_RTR_CTRL_3_NL_HBM_OFFSET_18, 0x1F);
2134 WREG32(mmSIF_RTR_CTRL_3_NL_HBM_PC_SEL_3, 0x20);
2135
2136 WREG32(mmSIF_RTR_CTRL_4_NL_HBM_SEL_0, 0x21);
2137 WREG32(mmSIF_RTR_CTRL_4_NL_HBM_SEL_1, 0x22);
2138 WREG32(mmSIF_RTR_CTRL_4_NL_HBM_OFFSET_18, 0x1F);
2139 WREG32(mmSIF_RTR_CTRL_4_NL_HBM_PC_SEL_3, 0x20);
2140
2141 WREG32(mmSIF_RTR_CTRL_5_NL_HBM_SEL_0, 0x21);
2142 WREG32(mmSIF_RTR_CTRL_5_NL_HBM_SEL_1, 0x22);
2143 WREG32(mmSIF_RTR_CTRL_5_NL_HBM_OFFSET_18, 0x1F);
2144 WREG32(mmSIF_RTR_CTRL_5_NL_HBM_PC_SEL_3, 0x20);
2145
2146 WREG32(mmSIF_RTR_CTRL_6_NL_HBM_SEL_0, 0x21);
2147 WREG32(mmSIF_RTR_CTRL_6_NL_HBM_SEL_1, 0x22);
2148 WREG32(mmSIF_RTR_CTRL_6_NL_HBM_OFFSET_18, 0x1F);
2149 WREG32(mmSIF_RTR_CTRL_6_NL_HBM_PC_SEL_3, 0x20);
2150
2151 WREG32(mmSIF_RTR_CTRL_7_NL_HBM_SEL_0, 0x21);
2152 WREG32(mmSIF_RTR_CTRL_7_NL_HBM_SEL_1, 0x22);
2153 WREG32(mmSIF_RTR_CTRL_7_NL_HBM_OFFSET_18, 0x1F);
2154 WREG32(mmSIF_RTR_CTRL_7_NL_HBM_PC_SEL_3, 0x20);
2155
2156 WREG32(mmNIF_RTR_CTRL_0_NL_HBM_SEL_0, 0x21);
2157 WREG32(mmNIF_RTR_CTRL_0_NL_HBM_SEL_1, 0x22);
2158 WREG32(mmNIF_RTR_CTRL_0_NL_HBM_OFFSET_18, 0x1F);
2159 WREG32(mmNIF_RTR_CTRL_0_NL_HBM_PC_SEL_3, 0x20);
2160
2161 WREG32(mmNIF_RTR_CTRL_1_NL_HBM_SEL_0, 0x21);
2162 WREG32(mmNIF_RTR_CTRL_1_NL_HBM_SEL_1, 0x22);
2163 WREG32(mmNIF_RTR_CTRL_1_NL_HBM_OFFSET_18, 0x1F);
2164 WREG32(mmNIF_RTR_CTRL_1_NL_HBM_PC_SEL_3, 0x20);
2165
2166 WREG32(mmNIF_RTR_CTRL_2_NL_HBM_SEL_0, 0x21);
2167 WREG32(mmNIF_RTR_CTRL_2_NL_HBM_SEL_1, 0x22);
2168 WREG32(mmNIF_RTR_CTRL_2_NL_HBM_OFFSET_18, 0x1F);
2169 WREG32(mmNIF_RTR_CTRL_2_NL_HBM_PC_SEL_3, 0x20);
2170
2171 WREG32(mmNIF_RTR_CTRL_3_NL_HBM_SEL_0, 0x21);
2172 WREG32(mmNIF_RTR_CTRL_3_NL_HBM_SEL_1, 0x22);
2173 WREG32(mmNIF_RTR_CTRL_3_NL_HBM_OFFSET_18, 0x1F);
2174 WREG32(mmNIF_RTR_CTRL_3_NL_HBM_PC_SEL_3, 0x20);
2175
2176 WREG32(mmNIF_RTR_CTRL_4_NL_HBM_SEL_0, 0x21);
2177 WREG32(mmNIF_RTR_CTRL_4_NL_HBM_SEL_1, 0x22);
2178 WREG32(mmNIF_RTR_CTRL_4_NL_HBM_OFFSET_18, 0x1F);
2179 WREG32(mmNIF_RTR_CTRL_4_NL_HBM_PC_SEL_3, 0x20);
2180
2181 WREG32(mmNIF_RTR_CTRL_5_NL_HBM_SEL_0, 0x21);
2182 WREG32(mmNIF_RTR_CTRL_5_NL_HBM_SEL_1, 0x22);
2183 WREG32(mmNIF_RTR_CTRL_5_NL_HBM_OFFSET_18, 0x1F);
2184 WREG32(mmNIF_RTR_CTRL_5_NL_HBM_PC_SEL_3, 0x20);
2185
2186 WREG32(mmNIF_RTR_CTRL_6_NL_HBM_SEL_0, 0x21);
2187 WREG32(mmNIF_RTR_CTRL_6_NL_HBM_SEL_1, 0x22);
2188 WREG32(mmNIF_RTR_CTRL_6_NL_HBM_OFFSET_18, 0x1F);
2189 WREG32(mmNIF_RTR_CTRL_6_NL_HBM_PC_SEL_3, 0x20);
2190
2191 WREG32(mmNIF_RTR_CTRL_7_NL_HBM_SEL_0, 0x21);
2192 WREG32(mmNIF_RTR_CTRL_7_NL_HBM_SEL_1, 0x22);
2193 WREG32(mmNIF_RTR_CTRL_7_NL_HBM_OFFSET_18, 0x1F);
2194 WREG32(mmNIF_RTR_CTRL_7_NL_HBM_PC_SEL_3, 0x20);
2195
2196 WREG32(mmDMA_IF_E_N_DOWN_CH0_NL_HBM_SEL_0, 0x21);
2197 WREG32(mmDMA_IF_E_N_DOWN_CH0_NL_HBM_SEL_1, 0x22);
2198 WREG32(mmDMA_IF_E_N_DOWN_CH0_NL_HBM_OFFSET_18, 0x1F);
2199 WREG32(mmDMA_IF_E_N_DOWN_CH0_NL_HBM_PC_SEL_3, 0x20);
2200
2201 WREG32(mmDMA_IF_E_N_DOWN_CH1_NL_HBM_SEL_0, 0x21);
2202 WREG32(mmDMA_IF_E_N_DOWN_CH1_NL_HBM_SEL_1, 0x22);
2203 WREG32(mmDMA_IF_E_N_DOWN_CH1_NL_HBM_OFFSET_18, 0x1F);
2204 WREG32(mmDMA_IF_E_N_DOWN_CH1_NL_HBM_PC_SEL_3, 0x20);
2205
2206 WREG32(mmDMA_IF_E_S_DOWN_CH0_NL_HBM_SEL_0, 0x21);
2207 WREG32(mmDMA_IF_E_S_DOWN_CH0_NL_HBM_SEL_1, 0x22);
2208 WREG32(mmDMA_IF_E_S_DOWN_CH0_NL_HBM_OFFSET_18, 0x1F);
2209 WREG32(mmDMA_IF_E_S_DOWN_CH0_NL_HBM_PC_SEL_3, 0x20);
2210
2211 WREG32(mmDMA_IF_E_S_DOWN_CH1_NL_HBM_SEL_0, 0x21);
2212 WREG32(mmDMA_IF_E_S_DOWN_CH1_NL_HBM_SEL_1, 0x22);
2213 WREG32(mmDMA_IF_E_S_DOWN_CH1_NL_HBM_OFFSET_18, 0x1F);
2214 WREG32(mmDMA_IF_E_S_DOWN_CH1_NL_HBM_PC_SEL_3, 0x20);
2215
2216 WREG32(mmDMA_IF_W_N_DOWN_CH0_NL_HBM_SEL_0, 0x21);
2217 WREG32(mmDMA_IF_W_N_DOWN_CH0_NL_HBM_SEL_1, 0x22);
2218 WREG32(mmDMA_IF_W_N_DOWN_CH0_NL_HBM_OFFSET_18, 0x1F);
2219 WREG32(mmDMA_IF_W_N_DOWN_CH0_NL_HBM_PC_SEL_3, 0x20);
2220
2221 WREG32(mmDMA_IF_W_N_DOWN_CH1_NL_HBM_SEL_0, 0x21);
2222 WREG32(mmDMA_IF_W_N_DOWN_CH1_NL_HBM_SEL_1, 0x22);
2223 WREG32(mmDMA_IF_W_N_DOWN_CH1_NL_HBM_OFFSET_18, 0x1F);
2224 WREG32(mmDMA_IF_W_N_DOWN_CH1_NL_HBM_PC_SEL_3, 0x20);
2225
2226 WREG32(mmDMA_IF_W_S_DOWN_CH0_NL_HBM_SEL_0, 0x21);
2227 WREG32(mmDMA_IF_W_S_DOWN_CH0_NL_HBM_SEL_1, 0x22);
2228 WREG32(mmDMA_IF_W_S_DOWN_CH0_NL_HBM_OFFSET_18, 0x1F);
2229 WREG32(mmDMA_IF_W_S_DOWN_CH0_NL_HBM_PC_SEL_3, 0x20);
2230
2231 WREG32(mmDMA_IF_W_S_DOWN_CH1_NL_HBM_SEL_0, 0x21);
2232 WREG32(mmDMA_IF_W_S_DOWN_CH1_NL_HBM_SEL_1, 0x22);
2233 WREG32(mmDMA_IF_W_S_DOWN_CH1_NL_HBM_OFFSET_18, 0x1F);
2234 WREG32(mmDMA_IF_W_S_DOWN_CH1_NL_HBM_PC_SEL_3, 0x20);
2235 }
2236
2237 WREG32(mmSIF_RTR_CTRL_0_E2E_HBM_EN,
2238 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2239 WREG32(mmSIF_RTR_CTRL_0_E2E_PCI_EN,
2240 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2241
2242 WREG32(mmSIF_RTR_CTRL_1_E2E_HBM_EN,
2243 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2244 WREG32(mmSIF_RTR_CTRL_1_E2E_PCI_EN,
2245 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2246
2247 WREG32(mmSIF_RTR_CTRL_2_E2E_HBM_EN,
2248 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2249 WREG32(mmSIF_RTR_CTRL_2_E2E_PCI_EN,
2250 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2251
2252 WREG32(mmSIF_RTR_CTRL_3_E2E_HBM_EN,
2253 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2254 WREG32(mmSIF_RTR_CTRL_3_E2E_PCI_EN,
2255 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2256
2257 WREG32(mmSIF_RTR_CTRL_4_E2E_HBM_EN,
2258 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2259 WREG32(mmSIF_RTR_CTRL_4_E2E_PCI_EN,
2260 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2261
2262 WREG32(mmSIF_RTR_CTRL_5_E2E_HBM_EN,
2263 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2264 WREG32(mmSIF_RTR_CTRL_5_E2E_PCI_EN,
2265 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2266
2267 WREG32(mmSIF_RTR_CTRL_6_E2E_HBM_EN,
2268 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2269 WREG32(mmSIF_RTR_CTRL_6_E2E_PCI_EN,
2270 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2271
2272 WREG32(mmSIF_RTR_CTRL_7_E2E_HBM_EN,
2273 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2274 WREG32(mmSIF_RTR_CTRL_7_E2E_PCI_EN,
2275 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2276
2277 WREG32(mmNIF_RTR_CTRL_0_E2E_HBM_EN,
2278 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2279 WREG32(mmNIF_RTR_CTRL_0_E2E_PCI_EN,
2280 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2281
2282 WREG32(mmNIF_RTR_CTRL_1_E2E_HBM_EN,
2283 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2284 WREG32(mmNIF_RTR_CTRL_1_E2E_PCI_EN,
2285 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2286
2287 WREG32(mmNIF_RTR_CTRL_2_E2E_HBM_EN,
2288 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2289 WREG32(mmNIF_RTR_CTRL_2_E2E_PCI_EN,
2290 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2291
2292 WREG32(mmNIF_RTR_CTRL_3_E2E_HBM_EN,
2293 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2294 WREG32(mmNIF_RTR_CTRL_3_E2E_PCI_EN,
2295 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2296
2297 WREG32(mmNIF_RTR_CTRL_4_E2E_HBM_EN,
2298 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2299 WREG32(mmNIF_RTR_CTRL_4_E2E_PCI_EN,
2300 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2301
2302 WREG32(mmNIF_RTR_CTRL_5_E2E_HBM_EN,
2303 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2304 WREG32(mmNIF_RTR_CTRL_5_E2E_PCI_EN,
2305 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2306
2307 WREG32(mmNIF_RTR_CTRL_6_E2E_HBM_EN,
2308 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2309 WREG32(mmNIF_RTR_CTRL_6_E2E_PCI_EN,
2310 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2311
2312 WREG32(mmNIF_RTR_CTRL_7_E2E_HBM_EN,
2313 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2314 WREG32(mmNIF_RTR_CTRL_7_E2E_PCI_EN,
2315 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2316
2317 WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_HBM_EN,
2318 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2319 WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_PCI_EN,
2320 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2321
2322 WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_HBM_EN,
2323 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2324 WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_PCI_EN,
2325 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2326
2327 WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_HBM_EN,
2328 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2329 WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_PCI_EN,
2330 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2331
2332 WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_HBM_EN,
2333 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2334 WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_PCI_EN,
2335 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2336
2337 WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_HBM_EN,
2338 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2339 WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_PCI_EN,
2340 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2341
2342 WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_HBM_EN,
2343 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2344 WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_PCI_EN,
2345 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2346
2347 WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_HBM_EN,
2348 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2349 WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_PCI_EN,
2350 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2351
2352 WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_HBM_EN,
2353 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2354 WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_PCI_EN,
2355 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2356}
2357
2358static void gaudi_init_hbm_cred(struct hl_device *hdev)
2359{
2360 uint32_t hbm0_wr, hbm1_wr, hbm0_rd, hbm1_rd;
2361
Ofir Bittonc692dec2020-10-04 17:34:37 +03002362 if (!hdev->asic_prop.fw_security_disabled)
2363 return;
2364
2365 if (hdev->asic_prop.fw_security_status_valid &&
2366 (hdev->asic_prop.fw_boot_cpu_security_map &
2367 CPU_BOOT_DEV_STS0_HBM_CRED_EN))
2368 return;
2369
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03002370 hbm0_wr = 0x33333333;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03002371 hbm0_rd = 0x77777777;
Oded Gabbay0b168c82020-06-15 19:25:57 +03002372 hbm1_wr = 0x55555555;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03002373 hbm1_rd = 0xDDDDDDDD;
2374
2375 WREG32(mmDMA_IF_E_N_HBM0_WR_CRED_CNT, hbm0_wr);
2376 WREG32(mmDMA_IF_E_N_HBM1_WR_CRED_CNT, hbm1_wr);
2377 WREG32(mmDMA_IF_E_N_HBM0_RD_CRED_CNT, hbm0_rd);
2378 WREG32(mmDMA_IF_E_N_HBM1_RD_CRED_CNT, hbm1_rd);
2379
2380 WREG32(mmDMA_IF_E_S_HBM0_WR_CRED_CNT, hbm0_wr);
2381 WREG32(mmDMA_IF_E_S_HBM1_WR_CRED_CNT, hbm1_wr);
2382 WREG32(mmDMA_IF_E_S_HBM0_RD_CRED_CNT, hbm0_rd);
2383 WREG32(mmDMA_IF_E_S_HBM1_RD_CRED_CNT, hbm1_rd);
2384
2385 WREG32(mmDMA_IF_W_N_HBM0_WR_CRED_CNT, hbm0_wr);
2386 WREG32(mmDMA_IF_W_N_HBM1_WR_CRED_CNT, hbm1_wr);
2387 WREG32(mmDMA_IF_W_N_HBM0_RD_CRED_CNT, hbm0_rd);
2388 WREG32(mmDMA_IF_W_N_HBM1_RD_CRED_CNT, hbm1_rd);
2389
2390 WREG32(mmDMA_IF_W_S_HBM0_WR_CRED_CNT, hbm0_wr);
2391 WREG32(mmDMA_IF_W_S_HBM1_WR_CRED_CNT, hbm1_wr);
2392 WREG32(mmDMA_IF_W_S_HBM0_RD_CRED_CNT, hbm0_rd);
2393 WREG32(mmDMA_IF_W_S_HBM1_RD_CRED_CNT, hbm1_rd);
2394
2395 WREG32(mmDMA_IF_E_N_HBM_CRED_EN_0,
2396 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2397 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2398 WREG32(mmDMA_IF_E_S_HBM_CRED_EN_0,
2399 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2400 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2401 WREG32(mmDMA_IF_W_N_HBM_CRED_EN_0,
2402 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2403 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2404 WREG32(mmDMA_IF_W_S_HBM_CRED_EN_0,
2405 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2406 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2407
2408 WREG32(mmDMA_IF_E_N_HBM_CRED_EN_1,
2409 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2410 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2411 WREG32(mmDMA_IF_E_S_HBM_CRED_EN_1,
2412 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2413 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2414 WREG32(mmDMA_IF_W_N_HBM_CRED_EN_1,
2415 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2416 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2417 WREG32(mmDMA_IF_W_S_HBM_CRED_EN_1,
2418 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2419 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2420}
2421
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03002422static void gaudi_init_golden_registers(struct hl_device *hdev)
2423{
2424 u32 tpc_offset;
2425 int tpc_id, i;
2426
2427 gaudi_init_e2e(hdev);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03002428 gaudi_init_hbm_cred(hdev);
2429
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03002430 for (tpc_id = 0, tpc_offset = 0;
2431 tpc_id < TPC_NUMBER_OF_ENGINES;
2432 tpc_id++, tpc_offset += TPC_CFG_OFFSET) {
2433 /* Mask all arithmetic interrupts from TPC */
2434 WREG32(mmTPC0_CFG_TPC_INTR_MASK + tpc_offset, 0x8FFF);
2435 /* Set 16 cache lines */
2436 WREG32_FIELD(TPC0_CFG_MSS_CONFIG, tpc_offset,
2437 ICACHE_FETCH_LINE_NUM, 2);
2438 }
2439
2440 /* Make sure 1st 128 bytes in SRAM are 0 for Tensor DMA */
2441 for (i = 0 ; i < 128 ; i += 8)
2442 writeq(0, hdev->pcie_bar[SRAM_BAR_ID] + i);
2443
2444 WREG32(mmMME0_CTRL_EUS_ROLLUP_CNT_ADD, 3);
2445 WREG32(mmMME1_CTRL_EUS_ROLLUP_CNT_ADD, 3);
2446 WREG32(mmMME2_CTRL_EUS_ROLLUP_CNT_ADD, 3);
2447 WREG32(mmMME3_CTRL_EUS_ROLLUP_CNT_ADD, 3);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03002448}
2449
2450static void gaudi_init_pci_dma_qman(struct hl_device *hdev, int dma_id,
2451 int qman_id, dma_addr_t qman_pq_addr)
2452{
2453 u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi;
2454 u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi;
2455 u32 q_off, dma_qm_offset;
2456 u32 dma_qm_err_cfg;
2457
2458 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
2459
2460 mtr_base_en_lo = lower_32_bits(CFG_BASE +
2461 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2462 mtr_base_en_hi = upper_32_bits(CFG_BASE +
2463 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2464 so_base_en_lo = lower_32_bits(CFG_BASE +
2465 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2466 so_base_en_hi = upper_32_bits(CFG_BASE +
2467 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2468 mtr_base_ws_lo = lower_32_bits(CFG_BASE +
2469 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2470 mtr_base_ws_hi = upper_32_bits(CFG_BASE +
2471 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2472 so_base_ws_lo = lower_32_bits(CFG_BASE +
2473 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
2474 so_base_ws_hi = upper_32_bits(CFG_BASE +
2475 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
2476
2477 q_off = dma_qm_offset + qman_id * 4;
2478
2479 WREG32(mmDMA0_QM_PQ_BASE_LO_0 + q_off, lower_32_bits(qman_pq_addr));
2480 WREG32(mmDMA0_QM_PQ_BASE_HI_0 + q_off, upper_32_bits(qman_pq_addr));
2481
2482 WREG32(mmDMA0_QM_PQ_SIZE_0 + q_off, ilog2(HL_QUEUE_LENGTH));
2483 WREG32(mmDMA0_QM_PQ_PI_0 + q_off, 0);
2484 WREG32(mmDMA0_QM_PQ_CI_0 + q_off, 0);
2485
Ofir Bitton25121d92020-09-24 08:22:58 +03002486 WREG32(mmDMA0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off, QMAN_LDMA_SIZE_OFFSET);
2487 WREG32(mmDMA0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2488 QMAN_LDMA_SRC_OFFSET);
2489 WREG32(mmDMA0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2490 QMAN_LDMA_DST_OFFSET);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03002491
2492 WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo);
2493 WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi);
2494 WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo);
2495 WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi);
2496 WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_LO_0 + q_off, mtr_base_ws_lo);
2497 WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_HI_0 + q_off, mtr_base_ws_hi);
2498 WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_LO_0 + q_off, so_base_ws_lo);
2499 WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_HI_0 + q_off, so_base_ws_hi);
2500
Omer Shpigelmance043262020-06-16 17:56:27 +03002501 WREG32(mmDMA0_QM_CP_BARRIER_CFG_0 + q_off, 0x100);
2502
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03002503 /* The following configuration is needed only once per QMAN */
2504 if (qman_id == 0) {
2505 /* Configure RAZWI IRQ */
2506 dma_qm_err_cfg = PCI_DMA_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
2507 if (hdev->stop_on_err) {
2508 dma_qm_err_cfg |=
2509 PCI_DMA_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
2510 }
2511
2512 WREG32(mmDMA0_QM_GLBL_ERR_CFG + dma_qm_offset, dma_qm_err_cfg);
2513 WREG32(mmDMA0_QM_GLBL_ERR_ADDR_LO + dma_qm_offset,
2514 lower_32_bits(CFG_BASE +
2515 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR));
2516 WREG32(mmDMA0_QM_GLBL_ERR_ADDR_HI + dma_qm_offset,
2517 upper_32_bits(CFG_BASE +
2518 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR));
2519 WREG32(mmDMA0_QM_GLBL_ERR_WDATA + dma_qm_offset,
2520 gaudi_irq_map_table[GAUDI_EVENT_DMA0_QM].cpu_id +
2521 dma_id);
2522
2523 WREG32(mmDMA0_QM_ARB_ERR_MSG_EN + dma_qm_offset,
2524 QM_ARB_ERR_MSG_EN_MASK);
2525
2526 /* Increase ARB WDT to support streams architecture */
2527 WREG32(mmDMA0_QM_ARB_SLV_CHOISE_WDT + dma_qm_offset,
2528 GAUDI_ARB_WDT_TIMEOUT);
2529
2530 WREG32(mmDMA0_QM_GLBL_PROT + dma_qm_offset,
2531 QMAN_EXTERNAL_MAKE_TRUSTED);
2532
2533 WREG32(mmDMA0_QM_GLBL_CFG1 + dma_qm_offset, 0);
2534 }
2535}
2536
2537static void gaudi_init_dma_core(struct hl_device *hdev, int dma_id)
2538{
2539 u32 dma_offset = dma_id * DMA_CORE_OFFSET;
2540 u32 dma_err_cfg = 1 << DMA0_CORE_ERR_CFG_ERR_MSG_EN_SHIFT;
2541
2542 /* Set to maximum possible according to physical size */
2543 WREG32(mmDMA0_CORE_RD_MAX_OUTSTAND + dma_offset, 0);
2544 WREG32(mmDMA0_CORE_RD_MAX_SIZE + dma_offset, 0);
2545
Oded Gabbayd1f36332020-09-14 09:26:54 +03002546 /* WA for H/W bug H3-2116 */
2547 WREG32(mmDMA0_CORE_LBW_MAX_OUTSTAND + dma_offset, 15);
2548
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03002549 /* STOP_ON bit implies no completion to operation in case of RAZWI */
2550 if (hdev->stop_on_err)
2551 dma_err_cfg |= 1 << DMA0_CORE_ERR_CFG_STOP_ON_ERR_SHIFT;
2552
2553 WREG32(mmDMA0_CORE_ERR_CFG + dma_offset, dma_err_cfg);
2554 WREG32(mmDMA0_CORE_ERRMSG_ADDR_LO + dma_offset,
2555 lower_32_bits(CFG_BASE + mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR));
2556 WREG32(mmDMA0_CORE_ERRMSG_ADDR_HI + dma_offset,
2557 upper_32_bits(CFG_BASE + mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR));
2558 WREG32(mmDMA0_CORE_ERRMSG_WDATA + dma_offset,
2559 gaudi_irq_map_table[GAUDI_EVENT_DMA0_CORE].cpu_id + dma_id);
2560 WREG32(mmDMA0_CORE_PROT + dma_offset,
2561 1 << DMA0_CORE_PROT_ERR_VAL_SHIFT);
2562 /* If the channel is secured, it should be in MMU bypass mode */
2563 WREG32(mmDMA0_CORE_SECURE_PROPS + dma_offset,
2564 1 << DMA0_CORE_SECURE_PROPS_MMBP_SHIFT);
2565 WREG32(mmDMA0_CORE_CFG_0 + dma_offset, 1 << DMA0_CORE_CFG_0_EN_SHIFT);
2566}
2567
2568static void gaudi_enable_qman(struct hl_device *hdev, int dma_id,
2569 u32 enable_mask)
2570{
2571 u32 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
2572
2573 WREG32(mmDMA0_QM_GLBL_CFG0 + dma_qm_offset, enable_mask);
2574}
2575
2576static void gaudi_init_pci_dma_qmans(struct hl_device *hdev)
2577{
2578 struct gaudi_device *gaudi = hdev->asic_specific;
2579 struct hl_hw_queue *q;
2580 int i, j, dma_id, cpu_skip, nic_skip, cq_id = 0, q_idx, msi_vec = 0;
2581
2582 if (gaudi->hw_cap_initialized & HW_CAP_PCI_DMA)
2583 return;
2584
2585 for (i = 0 ; i < PCI_DMA_NUMBER_OF_CHNLS ; i++) {
2586 dma_id = gaudi_dma_assignment[i];
2587 /*
2588 * For queues after the CPU Q need to add 1 to get the correct
2589 * queue. In addition, need to add the CPU EQ and NIC IRQs in
2590 * order to get the correct MSI register.
2591 */
2592 if (dma_id > 1) {
2593 cpu_skip = 1;
2594 nic_skip = NIC_NUMBER_OF_ENGINES;
2595 } else {
2596 cpu_skip = 0;
2597 nic_skip = 0;
2598 }
2599
2600 for (j = 0 ; j < QMAN_STREAMS ; j++) {
2601 q_idx = 4 * dma_id + j + cpu_skip;
2602 q = &hdev->kernel_queues[q_idx];
2603 q->cq_id = cq_id++;
2604 q->msi_vec = nic_skip + cpu_skip + msi_vec++;
2605 gaudi_init_pci_dma_qman(hdev, dma_id, j,
2606 q->bus_address);
2607 }
2608
2609 gaudi_init_dma_core(hdev, dma_id);
2610
2611 gaudi_enable_qman(hdev, dma_id, PCI_DMA_QMAN_ENABLE);
2612 }
2613
2614 gaudi->hw_cap_initialized |= HW_CAP_PCI_DMA;
2615}
2616
2617static void gaudi_init_hbm_dma_qman(struct hl_device *hdev, int dma_id,
2618 int qman_id, u64 qman_base_addr)
2619{
Ofir Bitton5de406c2020-09-10 10:56:26 +03002620 u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi;
2621 u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03002622 u32 q_off, dma_qm_offset;
2623 u32 dma_qm_err_cfg;
2624
2625 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
2626
Ofir Bitton5de406c2020-09-10 10:56:26 +03002627 mtr_base_en_lo = lower_32_bits(CFG_BASE +
2628 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2629 mtr_base_en_hi = upper_32_bits(CFG_BASE +
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03002630 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
Ofir Bitton5de406c2020-09-10 10:56:26 +03002631 so_base_en_lo = lower_32_bits(CFG_BASE +
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03002632 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
Ofir Bitton5de406c2020-09-10 10:56:26 +03002633 so_base_en_hi = upper_32_bits(CFG_BASE +
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03002634 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
Ofir Bitton5de406c2020-09-10 10:56:26 +03002635 mtr_base_ws_lo = lower_32_bits(CFG_BASE +
2636 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2637 mtr_base_ws_hi = upper_32_bits(CFG_BASE +
2638 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2639 so_base_ws_lo = lower_32_bits(CFG_BASE +
2640 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
2641 so_base_ws_hi = upper_32_bits(CFG_BASE +
2642 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03002643
2644 q_off = dma_qm_offset + qman_id * 4;
2645
2646 if (qman_id < 4) {
2647 WREG32(mmDMA0_QM_PQ_BASE_LO_0 + q_off,
2648 lower_32_bits(qman_base_addr));
2649 WREG32(mmDMA0_QM_PQ_BASE_HI_0 + q_off,
2650 upper_32_bits(qman_base_addr));
2651
2652 WREG32(mmDMA0_QM_PQ_SIZE_0 + q_off, ilog2(HBM_DMA_QMAN_LENGTH));
2653 WREG32(mmDMA0_QM_PQ_PI_0 + q_off, 0);
2654 WREG32(mmDMA0_QM_PQ_CI_0 + q_off, 0);
2655
Ofir Bitton25121d92020-09-24 08:22:58 +03002656 WREG32(mmDMA0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
2657 QMAN_CPDMA_SIZE_OFFSET);
2658 WREG32(mmDMA0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2659 QMAN_CPDMA_SRC_OFFSET);
2660 WREG32(mmDMA0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2661 QMAN_CPDMA_DST_OFFSET);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03002662 } else {
Ofir Bitton25121d92020-09-24 08:22:58 +03002663 WREG32(mmDMA0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
2664 QMAN_LDMA_SIZE_OFFSET);
2665 WREG32(mmDMA0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2666 QMAN_LDMA_SRC_OFFSET);
2667 WREG32(mmDMA0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
Oded Gabbay5b94d6e2020-09-25 20:14:15 +03002668 QMAN_LDMA_DST_OFFSET);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03002669
2670 /* Configure RAZWI IRQ */
2671 dma_qm_err_cfg = HBM_DMA_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
2672 if (hdev->stop_on_err) {
2673 dma_qm_err_cfg |=
2674 HBM_DMA_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
2675 }
2676 WREG32(mmDMA0_QM_GLBL_ERR_CFG + dma_qm_offset, dma_qm_err_cfg);
2677
2678 WREG32(mmDMA0_QM_GLBL_ERR_ADDR_LO + dma_qm_offset,
2679 lower_32_bits(CFG_BASE +
2680 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR));
2681 WREG32(mmDMA0_QM_GLBL_ERR_ADDR_HI + dma_qm_offset,
2682 upper_32_bits(CFG_BASE +
2683 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR));
2684 WREG32(mmDMA0_QM_GLBL_ERR_WDATA + dma_qm_offset,
2685 gaudi_irq_map_table[GAUDI_EVENT_DMA0_QM].cpu_id +
2686 dma_id);
2687
2688 WREG32(mmDMA0_QM_ARB_ERR_MSG_EN + dma_qm_offset,
2689 QM_ARB_ERR_MSG_EN_MASK);
2690
2691 /* Increase ARB WDT to support streams architecture */
2692 WREG32(mmDMA0_QM_ARB_SLV_CHOISE_WDT + dma_qm_offset,
2693 GAUDI_ARB_WDT_TIMEOUT);
2694
2695 WREG32(mmDMA0_QM_GLBL_CFG1 + dma_qm_offset, 0);
2696 WREG32(mmDMA0_QM_GLBL_PROT + dma_qm_offset,
2697 QMAN_INTERNAL_MAKE_TRUSTED);
2698 }
2699
Ofir Bitton5de406c2020-09-10 10:56:26 +03002700 WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo);
2701 WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi);
2702 WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo);
2703 WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi);
2704
2705 /* Configure DMA5 CP_MSG_BASE 2/3 for sync stream collective */
2706 if (gaudi_dma_assignment[dma_id] == GAUDI_ENGINE_ID_DMA_5) {
2707 WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_LO_0 + q_off,
2708 mtr_base_ws_lo);
2709 WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_HI_0 + q_off,
2710 mtr_base_ws_hi);
2711 WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_LO_0 + q_off,
2712 so_base_ws_lo);
2713 WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_HI_0 + q_off,
2714 so_base_ws_hi);
2715 }
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03002716}
2717
2718static void gaudi_init_hbm_dma_qmans(struct hl_device *hdev)
2719{
2720 struct gaudi_device *gaudi = hdev->asic_specific;
2721 struct gaudi_internal_qman_info *q;
2722 u64 qman_base_addr;
2723 int i, j, dma_id, internal_q_index;
2724
2725 if (gaudi->hw_cap_initialized & HW_CAP_HBM_DMA)
2726 return;
2727
2728 for (i = 0 ; i < HBM_DMA_NUMBER_OF_CHNLS ; i++) {
2729 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_1 + i];
2730
2731 for (j = 0 ; j < QMAN_STREAMS ; j++) {
2732 /*
2733 * Add the CPU queue in order to get the correct queue
2734 * number as all internal queue are placed after it
2735 */
2736 internal_q_index = dma_id * QMAN_STREAMS + j + 1;
2737
2738 q = &gaudi->internal_qmans[internal_q_index];
2739 qman_base_addr = (u64) q->pq_dma_addr;
2740 gaudi_init_hbm_dma_qman(hdev, dma_id, j,
2741 qman_base_addr);
2742 }
2743
2744 /* Initializing lower CP for HBM DMA QMAN */
2745 gaudi_init_hbm_dma_qman(hdev, dma_id, 4, 0);
2746
2747 gaudi_init_dma_core(hdev, dma_id);
2748
2749 gaudi_enable_qman(hdev, dma_id, HBM_DMA_QMAN_ENABLE);
2750 }
2751
2752 gaudi->hw_cap_initialized |= HW_CAP_HBM_DMA;
2753}
2754
2755static void gaudi_init_mme_qman(struct hl_device *hdev, u32 mme_offset,
2756 int qman_id, u64 qman_base_addr)
2757{
2758 u32 mtr_base_lo, mtr_base_hi;
2759 u32 so_base_lo, so_base_hi;
2760 u32 q_off, mme_id;
2761 u32 mme_qm_err_cfg;
2762
2763 mtr_base_lo = lower_32_bits(CFG_BASE +
2764 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2765 mtr_base_hi = upper_32_bits(CFG_BASE +
2766 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2767 so_base_lo = lower_32_bits(CFG_BASE +
2768 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2769 so_base_hi = upper_32_bits(CFG_BASE +
2770 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2771
2772 q_off = mme_offset + qman_id * 4;
2773
2774 if (qman_id < 4) {
2775 WREG32(mmMME0_QM_PQ_BASE_LO_0 + q_off,
2776 lower_32_bits(qman_base_addr));
2777 WREG32(mmMME0_QM_PQ_BASE_HI_0 + q_off,
2778 upper_32_bits(qman_base_addr));
2779
2780 WREG32(mmMME0_QM_PQ_SIZE_0 + q_off, ilog2(MME_QMAN_LENGTH));
2781 WREG32(mmMME0_QM_PQ_PI_0 + q_off, 0);
2782 WREG32(mmMME0_QM_PQ_CI_0 + q_off, 0);
2783
Ofir Bitton25121d92020-09-24 08:22:58 +03002784 WREG32(mmMME0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
2785 QMAN_CPDMA_SIZE_OFFSET);
2786 WREG32(mmMME0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2787 QMAN_CPDMA_SRC_OFFSET);
2788 WREG32(mmMME0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2789 QMAN_CPDMA_DST_OFFSET);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03002790 } else {
Ofir Bitton25121d92020-09-24 08:22:58 +03002791 WREG32(mmMME0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
2792 QMAN_LDMA_SIZE_OFFSET);
2793 WREG32(mmMME0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2794 QMAN_LDMA_SRC_OFFSET);
2795 WREG32(mmMME0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2796 QMAN_LDMA_DST_OFFSET);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03002797
2798 /* Configure RAZWI IRQ */
2799 mme_id = mme_offset /
2800 (mmMME1_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0);
2801
2802 mme_qm_err_cfg = MME_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
2803 if (hdev->stop_on_err) {
2804 mme_qm_err_cfg |=
2805 MME_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
2806 }
2807 WREG32(mmMME0_QM_GLBL_ERR_CFG + mme_offset, mme_qm_err_cfg);
2808 WREG32(mmMME0_QM_GLBL_ERR_ADDR_LO + mme_offset,
2809 lower_32_bits(CFG_BASE +
2810 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR));
2811 WREG32(mmMME0_QM_GLBL_ERR_ADDR_HI + mme_offset,
2812 upper_32_bits(CFG_BASE +
2813 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR));
2814 WREG32(mmMME0_QM_GLBL_ERR_WDATA + mme_offset,
2815 gaudi_irq_map_table[GAUDI_EVENT_MME0_QM].cpu_id +
2816 mme_id);
2817
2818 WREG32(mmMME0_QM_ARB_ERR_MSG_EN + mme_offset,
2819 QM_ARB_ERR_MSG_EN_MASK);
2820
2821 /* Increase ARB WDT to support streams architecture */
2822 WREG32(mmMME0_QM_ARB_SLV_CHOISE_WDT + mme_offset,
2823 GAUDI_ARB_WDT_TIMEOUT);
2824
2825 WREG32(mmMME0_QM_GLBL_CFG1 + mme_offset, 0);
2826 WREG32(mmMME0_QM_GLBL_PROT + mme_offset,
2827 QMAN_INTERNAL_MAKE_TRUSTED);
2828 }
2829
2830 WREG32(mmMME0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_lo);
2831 WREG32(mmMME0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_hi);
2832 WREG32(mmMME0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_lo);
2833 WREG32(mmMME0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_hi);
2834}
2835
2836static void gaudi_init_mme_qmans(struct hl_device *hdev)
2837{
2838 struct gaudi_device *gaudi = hdev->asic_specific;
2839 struct gaudi_internal_qman_info *q;
2840 u64 qman_base_addr;
2841 u32 mme_offset;
2842 int i, internal_q_index;
2843
2844 if (gaudi->hw_cap_initialized & HW_CAP_MME)
2845 return;
2846
2847 /*
2848 * map GAUDI_QUEUE_ID_MME_0_X to the N_W_MME (mmMME2_QM_BASE)
2849 * and GAUDI_QUEUE_ID_MME_1_X to the S_W_MME (mmMME0_QM_BASE)
2850 */
2851
2852 mme_offset = mmMME2_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0;
2853
2854 for (i = 0 ; i < MME_NUMBER_OF_QMANS ; i++) {
2855 internal_q_index = GAUDI_QUEUE_ID_MME_0_0 + i;
2856 q = &gaudi->internal_qmans[internal_q_index];
2857 qman_base_addr = (u64) q->pq_dma_addr;
2858 gaudi_init_mme_qman(hdev, mme_offset, (i & 0x3),
2859 qman_base_addr);
2860 if (i == 3)
2861 mme_offset = 0;
2862 }
2863
2864 /* Initializing lower CP for MME QMANs */
2865 mme_offset = mmMME2_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0;
2866 gaudi_init_mme_qman(hdev, mme_offset, 4, 0);
2867 gaudi_init_mme_qman(hdev, 0, 4, 0);
2868
2869 WREG32(mmMME2_QM_GLBL_CFG0, QMAN_MME_ENABLE);
2870 WREG32(mmMME0_QM_GLBL_CFG0, QMAN_MME_ENABLE);
2871
2872 gaudi->hw_cap_initialized |= HW_CAP_MME;
2873}
2874
2875static void gaudi_init_tpc_qman(struct hl_device *hdev, u32 tpc_offset,
2876 int qman_id, u64 qman_base_addr)
2877{
Ofir Bitton5de406c2020-09-10 10:56:26 +03002878 u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi;
2879 u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03002880 u32 q_off, tpc_id;
2881 u32 tpc_qm_err_cfg;
2882
Ofir Bitton5de406c2020-09-10 10:56:26 +03002883 mtr_base_en_lo = lower_32_bits(CFG_BASE +
2884 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2885 mtr_base_en_hi = upper_32_bits(CFG_BASE +
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03002886 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
Ofir Bitton5de406c2020-09-10 10:56:26 +03002887 so_base_en_lo = lower_32_bits(CFG_BASE +
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03002888 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
Ofir Bitton5de406c2020-09-10 10:56:26 +03002889 so_base_en_hi = upper_32_bits(CFG_BASE +
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03002890 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
Ofir Bitton5de406c2020-09-10 10:56:26 +03002891 mtr_base_ws_lo = lower_32_bits(CFG_BASE +
2892 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2893 mtr_base_ws_hi = upper_32_bits(CFG_BASE +
2894 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2895 so_base_ws_lo = lower_32_bits(CFG_BASE +
2896 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
2897 so_base_ws_hi = upper_32_bits(CFG_BASE +
2898 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03002899
2900 q_off = tpc_offset + qman_id * 4;
2901
Ofir Bitton5de406c2020-09-10 10:56:26 +03002902 tpc_id = tpc_offset /
2903 (mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0);
2904
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03002905 if (qman_id < 4) {
2906 WREG32(mmTPC0_QM_PQ_BASE_LO_0 + q_off,
2907 lower_32_bits(qman_base_addr));
2908 WREG32(mmTPC0_QM_PQ_BASE_HI_0 + q_off,
2909 upper_32_bits(qman_base_addr));
2910
2911 WREG32(mmTPC0_QM_PQ_SIZE_0 + q_off, ilog2(TPC_QMAN_LENGTH));
2912 WREG32(mmTPC0_QM_PQ_PI_0 + q_off, 0);
2913 WREG32(mmTPC0_QM_PQ_CI_0 + q_off, 0);
2914
Ofir Bitton25121d92020-09-24 08:22:58 +03002915 WREG32(mmTPC0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
2916 QMAN_CPDMA_SIZE_OFFSET);
2917 WREG32(mmTPC0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2918 QMAN_CPDMA_SRC_OFFSET);
2919 WREG32(mmTPC0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2920 QMAN_CPDMA_DST_OFFSET);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03002921 } else {
Ofir Bitton25121d92020-09-24 08:22:58 +03002922 WREG32(mmTPC0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
2923 QMAN_LDMA_SIZE_OFFSET);
2924 WREG32(mmTPC0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2925 QMAN_LDMA_SRC_OFFSET);
2926 WREG32(mmTPC0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2927 QMAN_LDMA_DST_OFFSET);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03002928
2929 /* Configure RAZWI IRQ */
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03002930 tpc_qm_err_cfg = TPC_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
2931 if (hdev->stop_on_err) {
2932 tpc_qm_err_cfg |=
2933 TPC_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
2934 }
2935
2936 WREG32(mmTPC0_QM_GLBL_ERR_CFG + tpc_offset, tpc_qm_err_cfg);
2937 WREG32(mmTPC0_QM_GLBL_ERR_ADDR_LO + tpc_offset,
2938 lower_32_bits(CFG_BASE +
2939 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR));
2940 WREG32(mmTPC0_QM_GLBL_ERR_ADDR_HI + tpc_offset,
2941 upper_32_bits(CFG_BASE +
2942 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR));
2943 WREG32(mmTPC0_QM_GLBL_ERR_WDATA + tpc_offset,
2944 gaudi_irq_map_table[GAUDI_EVENT_TPC0_QM].cpu_id +
2945 tpc_id);
2946
2947 WREG32(mmTPC0_QM_ARB_ERR_MSG_EN + tpc_offset,
2948 QM_ARB_ERR_MSG_EN_MASK);
2949
2950 /* Increase ARB WDT to support streams architecture */
2951 WREG32(mmTPC0_QM_ARB_SLV_CHOISE_WDT + tpc_offset,
2952 GAUDI_ARB_WDT_TIMEOUT);
2953
2954 WREG32(mmTPC0_QM_GLBL_CFG1 + tpc_offset, 0);
2955 WREG32(mmTPC0_QM_GLBL_PROT + tpc_offset,
2956 QMAN_INTERNAL_MAKE_TRUSTED);
2957 }
2958
Ofir Bitton5de406c2020-09-10 10:56:26 +03002959 WREG32(mmTPC0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo);
2960 WREG32(mmTPC0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi);
2961 WREG32(mmTPC0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo);
2962 WREG32(mmTPC0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi);
2963
2964 /* Configure TPC7 CP_MSG_BASE 2/3 for sync stream collective */
2965 if (tpc_id == 6) {
2966 WREG32(mmTPC0_QM_CP_MSG_BASE2_ADDR_LO_0 + q_off,
2967 mtr_base_ws_lo);
2968 WREG32(mmTPC0_QM_CP_MSG_BASE2_ADDR_HI_0 + q_off,
2969 mtr_base_ws_hi);
2970 WREG32(mmTPC0_QM_CP_MSG_BASE3_ADDR_LO_0 + q_off,
2971 so_base_ws_lo);
2972 WREG32(mmTPC0_QM_CP_MSG_BASE3_ADDR_HI_0 + q_off,
2973 so_base_ws_hi);
2974 }
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03002975}
2976
2977static void gaudi_init_tpc_qmans(struct hl_device *hdev)
2978{
2979 struct gaudi_device *gaudi = hdev->asic_specific;
2980 struct gaudi_internal_qman_info *q;
2981 u64 qman_base_addr;
2982 u32 so_base_hi, tpc_offset = 0;
2983 u32 tpc_delta = mmTPC1_CFG_SM_BASE_ADDRESS_HIGH -
2984 mmTPC0_CFG_SM_BASE_ADDRESS_HIGH;
2985 int i, tpc_id, internal_q_index;
2986
2987 if (gaudi->hw_cap_initialized & HW_CAP_TPC_MASK)
2988 return;
2989
2990 so_base_hi = upper_32_bits(CFG_BASE +
2991 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2992
2993 for (tpc_id = 0 ; tpc_id < TPC_NUMBER_OF_ENGINES ; tpc_id++) {
2994 for (i = 0 ; i < QMAN_STREAMS ; i++) {
2995 internal_q_index = GAUDI_QUEUE_ID_TPC_0_0 +
2996 tpc_id * QMAN_STREAMS + i;
2997 q = &gaudi->internal_qmans[internal_q_index];
2998 qman_base_addr = (u64) q->pq_dma_addr;
2999 gaudi_init_tpc_qman(hdev, tpc_offset, i,
3000 qman_base_addr);
3001
3002 if (i == 3) {
3003 /* Initializing lower CP for TPC QMAN */
3004 gaudi_init_tpc_qman(hdev, tpc_offset, 4, 0);
3005
3006 /* Enable the QMAN and TPC channel */
3007 WREG32(mmTPC0_QM_GLBL_CFG0 + tpc_offset,
3008 QMAN_TPC_ENABLE);
3009 }
3010 }
3011
3012 WREG32(mmTPC0_CFG_SM_BASE_ADDRESS_HIGH + tpc_id * tpc_delta,
3013 so_base_hi);
3014
3015 tpc_offset += mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0;
3016
Oded Gabbay65887292020-08-12 11:21:01 +03003017 gaudi->hw_cap_initialized |=
3018 FIELD_PREP(HW_CAP_TPC_MASK, 1 << tpc_id);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003019 }
3020}
3021
Oded Gabbay3c681572020-11-02 21:10:39 +02003022static void gaudi_init_nic_qman(struct hl_device *hdev, u32 nic_offset,
3023 int qman_id, u64 qman_base_addr, int nic_id)
3024{
Ofir Bitton5de406c2020-09-10 10:56:26 +03003025 u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi;
3026 u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi;
Oded Gabbay3c681572020-11-02 21:10:39 +02003027 u32 q_off;
3028 u32 nic_qm_err_cfg;
3029
Ofir Bitton5de406c2020-09-10 10:56:26 +03003030 mtr_base_en_lo = lower_32_bits(CFG_BASE +
3031 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3032 mtr_base_en_hi = upper_32_bits(CFG_BASE +
Oded Gabbay3c681572020-11-02 21:10:39 +02003033 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
Ofir Bitton5de406c2020-09-10 10:56:26 +03003034 so_base_en_lo = lower_32_bits(CFG_BASE +
Oded Gabbay3c681572020-11-02 21:10:39 +02003035 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
Ofir Bitton5de406c2020-09-10 10:56:26 +03003036 so_base_en_hi = upper_32_bits(CFG_BASE +
Oded Gabbay3c681572020-11-02 21:10:39 +02003037 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
Ofir Bitton5de406c2020-09-10 10:56:26 +03003038 mtr_base_ws_lo = lower_32_bits(CFG_BASE +
3039 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3040 mtr_base_ws_hi = upper_32_bits(CFG_BASE +
3041 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3042 so_base_ws_lo = lower_32_bits(CFG_BASE +
3043 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
3044 so_base_ws_hi = upper_32_bits(CFG_BASE +
3045 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
Oded Gabbay3c681572020-11-02 21:10:39 +02003046
3047 q_off = nic_offset + qman_id * 4;
3048
3049 WREG32(mmNIC0_QM0_PQ_BASE_LO_0 + q_off, lower_32_bits(qman_base_addr));
3050 WREG32(mmNIC0_QM0_PQ_BASE_HI_0 + q_off, upper_32_bits(qman_base_addr));
3051
3052 WREG32(mmNIC0_QM0_PQ_SIZE_0 + q_off, ilog2(NIC_QMAN_LENGTH));
3053 WREG32(mmNIC0_QM0_PQ_PI_0 + q_off, 0);
3054 WREG32(mmNIC0_QM0_PQ_CI_0 + q_off, 0);
3055
Ofir Bitton5de406c2020-09-10 10:56:26 +03003056 WREG32(mmNIC0_QM0_CP_LDMA_TSIZE_OFFSET_0 + q_off,
3057 QMAN_LDMA_SIZE_OFFSET);
3058 WREG32(mmNIC0_QM0_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
3059 QMAN_LDMA_SRC_OFFSET);
3060 WREG32(mmNIC0_QM0_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
3061 QMAN_LDMA_DST_OFFSET);
Oded Gabbay3c681572020-11-02 21:10:39 +02003062
Ofir Bitton5de406c2020-09-10 10:56:26 +03003063 WREG32(mmNIC0_QM0_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo);
3064 WREG32(mmNIC0_QM0_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi);
3065 WREG32(mmNIC0_QM0_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo);
3066 WREG32(mmNIC0_QM0_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi);
3067
3068 /* Configure NIC CP_MSG_BASE 2/3 for sync stream collective */
3069 WREG32(mmNIC0_QM0_CP_MSG_BASE2_ADDR_LO_0 + q_off, mtr_base_ws_lo);
3070 WREG32(mmNIC0_QM0_CP_MSG_BASE2_ADDR_HI_0 + q_off, mtr_base_ws_hi);
3071 WREG32(mmNIC0_QM0_CP_MSG_BASE3_ADDR_LO_0 + q_off, so_base_ws_lo);
3072 WREG32(mmNIC0_QM0_CP_MSG_BASE3_ADDR_HI_0 + q_off, so_base_ws_hi);
Oded Gabbay3c681572020-11-02 21:10:39 +02003073
3074 if (qman_id == 0) {
3075 /* Configure RAZWI IRQ */
3076 nic_qm_err_cfg = NIC_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
3077 if (hdev->stop_on_err) {
3078 nic_qm_err_cfg |=
3079 NIC_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
3080 }
3081
3082 WREG32(mmNIC0_QM0_GLBL_ERR_CFG + nic_offset, nic_qm_err_cfg);
3083 WREG32(mmNIC0_QM0_GLBL_ERR_ADDR_LO + nic_offset,
3084 lower_32_bits(CFG_BASE +
3085 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR));
3086 WREG32(mmNIC0_QM0_GLBL_ERR_ADDR_HI + nic_offset,
3087 upper_32_bits(CFG_BASE +
3088 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR));
3089 WREG32(mmNIC0_QM0_GLBL_ERR_WDATA + nic_offset,
3090 gaudi_irq_map_table[GAUDI_EVENT_NIC0_QM0].cpu_id +
3091 nic_id);
3092
3093 WREG32(mmNIC0_QM0_ARB_ERR_MSG_EN + nic_offset,
3094 QM_ARB_ERR_MSG_EN_MASK);
3095
3096 /* Increase ARB WDT to support streams architecture */
3097 WREG32(mmNIC0_QM0_ARB_SLV_CHOISE_WDT + nic_offset,
3098 GAUDI_ARB_WDT_TIMEOUT);
3099
3100 WREG32(mmNIC0_QM0_GLBL_CFG1 + nic_offset, 0);
3101 WREG32(mmNIC0_QM0_GLBL_PROT + nic_offset,
3102 QMAN_INTERNAL_MAKE_TRUSTED);
3103 }
3104}
3105
3106static void gaudi_init_nic_qmans(struct hl_device *hdev)
3107{
3108 struct gaudi_device *gaudi = hdev->asic_specific;
3109 struct gaudi_internal_qman_info *q;
3110 u64 qman_base_addr;
3111 u32 nic_offset = 0;
3112 u32 nic_delta_between_qmans =
3113 mmNIC0_QM1_GLBL_CFG0 - mmNIC0_QM0_GLBL_CFG0;
3114 u32 nic_delta_between_nics =
3115 mmNIC1_QM0_GLBL_CFG0 - mmNIC0_QM0_GLBL_CFG0;
3116 int i, nic_id, internal_q_index;
3117
3118 if (!hdev->nic_ports_mask)
3119 return;
3120
3121 if (gaudi->hw_cap_initialized & HW_CAP_NIC_MASK)
3122 return;
3123
3124 dev_dbg(hdev->dev, "Initializing NIC QMANs\n");
3125
3126 for (nic_id = 0 ; nic_id < NIC_NUMBER_OF_ENGINES ; nic_id++) {
3127 if (!(hdev->nic_ports_mask & (1 << nic_id))) {
3128 nic_offset += nic_delta_between_qmans;
3129 if (nic_id & 1) {
3130 nic_offset -= (nic_delta_between_qmans * 2);
3131 nic_offset += nic_delta_between_nics;
3132 }
3133 continue;
3134 }
3135
3136 for (i = 0 ; i < QMAN_STREAMS ; i++) {
3137 internal_q_index = GAUDI_QUEUE_ID_NIC_0_0 +
3138 nic_id * QMAN_STREAMS + i;
3139 q = &gaudi->internal_qmans[internal_q_index];
3140 qman_base_addr = (u64) q->pq_dma_addr;
3141 gaudi_init_nic_qman(hdev, nic_offset, (i & 0x3),
3142 qman_base_addr, nic_id);
3143 }
3144
3145 /* Enable the QMAN */
3146 WREG32(mmNIC0_QM0_GLBL_CFG0 + nic_offset, NIC_QMAN_ENABLE);
3147
3148 nic_offset += nic_delta_between_qmans;
3149 if (nic_id & 1) {
3150 nic_offset -= (nic_delta_between_qmans * 2);
3151 nic_offset += nic_delta_between_nics;
3152 }
3153
3154 gaudi->hw_cap_initialized |= 1 << (HW_CAP_NIC_SHIFT + nic_id);
3155 }
3156}
3157
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003158static void gaudi_disable_pci_dma_qmans(struct hl_device *hdev)
3159{
3160 struct gaudi_device *gaudi = hdev->asic_specific;
3161
3162 if (!(gaudi->hw_cap_initialized & HW_CAP_PCI_DMA))
3163 return;
3164
3165 WREG32(mmDMA0_QM_GLBL_CFG0, 0);
3166 WREG32(mmDMA1_QM_GLBL_CFG0, 0);
3167 WREG32(mmDMA5_QM_GLBL_CFG0, 0);
3168}
3169
3170static void gaudi_disable_hbm_dma_qmans(struct hl_device *hdev)
3171{
3172 struct gaudi_device *gaudi = hdev->asic_specific;
3173
3174 if (!(gaudi->hw_cap_initialized & HW_CAP_HBM_DMA))
3175 return;
3176
3177 WREG32(mmDMA2_QM_GLBL_CFG0, 0);
3178 WREG32(mmDMA3_QM_GLBL_CFG0, 0);
3179 WREG32(mmDMA4_QM_GLBL_CFG0, 0);
3180 WREG32(mmDMA6_QM_GLBL_CFG0, 0);
3181 WREG32(mmDMA7_QM_GLBL_CFG0, 0);
3182}
3183
3184static void gaudi_disable_mme_qmans(struct hl_device *hdev)
3185{
3186 struct gaudi_device *gaudi = hdev->asic_specific;
3187
3188 if (!(gaudi->hw_cap_initialized & HW_CAP_MME))
3189 return;
3190
3191 WREG32(mmMME2_QM_GLBL_CFG0, 0);
3192 WREG32(mmMME0_QM_GLBL_CFG0, 0);
3193}
3194
3195static void gaudi_disable_tpc_qmans(struct hl_device *hdev)
3196{
3197 struct gaudi_device *gaudi = hdev->asic_specific;
3198 u32 tpc_offset = 0;
3199 int tpc_id;
3200
3201 if (!(gaudi->hw_cap_initialized & HW_CAP_TPC_MASK))
3202 return;
3203
3204 for (tpc_id = 0 ; tpc_id < TPC_NUMBER_OF_ENGINES ; tpc_id++) {
3205 WREG32(mmTPC0_QM_GLBL_CFG0 + tpc_offset, 0);
3206 tpc_offset += mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0;
3207 }
3208}
3209
Oded Gabbay3c681572020-11-02 21:10:39 +02003210static void gaudi_disable_nic_qmans(struct hl_device *hdev)
3211{
3212 struct gaudi_device *gaudi = hdev->asic_specific;
3213 u32 nic_mask, nic_offset = 0;
3214 u32 nic_delta_between_qmans =
3215 mmNIC0_QM1_GLBL_CFG0 - mmNIC0_QM0_GLBL_CFG0;
3216 u32 nic_delta_between_nics =
3217 mmNIC1_QM0_GLBL_CFG0 - mmNIC0_QM0_GLBL_CFG0;
3218 int nic_id;
3219
3220 for (nic_id = 0 ; nic_id < NIC_NUMBER_OF_ENGINES ; nic_id++) {
3221 nic_mask = 1 << (HW_CAP_NIC_SHIFT + nic_id);
3222
3223 if (gaudi->hw_cap_initialized & nic_mask)
3224 WREG32(mmNIC0_QM0_GLBL_CFG0 + nic_offset, 0);
3225
3226 nic_offset += nic_delta_between_qmans;
3227 if (nic_id & 1) {
3228 nic_offset -= (nic_delta_between_qmans * 2);
3229 nic_offset += nic_delta_between_nics;
3230 }
3231 }
3232}
3233
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003234static void gaudi_stop_pci_dma_qmans(struct hl_device *hdev)
3235{
3236 struct gaudi_device *gaudi = hdev->asic_specific;
3237
3238 if (!(gaudi->hw_cap_initialized & HW_CAP_PCI_DMA))
3239 return;
3240
3241 /* Stop upper CPs of QMANs 0.0 to 1.3 and 5.0 to 5.3 */
3242 WREG32(mmDMA0_QM_GLBL_CFG1, 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3243 WREG32(mmDMA1_QM_GLBL_CFG1, 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3244 WREG32(mmDMA5_QM_GLBL_CFG1, 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3245}
3246
3247static void gaudi_stop_hbm_dma_qmans(struct hl_device *hdev)
3248{
3249 struct gaudi_device *gaudi = hdev->asic_specific;
3250
3251 if (!(gaudi->hw_cap_initialized & HW_CAP_HBM_DMA))
3252 return;
3253
3254 /* Stop CPs of HBM DMA QMANs */
3255
3256 WREG32(mmDMA2_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3257 WREG32(mmDMA3_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3258 WREG32(mmDMA4_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3259 WREG32(mmDMA6_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3260 WREG32(mmDMA7_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3261}
3262
3263static void gaudi_stop_mme_qmans(struct hl_device *hdev)
3264{
3265 struct gaudi_device *gaudi = hdev->asic_specific;
3266
3267 if (!(gaudi->hw_cap_initialized & HW_CAP_MME))
3268 return;
3269
3270 /* Stop CPs of MME QMANs */
3271 WREG32(mmMME2_QM_GLBL_CFG1, 0x1F << MME0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3272 WREG32(mmMME0_QM_GLBL_CFG1, 0x1F << MME0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3273}
3274
3275static void gaudi_stop_tpc_qmans(struct hl_device *hdev)
3276{
3277 struct gaudi_device *gaudi = hdev->asic_specific;
3278
3279 if (!(gaudi->hw_cap_initialized & HW_CAP_TPC_MASK))
3280 return;
3281
3282 WREG32(mmTPC0_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3283 WREG32(mmTPC1_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3284 WREG32(mmTPC2_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3285 WREG32(mmTPC3_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3286 WREG32(mmTPC4_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3287 WREG32(mmTPC5_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3288 WREG32(mmTPC6_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3289 WREG32(mmTPC7_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3290}
3291
Oded Gabbay3c681572020-11-02 21:10:39 +02003292static void gaudi_stop_nic_qmans(struct hl_device *hdev)
3293{
3294 struct gaudi_device *gaudi = hdev->asic_specific;
3295
3296 /* Stop upper CPs of QMANs */
3297
3298 if (gaudi->hw_cap_initialized & HW_CAP_NIC0)
3299 WREG32(mmNIC0_QM0_GLBL_CFG1,
3300 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3301 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3302 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3303
3304 if (gaudi->hw_cap_initialized & HW_CAP_NIC1)
3305 WREG32(mmNIC0_QM1_GLBL_CFG1,
3306 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3307 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3308 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3309
3310 if (gaudi->hw_cap_initialized & HW_CAP_NIC2)
3311 WREG32(mmNIC1_QM0_GLBL_CFG1,
3312 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3313 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3314 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3315
3316 if (gaudi->hw_cap_initialized & HW_CAP_NIC3)
3317 WREG32(mmNIC1_QM1_GLBL_CFG1,
3318 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3319 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3320 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3321
3322 if (gaudi->hw_cap_initialized & HW_CAP_NIC4)
3323 WREG32(mmNIC2_QM0_GLBL_CFG1,
3324 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3325 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3326 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3327
3328 if (gaudi->hw_cap_initialized & HW_CAP_NIC5)
3329 WREG32(mmNIC2_QM1_GLBL_CFG1,
3330 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3331 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3332 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3333
3334 if (gaudi->hw_cap_initialized & HW_CAP_NIC6)
3335 WREG32(mmNIC3_QM0_GLBL_CFG1,
3336 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3337 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3338 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3339
3340 if (gaudi->hw_cap_initialized & HW_CAP_NIC7)
3341 WREG32(mmNIC3_QM1_GLBL_CFG1,
3342 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3343 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3344 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3345
3346 if (gaudi->hw_cap_initialized & HW_CAP_NIC8)
3347 WREG32(mmNIC4_QM0_GLBL_CFG1,
3348 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3349 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3350 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3351
3352 if (gaudi->hw_cap_initialized & HW_CAP_NIC9)
3353 WREG32(mmNIC4_QM1_GLBL_CFG1,
3354 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3355 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3356 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3357}
3358
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003359static void gaudi_pci_dma_stall(struct hl_device *hdev)
3360{
3361 struct gaudi_device *gaudi = hdev->asic_specific;
3362
3363 if (!(gaudi->hw_cap_initialized & HW_CAP_PCI_DMA))
3364 return;
3365
3366 WREG32(mmDMA0_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3367 WREG32(mmDMA1_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3368 WREG32(mmDMA5_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3369}
3370
3371static void gaudi_hbm_dma_stall(struct hl_device *hdev)
3372{
3373 struct gaudi_device *gaudi = hdev->asic_specific;
3374
3375 if (!(gaudi->hw_cap_initialized & HW_CAP_HBM_DMA))
3376 return;
3377
3378 WREG32(mmDMA2_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3379 WREG32(mmDMA3_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3380 WREG32(mmDMA4_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3381 WREG32(mmDMA6_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3382 WREG32(mmDMA7_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3383}
3384
3385static void gaudi_mme_stall(struct hl_device *hdev)
3386{
3387 struct gaudi_device *gaudi = hdev->asic_specific;
3388
3389 if (!(gaudi->hw_cap_initialized & HW_CAP_MME))
3390 return;
3391
3392 /* WA for H3-1800 bug: do ACC and SBAB writes twice */
3393 WREG32(mmMME0_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3394 WREG32(mmMME0_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3395 WREG32(mmMME0_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3396 WREG32(mmMME0_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3397 WREG32(mmMME1_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3398 WREG32(mmMME1_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3399 WREG32(mmMME1_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3400 WREG32(mmMME1_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3401 WREG32(mmMME2_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3402 WREG32(mmMME2_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3403 WREG32(mmMME2_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3404 WREG32(mmMME2_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3405 WREG32(mmMME3_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3406 WREG32(mmMME3_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3407 WREG32(mmMME3_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3408 WREG32(mmMME3_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3409}
3410
3411static void gaudi_tpc_stall(struct hl_device *hdev)
3412{
3413 struct gaudi_device *gaudi = hdev->asic_specific;
3414
3415 if (!(gaudi->hw_cap_initialized & HW_CAP_TPC_MASK))
3416 return;
3417
3418 WREG32(mmTPC0_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3419 WREG32(mmTPC1_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3420 WREG32(mmTPC2_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3421 WREG32(mmTPC3_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3422 WREG32(mmTPC4_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3423 WREG32(mmTPC5_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3424 WREG32(mmTPC6_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3425 WREG32(mmTPC7_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3426}
3427
Oded Gabbaye38bfd32020-07-03 20:46:12 +03003428static void gaudi_set_clock_gating(struct hl_device *hdev)
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003429{
3430 struct gaudi_device *gaudi = hdev->asic_specific;
3431 u32 qman_offset;
Ofir Bittonf44d23b2020-08-04 13:38:43 +03003432 bool enable;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003433 int i;
3434
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003435 /* In case we are during debug session, don't enable the clock gate
3436 * as it may interfere
3437 */
3438 if (hdev->in_debug)
3439 return;
3440
Oded Gabbay0024c0942020-12-05 22:55:09 +02003441 if (!hdev->asic_prop.fw_security_disabled)
3442 return;
3443
Oded Gabbaye38bfd32020-07-03 20:46:12 +03003444 for (i = GAUDI_PCI_DMA_1, qman_offset = 0 ; i < GAUDI_HBM_DMA_1 ; i++) {
Ofir Bittonf44d23b2020-08-04 13:38:43 +03003445 enable = !!(hdev->clock_gating_mask &
3446 (BIT_ULL(gaudi_dma_assignment[i])));
Oded Gabbaye38bfd32020-07-03 20:46:12 +03003447
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003448 qman_offset = gaudi_dma_assignment[i] * DMA_QMAN_OFFSET;
Ofir Bittonf44d23b2020-08-04 13:38:43 +03003449 WREG32(mmDMA0_QM_CGM_CFG1 + qman_offset,
3450 enable ? QMAN_CGM1_PWR_GATE_EN : 0);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003451 WREG32(mmDMA0_QM_CGM_CFG + qman_offset,
Ofir Bittonf44d23b2020-08-04 13:38:43 +03003452 enable ? QMAN_UPPER_CP_CGM_PWR_GATE_EN : 0);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003453 }
3454
Oded Gabbaye38bfd32020-07-03 20:46:12 +03003455 for (i = GAUDI_HBM_DMA_1 ; i < GAUDI_DMA_MAX ; i++) {
Ofir Bittonf44d23b2020-08-04 13:38:43 +03003456 enable = !!(hdev->clock_gating_mask &
3457 (BIT_ULL(gaudi_dma_assignment[i])));
Oded Gabbaye38bfd32020-07-03 20:46:12 +03003458
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003459 qman_offset = gaudi_dma_assignment[i] * DMA_QMAN_OFFSET;
Ofir Bittonf44d23b2020-08-04 13:38:43 +03003460 WREG32(mmDMA0_QM_CGM_CFG1 + qman_offset,
3461 enable ? QMAN_CGM1_PWR_GATE_EN : 0);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003462 WREG32(mmDMA0_QM_CGM_CFG + qman_offset,
Ofir Bittonf44d23b2020-08-04 13:38:43 +03003463 enable ? QMAN_COMMON_CP_CGM_PWR_GATE_EN : 0);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003464 }
3465
Ofir Bittonf44d23b2020-08-04 13:38:43 +03003466 enable = !!(hdev->clock_gating_mask & (BIT_ULL(GAUDI_ENGINE_ID_MME_0)));
3467 WREG32(mmMME0_QM_CGM_CFG1, enable ? QMAN_CGM1_PWR_GATE_EN : 0);
3468 WREG32(mmMME0_QM_CGM_CFG, enable ? QMAN_COMMON_CP_CGM_PWR_GATE_EN : 0);
Oded Gabbaye38bfd32020-07-03 20:46:12 +03003469
Ofir Bittonf44d23b2020-08-04 13:38:43 +03003470 enable = !!(hdev->clock_gating_mask & (BIT_ULL(GAUDI_ENGINE_ID_MME_2)));
3471 WREG32(mmMME2_QM_CGM_CFG1, enable ? QMAN_CGM1_PWR_GATE_EN : 0);
3472 WREG32(mmMME2_QM_CGM_CFG, enable ? QMAN_COMMON_CP_CGM_PWR_GATE_EN : 0);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003473
3474 for (i = 0, qman_offset = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) {
Ofir Bittonf44d23b2020-08-04 13:38:43 +03003475 enable = !!(hdev->clock_gating_mask &
3476 (BIT_ULL(GAUDI_ENGINE_ID_TPC_0 + i)));
Oded Gabbaye38bfd32020-07-03 20:46:12 +03003477
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003478 WREG32(mmTPC0_QM_CGM_CFG1 + qman_offset,
Ofir Bittonf44d23b2020-08-04 13:38:43 +03003479 enable ? QMAN_CGM1_PWR_GATE_EN : 0);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003480 WREG32(mmTPC0_QM_CGM_CFG + qman_offset,
Ofir Bittonf44d23b2020-08-04 13:38:43 +03003481 enable ? QMAN_COMMON_CP_CGM_PWR_GATE_EN : 0);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003482
3483 qman_offset += TPC_QMAN_OFFSET;
3484 }
3485
3486 gaudi->hw_cap_initialized |= HW_CAP_CLK_GATE;
3487}
3488
3489static void gaudi_disable_clock_gating(struct hl_device *hdev)
3490{
3491 struct gaudi_device *gaudi = hdev->asic_specific;
3492 u32 qman_offset;
3493 int i;
3494
Oded Gabbay0024c0942020-12-05 22:55:09 +02003495 if (!hdev->asic_prop.fw_security_disabled)
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003496 return;
3497
3498 for (i = 0, qman_offset = 0 ; i < DMA_NUMBER_OF_CHANNELS ; i++) {
3499 WREG32(mmDMA0_QM_CGM_CFG + qman_offset, 0);
3500 WREG32(mmDMA0_QM_CGM_CFG1 + qman_offset, 0);
3501
3502 qman_offset += (mmDMA1_QM_CGM_CFG - mmDMA0_QM_CGM_CFG);
3503 }
3504
3505 WREG32(mmMME0_QM_CGM_CFG, 0);
3506 WREG32(mmMME0_QM_CGM_CFG1, 0);
3507 WREG32(mmMME2_QM_CGM_CFG, 0);
3508 WREG32(mmMME2_QM_CGM_CFG1, 0);
3509
3510 for (i = 0, qman_offset = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) {
3511 WREG32(mmTPC0_QM_CGM_CFG + qman_offset, 0);
3512 WREG32(mmTPC0_QM_CGM_CFG1 + qman_offset, 0);
3513
3514 qman_offset += (mmTPC1_QM_CGM_CFG - mmTPC0_QM_CGM_CFG);
3515 }
3516
3517 gaudi->hw_cap_initialized &= ~(HW_CAP_CLK_GATE);
3518}
3519
3520static void gaudi_enable_timestamp(struct hl_device *hdev)
3521{
3522 /* Disable the timestamp counter */
3523 WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 0);
3524
3525 /* Zero the lower/upper parts of the 64-bit counter */
3526 WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE + 0xC, 0);
3527 WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE + 0x8, 0);
3528
3529 /* Enable the counter */
3530 WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 1);
3531}
3532
3533static void gaudi_disable_timestamp(struct hl_device *hdev)
3534{
3535 /* Disable the timestamp counter */
3536 WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 0);
3537}
3538
3539static void gaudi_halt_engines(struct hl_device *hdev, bool hard_reset)
3540{
Oded Gabbayc83c4172020-07-05 15:48:34 +03003541 u32 wait_timeout_ms;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003542
3543 dev_info(hdev->dev,
3544 "Halting compute engines and disabling interrupts\n");
3545
Oded Gabbayc83c4172020-07-05 15:48:34 +03003546 if (hdev->pldm)
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003547 wait_timeout_ms = GAUDI_PLDM_RESET_WAIT_MSEC;
Oded Gabbayc83c4172020-07-05 15:48:34 +03003548 else
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003549 wait_timeout_ms = GAUDI_RESET_WAIT_MSEC;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003550
Oded Gabbay3c681572020-11-02 21:10:39 +02003551 gaudi_stop_nic_qmans(hdev);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003552 gaudi_stop_mme_qmans(hdev);
3553 gaudi_stop_tpc_qmans(hdev);
3554 gaudi_stop_hbm_dma_qmans(hdev);
3555 gaudi_stop_pci_dma_qmans(hdev);
3556
Oded Gabbaye38bfd32020-07-03 20:46:12 +03003557 hdev->asic_funcs->disable_clock_gating(hdev);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003558
3559 msleep(wait_timeout_ms);
3560
3561 gaudi_pci_dma_stall(hdev);
3562 gaudi_hbm_dma_stall(hdev);
3563 gaudi_tpc_stall(hdev);
3564 gaudi_mme_stall(hdev);
3565
3566 msleep(wait_timeout_ms);
3567
Oded Gabbay3c681572020-11-02 21:10:39 +02003568 gaudi_disable_nic_qmans(hdev);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003569 gaudi_disable_mme_qmans(hdev);
3570 gaudi_disable_tpc_qmans(hdev);
3571 gaudi_disable_hbm_dma_qmans(hdev);
3572 gaudi_disable_pci_dma_qmans(hdev);
3573
3574 gaudi_disable_timestamp(hdev);
3575
Oded Gabbay12ae3132020-07-03 20:58:23 +03003576 gaudi_disable_msi(hdev);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003577}
3578
3579static int gaudi_mmu_init(struct hl_device *hdev)
3580{
3581 struct asic_fixed_properties *prop = &hdev->asic_prop;
3582 struct gaudi_device *gaudi = hdev->asic_specific;
3583 u64 hop0_addr;
3584 int rc, i;
3585
3586 if (!hdev->mmu_enable)
3587 return 0;
3588
3589 if (gaudi->hw_cap_initialized & HW_CAP_MMU)
3590 return 0;
3591
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003592 for (i = 0 ; i < prop->max_asid ; i++) {
3593 hop0_addr = prop->mmu_pgt_addr +
3594 (i * prop->mmu_hop_table_size);
3595
3596 rc = gaudi_mmu_update_asid_hop0_addr(hdev, i, hop0_addr);
3597 if (rc) {
3598 dev_err(hdev->dev,
3599 "failed to set hop0 addr for asid %d\n", i);
3600 goto err;
3601 }
3602 }
3603
3604 /* init MMU cache manage page */
3605 WREG32(mmSTLB_CACHE_INV_BASE_39_8, MMU_CACHE_MNG_ADDR >> 8);
3606 WREG32(mmSTLB_CACHE_INV_BASE_49_40, MMU_CACHE_MNG_ADDR >> 40);
3607
Tomer Tayar644883e2020-07-19 11:00:03 +03003608 hdev->asic_funcs->mmu_invalidate_cache(hdev, true, 0);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003609
3610 WREG32(mmMMU_UP_MMU_ENABLE, 1);
3611 WREG32(mmMMU_UP_SPI_MASK, 0xF);
3612
3613 WREG32(mmSTLB_HOP_CONFIGURATION,
3614 hdev->mmu_huge_page_opt ? 0x30440 : 0x40440);
3615
Omer Shpigelmancfd41762020-06-03 13:03:35 +03003616 /*
3617 * The H/W expects the first PI after init to be 1. After wraparound
3618 * we'll write 0.
3619 */
3620 gaudi->mmu_cache_inv_pi = 1;
3621
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003622 gaudi->hw_cap_initialized |= HW_CAP_MMU;
3623
3624 return 0;
3625
3626err:
3627 return rc;
3628}
3629
3630static int gaudi_load_firmware_to_device(struct hl_device *hdev)
3631{
3632 void __iomem *dst;
3633
3634 /* HBM scrambler must be initialized before pushing F/W to HBM */
3635 gaudi_init_scrambler_hbm(hdev);
3636
3637 dst = hdev->pcie_bar[HBM_BAR_ID] + LINUX_FW_OFFSET;
3638
Ofir Bitton9bb86b62020-10-20 10:45:37 +03003639 return hl_fw_load_fw_to_device(hdev, GAUDI_LINUX_FW_FILE, dst, 0, 0);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003640}
3641
3642static int gaudi_load_boot_fit_to_device(struct hl_device *hdev)
3643{
3644 void __iomem *dst;
3645
3646 dst = hdev->pcie_bar[SRAM_BAR_ID] + BOOT_FIT_SRAM_OFFSET;
3647
Ofir Bitton9bb86b62020-10-20 10:45:37 +03003648 return hl_fw_load_fw_to_device(hdev, GAUDI_BOOT_FIT_FILE, dst, 0, 0);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003649}
3650
farah kassabrieb10b892020-10-14 15:17:36 +03003651static int gaudi_read_device_fw_version(struct hl_device *hdev,
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003652 enum hl_fw_component fwc)
3653{
3654 const char *name;
3655 u32 ver_off;
3656 char *dest;
3657
3658 switch (fwc) {
3659 case FW_COMP_UBOOT:
3660 ver_off = RREG32(mmUBOOT_VER_OFFSET);
3661 dest = hdev->asic_prop.uboot_ver;
3662 name = "U-Boot";
3663 break;
3664 case FW_COMP_PREBOOT:
3665 ver_off = RREG32(mmPREBOOT_VER_OFFSET);
3666 dest = hdev->asic_prop.preboot_ver;
3667 name = "Preboot";
3668 break;
3669 default:
3670 dev_warn(hdev->dev, "Undefined FW component: %d\n", fwc);
farah kassabrieb10b892020-10-14 15:17:36 +03003671 return -EIO;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003672 }
3673
3674 ver_off &= ~((u32)SRAM_BASE_ADDR);
3675
3676 if (ver_off < SRAM_SIZE - VERSION_MAX_LEN) {
3677 memcpy_fromio(dest, hdev->pcie_bar[SRAM_BAR_ID] + ver_off,
3678 VERSION_MAX_LEN);
3679 } else {
3680 dev_err(hdev->dev, "%s version offset (0x%x) is above SRAM\n",
3681 name, ver_off);
3682 strcpy(dest, "unavailable");
farah kassabrieb10b892020-10-14 15:17:36 +03003683 return -EIO;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003684 }
farah kassabrieb10b892020-10-14 15:17:36 +03003685
3686 return 0;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003687}
3688
3689static int gaudi_init_cpu(struct hl_device *hdev)
3690{
3691 struct gaudi_device *gaudi = hdev->asic_specific;
3692 int rc;
3693
3694 if (!hdev->cpu_enable)
3695 return 0;
3696
3697 if (gaudi->hw_cap_initialized & HW_CAP_CPU)
3698 return 0;
3699
3700 /*
3701 * The device CPU works with 40 bits addresses.
3702 * This register sets the extension to 50 bits.
3703 */
Ofir Bittonc692dec2020-10-04 17:34:37 +03003704 if (hdev->asic_prop.fw_security_disabled)
3705 WREG32(mmCPU_IF_CPU_MSB_ADDR, hdev->cpu_pci_msb_addr);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003706
3707 rc = hl_fw_init_cpu(hdev, mmPSOC_GLOBAL_CONF_CPU_BOOT_STATUS,
3708 mmPSOC_GLOBAL_CONF_KMD_MSG_TO_CPU,
3709 mmCPU_CMD_STATUS_TO_HOST,
Ofir Bitton323b7262020-10-04 09:09:19 +03003710 mmCPU_BOOT_DEV_STS0, mmCPU_BOOT_ERR0,
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003711 !hdev->bmc_enable, GAUDI_CPU_TIMEOUT_USEC,
3712 GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC);
3713
3714 if (rc)
3715 return rc;
3716
3717 gaudi->hw_cap_initialized |= HW_CAP_CPU;
3718
3719 return 0;
3720}
3721
3722static int gaudi_init_cpu_queues(struct hl_device *hdev, u32 cpu_timeout)
3723{
3724 struct gaudi_device *gaudi = hdev->asic_specific;
3725 struct hl_eq *eq;
3726 u32 status;
3727 struct hl_hw_queue *cpu_pq =
3728 &hdev->kernel_queues[GAUDI_QUEUE_ID_CPU_PQ];
3729 int err;
3730
3731 if (!hdev->cpu_queues_enable)
3732 return 0;
3733
3734 if (gaudi->hw_cap_initialized & HW_CAP_CPU_Q)
3735 return 0;
3736
3737 eq = &hdev->event_queue;
3738
3739 WREG32(mmCPU_IF_PQ_BASE_ADDR_LOW, lower_32_bits(cpu_pq->bus_address));
3740 WREG32(mmCPU_IF_PQ_BASE_ADDR_HIGH, upper_32_bits(cpu_pq->bus_address));
3741
3742 WREG32(mmCPU_IF_EQ_BASE_ADDR_LOW, lower_32_bits(eq->bus_address));
3743 WREG32(mmCPU_IF_EQ_BASE_ADDR_HIGH, upper_32_bits(eq->bus_address));
3744
3745 WREG32(mmCPU_IF_CQ_BASE_ADDR_LOW,
3746 lower_32_bits(hdev->cpu_accessible_dma_address));
3747 WREG32(mmCPU_IF_CQ_BASE_ADDR_HIGH,
3748 upper_32_bits(hdev->cpu_accessible_dma_address));
3749
3750 WREG32(mmCPU_IF_PQ_LENGTH, HL_QUEUE_SIZE_IN_BYTES);
3751 WREG32(mmCPU_IF_EQ_LENGTH, HL_EQ_SIZE_IN_BYTES);
3752 WREG32(mmCPU_IF_CQ_LENGTH, HL_CPU_ACCESSIBLE_MEM_SIZE);
3753
3754 /* Used for EQ CI */
3755 WREG32(mmCPU_IF_EQ_RD_OFFS, 0);
3756
3757 WREG32(mmCPU_IF_PF_PQ_PI, 0);
3758
3759 if (gaudi->multi_msi_mode)
3760 WREG32(mmCPU_IF_QUEUE_INIT, PQ_INIT_STATUS_READY_FOR_CP);
3761 else
3762 WREG32(mmCPU_IF_QUEUE_INIT,
3763 PQ_INIT_STATUS_READY_FOR_CP_SINGLE_MSI);
3764
3765 WREG32(mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR, GAUDI_EVENT_PI_UPDATE);
3766
3767 err = hl_poll_timeout(
3768 hdev,
3769 mmCPU_IF_QUEUE_INIT,
3770 status,
3771 (status == PQ_INIT_STATUS_READY_FOR_HOST),
3772 1000,
3773 cpu_timeout);
3774
3775 if (err) {
3776 dev_err(hdev->dev,
Oded Gabbay6138bbe2020-09-04 20:18:16 +03003777 "Failed to communicate with Device CPU (CPU-CP timeout)\n");
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003778 return -EIO;
3779 }
3780
3781 gaudi->hw_cap_initialized |= HW_CAP_CPU_Q;
3782 return 0;
3783}
3784
3785static void gaudi_pre_hw_init(struct hl_device *hdev)
3786{
3787 /* Perform read from the device to make sure device is up */
Oded Gabbay377182a2020-12-09 19:50:46 +02003788 RREG32(mmHW_STATE);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003789
Ofir Bittonc692dec2020-10-04 17:34:37 +03003790 if (hdev->asic_prop.fw_security_disabled) {
3791 /* Set the access through PCI bars (Linux driver only) as
3792 * secured
3793 */
3794 WREG32(mmPCIE_WRAP_LBW_PROT_OVR,
3795 (PCIE_WRAP_LBW_PROT_OVR_RD_EN_MASK |
3796 PCIE_WRAP_LBW_PROT_OVR_WR_EN_MASK));
Oded Gabbay57799ce2020-09-13 15:51:28 +03003797
Ofir Bittonc692dec2020-10-04 17:34:37 +03003798 /* Perform read to flush the waiting writes to ensure
3799 * configuration was set in the device
3800 */
3801 RREG32(mmPCIE_WRAP_LBW_PROT_OVR);
3802 }
Oded Gabbay57799ce2020-09-13 15:51:28 +03003803
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003804 /*
3805 * Let's mark in the H/W that we have reached this point. We check
3806 * this value in the reset_before_init function to understand whether
3807 * we need to reset the chip before doing H/W init. This register is
3808 * cleared by the H/W upon H/W reset
3809 */
3810 WREG32(mmHW_STATE, HL_DEVICE_HW_STATE_DIRTY);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003811}
3812
3813static int gaudi_hw_init(struct hl_device *hdev)
3814{
3815 int rc;
3816
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003817 gaudi_pre_hw_init(hdev);
3818
3819 gaudi_init_pci_dma_qmans(hdev);
3820
3821 gaudi_init_hbm_dma_qmans(hdev);
3822
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003823 rc = gaudi_init_cpu(hdev);
3824 if (rc) {
3825 dev_err(hdev->dev, "failed to initialize CPU\n");
3826 return rc;
3827 }
3828
Oded Gabbay0024c0942020-12-05 22:55:09 +02003829 /* In case the clock gating was enabled in preboot we need to disable
3830 * it here before touching the MME/TPC registers.
3831 * There is no need to take clk gating mutex because when this function
3832 * runs, no other relevant code can run
3833 */
3834 hdev->asic_funcs->disable_clock_gating(hdev);
3835
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003836 /* SRAM scrambler must be initialized after CPU is running from HBM */
3837 gaudi_init_scrambler_sram(hdev);
3838
3839 /* This is here just in case we are working without CPU */
3840 gaudi_init_scrambler_hbm(hdev);
3841
3842 gaudi_init_golden_registers(hdev);
3843
3844 rc = gaudi_mmu_init(hdev);
3845 if (rc)
3846 return rc;
3847
Omer Shpigelman3a3a5bf12020-05-11 10:45:12 +03003848 gaudi_init_security(hdev);
3849
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003850 gaudi_init_mme_qmans(hdev);
3851
3852 gaudi_init_tpc_qmans(hdev);
3853
Oded Gabbay3c681572020-11-02 21:10:39 +02003854 gaudi_init_nic_qmans(hdev);
3855
Oded Gabbaye38bfd32020-07-03 20:46:12 +03003856 hdev->asic_funcs->set_clock_gating(hdev);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003857
3858 gaudi_enable_timestamp(hdev);
3859
Oded Gabbay3c681572020-11-02 21:10:39 +02003860 /* MSI must be enabled before CPU queues and NIC are initialized */
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003861 rc = gaudi_enable_msi(hdev);
3862 if (rc)
3863 goto disable_queues;
3864
3865 /* must be called after MSI was enabled */
3866 rc = gaudi_init_cpu_queues(hdev, GAUDI_CPU_TIMEOUT_USEC);
3867 if (rc) {
3868 dev_err(hdev->dev, "failed to initialize CPU H/W queues %d\n",
3869 rc);
3870 goto disable_msi;
3871 }
3872
3873 /* Perform read from the device to flush all configuration */
Oded Gabbay377182a2020-12-09 19:50:46 +02003874 RREG32(mmHW_STATE);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003875
3876 return 0;
3877
3878disable_msi:
3879 gaudi_disable_msi(hdev);
3880disable_queues:
3881 gaudi_disable_mme_qmans(hdev);
3882 gaudi_disable_pci_dma_qmans(hdev);
3883
3884 return rc;
3885}
3886
3887static void gaudi_hw_fini(struct hl_device *hdev, bool hard_reset)
3888{
3889 struct gaudi_device *gaudi = hdev->asic_specific;
Igor Grinbergb726a2f2020-10-29 14:06:54 +02003890 u32 status, reset_timeout_ms, cpu_timeout_ms;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003891
Oded Gabbay12ae3132020-07-03 20:58:23 +03003892 if (!hard_reset) {
3893 dev_err(hdev->dev, "GAUDI doesn't support soft-reset\n");
3894 return;
3895 }
3896
Oded Gabbayc83c4172020-07-05 15:48:34 +03003897 if (hdev->pldm) {
Oded Gabbay12ae3132020-07-03 20:58:23 +03003898 reset_timeout_ms = GAUDI_PLDM_HRESET_TIMEOUT_MSEC;
Oded Gabbayc83c4172020-07-05 15:48:34 +03003899 cpu_timeout_ms = GAUDI_PLDM_RESET_WAIT_MSEC;
3900 } else {
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003901 reset_timeout_ms = GAUDI_RESET_TIMEOUT_MSEC;
Oded Gabbayc83c4172020-07-05 15:48:34 +03003902 cpu_timeout_ms = GAUDI_CPU_RESET_WAIT_MSEC;
3903 }
3904
3905 /* Set device to handle FLR by H/W as we will put the device CPU to
3906 * halt mode
3907 */
Oded Gabbaya63c3fb2020-11-26 18:11:05 +02003908 if (hdev->asic_prop.fw_security_disabled &&
3909 !hdev->asic_prop.hard_reset_done_by_fw)
Ofir Bittonb90c8942020-11-08 12:59:04 +02003910 WREG32(mmPCIE_AUX_FLR_CTRL, (PCIE_AUX_FLR_CTRL_HW_CTRL_MASK |
Oded Gabbayc83c4172020-07-05 15:48:34 +03003911 PCIE_AUX_FLR_CTRL_INT_MASK_MASK));
3912
3913 /* I don't know what is the state of the CPU so make sure it is
3914 * stopped in any means necessary
3915 */
Ofir Bitton9c9013c2020-12-01 10:39:54 +02003916 if (hdev->asic_prop.hard_reset_done_by_fw)
3917 WREG32(mmPSOC_GLOBAL_CONF_KMD_MSG_TO_CPU, KMD_MSG_RST_DEV);
3918 else
3919 WREG32(mmPSOC_GLOBAL_CONF_KMD_MSG_TO_CPU, KMD_MSG_GOTO_WFE);
Ofir Bittonb90c8942020-11-08 12:59:04 +02003920
Oded Gabbayc83c4172020-07-05 15:48:34 +03003921 WREG32(mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR, GAUDI_EVENT_HALT_MACHINE);
3922
Oded Gabbaya63c3fb2020-11-26 18:11:05 +02003923 if (hdev->asic_prop.fw_security_disabled &&
3924 !hdev->asic_prop.hard_reset_done_by_fw) {
3925
3926 /* Configure the reset registers. Must be done as early as
3927 * possible in case we fail during H/W initialization
3928 */
3929 WREG32(mmPSOC_GLOBAL_CONF_SOFT_RST_CFG_H,
3930 (CFG_RST_H_DMA_MASK |
3931 CFG_RST_H_MME_MASK |
3932 CFG_RST_H_SM_MASK |
3933 CFG_RST_H_TPC_7_MASK));
3934
3935 WREG32(mmPSOC_GLOBAL_CONF_SOFT_RST_CFG_L, CFG_RST_L_TPC_MASK);
3936
3937 WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST_CFG_H,
3938 (CFG_RST_H_HBM_MASK |
3939 CFG_RST_H_TPC_7_MASK |
3940 CFG_RST_H_NIC_MASK |
3941 CFG_RST_H_SM_MASK |
3942 CFG_RST_H_DMA_MASK |
3943 CFG_RST_H_MME_MASK |
3944 CFG_RST_H_CPU_MASK |
3945 CFG_RST_H_MMU_MASK));
3946
3947 WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST_CFG_L,
3948 (CFG_RST_L_IF_MASK |
3949 CFG_RST_L_PSOC_MASK |
3950 CFG_RST_L_TPC_MASK));
3951
Ofir Bittonb90c8942020-11-08 12:59:04 +02003952 msleep(cpu_timeout_ms);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003953
Ofir Bittonb90c8942020-11-08 12:59:04 +02003954 /* Tell ASIC not to re-initialize PCIe */
3955 WREG32(mmPREBOOT_PCIE_EN, LKD_HARD_RESET_MAGIC);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003956
Ofir Bittonb90c8942020-11-08 12:59:04 +02003957 /* Restart BTL/BLR upon hard-reset */
3958 if (hdev->asic_prop.fw_security_disabled)
3959 WREG32(mmPSOC_GLOBAL_CONF_BOOT_SEQ_RE_START, 1);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003960
Ofir Bittonb90c8942020-11-08 12:59:04 +02003961 WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST,
Oded Gabbay12ae3132020-07-03 20:58:23 +03003962 1 << PSOC_GLOBAL_CONF_SW_ALL_RST_IND_SHIFT);
Ofir Bittonb90c8942020-11-08 12:59:04 +02003963
Oded Gabbay13d0ee12020-12-06 23:48:45 +02003964 dev_info(hdev->dev,
3965 "Issued HARD reset command, going to wait %dms\n",
3966 reset_timeout_ms);
3967 } else {
3968 dev_info(hdev->dev,
3969 "Firmware performs HARD reset, going to wait %dms\n",
3970 reset_timeout_ms);
3971 }
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003972
3973 /*
3974 * After hard reset, we can't poll the BTM_FSM register because the PSOC
3975 * itself is in reset. Need to wait until the reset is deasserted
3976 */
3977 msleep(reset_timeout_ms);
3978
3979 status = RREG32(mmPSOC_GLOBAL_CONF_BTM_FSM);
3980 if (status & PSOC_GLOBAL_CONF_BTM_FSM_STATE_MASK)
3981 dev_err(hdev->dev,
3982 "Timeout while waiting for device to reset 0x%x\n",
3983 status);
3984
farah kassabrieb10b892020-10-14 15:17:36 +03003985 if (gaudi) {
3986 gaudi->hw_cap_initialized &= ~(HW_CAP_CPU | HW_CAP_CPU_Q |
3987 HW_CAP_HBM | HW_CAP_PCI_DMA |
3988 HW_CAP_MME | HW_CAP_TPC_MASK |
3989 HW_CAP_HBM_DMA | HW_CAP_PLL |
3990 HW_CAP_NIC_MASK | HW_CAP_MMU |
3991 HW_CAP_SRAM_SCRAMBLER |
3992 HW_CAP_HBM_SCRAMBLER |
3993 HW_CAP_CLK_GATE);
Oded Gabbaye38bfd32020-07-03 20:46:12 +03003994
farah kassabrieb10b892020-10-14 15:17:36 +03003995 memset(gaudi->events_stat, 0, sizeof(gaudi->events_stat));
3996 }
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003997}
3998
3999static int gaudi_suspend(struct hl_device *hdev)
4000{
4001 int rc;
4002
Oded Gabbay2f553422020-08-15 16:28:10 +03004003 rc = hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_DISABLE_PCI_ACCESS);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03004004 if (rc)
4005 dev_err(hdev->dev, "Failed to disable PCI access from CPU\n");
4006
4007 return rc;
4008}
4009
4010static int gaudi_resume(struct hl_device *hdev)
4011{
4012 return gaudi_init_iatu(hdev);
4013}
4014
4015static int gaudi_cb_mmap(struct hl_device *hdev, struct vm_area_struct *vma,
Hillf Danton0db57532020-08-23 07:32:42 +08004016 void *cpu_addr, dma_addr_t dma_addr, size_t size)
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03004017{
4018 int rc;
4019
4020 vma->vm_flags |= VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP |
4021 VM_DONTCOPY | VM_NORESERVE;
4022
Oded Gabbaya9d4ef62021-01-11 13:49:38 +02004023 rc = dma_mmap_coherent(hdev->dev, vma, cpu_addr,
4024 (dma_addr - HOST_PHYS_BASE), size);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03004025 if (rc)
Hillf Danton0db57532020-08-23 07:32:42 +08004026 dev_err(hdev->dev, "dma_mmap_coherent error %d", rc);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03004027
4028 return rc;
4029}
4030
4031static void gaudi_ring_doorbell(struct hl_device *hdev, u32 hw_queue_id, u32 pi)
4032{
4033 struct gaudi_device *gaudi = hdev->asic_specific;
4034 u32 db_reg_offset, db_value, dma_qm_offset, q_off;
4035 int dma_id;
4036 bool invalid_queue = false;
4037
4038 switch (hw_queue_id) {
4039 case GAUDI_QUEUE_ID_DMA_0_0...GAUDI_QUEUE_ID_DMA_0_3:
4040 dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_1];
4041 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4042 q_off = dma_qm_offset + (hw_queue_id & 0x3) * 4;
4043 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4044 break;
4045
4046 case GAUDI_QUEUE_ID_DMA_1_0...GAUDI_QUEUE_ID_DMA_1_3:
4047 dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_2];
4048 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4049 q_off = dma_qm_offset + (hw_queue_id & 0x3) * 4;
4050 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4051 break;
4052
4053 case GAUDI_QUEUE_ID_DMA_2_0...GAUDI_QUEUE_ID_DMA_2_3:
4054 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_1];
4055 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4056 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4057 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4058 break;
4059
4060 case GAUDI_QUEUE_ID_DMA_3_0...GAUDI_QUEUE_ID_DMA_3_3:
4061 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_2];
4062 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4063 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4064 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4065 break;
4066
4067 case GAUDI_QUEUE_ID_DMA_4_0...GAUDI_QUEUE_ID_DMA_4_3:
4068 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_3];
4069 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4070 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4071 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4072 break;
4073
4074 case GAUDI_QUEUE_ID_DMA_5_0...GAUDI_QUEUE_ID_DMA_5_3:
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03004075 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_4];
4076 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4077 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4078 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4079 break;
4080
Ofir Bitton0940cab2020-08-31 08:52:56 +03004081 case GAUDI_QUEUE_ID_DMA_6_0...GAUDI_QUEUE_ID_DMA_6_3:
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03004082 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_5];
4083 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4084 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4085 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4086 break;
4087
Ofir Bitton0940cab2020-08-31 08:52:56 +03004088 case GAUDI_QUEUE_ID_DMA_7_0...GAUDI_QUEUE_ID_DMA_7_3:
4089 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_6];
4090 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4091 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4092 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4093 break;
4094
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03004095 case GAUDI_QUEUE_ID_CPU_PQ:
4096 if (gaudi->hw_cap_initialized & HW_CAP_CPU_Q)
4097 db_reg_offset = mmCPU_IF_PF_PQ_PI;
4098 else
4099 invalid_queue = true;
4100 break;
4101
4102 case GAUDI_QUEUE_ID_MME_0_0:
4103 db_reg_offset = mmMME2_QM_PQ_PI_0;
4104 break;
4105
4106 case GAUDI_QUEUE_ID_MME_0_1:
4107 db_reg_offset = mmMME2_QM_PQ_PI_1;
4108 break;
4109
4110 case GAUDI_QUEUE_ID_MME_0_2:
4111 db_reg_offset = mmMME2_QM_PQ_PI_2;
4112 break;
4113
4114 case GAUDI_QUEUE_ID_MME_0_3:
4115 db_reg_offset = mmMME2_QM_PQ_PI_3;
4116 break;
4117
4118 case GAUDI_QUEUE_ID_MME_1_0:
4119 db_reg_offset = mmMME0_QM_PQ_PI_0;
4120 break;
4121
4122 case GAUDI_QUEUE_ID_MME_1_1:
4123 db_reg_offset = mmMME0_QM_PQ_PI_1;
4124 break;
4125
4126 case GAUDI_QUEUE_ID_MME_1_2:
4127 db_reg_offset = mmMME0_QM_PQ_PI_2;
4128 break;
4129
4130 case GAUDI_QUEUE_ID_MME_1_3:
4131 db_reg_offset = mmMME0_QM_PQ_PI_3;
4132 break;
4133
4134 case GAUDI_QUEUE_ID_TPC_0_0:
4135 db_reg_offset = mmTPC0_QM_PQ_PI_0;
4136 break;
4137
4138 case GAUDI_QUEUE_ID_TPC_0_1:
4139 db_reg_offset = mmTPC0_QM_PQ_PI_1;
4140 break;
4141
4142 case GAUDI_QUEUE_ID_TPC_0_2:
4143 db_reg_offset = mmTPC0_QM_PQ_PI_2;
4144 break;
4145
4146 case GAUDI_QUEUE_ID_TPC_0_3:
4147 db_reg_offset = mmTPC0_QM_PQ_PI_3;
4148 break;
4149
4150 case GAUDI_QUEUE_ID_TPC_1_0:
4151 db_reg_offset = mmTPC1_QM_PQ_PI_0;
4152 break;
4153
4154 case GAUDI_QUEUE_ID_TPC_1_1:
4155 db_reg_offset = mmTPC1_QM_PQ_PI_1;
4156 break;
4157
4158 case GAUDI_QUEUE_ID_TPC_1_2:
4159 db_reg_offset = mmTPC1_QM_PQ_PI_2;
4160 break;
4161
4162 case GAUDI_QUEUE_ID_TPC_1_3:
4163 db_reg_offset = mmTPC1_QM_PQ_PI_3;
4164 break;
4165
4166 case GAUDI_QUEUE_ID_TPC_2_0:
4167 db_reg_offset = mmTPC2_QM_PQ_PI_0;
4168 break;
4169
4170 case GAUDI_QUEUE_ID_TPC_2_1:
4171 db_reg_offset = mmTPC2_QM_PQ_PI_1;
4172 break;
4173
4174 case GAUDI_QUEUE_ID_TPC_2_2:
4175 db_reg_offset = mmTPC2_QM_PQ_PI_2;
4176 break;
4177
4178 case GAUDI_QUEUE_ID_TPC_2_3:
4179 db_reg_offset = mmTPC2_QM_PQ_PI_3;
4180 break;
4181
4182 case GAUDI_QUEUE_ID_TPC_3_0:
4183 db_reg_offset = mmTPC3_QM_PQ_PI_0;
4184 break;
4185
4186 case GAUDI_QUEUE_ID_TPC_3_1:
4187 db_reg_offset = mmTPC3_QM_PQ_PI_1;
4188 break;
4189
4190 case GAUDI_QUEUE_ID_TPC_3_2:
4191 db_reg_offset = mmTPC3_QM_PQ_PI_2;
4192 break;
4193
4194 case GAUDI_QUEUE_ID_TPC_3_3:
4195 db_reg_offset = mmTPC3_QM_PQ_PI_3;
4196 break;
4197
4198 case GAUDI_QUEUE_ID_TPC_4_0:
4199 db_reg_offset = mmTPC4_QM_PQ_PI_0;
4200 break;
4201
4202 case GAUDI_QUEUE_ID_TPC_4_1:
4203 db_reg_offset = mmTPC4_QM_PQ_PI_1;
4204 break;
4205
4206 case GAUDI_QUEUE_ID_TPC_4_2:
4207 db_reg_offset = mmTPC4_QM_PQ_PI_2;
4208 break;
4209
4210 case GAUDI_QUEUE_ID_TPC_4_3:
4211 db_reg_offset = mmTPC4_QM_PQ_PI_3;
4212 break;
4213
4214 case GAUDI_QUEUE_ID_TPC_5_0:
4215 db_reg_offset = mmTPC5_QM_PQ_PI_0;
4216 break;
4217
4218 case GAUDI_QUEUE_ID_TPC_5_1:
4219 db_reg_offset = mmTPC5_QM_PQ_PI_1;
4220 break;
4221
4222 case GAUDI_QUEUE_ID_TPC_5_2:
4223 db_reg_offset = mmTPC5_QM_PQ_PI_2;
4224 break;
4225
4226 case GAUDI_QUEUE_ID_TPC_5_3:
4227 db_reg_offset = mmTPC5_QM_PQ_PI_3;
4228 break;
4229
4230 case GAUDI_QUEUE_ID_TPC_6_0:
4231 db_reg_offset = mmTPC6_QM_PQ_PI_0;
4232 break;
4233
4234 case GAUDI_QUEUE_ID_TPC_6_1:
4235 db_reg_offset = mmTPC6_QM_PQ_PI_1;
4236 break;
4237
4238 case GAUDI_QUEUE_ID_TPC_6_2:
4239 db_reg_offset = mmTPC6_QM_PQ_PI_2;
4240 break;
4241
4242 case GAUDI_QUEUE_ID_TPC_6_3:
4243 db_reg_offset = mmTPC6_QM_PQ_PI_3;
4244 break;
4245
4246 case GAUDI_QUEUE_ID_TPC_7_0:
4247 db_reg_offset = mmTPC7_QM_PQ_PI_0;
4248 break;
4249
4250 case GAUDI_QUEUE_ID_TPC_7_1:
4251 db_reg_offset = mmTPC7_QM_PQ_PI_1;
4252 break;
4253
4254 case GAUDI_QUEUE_ID_TPC_7_2:
4255 db_reg_offset = mmTPC7_QM_PQ_PI_2;
4256 break;
4257
4258 case GAUDI_QUEUE_ID_TPC_7_3:
4259 db_reg_offset = mmTPC7_QM_PQ_PI_3;
4260 break;
4261
Oded Gabbay3c681572020-11-02 21:10:39 +02004262 case GAUDI_QUEUE_ID_NIC_0_0:
4263 db_reg_offset = mmNIC0_QM0_PQ_PI_0;
4264 break;
4265
4266 case GAUDI_QUEUE_ID_NIC_0_1:
4267 db_reg_offset = mmNIC0_QM0_PQ_PI_1;
4268 break;
4269
4270 case GAUDI_QUEUE_ID_NIC_0_2:
4271 db_reg_offset = mmNIC0_QM0_PQ_PI_2;
4272 break;
4273
4274 case GAUDI_QUEUE_ID_NIC_0_3:
4275 db_reg_offset = mmNIC0_QM0_PQ_PI_3;
4276 break;
4277
4278 case GAUDI_QUEUE_ID_NIC_1_0:
4279 db_reg_offset = mmNIC0_QM1_PQ_PI_0;
4280 break;
4281
4282 case GAUDI_QUEUE_ID_NIC_1_1:
4283 db_reg_offset = mmNIC0_QM1_PQ_PI_1;
4284 break;
4285
4286 case GAUDI_QUEUE_ID_NIC_1_2:
4287 db_reg_offset = mmNIC0_QM1_PQ_PI_2;
4288 break;
4289
4290 case GAUDI_QUEUE_ID_NIC_1_3:
4291 db_reg_offset = mmNIC0_QM1_PQ_PI_3;
4292 break;
4293
4294 case GAUDI_QUEUE_ID_NIC_2_0:
4295 db_reg_offset = mmNIC1_QM0_PQ_PI_0;
4296 break;
4297
4298 case GAUDI_QUEUE_ID_NIC_2_1:
4299 db_reg_offset = mmNIC1_QM0_PQ_PI_1;
4300 break;
4301
4302 case GAUDI_QUEUE_ID_NIC_2_2:
4303 db_reg_offset = mmNIC1_QM0_PQ_PI_2;
4304 break;
4305
4306 case GAUDI_QUEUE_ID_NIC_2_3:
4307 db_reg_offset = mmNIC1_QM0_PQ_PI_3;
4308 break;
4309
4310 case GAUDI_QUEUE_ID_NIC_3_0:
4311 db_reg_offset = mmNIC1_QM1_PQ_PI_0;
4312 break;
4313
4314 case GAUDI_QUEUE_ID_NIC_3_1:
4315 db_reg_offset = mmNIC1_QM1_PQ_PI_1;
4316 break;
4317
4318 case GAUDI_QUEUE_ID_NIC_3_2:
4319 db_reg_offset = mmNIC1_QM1_PQ_PI_2;
4320 break;
4321
4322 case GAUDI_QUEUE_ID_NIC_3_3:
4323 db_reg_offset = mmNIC1_QM1_PQ_PI_3;
4324 break;
4325
4326 case GAUDI_QUEUE_ID_NIC_4_0:
4327 db_reg_offset = mmNIC2_QM0_PQ_PI_0;
4328 break;
4329
4330 case GAUDI_QUEUE_ID_NIC_4_1:
4331 db_reg_offset = mmNIC2_QM0_PQ_PI_1;
4332 break;
4333
4334 case GAUDI_QUEUE_ID_NIC_4_2:
4335 db_reg_offset = mmNIC2_QM0_PQ_PI_2;
4336 break;
4337
4338 case GAUDI_QUEUE_ID_NIC_4_3:
4339 db_reg_offset = mmNIC2_QM0_PQ_PI_3;
4340 break;
4341
4342 case GAUDI_QUEUE_ID_NIC_5_0:
4343 db_reg_offset = mmNIC2_QM1_PQ_PI_0;
4344 break;
4345
4346 case GAUDI_QUEUE_ID_NIC_5_1:
4347 db_reg_offset = mmNIC2_QM1_PQ_PI_1;
4348 break;
4349
4350 case GAUDI_QUEUE_ID_NIC_5_2:
4351 db_reg_offset = mmNIC2_QM1_PQ_PI_2;
4352 break;
4353
4354 case GAUDI_QUEUE_ID_NIC_5_3:
4355 db_reg_offset = mmNIC2_QM1_PQ_PI_3;
4356 break;
4357
4358 case GAUDI_QUEUE_ID_NIC_6_0:
4359 db_reg_offset = mmNIC3_QM0_PQ_PI_0;
4360 break;
4361
4362 case GAUDI_QUEUE_ID_NIC_6_1:
4363 db_reg_offset = mmNIC3_QM0_PQ_PI_1;
4364 break;
4365
4366 case GAUDI_QUEUE_ID_NIC_6_2:
4367 db_reg_offset = mmNIC3_QM0_PQ_PI_2;
4368 break;
4369
4370 case GAUDI_QUEUE_ID_NIC_6_3:
4371 db_reg_offset = mmNIC3_QM0_PQ_PI_3;
4372 break;
4373
4374 case GAUDI_QUEUE_ID_NIC_7_0:
4375 db_reg_offset = mmNIC3_QM1_PQ_PI_0;
4376 break;
4377
4378 case GAUDI_QUEUE_ID_NIC_7_1:
4379 db_reg_offset = mmNIC3_QM1_PQ_PI_1;
4380 break;
4381
4382 case GAUDI_QUEUE_ID_NIC_7_2:
4383 db_reg_offset = mmNIC3_QM1_PQ_PI_2;
4384 break;
4385
4386 case GAUDI_QUEUE_ID_NIC_7_3:
4387 db_reg_offset = mmNIC3_QM1_PQ_PI_3;
4388 break;
4389
4390 case GAUDI_QUEUE_ID_NIC_8_0:
4391 db_reg_offset = mmNIC4_QM0_PQ_PI_0;
4392 break;
4393
4394 case GAUDI_QUEUE_ID_NIC_8_1:
4395 db_reg_offset = mmNIC4_QM0_PQ_PI_1;
4396 break;
4397
4398 case GAUDI_QUEUE_ID_NIC_8_2:
4399 db_reg_offset = mmNIC4_QM0_PQ_PI_2;
4400 break;
4401
4402 case GAUDI_QUEUE_ID_NIC_8_3:
4403 db_reg_offset = mmNIC4_QM0_PQ_PI_3;
4404 break;
4405
4406 case GAUDI_QUEUE_ID_NIC_9_0:
4407 db_reg_offset = mmNIC4_QM1_PQ_PI_0;
4408 break;
4409
4410 case GAUDI_QUEUE_ID_NIC_9_1:
4411 db_reg_offset = mmNIC4_QM1_PQ_PI_1;
4412 break;
4413
4414 case GAUDI_QUEUE_ID_NIC_9_2:
4415 db_reg_offset = mmNIC4_QM1_PQ_PI_2;
4416 break;
4417
4418 case GAUDI_QUEUE_ID_NIC_9_3:
4419 db_reg_offset = mmNIC4_QM1_PQ_PI_3;
4420 break;
4421
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03004422 default:
4423 invalid_queue = true;
4424 }
4425
4426 if (invalid_queue) {
4427 /* Should never get here */
4428 dev_err(hdev->dev, "h/w queue %d is invalid. Can't set pi\n",
4429 hw_queue_id);
4430 return;
4431 }
4432
4433 db_value = pi;
4434
4435 /* ring the doorbell */
4436 WREG32(db_reg_offset, db_value);
4437
4438 if (hw_queue_id == GAUDI_QUEUE_ID_CPU_PQ)
4439 WREG32(mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR,
4440 GAUDI_EVENT_PI_UPDATE);
4441}
4442
4443static void gaudi_pqe_write(struct hl_device *hdev, __le64 *pqe,
4444 struct hl_bd *bd)
4445{
4446 __le64 *pbd = (__le64 *) bd;
4447
4448 /* The QMANs are on the host memory so a simple copy suffice */
4449 pqe[0] = pbd[0];
4450 pqe[1] = pbd[1];
4451}
4452
4453static void *gaudi_dma_alloc_coherent(struct hl_device *hdev, size_t size,
4454 dma_addr_t *dma_handle, gfp_t flags)
4455{
4456 void *kernel_addr = dma_alloc_coherent(&hdev->pdev->dev, size,
4457 dma_handle, flags);
4458
4459 /* Shift to the device's base physical address of host memory */
4460 if (kernel_addr)
4461 *dma_handle += HOST_PHYS_BASE;
4462
4463 return kernel_addr;
4464}
4465
4466static void gaudi_dma_free_coherent(struct hl_device *hdev, size_t size,
4467 void *cpu_addr, dma_addr_t dma_handle)
4468{
4469 /* Cancel the device's base physical address of host memory */
4470 dma_addr_t fixed_dma_handle = dma_handle - HOST_PHYS_BASE;
4471
4472 dma_free_coherent(&hdev->pdev->dev, size, cpu_addr, fixed_dma_handle);
4473}
4474
farah kassabri03df1362020-05-06 11:17:38 +03004475static int gaudi_hbm_scrubbing(struct hl_device *hdev)
4476{
4477 struct asic_fixed_properties *prop = &hdev->asic_prop;
4478 u64 cur_addr = DRAM_BASE_ADDR_USER;
4479 u32 val;
4480 u32 chunk_size;
4481 int rc, dma_id;
4482
4483 while (cur_addr < prop->dram_end_address) {
4484 for (dma_id = 0 ; dma_id < DMA_NUMBER_OF_CHANNELS ; dma_id++) {
4485 u32 dma_offset = dma_id * DMA_CORE_OFFSET;
4486
4487 chunk_size =
4488 min((u64)SZ_2G, prop->dram_end_address - cur_addr);
4489
4490 dev_dbg(hdev->dev,
4491 "Doing HBM scrubbing for 0x%09llx - 0x%09llx\n",
4492 cur_addr, cur_addr + chunk_size);
4493
4494 WREG32(mmDMA0_CORE_SRC_BASE_LO + dma_offset, 0);
4495 WREG32(mmDMA0_CORE_SRC_BASE_HI + dma_offset, 0);
4496 WREG32(mmDMA0_CORE_DST_BASE_LO + dma_offset,
4497 lower_32_bits(cur_addr));
4498 WREG32(mmDMA0_CORE_DST_BASE_HI + dma_offset,
4499 upper_32_bits(cur_addr));
4500 WREG32(mmDMA0_CORE_DST_TSIZE_0 + dma_offset,
4501 chunk_size);
4502 WREG32(mmDMA0_CORE_COMMIT + dma_offset,
4503 ((1 << DMA0_CORE_COMMIT_LIN_SHIFT) |
4504 (1 << DMA0_CORE_COMMIT_MEM_SET_SHIFT)));
4505
4506 cur_addr += chunk_size;
4507
4508 if (cur_addr == prop->dram_end_address)
4509 break;
4510 }
4511
4512 for (dma_id = 0 ; dma_id < DMA_NUMBER_OF_CHANNELS ; dma_id++) {
4513 u32 dma_offset = dma_id * DMA_CORE_OFFSET;
4514
4515 rc = hl_poll_timeout(
4516 hdev,
4517 mmDMA0_CORE_STS0 + dma_offset,
4518 val,
4519 ((val & DMA0_CORE_STS0_BUSY_MASK) == 0),
4520 1000,
4521 HBM_SCRUBBING_TIMEOUT_US);
4522
4523 if (rc) {
4524 dev_err(hdev->dev,
4525 "DMA Timeout during HBM scrubbing of DMA #%d\n",
4526 dma_id);
4527 return -EIO;
4528 }
4529 }
4530 }
4531
4532 return 0;
4533}
4534
4535static int gaudi_scrub_device_mem(struct hl_device *hdev, u64 addr, u64 size)
4536{
4537 struct asic_fixed_properties *prop = &hdev->asic_prop;
4538 struct gaudi_device *gaudi = hdev->asic_specific;
4539 u64 idle_mask = 0;
4540 int rc = 0;
4541 u64 val = 0;
4542
4543 if (!hdev->memory_scrub)
4544 return 0;
4545
4546 if (!addr && !size) {
4547 /* Wait till device is idle */
4548 rc = hl_poll_timeout(
4549 hdev,
4550 mmDMA0_CORE_STS0/* dummy */,
4551 val/* dummy */,
4552 (hdev->asic_funcs->is_device_idle(hdev,
4553 &idle_mask, NULL)),
4554 1000,
4555 HBM_SCRUBBING_TIMEOUT_US);
4556 if (rc) {
4557 dev_err(hdev->dev, "waiting for idle timeout\n");
4558 return -EIO;
4559 }
4560
4561 /* Scrub SRAM */
4562 addr = prop->sram_user_base_address;
4563 size = hdev->pldm ? 0x10000 :
4564 (prop->sram_size - SRAM_USER_BASE_OFFSET);
4565 val = 0x7777777777777777ull;
4566
4567 rc = gaudi_memset_device_memory(hdev, addr, size, val);
4568 if (rc) {
4569 dev_err(hdev->dev,
4570 "Failed to clear SRAM in mem scrub all\n");
4571 return rc;
4572 }
4573
4574 mutex_lock(&gaudi->clk_gate_mutex);
4575 hdev->asic_funcs->disable_clock_gating(hdev);
4576
4577 /* Scrub HBM using all DMA channels in parallel */
4578 rc = gaudi_hbm_scrubbing(hdev);
4579 if (rc)
4580 dev_err(hdev->dev,
4581 "Failed to clear HBM in mem scrub all\n");
4582
4583 hdev->asic_funcs->set_clock_gating(hdev);
4584 mutex_unlock(&gaudi->clk_gate_mutex);
4585 }
4586
4587 return rc;
4588}
4589
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03004590static void *gaudi_get_int_queue_base(struct hl_device *hdev,
4591 u32 queue_id, dma_addr_t *dma_handle,
4592 u16 *queue_len)
4593{
4594 struct gaudi_device *gaudi = hdev->asic_specific;
4595 struct gaudi_internal_qman_info *q;
4596
4597 if (queue_id >= GAUDI_QUEUE_ID_SIZE ||
4598 gaudi_queue_type[queue_id] != QUEUE_TYPE_INT) {
4599 dev_err(hdev->dev, "Got invalid queue id %d\n", queue_id);
4600 return NULL;
4601 }
4602
4603 q = &gaudi->internal_qmans[queue_id];
4604 *dma_handle = q->pq_dma_addr;
4605 *queue_len = q->pq_size / QMAN_PQ_ENTRY_SIZE;
4606
4607 return q->pq_kernel_addr;
4608}
4609
4610static int gaudi_send_cpu_message(struct hl_device *hdev, u32 *msg,
Alon Mizrahi439bc472020-11-10 13:49:10 +02004611 u16 len, u32 timeout, u64 *result)
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03004612{
4613 struct gaudi_device *gaudi = hdev->asic_specific;
4614
4615 if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q)) {
4616 if (result)
4617 *result = 0;
4618 return 0;
4619 }
4620
Oded Gabbay788cacf2020-07-07 17:30:13 +03004621 if (!timeout)
4622 timeout = GAUDI_MSG_TO_CPU_TIMEOUT_USEC;
4623
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03004624 return hl_fw_send_cpu_message(hdev, GAUDI_QUEUE_ID_CPU_PQ, msg, len,
4625 timeout, result);
4626}
4627
4628static int gaudi_test_queue(struct hl_device *hdev, u32 hw_queue_id)
4629{
4630 struct packet_msg_prot *fence_pkt;
4631 dma_addr_t pkt_dma_addr;
4632 u32 fence_val, tmp, timeout_usec;
4633 dma_addr_t fence_dma_addr;
4634 u32 *fence_ptr;
4635 int rc;
4636
4637 if (hdev->pldm)
4638 timeout_usec = GAUDI_PLDM_TEST_QUEUE_WAIT_USEC;
4639 else
4640 timeout_usec = GAUDI_TEST_QUEUE_WAIT_USEC;
4641
4642 fence_val = GAUDI_QMAN0_FENCE_VAL;
4643
4644 fence_ptr = hdev->asic_funcs->asic_dma_pool_zalloc(hdev, 4, GFP_KERNEL,
4645 &fence_dma_addr);
4646 if (!fence_ptr) {
4647 dev_err(hdev->dev,
Dotan Barakd6b045c2020-08-06 09:20:49 +03004648 "Failed to allocate memory for H/W queue %d testing\n",
4649 hw_queue_id);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03004650 return -ENOMEM;
4651 }
4652
4653 *fence_ptr = 0;
4654
4655 fence_pkt = hdev->asic_funcs->asic_dma_pool_zalloc(hdev,
4656 sizeof(struct packet_msg_prot),
4657 GFP_KERNEL, &pkt_dma_addr);
4658 if (!fence_pkt) {
4659 dev_err(hdev->dev,
Dotan Barakd6b045c2020-08-06 09:20:49 +03004660 "Failed to allocate packet for H/W queue %d testing\n",
4661 hw_queue_id);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03004662 rc = -ENOMEM;
4663 goto free_fence_ptr;
4664 }
4665
Oded Gabbay65887292020-08-12 11:21:01 +03004666 tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT);
4667 tmp |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);
4668 tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
4669
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03004670 fence_pkt->ctl = cpu_to_le32(tmp);
4671 fence_pkt->value = cpu_to_le32(fence_val);
4672 fence_pkt->addr = cpu_to_le64(fence_dma_addr);
4673
4674 rc = hl_hw_queue_send_cb_no_cmpl(hdev, hw_queue_id,
4675 sizeof(struct packet_msg_prot),
4676 pkt_dma_addr);
4677 if (rc) {
4678 dev_err(hdev->dev,
Dotan Barakd6b045c2020-08-06 09:20:49 +03004679 "Failed to send fence packet to H/W queue %d\n",
4680 hw_queue_id);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03004681 goto free_pkt;
4682 }
4683
4684 rc = hl_poll_timeout_memory(hdev, fence_ptr, tmp, (tmp == fence_val),
4685 1000, timeout_usec, true);
4686
4687 hl_hw_queue_inc_ci_kernel(hdev, hw_queue_id);
4688
4689 if (rc == -ETIMEDOUT) {
4690 dev_err(hdev->dev,
4691 "H/W queue %d test failed (scratch(0x%08llX) == 0x%08X)\n",
4692 hw_queue_id, (unsigned long long) fence_dma_addr, tmp);
4693 rc = -EIO;
4694 }
4695
4696free_pkt:
4697 hdev->asic_funcs->asic_dma_pool_free(hdev, (void *) fence_pkt,
4698 pkt_dma_addr);
4699free_fence_ptr:
4700 hdev->asic_funcs->asic_dma_pool_free(hdev, (void *) fence_ptr,
4701 fence_dma_addr);
4702 return rc;
4703}
4704
4705static int gaudi_test_cpu_queue(struct hl_device *hdev)
4706{
4707 struct gaudi_device *gaudi = hdev->asic_specific;
4708
4709 /*
4710 * check capability here as send_cpu_message() won't update the result
4711 * value if no capability
4712 */
4713 if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
4714 return 0;
4715
4716 return hl_fw_test_cpu_queue(hdev);
4717}
4718
4719static int gaudi_test_queues(struct hl_device *hdev)
4720{
4721 int i, rc, ret_val = 0;
4722
Ofir Bitton3abc99b2020-06-23 14:50:39 +03004723 for (i = 0 ; i < hdev->asic_prop.max_queues ; i++) {
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03004724 if (hdev->asic_prop.hw_queues_props[i].type == QUEUE_TYPE_EXT) {
4725 rc = gaudi_test_queue(hdev, i);
4726 if (rc)
4727 ret_val = -EINVAL;
4728 }
4729 }
4730
4731 rc = gaudi_test_cpu_queue(hdev);
4732 if (rc)
4733 ret_val = -EINVAL;
4734
4735 return ret_val;
4736}
4737
4738static void *gaudi_dma_pool_zalloc(struct hl_device *hdev, size_t size,
4739 gfp_t mem_flags, dma_addr_t *dma_handle)
4740{
4741 void *kernel_addr;
4742
4743 if (size > GAUDI_DMA_POOL_BLK_SIZE)
4744 return NULL;
4745
4746 kernel_addr = dma_pool_zalloc(hdev->dma_pool, mem_flags, dma_handle);
4747
4748 /* Shift to the device's base physical address of host memory */
4749 if (kernel_addr)
4750 *dma_handle += HOST_PHYS_BASE;
4751
4752 return kernel_addr;
4753}
4754
4755static void gaudi_dma_pool_free(struct hl_device *hdev, void *vaddr,
4756 dma_addr_t dma_addr)
4757{
4758 /* Cancel the device's base physical address of host memory */
4759 dma_addr_t fixed_dma_addr = dma_addr - HOST_PHYS_BASE;
4760
4761 dma_pool_free(hdev->dma_pool, vaddr, fixed_dma_addr);
4762}
4763
4764static void *gaudi_cpu_accessible_dma_pool_alloc(struct hl_device *hdev,
4765 size_t size, dma_addr_t *dma_handle)
4766{
4767 return hl_fw_cpu_accessible_dma_pool_alloc(hdev, size, dma_handle);
4768}
4769
4770static void gaudi_cpu_accessible_dma_pool_free(struct hl_device *hdev,
4771 size_t size, void *vaddr)
4772{
4773 hl_fw_cpu_accessible_dma_pool_free(hdev, size, vaddr);
4774}
4775
4776static int gaudi_dma_map_sg(struct hl_device *hdev, struct scatterlist *sgl,
4777 int nents, enum dma_data_direction dir)
4778{
4779 struct scatterlist *sg;
4780 int i;
4781
4782 if (!dma_map_sg(&hdev->pdev->dev, sgl, nents, dir))
4783 return -ENOMEM;
4784
4785 /* Shift to the device's base physical address of host memory */
4786 for_each_sg(sgl, sg, nents, i)
4787 sg->dma_address += HOST_PHYS_BASE;
4788
4789 return 0;
4790}
4791
4792static void gaudi_dma_unmap_sg(struct hl_device *hdev, struct scatterlist *sgl,
4793 int nents, enum dma_data_direction dir)
4794{
4795 struct scatterlist *sg;
4796 int i;
4797
4798 /* Cancel the device's base physical address of host memory */
4799 for_each_sg(sgl, sg, nents, i)
4800 sg->dma_address -= HOST_PHYS_BASE;
4801
4802 dma_unmap_sg(&hdev->pdev->dev, sgl, nents, dir);
4803}
4804
4805static u32 gaudi_get_dma_desc_list_size(struct hl_device *hdev,
4806 struct sg_table *sgt)
4807{
4808 struct scatterlist *sg, *sg_next_iter;
4809 u32 count, dma_desc_cnt;
4810 u64 len, len_next;
4811 dma_addr_t addr, addr_next;
4812
4813 dma_desc_cnt = 0;
4814
4815 for_each_sg(sgt->sgl, sg, sgt->nents, count) {
4816
4817 len = sg_dma_len(sg);
4818 addr = sg_dma_address(sg);
4819
4820 if (len == 0)
4821 break;
4822
4823 while ((count + 1) < sgt->nents) {
4824 sg_next_iter = sg_next(sg);
4825 len_next = sg_dma_len(sg_next_iter);
4826 addr_next = sg_dma_address(sg_next_iter);
4827
4828 if (len_next == 0)
4829 break;
4830
4831 if ((addr + len == addr_next) &&
4832 (len + len_next <= DMA_MAX_TRANSFER_SIZE)) {
4833 len += len_next;
4834 count++;
4835 sg = sg_next_iter;
4836 } else {
4837 break;
4838 }
4839 }
4840
4841 dma_desc_cnt++;
4842 }
4843
4844 return dma_desc_cnt * sizeof(struct packet_lin_dma);
4845}
4846
4847static int gaudi_pin_memory_before_cs(struct hl_device *hdev,
4848 struct hl_cs_parser *parser,
4849 struct packet_lin_dma *user_dma_pkt,
4850 u64 addr, enum dma_data_direction dir)
4851{
4852 struct hl_userptr *userptr;
4853 int rc;
4854
4855 if (hl_userptr_is_pinned(hdev, addr, le32_to_cpu(user_dma_pkt->tsize),
4856 parser->job_userptr_list, &userptr))
4857 goto already_pinned;
4858
4859 userptr = kzalloc(sizeof(*userptr), GFP_ATOMIC);
4860 if (!userptr)
4861 return -ENOMEM;
4862
4863 rc = hl_pin_host_memory(hdev, addr, le32_to_cpu(user_dma_pkt->tsize),
4864 userptr);
4865 if (rc)
4866 goto free_userptr;
4867
4868 list_add_tail(&userptr->job_node, parser->job_userptr_list);
4869
4870 rc = hdev->asic_funcs->asic_dma_map_sg(hdev, userptr->sgt->sgl,
4871 userptr->sgt->nents, dir);
4872 if (rc) {
4873 dev_err(hdev->dev, "failed to map sgt with DMA region\n");
4874 goto unpin_memory;
4875 }
4876
4877 userptr->dma_mapped = true;
4878 userptr->dir = dir;
4879
4880already_pinned:
4881 parser->patched_cb_size +=
4882 gaudi_get_dma_desc_list_size(hdev, userptr->sgt);
4883
4884 return 0;
4885
4886unpin_memory:
4887 hl_unpin_host_memory(hdev, userptr);
4888free_userptr:
4889 kfree(userptr);
4890 return rc;
4891}
4892
4893static int gaudi_validate_dma_pkt_host(struct hl_device *hdev,
4894 struct hl_cs_parser *parser,
4895 struct packet_lin_dma *user_dma_pkt,
4896 bool src_in_host)
4897{
4898 enum dma_data_direction dir;
4899 bool skip_host_mem_pin = false, user_memset;
4900 u64 addr;
4901 int rc = 0;
4902
4903 user_memset = (le32_to_cpu(user_dma_pkt->ctl) &
4904 GAUDI_PKT_LIN_DMA_CTL_MEMSET_MASK) >>
4905 GAUDI_PKT_LIN_DMA_CTL_MEMSET_SHIFT;
4906
4907 if (src_in_host) {
4908 if (user_memset)
4909 skip_host_mem_pin = true;
4910
4911 dev_dbg(hdev->dev, "DMA direction is HOST --> DEVICE\n");
4912 dir = DMA_TO_DEVICE;
4913 addr = le64_to_cpu(user_dma_pkt->src_addr);
4914 } else {
4915 dev_dbg(hdev->dev, "DMA direction is DEVICE --> HOST\n");
4916 dir = DMA_FROM_DEVICE;
4917 addr = (le64_to_cpu(user_dma_pkt->dst_addr) &
4918 GAUDI_PKT_LIN_DMA_DST_ADDR_MASK) >>
4919 GAUDI_PKT_LIN_DMA_DST_ADDR_SHIFT;
4920 }
4921
4922 if (skip_host_mem_pin)
4923 parser->patched_cb_size += sizeof(*user_dma_pkt);
4924 else
4925 rc = gaudi_pin_memory_before_cs(hdev, parser, user_dma_pkt,
4926 addr, dir);
4927
4928 return rc;
4929}
4930
4931static int gaudi_validate_dma_pkt_no_mmu(struct hl_device *hdev,
4932 struct hl_cs_parser *parser,
4933 struct packet_lin_dma *user_dma_pkt)
4934{
4935 bool src_in_host = false;
4936 u64 dst_addr = (le64_to_cpu(user_dma_pkt->dst_addr) &
4937 GAUDI_PKT_LIN_DMA_DST_ADDR_MASK) >>
4938 GAUDI_PKT_LIN_DMA_DST_ADDR_SHIFT;
4939
4940 dev_dbg(hdev->dev, "DMA packet details:\n");
4941 dev_dbg(hdev->dev, "source == 0x%llx\n",
4942 le64_to_cpu(user_dma_pkt->src_addr));
4943 dev_dbg(hdev->dev, "destination == 0x%llx\n", dst_addr);
4944 dev_dbg(hdev->dev, "size == %u\n", le32_to_cpu(user_dma_pkt->tsize));
4945
4946 /*
4947 * Special handling for DMA with size 0. Bypass all validations
4948 * because no transactions will be done except for WR_COMP, which
4949 * is not a security issue
4950 */
4951 if (!le32_to_cpu(user_dma_pkt->tsize)) {
4952 parser->patched_cb_size += sizeof(*user_dma_pkt);
4953 return 0;
4954 }
4955
4956 if (parser->hw_queue_id <= GAUDI_QUEUE_ID_DMA_0_3)
4957 src_in_host = true;
4958
4959 return gaudi_validate_dma_pkt_host(hdev, parser, user_dma_pkt,
4960 src_in_host);
4961}
4962
Oded Gabbay64536ab2020-05-27 12:38:16 +03004963static int gaudi_validate_load_and_exe_pkt(struct hl_device *hdev,
4964 struct hl_cs_parser *parser,
4965 struct packet_load_and_exe *user_pkt)
4966{
4967 u32 cfg;
4968
4969 cfg = le32_to_cpu(user_pkt->cfg);
4970
4971 if (cfg & GAUDI_PKT_LOAD_AND_EXE_CFG_DST_MASK) {
4972 dev_err(hdev->dev,
4973 "User not allowed to use Load and Execute\n");
4974 return -EPERM;
4975 }
4976
4977 parser->patched_cb_size += sizeof(struct packet_load_and_exe);
4978
4979 return 0;
4980}
4981
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03004982static int gaudi_validate_cb(struct hl_device *hdev,
4983 struct hl_cs_parser *parser, bool is_mmu)
4984{
4985 u32 cb_parsed_length = 0;
4986 int rc = 0;
4987
4988 parser->patched_cb_size = 0;
4989
4990 /* cb_user_size is more than 0 so loop will always be executed */
4991 while (cb_parsed_length < parser->user_cb_size) {
4992 enum packet_id pkt_id;
4993 u16 pkt_size;
4994 struct gaudi_packet *user_pkt;
4995
Arnd Bergmann82948e62020-10-26 17:08:06 +01004996 user_pkt = parser->user_cb->kernel_address + cb_parsed_length;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03004997
4998 pkt_id = (enum packet_id) (
4999 (le64_to_cpu(user_pkt->header) &
5000 PACKET_HEADER_PACKET_ID_MASK) >>
5001 PACKET_HEADER_PACKET_ID_SHIFT);
5002
Ofir Bittonbc75be22020-07-30 14:56:38 +03005003 if (!validate_packet_id(pkt_id)) {
5004 dev_err(hdev->dev, "Invalid packet id %u\n", pkt_id);
5005 rc = -EINVAL;
5006 break;
5007 }
5008
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005009 pkt_size = gaudi_packet_sizes[pkt_id];
5010 cb_parsed_length += pkt_size;
5011 if (cb_parsed_length > parser->user_cb_size) {
5012 dev_err(hdev->dev,
5013 "packet 0x%x is out of CB boundary\n", pkt_id);
5014 rc = -EINVAL;
5015 break;
5016 }
5017
5018 switch (pkt_id) {
5019 case PACKET_MSG_PROT:
5020 dev_err(hdev->dev,
5021 "User not allowed to use MSG_PROT\n");
5022 rc = -EPERM;
5023 break;
5024
5025 case PACKET_CP_DMA:
5026 dev_err(hdev->dev, "User not allowed to use CP_DMA\n");
5027 rc = -EPERM;
5028 break;
5029
5030 case PACKET_STOP:
5031 dev_err(hdev->dev, "User not allowed to use STOP\n");
5032 rc = -EPERM;
5033 break;
5034
Oded Gabbay2edc66e2020-07-03 19:28:54 +03005035 case PACKET_WREG_BULK:
5036 dev_err(hdev->dev,
5037 "User not allowed to use WREG_BULK\n");
5038 rc = -EPERM;
5039 break;
5040
Oded Gabbay64536ab2020-05-27 12:38:16 +03005041 case PACKET_LOAD_AND_EXE:
5042 rc = gaudi_validate_load_and_exe_pkt(hdev, parser,
5043 (struct packet_load_and_exe *) user_pkt);
5044 break;
5045
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005046 case PACKET_LIN_DMA:
5047 parser->contains_dma_pkt = true;
5048 if (is_mmu)
5049 parser->patched_cb_size += pkt_size;
5050 else
5051 rc = gaudi_validate_dma_pkt_no_mmu(hdev, parser,
5052 (struct packet_lin_dma *) user_pkt);
5053 break;
5054
5055 case PACKET_WREG_32:
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005056 case PACKET_MSG_LONG:
5057 case PACKET_MSG_SHORT:
5058 case PACKET_REPEAT:
5059 case PACKET_FENCE:
5060 case PACKET_NOP:
5061 case PACKET_ARB_POINT:
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005062 parser->patched_cb_size += pkt_size;
5063 break;
5064
5065 default:
5066 dev_err(hdev->dev, "Invalid packet header 0x%x\n",
5067 pkt_id);
5068 rc = -EINVAL;
5069 break;
5070 }
5071
5072 if (rc)
5073 break;
5074 }
5075
5076 /*
5077 * The new CB should have space at the end for two MSG_PROT packets:
5078 * 1. A packet that will act as a completion packet
5079 * 2. A packet that will generate MSI-X interrupt
5080 */
Ofir Bittonac6fdbf2020-12-03 16:59:28 +02005081 if (parser->completion)
5082 parser->patched_cb_size += sizeof(struct packet_msg_prot) * 2;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005083
5084 return rc;
5085}
5086
5087static int gaudi_patch_dma_packet(struct hl_device *hdev,
5088 struct hl_cs_parser *parser,
5089 struct packet_lin_dma *user_dma_pkt,
5090 struct packet_lin_dma *new_dma_pkt,
5091 u32 *new_dma_pkt_size)
5092{
5093 struct hl_userptr *userptr;
5094 struct scatterlist *sg, *sg_next_iter;
5095 u32 count, dma_desc_cnt, user_wrcomp_en_mask, ctl;
5096 u64 len, len_next;
5097 dma_addr_t dma_addr, dma_addr_next;
5098 u64 device_memory_addr, addr;
5099 enum dma_data_direction dir;
5100 struct sg_table *sgt;
5101 bool src_in_host = false;
5102 bool skip_host_mem_pin = false;
5103 bool user_memset;
5104
5105 ctl = le32_to_cpu(user_dma_pkt->ctl);
5106
5107 if (parser->hw_queue_id <= GAUDI_QUEUE_ID_DMA_0_3)
5108 src_in_host = true;
5109
5110 user_memset = (ctl & GAUDI_PKT_LIN_DMA_CTL_MEMSET_MASK) >>
5111 GAUDI_PKT_LIN_DMA_CTL_MEMSET_SHIFT;
5112
5113 if (src_in_host) {
5114 addr = le64_to_cpu(user_dma_pkt->src_addr);
5115 device_memory_addr = le64_to_cpu(user_dma_pkt->dst_addr);
5116 dir = DMA_TO_DEVICE;
5117 if (user_memset)
5118 skip_host_mem_pin = true;
5119 } else {
5120 addr = le64_to_cpu(user_dma_pkt->dst_addr);
5121 device_memory_addr = le64_to_cpu(user_dma_pkt->src_addr);
5122 dir = DMA_FROM_DEVICE;
5123 }
5124
5125 if ((!skip_host_mem_pin) &&
5126 (!hl_userptr_is_pinned(hdev, addr,
5127 le32_to_cpu(user_dma_pkt->tsize),
5128 parser->job_userptr_list, &userptr))) {
5129 dev_err(hdev->dev, "Userptr 0x%llx + 0x%x NOT mapped\n",
5130 addr, user_dma_pkt->tsize);
5131 return -EFAULT;
5132 }
5133
5134 if ((user_memset) && (dir == DMA_TO_DEVICE)) {
5135 memcpy(new_dma_pkt, user_dma_pkt, sizeof(*user_dma_pkt));
5136 *new_dma_pkt_size = sizeof(*user_dma_pkt);
5137 return 0;
5138 }
5139
5140 user_wrcomp_en_mask = ctl & GAUDI_PKT_LIN_DMA_CTL_WRCOMP_EN_MASK;
5141
5142 sgt = userptr->sgt;
5143 dma_desc_cnt = 0;
5144
5145 for_each_sg(sgt->sgl, sg, sgt->nents, count) {
5146 len = sg_dma_len(sg);
5147 dma_addr = sg_dma_address(sg);
5148
5149 if (len == 0)
5150 break;
5151
5152 while ((count + 1) < sgt->nents) {
5153 sg_next_iter = sg_next(sg);
5154 len_next = sg_dma_len(sg_next_iter);
5155 dma_addr_next = sg_dma_address(sg_next_iter);
5156
5157 if (len_next == 0)
5158 break;
5159
5160 if ((dma_addr + len == dma_addr_next) &&
5161 (len + len_next <= DMA_MAX_TRANSFER_SIZE)) {
5162 len += len_next;
5163 count++;
5164 sg = sg_next_iter;
5165 } else {
5166 break;
5167 }
5168 }
5169
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005170 ctl = le32_to_cpu(user_dma_pkt->ctl);
5171 if (likely(dma_desc_cnt))
5172 ctl &= ~GAUDI_PKT_CTL_EB_MASK;
5173 ctl &= ~GAUDI_PKT_LIN_DMA_CTL_WRCOMP_EN_MASK;
5174 new_dma_pkt->ctl = cpu_to_le32(ctl);
5175 new_dma_pkt->tsize = cpu_to_le32(len);
5176
5177 if (dir == DMA_TO_DEVICE) {
5178 new_dma_pkt->src_addr = cpu_to_le64(dma_addr);
5179 new_dma_pkt->dst_addr = cpu_to_le64(device_memory_addr);
5180 } else {
5181 new_dma_pkt->src_addr = cpu_to_le64(device_memory_addr);
5182 new_dma_pkt->dst_addr = cpu_to_le64(dma_addr);
5183 }
5184
5185 if (!user_memset)
5186 device_memory_addr += len;
5187 dma_desc_cnt++;
5188 new_dma_pkt++;
5189 }
5190
5191 if (!dma_desc_cnt) {
5192 dev_err(hdev->dev,
5193 "Error of 0 SG entries when patching DMA packet\n");
5194 return -EFAULT;
5195 }
5196
5197 /* Fix the last dma packet - wrcomp must be as user set it */
5198 new_dma_pkt--;
5199 new_dma_pkt->ctl |= cpu_to_le32(user_wrcomp_en_mask);
5200
5201 *new_dma_pkt_size = dma_desc_cnt * sizeof(struct packet_lin_dma);
5202
5203 return 0;
5204}
5205
5206static int gaudi_patch_cb(struct hl_device *hdev,
5207 struct hl_cs_parser *parser)
5208{
5209 u32 cb_parsed_length = 0;
5210 u32 cb_patched_cur_length = 0;
5211 int rc = 0;
5212
5213 /* cb_user_size is more than 0 so loop will always be executed */
5214 while (cb_parsed_length < parser->user_cb_size) {
5215 enum packet_id pkt_id;
5216 u16 pkt_size;
5217 u32 new_pkt_size = 0;
5218 struct gaudi_packet *user_pkt, *kernel_pkt;
5219
Arnd Bergmann82948e62020-10-26 17:08:06 +01005220 user_pkt = parser->user_cb->kernel_address + cb_parsed_length;
5221 kernel_pkt = parser->patched_cb->kernel_address +
5222 cb_patched_cur_length;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005223
5224 pkt_id = (enum packet_id) (
5225 (le64_to_cpu(user_pkt->header) &
5226 PACKET_HEADER_PACKET_ID_MASK) >>
5227 PACKET_HEADER_PACKET_ID_SHIFT);
5228
Ofir Bittonbc75be22020-07-30 14:56:38 +03005229 if (!validate_packet_id(pkt_id)) {
5230 dev_err(hdev->dev, "Invalid packet id %u\n", pkt_id);
5231 rc = -EINVAL;
5232 break;
5233 }
5234
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005235 pkt_size = gaudi_packet_sizes[pkt_id];
5236 cb_parsed_length += pkt_size;
5237 if (cb_parsed_length > parser->user_cb_size) {
5238 dev_err(hdev->dev,
5239 "packet 0x%x is out of CB boundary\n", pkt_id);
5240 rc = -EINVAL;
5241 break;
5242 }
5243
5244 switch (pkt_id) {
5245 case PACKET_LIN_DMA:
5246 rc = gaudi_patch_dma_packet(hdev, parser,
5247 (struct packet_lin_dma *) user_pkt,
5248 (struct packet_lin_dma *) kernel_pkt,
5249 &new_pkt_size);
5250 cb_patched_cur_length += new_pkt_size;
5251 break;
5252
5253 case PACKET_MSG_PROT:
5254 dev_err(hdev->dev,
5255 "User not allowed to use MSG_PROT\n");
5256 rc = -EPERM;
5257 break;
5258
5259 case PACKET_CP_DMA:
5260 dev_err(hdev->dev, "User not allowed to use CP_DMA\n");
5261 rc = -EPERM;
5262 break;
5263
5264 case PACKET_STOP:
5265 dev_err(hdev->dev, "User not allowed to use STOP\n");
5266 rc = -EPERM;
5267 break;
5268
5269 case PACKET_WREG_32:
5270 case PACKET_WREG_BULK:
5271 case PACKET_MSG_LONG:
5272 case PACKET_MSG_SHORT:
5273 case PACKET_REPEAT:
5274 case PACKET_FENCE:
5275 case PACKET_NOP:
5276 case PACKET_ARB_POINT:
5277 case PACKET_LOAD_AND_EXE:
5278 memcpy(kernel_pkt, user_pkt, pkt_size);
5279 cb_patched_cur_length += pkt_size;
5280 break;
5281
5282 default:
5283 dev_err(hdev->dev, "Invalid packet header 0x%x\n",
5284 pkt_id);
5285 rc = -EINVAL;
5286 break;
5287 }
5288
5289 if (rc)
5290 break;
5291 }
5292
5293 return rc;
5294}
5295
5296static int gaudi_parse_cb_mmu(struct hl_device *hdev,
5297 struct hl_cs_parser *parser)
5298{
5299 u64 patched_cb_handle;
5300 u32 patched_cb_size;
5301 struct hl_cb *user_cb;
5302 int rc;
5303
5304 /*
5305 * The new CB should have space at the end for two MSG_PROT pkt:
5306 * 1. A packet that will act as a completion packet
5307 * 2. A packet that will generate MSI interrupt
5308 */
Ofir Bittonac6fdbf2020-12-03 16:59:28 +02005309 if (parser->completion)
5310 parser->patched_cb_size = parser->user_cb_size +
5311 sizeof(struct packet_msg_prot) * 2;
5312 else
5313 parser->patched_cb_size = parser->user_cb_size;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005314
Tomer Tayarfa8641a12020-09-07 17:36:41 +03005315 rc = hl_cb_create(hdev, &hdev->kernel_cb_mgr, hdev->kernel_ctx,
Tomer Tayaref6a0f62020-07-09 16:17:48 +03005316 parser->patched_cb_size, false, false,
Tomer Tayarfa8641a12020-09-07 17:36:41 +03005317 &patched_cb_handle);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005318
5319 if (rc) {
5320 dev_err(hdev->dev,
5321 "Failed to allocate patched CB for DMA CS %d\n",
5322 rc);
5323 return rc;
5324 }
5325
5326 patched_cb_handle >>= PAGE_SHIFT;
5327 parser->patched_cb = hl_cb_get(hdev, &hdev->kernel_cb_mgr,
5328 (u32) patched_cb_handle);
Alon Mizrahi75d9a2a2020-12-03 17:32:19 +02005329 /* hl_cb_get should never fail */
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005330 if (!parser->patched_cb) {
Alon Mizrahi75d9a2a2020-12-03 17:32:19 +02005331 dev_crit(hdev->dev, "DMA CB handle invalid 0x%x\n",
5332 (u32) patched_cb_handle);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005333 rc = -EFAULT;
5334 goto out;
5335 }
5336
5337 /*
5338 * The check that parser->user_cb_size <= parser->user_cb->size was done
5339 * in validate_queue_index().
5340 */
Arnd Bergmann82948e62020-10-26 17:08:06 +01005341 memcpy(parser->patched_cb->kernel_address,
5342 parser->user_cb->kernel_address,
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005343 parser->user_cb_size);
5344
5345 patched_cb_size = parser->patched_cb_size;
5346
5347 /* Validate patched CB instead of user CB */
5348 user_cb = parser->user_cb;
5349 parser->user_cb = parser->patched_cb;
5350 rc = gaudi_validate_cb(hdev, parser, true);
5351 parser->user_cb = user_cb;
5352
5353 if (rc) {
5354 hl_cb_put(parser->patched_cb);
5355 goto out;
5356 }
5357
5358 if (patched_cb_size != parser->patched_cb_size) {
5359 dev_err(hdev->dev, "user CB size mismatch\n");
5360 hl_cb_put(parser->patched_cb);
5361 rc = -EINVAL;
5362 goto out;
5363 }
5364
5365out:
5366 /*
5367 * Always call cb destroy here because we still have 1 reference
5368 * to it by calling cb_get earlier. After the job will be completed,
5369 * cb_put will release it, but here we want to remove it from the
5370 * idr
5371 */
5372 hl_cb_destroy(hdev, &hdev->kernel_cb_mgr,
5373 patched_cb_handle << PAGE_SHIFT);
5374
5375 return rc;
5376}
5377
5378static int gaudi_parse_cb_no_mmu(struct hl_device *hdev,
5379 struct hl_cs_parser *parser)
5380{
5381 u64 patched_cb_handle;
5382 int rc;
5383
5384 rc = gaudi_validate_cb(hdev, parser, false);
5385
5386 if (rc)
5387 goto free_userptr;
5388
Tomer Tayarfa8641a12020-09-07 17:36:41 +03005389 rc = hl_cb_create(hdev, &hdev->kernel_cb_mgr, hdev->kernel_ctx,
Tomer Tayaref6a0f62020-07-09 16:17:48 +03005390 parser->patched_cb_size, false, false,
Tomer Tayarfa8641a12020-09-07 17:36:41 +03005391 &patched_cb_handle);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005392 if (rc) {
5393 dev_err(hdev->dev,
5394 "Failed to allocate patched CB for DMA CS %d\n", rc);
5395 goto free_userptr;
5396 }
5397
5398 patched_cb_handle >>= PAGE_SHIFT;
5399 parser->patched_cb = hl_cb_get(hdev, &hdev->kernel_cb_mgr,
5400 (u32) patched_cb_handle);
Alon Mizrahi75d9a2a2020-12-03 17:32:19 +02005401 /* hl_cb_get should never fail here */
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005402 if (!parser->patched_cb) {
Alon Mizrahi75d9a2a2020-12-03 17:32:19 +02005403 dev_crit(hdev->dev, "DMA CB handle invalid 0x%x\n",
5404 (u32) patched_cb_handle);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005405 rc = -EFAULT;
5406 goto out;
5407 }
5408
5409 rc = gaudi_patch_cb(hdev, parser);
5410
5411 if (rc)
5412 hl_cb_put(parser->patched_cb);
5413
5414out:
5415 /*
5416 * Always call cb destroy here because we still have 1 reference
5417 * to it by calling cb_get earlier. After the job will be completed,
5418 * cb_put will release it, but here we want to remove it from the
5419 * idr
5420 */
5421 hl_cb_destroy(hdev, &hdev->kernel_cb_mgr,
5422 patched_cb_handle << PAGE_SHIFT);
5423
5424free_userptr:
5425 if (rc)
5426 hl_userptr_delete_list(hdev, parser->job_userptr_list);
5427 return rc;
5428}
5429
5430static int gaudi_parse_cb_no_ext_queue(struct hl_device *hdev,
5431 struct hl_cs_parser *parser)
5432{
5433 struct asic_fixed_properties *asic_prop = &hdev->asic_prop;
Oded Gabbay3c681572020-11-02 21:10:39 +02005434 struct gaudi_device *gaudi = hdev->asic_specific;
5435 u32 nic_mask_q_id = 1 << (HW_CAP_NIC_SHIFT +
5436 ((parser->hw_queue_id - GAUDI_QUEUE_ID_NIC_0_0) >> 2));
5437
5438 if ((parser->hw_queue_id >= GAUDI_QUEUE_ID_NIC_0_0) &&
5439 (parser->hw_queue_id <= GAUDI_QUEUE_ID_NIC_9_3) &&
5440 (!(gaudi->hw_cap_initialized & nic_mask_q_id))) {
5441 dev_err(hdev->dev, "h/w queue %d is disabled\n",
5442 parser->hw_queue_id);
5443 return -EINVAL;
5444 }
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005445
5446 /* For internal queue jobs just check if CB address is valid */
5447 if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,
5448 parser->user_cb_size,
5449 asic_prop->sram_user_base_address,
5450 asic_prop->sram_end_address))
5451 return 0;
5452
5453 if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,
5454 parser->user_cb_size,
5455 asic_prop->dram_user_base_address,
5456 asic_prop->dram_end_address))
5457 return 0;
5458
5459 /* PMMU and HPMMU addresses are equal, check only one of them */
5460 if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,
5461 parser->user_cb_size,
5462 asic_prop->pmmu.start_addr,
5463 asic_prop->pmmu.end_addr))
5464 return 0;
5465
5466 dev_err(hdev->dev,
5467 "CB address 0x%px + 0x%x for internal QMAN is not valid\n",
5468 parser->user_cb, parser->user_cb_size);
5469
5470 return -EFAULT;
5471}
5472
5473static int gaudi_cs_parser(struct hl_device *hdev, struct hl_cs_parser *parser)
5474{
5475 struct gaudi_device *gaudi = hdev->asic_specific;
5476
5477 if (parser->queue_type == QUEUE_TYPE_INT)
5478 return gaudi_parse_cb_no_ext_queue(hdev, parser);
5479
5480 if (gaudi->hw_cap_initialized & HW_CAP_MMU)
5481 return gaudi_parse_cb_mmu(hdev, parser);
5482 else
5483 return gaudi_parse_cb_no_mmu(hdev, parser);
5484}
5485
5486static void gaudi_add_end_of_cb_packets(struct hl_device *hdev,
Arnd Bergmann82948e62020-10-26 17:08:06 +01005487 void *kernel_address, u32 len,
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005488 u64 cq_addr, u32 cq_val, u32 msi_vec,
5489 bool eb)
5490{
5491 struct gaudi_device *gaudi = hdev->asic_specific;
5492 struct packet_msg_prot *cq_pkt;
5493 u32 tmp;
5494
Arnd Bergmann82948e62020-10-26 17:08:06 +01005495 cq_pkt = kernel_address + len - (sizeof(struct packet_msg_prot) * 2);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005496
Oded Gabbay65887292020-08-12 11:21:01 +03005497 tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT);
5498 tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005499
5500 if (eb)
Oded Gabbay65887292020-08-12 11:21:01 +03005501 tmp |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005502
5503 cq_pkt->ctl = cpu_to_le32(tmp);
5504 cq_pkt->value = cpu_to_le32(cq_val);
5505 cq_pkt->addr = cpu_to_le64(cq_addr);
5506
5507 cq_pkt++;
5508
Oded Gabbay65887292020-08-12 11:21:01 +03005509 tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT);
5510 tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005511 cq_pkt->ctl = cpu_to_le32(tmp);
5512 cq_pkt->value = cpu_to_le32(1);
5513
5514 if (!gaudi->multi_msi_mode)
5515 msi_vec = 0;
5516
5517 cq_pkt->addr = cpu_to_le64(CFG_BASE + mmPCIE_MSI_INTR_0 + msi_vec * 4);
5518}
5519
5520static void gaudi_update_eq_ci(struct hl_device *hdev, u32 val)
5521{
5522 WREG32(mmCPU_IF_EQ_RD_OFFS, val);
5523}
5524
5525static int gaudi_memset_device_memory(struct hl_device *hdev, u64 addr,
5526 u32 size, u64 val)
5527{
5528 struct packet_lin_dma *lin_dma_pkt;
5529 struct hl_cs_job *job;
Moti Haimovskia9855a2d92020-06-24 19:40:57 +03005530 u32 cb_size, ctl, err_cause;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005531 struct hl_cb *cb;
5532 int rc;
5533
Ofir Bittona04b7cd2020-07-13 13:36:55 +03005534 cb = hl_cb_kernel_create(hdev, PAGE_SIZE, false);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005535 if (!cb)
5536 return -EFAULT;
5537
Arnd Bergmann82948e62020-10-26 17:08:06 +01005538 lin_dma_pkt = cb->kernel_address;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005539 memset(lin_dma_pkt, 0, sizeof(*lin_dma_pkt));
5540 cb_size = sizeof(*lin_dma_pkt);
5541
Oded Gabbay65887292020-08-12 11:21:01 +03005542 ctl = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_LIN_DMA);
5543 ctl |= FIELD_PREP(GAUDI_PKT_LIN_DMA_CTL_MEMSET_MASK, 1);
5544 ctl |= FIELD_PREP(GAUDI_PKT_LIN_DMA_CTL_LIN_MASK, 1);
5545 ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
5546 ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
5547
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005548 lin_dma_pkt->ctl = cpu_to_le32(ctl);
5549 lin_dma_pkt->src_addr = cpu_to_le64(val);
5550 lin_dma_pkt->dst_addr |= cpu_to_le64(addr);
5551 lin_dma_pkt->tsize = cpu_to_le32(size);
5552
5553 job = hl_cs_allocate_job(hdev, QUEUE_TYPE_EXT, true);
5554 if (!job) {
5555 dev_err(hdev->dev, "Failed to allocate a new job\n");
5556 rc = -ENOMEM;
5557 goto release_cb;
5558 }
5559
Moti Haimovskia9855a2d92020-06-24 19:40:57 +03005560 /* Verify DMA is OK */
5561 err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE);
5562 if (err_cause && !hdev->init_done) {
5563 dev_dbg(hdev->dev,
5564 "Clearing DMA0 engine from errors (cause 0x%x)\n",
5565 err_cause);
5566 WREG32(mmDMA0_CORE_ERR_CAUSE, err_cause);
5567 }
5568
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005569 job->id = 0;
5570 job->user_cb = cb;
Tomer Tayarf07486742020-08-02 22:51:31 +03005571 atomic_inc(&job->user_cb->cs_cnt);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005572 job->user_cb_size = cb_size;
5573 job->hw_queue_id = GAUDI_QUEUE_ID_DMA_0_0;
5574 job->patched_cb = job->user_cb;
5575 job->job_cb_size = job->user_cb_size + sizeof(struct packet_msg_prot);
5576
5577 hl_debugfs_add_job(hdev, job);
5578
5579 rc = gaudi_send_job_on_qman0(hdev, job);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005580 hl_debugfs_remove_job(hdev, job);
5581 kfree(job);
Tomer Tayarf07486742020-08-02 22:51:31 +03005582 atomic_dec(&cb->cs_cnt);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005583
Moti Haimovskia9855a2d92020-06-24 19:40:57 +03005584 /* Verify DMA is OK */
5585 err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE);
5586 if (err_cause) {
5587 dev_err(hdev->dev, "DMA Failed, cause 0x%x\n", err_cause);
5588 rc = -EIO;
5589 if (!hdev->init_done) {
5590 dev_dbg(hdev->dev,
5591 "Clearing DMA0 engine from errors (cause 0x%x)\n",
5592 err_cause);
5593 WREG32(mmDMA0_CORE_ERR_CAUSE, err_cause);
5594 }
5595 }
5596
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005597release_cb:
5598 hl_cb_put(cb);
5599 hl_cb_destroy(hdev, &hdev->kernel_cb_mgr, cb->id << PAGE_SHIFT);
5600
5601 return rc;
5602}
5603
Ofir Bitton423815b2021-01-05 09:04:07 +02005604static int gaudi_memset_registers(struct hl_device *hdev, u64 reg_base,
5605 u32 num_regs, u32 val)
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005606{
Ofir Bitton423815b2021-01-05 09:04:07 +02005607 struct packet_msg_long *pkt;
5608 struct hl_cs_job *job;
5609 u32 cb_size, ctl;
5610 struct hl_cb *cb;
5611 int i, rc;
5612
5613 cb_size = (sizeof(*pkt) * num_regs) + sizeof(struct packet_msg_prot);
5614
5615 if (cb_size > SZ_2M) {
5616 dev_err(hdev->dev, "CB size must be smaller than %uMB", SZ_2M);
5617 return -ENOMEM;
5618 }
5619
5620 cb = hl_cb_kernel_create(hdev, cb_size, false);
5621 if (!cb)
5622 return -EFAULT;
5623
5624 pkt = cb->kernel_address;
5625
5626 ctl = FIELD_PREP(GAUDI_PKT_LONG_CTL_OP_MASK, 0); /* write the value */
5627 ctl |= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_LONG);
5628 ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);
5629 ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
5630 ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
5631
5632 for (i = 0; i < num_regs ; i++, pkt++) {
5633 pkt->ctl = cpu_to_le32(ctl);
5634 pkt->value = cpu_to_le32(val);
5635 pkt->addr = cpu_to_le64(reg_base + (i * 4));
5636 }
5637
5638 job = hl_cs_allocate_job(hdev, QUEUE_TYPE_EXT, true);
5639 if (!job) {
5640 dev_err(hdev->dev, "Failed to allocate a new job\n");
5641 rc = -ENOMEM;
5642 goto release_cb;
5643 }
5644
5645 job->id = 0;
5646 job->user_cb = cb;
5647 atomic_inc(&job->user_cb->cs_cnt);
5648 job->user_cb_size = cb_size;
5649 job->hw_queue_id = GAUDI_QUEUE_ID_DMA_0_0;
5650 job->patched_cb = job->user_cb;
5651 job->job_cb_size = cb_size;
5652
5653 hl_debugfs_add_job(hdev, job);
5654
5655 rc = gaudi_send_job_on_qman0(hdev, job);
5656 hl_debugfs_remove_job(hdev, job);
5657 kfree(job);
5658 atomic_dec(&cb->cs_cnt);
5659
5660release_cb:
5661 hl_cb_put(cb);
5662 hl_cb_destroy(hdev, &hdev->kernel_cb_mgr, cb->id << PAGE_SHIFT);
5663
5664 return rc;
5665}
5666
5667static int gaudi_schedule_register_memset(struct hl_device *hdev,
5668 u32 hw_queue_id, u64 reg_base, u32 num_regs, u32 val)
5669{
5670 struct hl_ctx *ctx = hdev->compute_ctx;
5671 struct hl_pending_cb *pending_cb;
5672 struct packet_msg_long *pkt;
5673 u32 cb_size, ctl;
5674 struct hl_cb *cb;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005675 int i;
5676
Ofir Bitton423815b2021-01-05 09:04:07 +02005677 /* If no compute context available or context is going down
5678 * memset registers directly
5679 */
5680 if (!ctx || kref_read(&ctx->refcount) == 0)
5681 return gaudi_memset_registers(hdev, reg_base, num_regs, val);
5682
5683 cb_size = (sizeof(*pkt) * num_regs) +
5684 sizeof(struct packet_msg_prot) * 2;
5685
5686 if (cb_size > SZ_2M) {
5687 dev_err(hdev->dev, "CB size must be smaller than %uMB", SZ_2M);
5688 return -ENOMEM;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005689 }
5690
Ofir Bitton423815b2021-01-05 09:04:07 +02005691 pending_cb = kzalloc(sizeof(*pending_cb), GFP_KERNEL);
5692 if (!pending_cb)
5693 return -ENOMEM;
5694
5695 cb = hl_cb_kernel_create(hdev, cb_size, false);
5696 if (!cb) {
5697 kfree(pending_cb);
5698 return -EFAULT;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005699 }
5700
Ofir Bitton423815b2021-01-05 09:04:07 +02005701 pkt = cb->kernel_address;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005702
Ofir Bitton423815b2021-01-05 09:04:07 +02005703 ctl = FIELD_PREP(GAUDI_PKT_LONG_CTL_OP_MASK, 0); /* write the value */
5704 ctl |= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_LONG);
5705 ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);
5706 ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
5707 ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005708
Ofir Bitton423815b2021-01-05 09:04:07 +02005709 for (i = 0; i < num_regs ; i++, pkt++) {
5710 pkt->ctl = cpu_to_le32(ctl);
5711 pkt->value = cpu_to_le32(val);
5712 pkt->addr = cpu_to_le64(reg_base + (i * 4));
5713 }
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005714
Ofir Bitton423815b2021-01-05 09:04:07 +02005715 hl_cb_destroy(hdev, &hdev->kernel_cb_mgr, cb->id << PAGE_SHIFT);
5716
5717 pending_cb->cb = cb;
5718 pending_cb->cb_size = cb_size;
5719 /* The queue ID MUST be an external queue ID. Otherwise, we will
5720 * have undefined behavior
5721 */
5722 pending_cb->hw_queue_id = hw_queue_id;
5723
5724 spin_lock(&ctx->pending_cb_lock);
5725 list_add_tail(&pending_cb->cb_node, &ctx->pending_cb_list);
5726 spin_unlock(&ctx->pending_cb_lock);
5727
5728 return 0;
5729}
5730
5731static int gaudi_restore_sm_registers(struct hl_device *hdev)
5732{
5733 u64 base_addr;
5734 u32 num_regs;
5735 int rc;
5736
5737 base_addr = CFG_BASE + mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0;
5738 num_regs = NUM_OF_SOB_IN_BLOCK;
5739 rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5740 if (rc) {
5741 dev_err(hdev->dev, "failed resetting SM registers");
5742 return -ENOMEM;
5743 }
5744
5745 base_addr = CFG_BASE + mmSYNC_MNGR_E_S_SYNC_MNGR_OBJS_SOB_OBJ_0;
5746 num_regs = NUM_OF_SOB_IN_BLOCK;
5747 rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5748 if (rc) {
5749 dev_err(hdev->dev, "failed resetting SM registers");
5750 return -ENOMEM;
5751 }
5752
5753 base_addr = CFG_BASE + mmSYNC_MNGR_W_N_SYNC_MNGR_OBJS_SOB_OBJ_0;
5754 num_regs = NUM_OF_SOB_IN_BLOCK;
5755 rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5756 if (rc) {
5757 dev_err(hdev->dev, "failed resetting SM registers");
5758 return -ENOMEM;
5759 }
5760
5761 base_addr = CFG_BASE + mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_STATUS_0;
5762 num_regs = NUM_OF_MONITORS_IN_BLOCK;
5763 rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5764 if (rc) {
5765 dev_err(hdev->dev, "failed resetting SM registers");
5766 return -ENOMEM;
5767 }
5768
5769 base_addr = CFG_BASE + mmSYNC_MNGR_E_S_SYNC_MNGR_OBJS_MON_STATUS_0;
5770 num_regs = NUM_OF_MONITORS_IN_BLOCK;
5771 rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5772 if (rc) {
5773 dev_err(hdev->dev, "failed resetting SM registers");
5774 return -ENOMEM;
5775 }
5776
5777 base_addr = CFG_BASE + mmSYNC_MNGR_W_N_SYNC_MNGR_OBJS_MON_STATUS_0;
5778 num_regs = NUM_OF_MONITORS_IN_BLOCK;
5779 rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5780 if (rc) {
5781 dev_err(hdev->dev, "failed resetting SM registers");
5782 return -ENOMEM;
5783 }
5784
5785 base_addr = CFG_BASE + mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 +
5786 (GAUDI_FIRST_AVAILABLE_W_S_SYNC_OBJECT * 4);
5787 num_regs = NUM_OF_SOB_IN_BLOCK - GAUDI_FIRST_AVAILABLE_W_S_SYNC_OBJECT;
5788 rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5789 if (rc) {
5790 dev_err(hdev->dev, "failed resetting SM registers");
5791 return -ENOMEM;
5792 }
5793
5794 base_addr = CFG_BASE + mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_STATUS_0 +
5795 (GAUDI_FIRST_AVAILABLE_W_S_MONITOR * 4);
5796 num_regs = NUM_OF_MONITORS_IN_BLOCK - GAUDI_FIRST_AVAILABLE_W_S_MONITOR;
5797 rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5798 if (rc) {
5799 dev_err(hdev->dev, "failed resetting SM registers");
5800 return -ENOMEM;
5801 }
5802
5803 return 0;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005804}
5805
5806static void gaudi_restore_dma_registers(struct hl_device *hdev)
5807{
5808 u32 sob_delta = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_1 -
5809 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0;
5810 int i;
5811
5812 for (i = 0 ; i < DMA_NUMBER_OF_CHANNELS ; i++) {
5813 u64 sob_addr = CFG_BASE +
5814 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0 +
5815 (i * sob_delta);
5816 u32 dma_offset = i * DMA_CORE_OFFSET;
5817
5818 WREG32(mmDMA0_CORE_WR_COMP_ADDR_LO + dma_offset,
5819 lower_32_bits(sob_addr));
5820 WREG32(mmDMA0_CORE_WR_COMP_ADDR_HI + dma_offset,
5821 upper_32_bits(sob_addr));
5822 WREG32(mmDMA0_CORE_WR_COMP_WDATA + dma_offset, 0x80000001);
5823
5824 /* For DMAs 2-7, need to restore WR_AWUSER_31_11 as it can be
5825 * modified by the user for SRAM reduction
5826 */
5827 if (i > 1)
5828 WREG32(mmDMA0_CORE_WR_AWUSER_31_11 + dma_offset,
5829 0x00000001);
5830 }
5831}
5832
5833static void gaudi_restore_qm_registers(struct hl_device *hdev)
5834{
5835 u32 qman_offset;
5836 int i;
5837
5838 for (i = 0 ; i < DMA_NUMBER_OF_CHANNELS ; i++) {
5839 qman_offset = i * DMA_QMAN_OFFSET;
5840 WREG32(mmDMA0_QM_ARB_CFG_0 + qman_offset, 0);
5841 }
5842
5843 for (i = 0 ; i < MME_NUMBER_OF_MASTER_ENGINES ; i++) {
5844 qman_offset = i * (mmMME2_QM_BASE - mmMME0_QM_BASE);
5845 WREG32(mmMME0_QM_ARB_CFG_0 + qman_offset, 0);
5846 }
5847
5848 for (i = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) {
5849 qman_offset = i * TPC_QMAN_OFFSET;
5850 WREG32(mmTPC0_QM_ARB_CFG_0 + qman_offset, 0);
5851 }
Oded Gabbay3c681572020-11-02 21:10:39 +02005852
5853 for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++) {
5854 qman_offset = (i >> 1) * NIC_MACRO_QMAN_OFFSET +
5855 (i & 0x1) * NIC_ENGINE_QMAN_OFFSET;
5856 WREG32(mmNIC0_QM0_ARB_CFG_0 + qman_offset, 0);
5857 }
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005858}
5859
Ofir Bitton423815b2021-01-05 09:04:07 +02005860static int gaudi_restore_user_registers(struct hl_device *hdev)
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005861{
Ofir Bitton423815b2021-01-05 09:04:07 +02005862 int rc;
5863
5864 rc = gaudi_restore_sm_registers(hdev);
5865 if (rc)
5866 return rc;
5867
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005868 gaudi_restore_dma_registers(hdev);
5869 gaudi_restore_qm_registers(hdev);
Ofir Bitton423815b2021-01-05 09:04:07 +02005870
5871 return 0;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005872}
5873
5874static int gaudi_context_switch(struct hl_device *hdev, u32 asid)
5875{
Ofir Bitton423815b2021-01-05 09:04:07 +02005876 return gaudi_restore_user_registers(hdev);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005877}
5878
5879static int gaudi_mmu_clear_pgt_range(struct hl_device *hdev)
5880{
5881 struct asic_fixed_properties *prop = &hdev->asic_prop;
5882 struct gaudi_device *gaudi = hdev->asic_specific;
5883 u64 addr = prop->mmu_pgt_addr;
5884 u32 size = prop->mmu_pgt_size + MMU_CACHE_MNG_SIZE;
5885
5886 if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
5887 return 0;
5888
5889 return gaudi_memset_device_memory(hdev, addr, size, 0);
5890}
5891
5892static void gaudi_restore_phase_topology(struct hl_device *hdev)
5893{
5894
5895}
5896
5897static int gaudi_debugfs_read32(struct hl_device *hdev, u64 addr, u32 *val)
5898{
5899 struct asic_fixed_properties *prop = &hdev->asic_prop;
5900 struct gaudi_device *gaudi = hdev->asic_specific;
5901 u64 hbm_bar_addr;
5902 int rc = 0;
5903
5904 if ((addr >= CFG_BASE) && (addr < CFG_BASE + CFG_SIZE)) {
Oded Gabbaye38bfd32020-07-03 20:46:12 +03005905
5906 if ((gaudi->hw_cap_initialized & HW_CAP_CLK_GATE) &&
5907 (hdev->clock_gating_mask &
5908 GAUDI_CLK_GATE_DEBUGFS_MASK)) {
5909
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005910 dev_err_ratelimited(hdev->dev,
5911 "Can't read register - clock gating is enabled!\n");
5912 rc = -EFAULT;
5913 } else {
5914 *val = RREG32(addr - CFG_BASE);
5915 }
Oded Gabbaye38bfd32020-07-03 20:46:12 +03005916
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005917 } else if ((addr >= SRAM_BASE_ADDR) &&
5918 (addr < SRAM_BASE_ADDR + SRAM_BAR_SIZE)) {
5919 *val = readl(hdev->pcie_bar[SRAM_BAR_ID] +
5920 (addr - SRAM_BASE_ADDR));
5921 } else if (addr < DRAM_PHYS_BASE + hdev->asic_prop.dram_size) {
5922 u64 bar_base_addr = DRAM_PHYS_BASE +
5923 (addr & ~(prop->dram_pci_bar_size - 0x1ull));
5924
5925 hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, bar_base_addr);
5926 if (hbm_bar_addr != U64_MAX) {
5927 *val = readl(hdev->pcie_bar[HBM_BAR_ID] +
5928 (addr - bar_base_addr));
5929
5930 hbm_bar_addr = gaudi_set_hbm_bar_base(hdev,
5931 hbm_bar_addr);
5932 }
5933 if (hbm_bar_addr == U64_MAX)
5934 rc = -EIO;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005935 } else {
5936 rc = -EFAULT;
5937 }
5938
5939 return rc;
5940}
5941
5942static int gaudi_debugfs_write32(struct hl_device *hdev, u64 addr, u32 val)
5943{
5944 struct asic_fixed_properties *prop = &hdev->asic_prop;
5945 struct gaudi_device *gaudi = hdev->asic_specific;
5946 u64 hbm_bar_addr;
5947 int rc = 0;
5948
5949 if ((addr >= CFG_BASE) && (addr < CFG_BASE + CFG_SIZE)) {
Oded Gabbaye38bfd32020-07-03 20:46:12 +03005950
5951 if ((gaudi->hw_cap_initialized & HW_CAP_CLK_GATE) &&
5952 (hdev->clock_gating_mask &
5953 GAUDI_CLK_GATE_DEBUGFS_MASK)) {
5954
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005955 dev_err_ratelimited(hdev->dev,
5956 "Can't write register - clock gating is enabled!\n");
5957 rc = -EFAULT;
5958 } else {
5959 WREG32(addr - CFG_BASE, val);
5960 }
Oded Gabbaye38bfd32020-07-03 20:46:12 +03005961
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005962 } else if ((addr >= SRAM_BASE_ADDR) &&
5963 (addr < SRAM_BASE_ADDR + SRAM_BAR_SIZE)) {
5964 writel(val, hdev->pcie_bar[SRAM_BAR_ID] +
5965 (addr - SRAM_BASE_ADDR));
5966 } else if (addr < DRAM_PHYS_BASE + hdev->asic_prop.dram_size) {
5967 u64 bar_base_addr = DRAM_PHYS_BASE +
5968 (addr & ~(prop->dram_pci_bar_size - 0x1ull));
5969
5970 hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, bar_base_addr);
5971 if (hbm_bar_addr != U64_MAX) {
5972 writel(val, hdev->pcie_bar[HBM_BAR_ID] +
5973 (addr - bar_base_addr));
5974
5975 hbm_bar_addr = gaudi_set_hbm_bar_base(hdev,
5976 hbm_bar_addr);
5977 }
5978 if (hbm_bar_addr == U64_MAX)
5979 rc = -EIO;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005980 } else {
5981 rc = -EFAULT;
5982 }
5983
5984 return rc;
5985}
5986
5987static int gaudi_debugfs_read64(struct hl_device *hdev, u64 addr, u64 *val)
5988{
5989 struct asic_fixed_properties *prop = &hdev->asic_prop;
5990 struct gaudi_device *gaudi = hdev->asic_specific;
5991 u64 hbm_bar_addr;
5992 int rc = 0;
5993
5994 if ((addr >= CFG_BASE) && (addr <= CFG_BASE + CFG_SIZE - sizeof(u64))) {
Oded Gabbaye38bfd32020-07-03 20:46:12 +03005995
5996 if ((gaudi->hw_cap_initialized & HW_CAP_CLK_GATE) &&
5997 (hdev->clock_gating_mask &
5998 GAUDI_CLK_GATE_DEBUGFS_MASK)) {
5999
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006000 dev_err_ratelimited(hdev->dev,
6001 "Can't read register - clock gating is enabled!\n");
6002 rc = -EFAULT;
6003 } else {
6004 u32 val_l = RREG32(addr - CFG_BASE);
6005 u32 val_h = RREG32(addr + sizeof(u32) - CFG_BASE);
6006
6007 *val = (((u64) val_h) << 32) | val_l;
6008 }
Oded Gabbaye38bfd32020-07-03 20:46:12 +03006009
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006010 } else if ((addr >= SRAM_BASE_ADDR) &&
6011 (addr <= SRAM_BASE_ADDR + SRAM_BAR_SIZE - sizeof(u64))) {
6012 *val = readq(hdev->pcie_bar[SRAM_BAR_ID] +
6013 (addr - SRAM_BASE_ADDR));
6014 } else if (addr <=
6015 DRAM_PHYS_BASE + hdev->asic_prop.dram_size - sizeof(u64)) {
6016 u64 bar_base_addr = DRAM_PHYS_BASE +
6017 (addr & ~(prop->dram_pci_bar_size - 0x1ull));
6018
6019 hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, bar_base_addr);
6020 if (hbm_bar_addr != U64_MAX) {
6021 *val = readq(hdev->pcie_bar[HBM_BAR_ID] +
6022 (addr - bar_base_addr));
6023
6024 hbm_bar_addr = gaudi_set_hbm_bar_base(hdev,
6025 hbm_bar_addr);
6026 }
6027 if (hbm_bar_addr == U64_MAX)
6028 rc = -EIO;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006029 } else {
6030 rc = -EFAULT;
6031 }
6032
6033 return rc;
6034}
6035
6036static int gaudi_debugfs_write64(struct hl_device *hdev, u64 addr, u64 val)
6037{
6038 struct asic_fixed_properties *prop = &hdev->asic_prop;
6039 struct gaudi_device *gaudi = hdev->asic_specific;
6040 u64 hbm_bar_addr;
6041 int rc = 0;
6042
6043 if ((addr >= CFG_BASE) && (addr <= CFG_BASE + CFG_SIZE - sizeof(u64))) {
Oded Gabbaye38bfd32020-07-03 20:46:12 +03006044
6045 if ((gaudi->hw_cap_initialized & HW_CAP_CLK_GATE) &&
6046 (hdev->clock_gating_mask &
6047 GAUDI_CLK_GATE_DEBUGFS_MASK)) {
6048
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006049 dev_err_ratelimited(hdev->dev,
6050 "Can't write register - clock gating is enabled!\n");
6051 rc = -EFAULT;
6052 } else {
6053 WREG32(addr - CFG_BASE, lower_32_bits(val));
6054 WREG32(addr + sizeof(u32) - CFG_BASE,
6055 upper_32_bits(val));
6056 }
Oded Gabbaye38bfd32020-07-03 20:46:12 +03006057
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006058 } else if ((addr >= SRAM_BASE_ADDR) &&
6059 (addr <= SRAM_BASE_ADDR + SRAM_BAR_SIZE - sizeof(u64))) {
6060 writeq(val, hdev->pcie_bar[SRAM_BAR_ID] +
6061 (addr - SRAM_BASE_ADDR));
6062 } else if (addr <=
6063 DRAM_PHYS_BASE + hdev->asic_prop.dram_size - sizeof(u64)) {
6064 u64 bar_base_addr = DRAM_PHYS_BASE +
6065 (addr & ~(prop->dram_pci_bar_size - 0x1ull));
6066
6067 hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, bar_base_addr);
6068 if (hbm_bar_addr != U64_MAX) {
6069 writeq(val, hdev->pcie_bar[HBM_BAR_ID] +
6070 (addr - bar_base_addr));
6071
6072 hbm_bar_addr = gaudi_set_hbm_bar_base(hdev,
6073 hbm_bar_addr);
6074 }
6075 if (hbm_bar_addr == U64_MAX)
6076 rc = -EIO;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006077 } else {
6078 rc = -EFAULT;
6079 }
6080
6081 return rc;
6082}
6083
6084static u64 gaudi_read_pte(struct hl_device *hdev, u64 addr)
6085{
6086 struct gaudi_device *gaudi = hdev->asic_specific;
6087
6088 if (hdev->hard_reset_pending)
6089 return U64_MAX;
6090
6091 return readq(hdev->pcie_bar[HBM_BAR_ID] +
6092 (addr - gaudi->hbm_bar_cur_addr));
6093}
6094
6095static void gaudi_write_pte(struct hl_device *hdev, u64 addr, u64 val)
6096{
6097 struct gaudi_device *gaudi = hdev->asic_specific;
6098
6099 if (hdev->hard_reset_pending)
6100 return;
6101
6102 writeq(val, hdev->pcie_bar[HBM_BAR_ID] +
6103 (addr - gaudi->hbm_bar_cur_addr));
6104}
6105
Ofir Bitton1137e1e2020-09-30 18:43:52 +03006106void gaudi_mmu_prepare_reg(struct hl_device *hdev, u64 reg, u32 asid)
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006107{
6108 /* mask to zero the MMBP and ASID bits */
6109 WREG32_AND(reg, ~0x7FF);
6110 WREG32_OR(reg, asid);
6111}
6112
6113static void gaudi_mmu_prepare(struct hl_device *hdev, u32 asid)
6114{
6115 struct gaudi_device *gaudi = hdev->asic_specific;
6116
6117 if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
6118 return;
6119
6120 if (asid & ~DMA0_QM_GLBL_NON_SECURE_PROPS_0_ASID_MASK) {
Alon Mizrahi75d9a2a2020-12-03 17:32:19 +02006121 dev_crit(hdev->dev, "asid %u is too big\n", asid);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006122 return;
6123 }
6124
6125 mutex_lock(&gaudi->clk_gate_mutex);
6126
6127 hdev->asic_funcs->disable_clock_gating(hdev);
6128
6129 gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_0, asid);
6130 gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_1, asid);
6131 gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_2, asid);
6132 gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_3, asid);
6133 gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_4, asid);
6134
6135 gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_0, asid);
6136 gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_1, asid);
6137 gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_2, asid);
6138 gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_3, asid);
6139 gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_4, asid);
6140
6141 gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_0, asid);
6142 gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_1, asid);
6143 gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_2, asid);
6144 gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_3, asid);
6145 gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_4, asid);
6146
6147 gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_0, asid);
6148 gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_1, asid);
6149 gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_2, asid);
6150 gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_3, asid);
6151 gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_4, asid);
6152
6153 gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_0, asid);
6154 gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_1, asid);
6155 gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_2, asid);
6156 gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_3, asid);
6157 gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_4, asid);
6158
6159 gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_0, asid);
6160 gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_1, asid);
6161 gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_2, asid);
6162 gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_3, asid);
6163 gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_4, asid);
6164
6165 gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_0, asid);
6166 gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_1, asid);
6167 gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_2, asid);
6168 gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_3, asid);
6169 gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_4, asid);
6170
6171 gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_0, asid);
6172 gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_1, asid);
6173 gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_2, asid);
6174 gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_3, asid);
6175 gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_4, asid);
6176
6177 gaudi_mmu_prepare_reg(hdev, mmDMA0_CORE_NON_SECURE_PROPS, asid);
6178 gaudi_mmu_prepare_reg(hdev, mmDMA1_CORE_NON_SECURE_PROPS, asid);
6179 gaudi_mmu_prepare_reg(hdev, mmDMA2_CORE_NON_SECURE_PROPS, asid);
6180 gaudi_mmu_prepare_reg(hdev, mmDMA3_CORE_NON_SECURE_PROPS, asid);
6181 gaudi_mmu_prepare_reg(hdev, mmDMA4_CORE_NON_SECURE_PROPS, asid);
6182 gaudi_mmu_prepare_reg(hdev, mmDMA5_CORE_NON_SECURE_PROPS, asid);
6183 gaudi_mmu_prepare_reg(hdev, mmDMA6_CORE_NON_SECURE_PROPS, asid);
6184 gaudi_mmu_prepare_reg(hdev, mmDMA7_CORE_NON_SECURE_PROPS, asid);
6185
6186 gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_0, asid);
6187 gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_1, asid);
6188 gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_2, asid);
6189 gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_3, asid);
6190 gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_4, asid);
6191 gaudi_mmu_prepare_reg(hdev, mmTPC0_CFG_ARUSER_LO, asid);
6192 gaudi_mmu_prepare_reg(hdev, mmTPC0_CFG_AWUSER_LO, asid);
6193
6194 gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_0, asid);
6195 gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_1, asid);
6196 gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_2, asid);
6197 gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_3, asid);
6198 gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_4, asid);
6199 gaudi_mmu_prepare_reg(hdev, mmTPC1_CFG_ARUSER_LO, asid);
6200 gaudi_mmu_prepare_reg(hdev, mmTPC1_CFG_AWUSER_LO, asid);
6201
6202 gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_0, asid);
6203 gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_1, asid);
6204 gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_2, asid);
6205 gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_3, asid);
6206 gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_4, asid);
6207 gaudi_mmu_prepare_reg(hdev, mmTPC2_CFG_ARUSER_LO, asid);
6208 gaudi_mmu_prepare_reg(hdev, mmTPC2_CFG_AWUSER_LO, asid);
6209
6210 gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_0, asid);
6211 gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_1, asid);
6212 gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_2, asid);
6213 gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_3, asid);
6214 gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_4, asid);
6215 gaudi_mmu_prepare_reg(hdev, mmTPC3_CFG_ARUSER_LO, asid);
6216 gaudi_mmu_prepare_reg(hdev, mmTPC3_CFG_AWUSER_LO, asid);
6217
6218 gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_0, asid);
6219 gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_1, asid);
6220 gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_2, asid);
6221 gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_3, asid);
6222 gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_4, asid);
6223 gaudi_mmu_prepare_reg(hdev, mmTPC4_CFG_ARUSER_LO, asid);
6224 gaudi_mmu_prepare_reg(hdev, mmTPC4_CFG_AWUSER_LO, asid);
6225
6226 gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_0, asid);
6227 gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_1, asid);
6228 gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_2, asid);
6229 gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_3, asid);
6230 gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_4, asid);
6231 gaudi_mmu_prepare_reg(hdev, mmTPC5_CFG_ARUSER_LO, asid);
6232 gaudi_mmu_prepare_reg(hdev, mmTPC5_CFG_AWUSER_LO, asid);
6233
6234 gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_0, asid);
6235 gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_1, asid);
6236 gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_2, asid);
6237 gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_3, asid);
6238 gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_4, asid);
6239 gaudi_mmu_prepare_reg(hdev, mmTPC6_CFG_ARUSER_LO, asid);
6240 gaudi_mmu_prepare_reg(hdev, mmTPC6_CFG_AWUSER_LO, asid);
6241
6242 gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_0, asid);
6243 gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_1, asid);
6244 gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_2, asid);
6245 gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_3, asid);
6246 gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_4, asid);
6247 gaudi_mmu_prepare_reg(hdev, mmTPC7_CFG_ARUSER_LO, asid);
6248 gaudi_mmu_prepare_reg(hdev, mmTPC7_CFG_AWUSER_LO, asid);
6249
6250 gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_0, asid);
6251 gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_1, asid);
6252 gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_2, asid);
6253 gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_3, asid);
6254 gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_4, asid);
6255 gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_0, asid);
6256 gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_1, asid);
6257 gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_2, asid);
6258 gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_3, asid);
6259 gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_4, asid);
6260
6261 gaudi_mmu_prepare_reg(hdev, mmMME0_SBAB_ARUSER0, asid);
6262 gaudi_mmu_prepare_reg(hdev, mmMME0_SBAB_ARUSER1, asid);
6263 gaudi_mmu_prepare_reg(hdev, mmMME1_SBAB_ARUSER0, asid);
6264 gaudi_mmu_prepare_reg(hdev, mmMME1_SBAB_ARUSER1, asid);
6265 gaudi_mmu_prepare_reg(hdev, mmMME2_SBAB_ARUSER0, asid);
6266 gaudi_mmu_prepare_reg(hdev, mmMME2_SBAB_ARUSER1, asid);
6267 gaudi_mmu_prepare_reg(hdev, mmMME3_SBAB_ARUSER0, asid);
6268 gaudi_mmu_prepare_reg(hdev, mmMME3_SBAB_ARUSER1, asid);
6269 gaudi_mmu_prepare_reg(hdev, mmMME0_ACC_WBC, asid);
6270 gaudi_mmu_prepare_reg(hdev, mmMME1_ACC_WBC, asid);
6271 gaudi_mmu_prepare_reg(hdev, mmMME2_ACC_WBC, asid);
6272 gaudi_mmu_prepare_reg(hdev, mmMME3_ACC_WBC, asid);
6273
Oded Gabbay3c681572020-11-02 21:10:39 +02006274 if (hdev->nic_ports_mask & GAUDI_NIC_MASK_NIC0) {
6275 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_0,
6276 asid);
6277 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_1,
6278 asid);
6279 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_2,
6280 asid);
6281 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_3,
6282 asid);
6283 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_4,
6284 asid);
6285 }
6286
6287 if (hdev->nic_ports_mask & GAUDI_NIC_MASK_NIC1) {
6288 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_0,
6289 asid);
6290 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_1,
6291 asid);
6292 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_2,
6293 asid);
6294 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_3,
6295 asid);
6296 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_4,
6297 asid);
6298 }
6299
6300 if (hdev->nic_ports_mask & GAUDI_NIC_MASK_NIC2) {
6301 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_0,
6302 asid);
6303 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_1,
6304 asid);
6305 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_2,
6306 asid);
6307 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_3,
6308 asid);
6309 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_4,
6310 asid);
6311 }
6312
6313 if (hdev->nic_ports_mask & GAUDI_NIC_MASK_NIC3) {
6314 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_0,
6315 asid);
6316 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_1,
6317 asid);
6318 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_2,
6319 asid);
6320 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_3,
6321 asid);
6322 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_4,
6323 asid);
6324 }
6325
6326 if (hdev->nic_ports_mask & GAUDI_NIC_MASK_NIC4) {
6327 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_0,
6328 asid);
6329 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_1,
6330 asid);
6331 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_2,
6332 asid);
6333 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_3,
6334 asid);
6335 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_4,
6336 asid);
6337 }
6338
6339 if (hdev->nic_ports_mask & GAUDI_NIC_MASK_NIC5) {
6340 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_0,
6341 asid);
6342 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_1,
6343 asid);
6344 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_2,
6345 asid);
6346 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_3,
6347 asid);
6348 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_4,
6349 asid);
6350 }
6351
6352 if (hdev->nic_ports_mask & GAUDI_NIC_MASK_NIC6) {
6353 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_0,
6354 asid);
6355 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_1,
6356 asid);
6357 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_2,
6358 asid);
6359 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_3,
6360 asid);
6361 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_4,
6362 asid);
6363 }
6364
6365 if (hdev->nic_ports_mask & GAUDI_NIC_MASK_NIC7) {
6366 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_0,
6367 asid);
6368 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_1,
6369 asid);
6370 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_2,
6371 asid);
6372 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_3,
6373 asid);
6374 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_4,
6375 asid);
6376 }
6377
6378 if (hdev->nic_ports_mask & GAUDI_NIC_MASK_NIC8) {
6379 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_0,
6380 asid);
6381 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_1,
6382 asid);
6383 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_2,
6384 asid);
6385 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_3,
6386 asid);
6387 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_4,
6388 asid);
6389 }
6390
6391 if (hdev->nic_ports_mask & GAUDI_NIC_MASK_NIC9) {
6392 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_0,
6393 asid);
6394 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_1,
6395 asid);
6396 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_2,
6397 asid);
6398 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_3,
6399 asid);
6400 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_4,
6401 asid);
6402 }
6403
Oded Gabbaye38bfd32020-07-03 20:46:12 +03006404 hdev->asic_funcs->set_clock_gating(hdev);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006405
6406 mutex_unlock(&gaudi->clk_gate_mutex);
6407}
6408
6409static int gaudi_send_job_on_qman0(struct hl_device *hdev,
6410 struct hl_cs_job *job)
6411{
6412 struct packet_msg_prot *fence_pkt;
6413 u32 *fence_ptr;
6414 dma_addr_t fence_dma_addr;
6415 struct hl_cb *cb;
6416 u32 tmp, timeout, dma_offset;
6417 int rc;
6418
6419 if (hdev->pldm)
6420 timeout = GAUDI_PLDM_QMAN0_TIMEOUT_USEC;
6421 else
6422 timeout = HL_DEVICE_TIMEOUT_USEC;
6423
6424 if (!hdev->asic_funcs->is_device_idle(hdev, NULL, NULL)) {
6425 dev_err_ratelimited(hdev->dev,
6426 "Can't send driver job on QMAN0 because the device is not idle\n");
6427 return -EBUSY;
6428 }
6429
6430 fence_ptr = hdev->asic_funcs->asic_dma_pool_zalloc(hdev, 4, GFP_KERNEL,
6431 &fence_dma_addr);
6432 if (!fence_ptr) {
6433 dev_err(hdev->dev,
6434 "Failed to allocate fence memory for QMAN0\n");
6435 return -ENOMEM;
6436 }
6437
6438 cb = job->patched_cb;
6439
Arnd Bergmann82948e62020-10-26 17:08:06 +01006440 fence_pkt = cb->kernel_address +
6441 job->job_cb_size - sizeof(struct packet_msg_prot);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006442
Oded Gabbay65887292020-08-12 11:21:01 +03006443 tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT);
6444 tmp |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);
6445 tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
6446
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006447 fence_pkt->ctl = cpu_to_le32(tmp);
6448 fence_pkt->value = cpu_to_le32(GAUDI_QMAN0_FENCE_VAL);
6449 fence_pkt->addr = cpu_to_le64(fence_dma_addr);
6450
6451 dma_offset = gaudi_dma_assignment[GAUDI_PCI_DMA_1] * DMA_CORE_OFFSET;
6452
6453 WREG32_OR(mmDMA0_CORE_PROT + dma_offset, BIT(DMA0_CORE_PROT_VAL_SHIFT));
6454
6455 rc = hl_hw_queue_send_cb_no_cmpl(hdev, GAUDI_QUEUE_ID_DMA_0_0,
6456 job->job_cb_size, cb->bus_address);
6457 if (rc) {
6458 dev_err(hdev->dev, "Failed to send CB on QMAN0, %d\n", rc);
6459 goto free_fence_ptr;
6460 }
6461
6462 rc = hl_poll_timeout_memory(hdev, fence_ptr, tmp,
6463 (tmp == GAUDI_QMAN0_FENCE_VAL), 1000,
6464 timeout, true);
6465
6466 hl_hw_queue_inc_ci_kernel(hdev, GAUDI_QUEUE_ID_DMA_0_0);
6467
6468 if (rc == -ETIMEDOUT) {
6469 dev_err(hdev->dev, "QMAN0 Job timeout (0x%x)\n", tmp);
6470 goto free_fence_ptr;
6471 }
6472
6473free_fence_ptr:
6474 WREG32_AND(mmDMA0_CORE_PROT + dma_offset,
6475 ~BIT(DMA0_CORE_PROT_VAL_SHIFT));
6476
6477 hdev->asic_funcs->asic_dma_pool_free(hdev, (void *) fence_ptr,
6478 fence_dma_addr);
6479 return rc;
6480}
6481
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006482static void gaudi_get_event_desc(u16 event_type, char *desc, size_t size)
6483{
Ofir Bittonebd8d122020-05-10 13:41:28 +03006484 if (event_type >= GAUDI_EVENT_SIZE)
6485 goto event_not_supported;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006486
Ofir Bittonebd8d122020-05-10 13:41:28 +03006487 if (!gaudi_irq_map_table[event_type].valid)
6488 goto event_not_supported;
6489
6490 snprintf(desc, size, gaudi_irq_map_table[event_type].name);
6491
6492 return;
6493
6494event_not_supported:
6495 snprintf(desc, size, "N/A");
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006496}
6497
6498static const char *gaudi_get_razwi_initiator_dma_name(struct hl_device *hdev,
6499 u32 x_y, bool is_write)
6500{
6501 u32 dma_id[2], dma_offset, err_cause[2], mask, i;
6502
6503 mask = is_write ? DMA0_CORE_ERR_CAUSE_HBW_WR_ERR_MASK :
6504 DMA0_CORE_ERR_CAUSE_HBW_RD_ERR_MASK;
6505
6506 switch (x_y) {
6507 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_0:
6508 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_1:
6509 dma_id[0] = 0;
6510 dma_id[1] = 2;
6511 break;
6512 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_0:
6513 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_1:
6514 dma_id[0] = 1;
6515 dma_id[1] = 3;
6516 break;
6517 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_0:
6518 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_1:
6519 dma_id[0] = 4;
6520 dma_id[1] = 6;
6521 break;
6522 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_0:
6523 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_1:
6524 dma_id[0] = 5;
6525 dma_id[1] = 7;
6526 break;
6527 default:
6528 goto unknown_initiator;
6529 }
6530
6531 for (i = 0 ; i < 2 ; i++) {
6532 dma_offset = dma_id[i] * DMA_CORE_OFFSET;
6533 err_cause[i] = RREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset);
6534 }
6535
6536 switch (x_y) {
6537 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_0:
6538 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_1:
6539 if ((err_cause[0] & mask) && !(err_cause[1] & mask))
6540 return "DMA0";
6541 else if (!(err_cause[0] & mask) && (err_cause[1] & mask))
6542 return "DMA2";
6543 else
6544 return "DMA0 or DMA2";
6545 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_0:
6546 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_1:
6547 if ((err_cause[0] & mask) && !(err_cause[1] & mask))
6548 return "DMA1";
6549 else if (!(err_cause[0] & mask) && (err_cause[1] & mask))
6550 return "DMA3";
6551 else
6552 return "DMA1 or DMA3";
6553 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_0:
6554 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_1:
6555 if ((err_cause[0] & mask) && !(err_cause[1] & mask))
6556 return "DMA4";
6557 else if (!(err_cause[0] & mask) && (err_cause[1] & mask))
6558 return "DMA6";
6559 else
6560 return "DMA4 or DMA6";
6561 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_0:
6562 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_1:
6563 if ((err_cause[0] & mask) && !(err_cause[1] & mask))
6564 return "DMA5";
6565 else if (!(err_cause[0] & mask) && (err_cause[1] & mask))
6566 return "DMA7";
6567 else
6568 return "DMA5 or DMA7";
6569 }
6570
6571unknown_initiator:
6572 return "unknown initiator";
6573}
6574
6575static const char *gaudi_get_razwi_initiator_name(struct hl_device *hdev,
6576 bool is_write)
6577{
6578 u32 val, x_y, axi_id;
6579
6580 val = is_write ? RREG32(mmMMU_UP_RAZWI_WRITE_ID) :
6581 RREG32(mmMMU_UP_RAZWI_READ_ID);
6582 x_y = val & ((RAZWI_INITIATOR_Y_MASK << RAZWI_INITIATOR_Y_SHIFT) |
6583 (RAZWI_INITIATOR_X_MASK << RAZWI_INITIATOR_X_SHIFT));
6584 axi_id = val & (RAZWI_INITIATOR_AXI_ID_MASK <<
6585 RAZWI_INITIATOR_AXI_ID_SHIFT);
6586
6587 switch (x_y) {
6588 case RAZWI_INITIATOR_ID_X_Y_TPC0_NIC0:
6589 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC))
6590 return "TPC0";
6591 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC))
6592 return "NIC0";
6593 break;
6594 case RAZWI_INITIATOR_ID_X_Y_TPC1:
6595 return "TPC1";
6596 case RAZWI_INITIATOR_ID_X_Y_MME0_0:
6597 case RAZWI_INITIATOR_ID_X_Y_MME0_1:
6598 return "MME0";
6599 case RAZWI_INITIATOR_ID_X_Y_MME1_0:
6600 case RAZWI_INITIATOR_ID_X_Y_MME1_1:
6601 return "MME1";
6602 case RAZWI_INITIATOR_ID_X_Y_TPC2:
6603 return "TPC2";
6604 case RAZWI_INITIATOR_ID_X_Y_TPC3_PCI_CPU_PSOC:
6605 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC))
6606 return "TPC3";
6607 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_PCI))
6608 return "PCI";
6609 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_CPU))
6610 return "CPU";
6611 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_PSOC))
6612 return "PSOC";
6613 break;
6614 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_0:
6615 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_1:
6616 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_0:
6617 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_1:
6618 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_0:
6619 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_1:
6620 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_0:
6621 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_1:
6622 return gaudi_get_razwi_initiator_dma_name(hdev, x_y, is_write);
6623 case RAZWI_INITIATOR_ID_X_Y_TPC4_NIC1_NIC2:
6624 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC))
6625 return "TPC4";
6626 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC))
6627 return "NIC1";
6628 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC_FT))
6629 return "NIC2";
6630 break;
6631 case RAZWI_INITIATOR_ID_X_Y_TPC5:
6632 return "TPC5";
6633 case RAZWI_INITIATOR_ID_X_Y_MME2_0:
6634 case RAZWI_INITIATOR_ID_X_Y_MME2_1:
6635 return "MME2";
6636 case RAZWI_INITIATOR_ID_X_Y_MME3_0:
6637 case RAZWI_INITIATOR_ID_X_Y_MME3_1:
6638 return "MME3";
6639 case RAZWI_INITIATOR_ID_X_Y_TPC6:
6640 return "TPC6";
6641 case RAZWI_INITIATOR_ID_X_Y_TPC7_NIC4_NIC5:
6642 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC))
6643 return "TPC7";
6644 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC))
6645 return "NIC4";
6646 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC_FT))
6647 return "NIC5";
6648 break;
6649 default:
6650 break;
6651 }
6652
6653 dev_err(hdev->dev,
6654 "Unknown RAZWI initiator ID 0x%x [Y=%d, X=%d, AXI_ID=%d]\n",
6655 val,
6656 (val >> RAZWI_INITIATOR_Y_SHIFT) & RAZWI_INITIATOR_Y_MASK,
6657 (val >> RAZWI_INITIATOR_X_SHIFT) & RAZWI_INITIATOR_X_MASK,
6658 (val >> RAZWI_INITIATOR_AXI_ID_SHIFT) &
6659 RAZWI_INITIATOR_AXI_ID_MASK);
6660
6661 return "unknown initiator";
6662}
6663
6664static void gaudi_print_razwi_info(struct hl_device *hdev)
6665{
6666 if (RREG32(mmMMU_UP_RAZWI_WRITE_VLD)) {
6667 dev_err_ratelimited(hdev->dev,
6668 "RAZWI event caused by illegal write of %s\n",
6669 gaudi_get_razwi_initiator_name(hdev, true));
6670 WREG32(mmMMU_UP_RAZWI_WRITE_VLD, 0);
6671 }
6672
6673 if (RREG32(mmMMU_UP_RAZWI_READ_VLD)) {
6674 dev_err_ratelimited(hdev->dev,
6675 "RAZWI event caused by illegal read of %s\n",
6676 gaudi_get_razwi_initiator_name(hdev, false));
6677 WREG32(mmMMU_UP_RAZWI_READ_VLD, 0);
6678 }
6679}
6680
6681static void gaudi_print_mmu_error_info(struct hl_device *hdev)
6682{
6683 struct gaudi_device *gaudi = hdev->asic_specific;
6684 u64 addr;
6685 u32 val;
6686
6687 if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
6688 return;
6689
6690 val = RREG32(mmMMU_UP_PAGE_ERROR_CAPTURE);
6691 if (val & MMU_UP_PAGE_ERROR_CAPTURE_ENTRY_VALID_MASK) {
6692 addr = val & MMU_UP_PAGE_ERROR_CAPTURE_VA_49_32_MASK;
6693 addr <<= 32;
6694 addr |= RREG32(mmMMU_UP_PAGE_ERROR_CAPTURE_VA);
6695
6696 dev_err_ratelimited(hdev->dev, "MMU page fault on va 0x%llx\n",
6697 addr);
6698
6699 WREG32(mmMMU_UP_PAGE_ERROR_CAPTURE, 0);
6700 }
6701
6702 val = RREG32(mmMMU_UP_ACCESS_ERROR_CAPTURE);
6703 if (val & MMU_UP_ACCESS_ERROR_CAPTURE_ENTRY_VALID_MASK) {
6704 addr = val & MMU_UP_ACCESS_ERROR_CAPTURE_VA_49_32_MASK;
6705 addr <<= 32;
6706 addr |= RREG32(mmMMU_UP_ACCESS_ERROR_CAPTURE_VA);
6707
6708 dev_err_ratelimited(hdev->dev,
6709 "MMU access error on va 0x%llx\n", addr);
6710
6711 WREG32(mmMMU_UP_ACCESS_ERROR_CAPTURE, 0);
6712 }
6713}
6714
6715/*
6716 * +-------------------+------------------------------------------------------+
6717 * | Configuration Reg | Description |
6718 * | Address | |
6719 * +-------------------+------------------------------------------------------+
6720 * | 0xF30 - 0xF3F |ECC single error indication (1 bit per memory wrapper)|
6721 * | |0xF30 memory wrappers 31:0 (MSB to LSB) |
6722 * | |0xF34 memory wrappers 63:32 |
6723 * | |0xF38 memory wrappers 95:64 |
6724 * | |0xF3C memory wrappers 127:96 |
6725 * +-------------------+------------------------------------------------------+
6726 * | 0xF40 - 0xF4F |ECC double error indication (1 bit per memory wrapper)|
6727 * | |0xF40 memory wrappers 31:0 (MSB to LSB) |
6728 * | |0xF44 memory wrappers 63:32 |
6729 * | |0xF48 memory wrappers 95:64 |
6730 * | |0xF4C memory wrappers 127:96 |
6731 * +-------------------+------------------------------------------------------+
6732 */
Oded Gabbayfcc6a4e2020-05-17 08:20:35 +03006733static int gaudi_extract_ecc_info(struct hl_device *hdev,
6734 struct ecc_info_extract_params *params, u64 *ecc_address,
6735 u64 *ecc_syndrom, u8 *memory_wrapper_idx)
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006736{
6737 struct gaudi_device *gaudi = hdev->asic_specific;
Oded Gabbayfcc6a4e2020-05-17 08:20:35 +03006738 u32 i, num_mem_regs, reg, err_bit;
6739 u64 err_addr, err_word = 0;
6740 int rc = 0;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006741
Oded Gabbayfcc6a4e2020-05-17 08:20:35 +03006742 num_mem_regs = params->num_memories / 32 +
6743 ((params->num_memories % 32) ? 1 : 0);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006744
Oded Gabbayfcc6a4e2020-05-17 08:20:35 +03006745 if (params->block_address >= CFG_BASE)
6746 params->block_address -= CFG_BASE;
6747
6748 if (params->derr)
6749 err_addr = params->block_address + GAUDI_ECC_DERR0_OFFSET;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006750 else
Oded Gabbayfcc6a4e2020-05-17 08:20:35 +03006751 err_addr = params->block_address + GAUDI_ECC_SERR0_OFFSET;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006752
Oded Gabbayfcc6a4e2020-05-17 08:20:35 +03006753 if (params->disable_clock_gating) {
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006754 mutex_lock(&gaudi->clk_gate_mutex);
6755 hdev->asic_funcs->disable_clock_gating(hdev);
6756 }
6757
Oded Gabbayfcc6a4e2020-05-17 08:20:35 +03006758 /* Set invalid wrapper index */
6759 *memory_wrapper_idx = 0xFF;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006760
Oded Gabbayfcc6a4e2020-05-17 08:20:35 +03006761 /* Iterate through memory wrappers, a single bit must be set */
Dan Carpenterb0353542020-08-05 12:51:05 +03006762 for (i = 0 ; i < num_mem_regs ; i++) {
Oded Gabbayfcc6a4e2020-05-17 08:20:35 +03006763 err_addr += i * 4;
6764 err_word = RREG32(err_addr);
6765 if (err_word) {
6766 err_bit = __ffs(err_word);
6767 *memory_wrapper_idx = err_bit + (32 * i);
6768 break;
6769 }
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006770 }
6771
Oded Gabbayfcc6a4e2020-05-17 08:20:35 +03006772 if (*memory_wrapper_idx == 0xFF) {
6773 dev_err(hdev->dev, "ECC error information cannot be found\n");
6774 rc = -EINVAL;
6775 goto enable_clk_gate;
6776 }
6777
6778 WREG32(params->block_address + GAUDI_ECC_MEM_SEL_OFFSET,
6779 *memory_wrapper_idx);
6780
6781 *ecc_address =
6782 RREG32(params->block_address + GAUDI_ECC_ADDRESS_OFFSET);
6783 *ecc_syndrom =
6784 RREG32(params->block_address + GAUDI_ECC_SYNDROME_OFFSET);
6785
6786 /* Clear error indication */
6787 reg = RREG32(params->block_address + GAUDI_ECC_MEM_INFO_CLR_OFFSET);
6788 if (params->derr)
6789 reg |= FIELD_PREP(GAUDI_ECC_MEM_INFO_CLR_DERR_MASK, 1);
6790 else
6791 reg |= FIELD_PREP(GAUDI_ECC_MEM_INFO_CLR_SERR_MASK, 1);
6792
6793 WREG32(params->block_address + GAUDI_ECC_MEM_INFO_CLR_OFFSET, reg);
6794
6795enable_clk_gate:
6796 if (params->disable_clock_gating) {
Oded Gabbaye38bfd32020-07-03 20:46:12 +03006797 hdev->asic_funcs->set_clock_gating(hdev);
Greg Kroah-Hartman65a9bde62020-07-27 11:49:37 +02006798
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006799 mutex_unlock(&gaudi->clk_gate_mutex);
6800 }
Oded Gabbayfcc6a4e2020-05-17 08:20:35 +03006801
6802 return rc;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006803}
6804
6805static void gaudi_handle_qman_err_generic(struct hl_device *hdev,
6806 const char *qm_name,
6807 u64 glbl_sts_addr,
6808 u64 arb_err_addr)
6809{
6810 u32 i, j, glbl_sts_val, arb_err_val, glbl_sts_clr_val;
6811 char reg_desc[32];
6812
6813 /* Iterate through all stream GLBL_STS1 registers + Lower CP */
6814 for (i = 0 ; i < QMAN_STREAMS + 1 ; i++) {
6815 glbl_sts_clr_val = 0;
6816 glbl_sts_val = RREG32(glbl_sts_addr + 4 * i);
6817
6818 if (!glbl_sts_val)
6819 continue;
6820
6821 if (i == QMAN_STREAMS)
6822 snprintf(reg_desc, ARRAY_SIZE(reg_desc), "LowerCP");
6823 else
6824 snprintf(reg_desc, ARRAY_SIZE(reg_desc), "stream%u", i);
6825
6826 for (j = 0 ; j < GAUDI_NUM_OF_QM_ERR_CAUSE ; j++) {
6827 if (glbl_sts_val & BIT(j)) {
6828 dev_err_ratelimited(hdev->dev,
6829 "%s %s. err cause: %s\n",
6830 qm_name, reg_desc,
6831 gaudi_qman_error_cause[j]);
6832 glbl_sts_clr_val |= BIT(j);
6833 }
6834 }
6835
6836 /* Write 1 clear errors */
6837 WREG32(glbl_sts_addr + 4 * i, glbl_sts_clr_val);
6838 }
6839
6840 arb_err_val = RREG32(arb_err_addr);
6841
6842 if (!arb_err_val)
6843 return;
6844
6845 for (j = 0 ; j < GAUDI_NUM_OF_QM_ARB_ERR_CAUSE ; j++) {
6846 if (arb_err_val & BIT(j)) {
6847 dev_err_ratelimited(hdev->dev,
6848 "%s ARB_ERR. err cause: %s\n",
6849 qm_name,
6850 gaudi_qman_arb_error_cause[j]);
6851 }
6852 }
6853}
6854
Ofir Bittonf8bc7f02021-01-03 20:52:40 +02006855static void gaudi_print_sm_sei_info(struct hl_device *hdev, u16 event_type,
6856 struct hl_eq_sm_sei_data *sei_data)
6857{
6858 u32 index = event_type - GAUDI_EVENT_DMA_IF_SEI_0;
6859
6860 switch (sei_data->sei_cause) {
6861 case GAUDI_SM_SEI_SO_OVERFLOW:
6862 dev_err(hdev->dev,
6863 "SM %u SEI Error: SO %u overflow/underflow",
6864 index, le16_to_cpu(sei_data->sei_log));
6865 break;
6866 case GAUDI_SM_SEI_LBW_4B_UNALIGNED:
6867 dev_err(hdev->dev,
6868 "SM %u SEI Error: Unaligned 4B LBW access, monitor agent address low - %#x",
6869 index, le16_to_cpu(sei_data->sei_log));
6870 break;
6871 case GAUDI_SM_SEI_AXI_RESPONSE_ERR:
6872 dev_err(hdev->dev,
6873 "SM %u SEI Error: AXI ID %u response error",
6874 index, le16_to_cpu(sei_data->sei_log));
6875 break;
6876 default:
6877 dev_err(hdev->dev, "Unknown SM SEI cause %u",
6878 le16_to_cpu(sei_data->sei_log));
6879 break;
6880 }
6881}
6882
Oded Gabbayfcc6a4e2020-05-17 08:20:35 +03006883static void gaudi_handle_ecc_event(struct hl_device *hdev, u16 event_type,
6884 struct hl_eq_ecc_data *ecc_data)
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006885{
Oded Gabbayfcc6a4e2020-05-17 08:20:35 +03006886 struct ecc_info_extract_params params;
6887 u64 ecc_address = 0, ecc_syndrom = 0;
6888 u8 index, memory_wrapper_idx = 0;
6889 bool extract_info_from_fw;
6890 int rc;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006891
6892 switch (event_type) {
Oded Gabbayfcc6a4e2020-05-17 08:20:35 +03006893 case GAUDI_EVENT_PCIE_CORE_SERR ... GAUDI_EVENT_PCIE_PHY_DERR:
6894 case GAUDI_EVENT_DMA0_SERR_ECC ... GAUDI_EVENT_MMU_DERR:
6895 extract_info_from_fw = true;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006896 break;
6897 case GAUDI_EVENT_TPC0_SERR ... GAUDI_EVENT_TPC7_SERR:
6898 index = event_type - GAUDI_EVENT_TPC0_SERR;
Oded Gabbayfcc6a4e2020-05-17 08:20:35 +03006899 params.block_address = mmTPC0_CFG_BASE + index * TPC_CFG_OFFSET;
6900 params.num_memories = 90;
6901 params.derr = false;
6902 params.disable_clock_gating = true;
6903 extract_info_from_fw = false;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006904 break;
6905 case GAUDI_EVENT_TPC0_DERR ... GAUDI_EVENT_TPC7_DERR:
6906 index = event_type - GAUDI_EVENT_TPC0_DERR;
Oded Gabbayfcc6a4e2020-05-17 08:20:35 +03006907 params.block_address =
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006908 mmTPC0_CFG_BASE + index * TPC_CFG_OFFSET;
Oded Gabbayfcc6a4e2020-05-17 08:20:35 +03006909 params.num_memories = 90;
6910 params.derr = true;
6911 params.disable_clock_gating = true;
6912 extract_info_from_fw = false;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006913 break;
6914 case GAUDI_EVENT_MME0_ACC_SERR:
6915 case GAUDI_EVENT_MME1_ACC_SERR:
6916 case GAUDI_EVENT_MME2_ACC_SERR:
6917 case GAUDI_EVENT_MME3_ACC_SERR:
6918 index = (event_type - GAUDI_EVENT_MME0_ACC_SERR) / 4;
Oded Gabbayfcc6a4e2020-05-17 08:20:35 +03006919 params.block_address = mmMME0_ACC_BASE + index * MME_ACC_OFFSET;
6920 params.num_memories = 128;
6921 params.derr = false;
6922 params.disable_clock_gating = true;
6923 extract_info_from_fw = false;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006924 break;
6925 case GAUDI_EVENT_MME0_ACC_DERR:
6926 case GAUDI_EVENT_MME1_ACC_DERR:
6927 case GAUDI_EVENT_MME2_ACC_DERR:
6928 case GAUDI_EVENT_MME3_ACC_DERR:
6929 index = (event_type - GAUDI_EVENT_MME0_ACC_DERR) / 4;
Oded Gabbayfcc6a4e2020-05-17 08:20:35 +03006930 params.block_address = mmMME0_ACC_BASE + index * MME_ACC_OFFSET;
6931 params.num_memories = 128;
6932 params.derr = true;
6933 params.disable_clock_gating = true;
6934 extract_info_from_fw = false;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006935 break;
6936 case GAUDI_EVENT_MME0_SBAB_SERR:
6937 case GAUDI_EVENT_MME1_SBAB_SERR:
6938 case GAUDI_EVENT_MME2_SBAB_SERR:
6939 case GAUDI_EVENT_MME3_SBAB_SERR:
6940 index = (event_type - GAUDI_EVENT_MME0_SBAB_SERR) / 4;
Oded Gabbayfcc6a4e2020-05-17 08:20:35 +03006941 params.block_address =
6942 mmMME0_SBAB_BASE + index * MME_ACC_OFFSET;
6943 params.num_memories = 33;
6944 params.derr = false;
6945 params.disable_clock_gating = true;
6946 extract_info_from_fw = false;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006947 break;
6948 case GAUDI_EVENT_MME0_SBAB_DERR:
6949 case GAUDI_EVENT_MME1_SBAB_DERR:
6950 case GAUDI_EVENT_MME2_SBAB_DERR:
6951 case GAUDI_EVENT_MME3_SBAB_DERR:
6952 index = (event_type - GAUDI_EVENT_MME0_SBAB_DERR) / 4;
Oded Gabbayfcc6a4e2020-05-17 08:20:35 +03006953 params.block_address =
6954 mmMME0_SBAB_BASE + index * MME_ACC_OFFSET;
6955 params.num_memories = 33;
6956 params.derr = true;
6957 params.disable_clock_gating = true;
Oded Gabbay652b4442020-11-21 14:35:35 +02006958 extract_info_from_fw = false;
6959 break;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006960 default:
6961 return;
6962 }
6963
Oded Gabbayfcc6a4e2020-05-17 08:20:35 +03006964 if (extract_info_from_fw) {
6965 ecc_address = le64_to_cpu(ecc_data->ecc_address);
6966 ecc_syndrom = le64_to_cpu(ecc_data->ecc_syndrom);
6967 memory_wrapper_idx = ecc_data->memory_wrapper_idx;
6968 } else {
6969 rc = gaudi_extract_ecc_info(hdev, &params, &ecc_address,
6970 &ecc_syndrom, &memory_wrapper_idx);
6971 if (rc)
6972 return;
6973 }
6974
6975 dev_err(hdev->dev,
6976 "ECC error detected. address: %#llx. Syndrom: %#llx. block id %u\n",
6977 ecc_address, ecc_syndrom, memory_wrapper_idx);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006978}
6979
6980static void gaudi_handle_qman_err(struct hl_device *hdev, u16 event_type)
6981{
6982 u64 glbl_sts_addr, arb_err_addr;
6983 u8 index;
6984 char desc[32];
6985
6986 switch (event_type) {
6987 case GAUDI_EVENT_TPC0_QM ... GAUDI_EVENT_TPC7_QM:
6988 index = event_type - GAUDI_EVENT_TPC0_QM;
6989 glbl_sts_addr =
6990 mmTPC0_QM_GLBL_STS1_0 + index * TPC_QMAN_OFFSET;
6991 arb_err_addr =
6992 mmTPC0_QM_ARB_ERR_CAUSE + index * TPC_QMAN_OFFSET;
6993 snprintf(desc, ARRAY_SIZE(desc), "%s%d", "TPC_QM", index);
6994 break;
6995 case GAUDI_EVENT_MME0_QM ... GAUDI_EVENT_MME2_QM:
6996 index = event_type - GAUDI_EVENT_MME0_QM;
6997 glbl_sts_addr =
6998 mmMME0_QM_GLBL_STS1_0 + index * MME_QMAN_OFFSET;
6999 arb_err_addr =
7000 mmMME0_QM_ARB_ERR_CAUSE + index * MME_QMAN_OFFSET;
7001 snprintf(desc, ARRAY_SIZE(desc), "%s%d", "MME_QM", index);
7002 break;
7003 case GAUDI_EVENT_DMA0_QM ... GAUDI_EVENT_DMA7_QM:
7004 index = event_type - GAUDI_EVENT_DMA0_QM;
7005 glbl_sts_addr =
7006 mmDMA0_QM_GLBL_STS1_0 + index * DMA_QMAN_OFFSET;
7007 arb_err_addr =
7008 mmDMA0_QM_ARB_ERR_CAUSE + index * DMA_QMAN_OFFSET;
7009 snprintf(desc, ARRAY_SIZE(desc), "%s%d", "DMA_QM", index);
7010 break;
Oded Gabbay3c681572020-11-02 21:10:39 +02007011 case GAUDI_EVENT_NIC0_QM0:
7012 glbl_sts_addr = mmNIC0_QM0_GLBL_STS1_0;
7013 arb_err_addr = mmNIC0_QM0_ARB_ERR_CAUSE;
7014 snprintf(desc, ARRAY_SIZE(desc), "NIC0_QM0");
7015 break;
7016 case GAUDI_EVENT_NIC0_QM1:
7017 glbl_sts_addr = mmNIC0_QM1_GLBL_STS1_0;
7018 arb_err_addr = mmNIC0_QM1_ARB_ERR_CAUSE;
7019 snprintf(desc, ARRAY_SIZE(desc), "NIC0_QM1");
7020 break;
7021 case GAUDI_EVENT_NIC1_QM0:
7022 glbl_sts_addr = mmNIC1_QM0_GLBL_STS1_0;
7023 arb_err_addr = mmNIC1_QM0_ARB_ERR_CAUSE;
7024 snprintf(desc, ARRAY_SIZE(desc), "NIC1_QM0");
7025 break;
7026 case GAUDI_EVENT_NIC1_QM1:
7027 glbl_sts_addr = mmNIC1_QM1_GLBL_STS1_0;
7028 arb_err_addr = mmNIC1_QM1_ARB_ERR_CAUSE;
7029 snprintf(desc, ARRAY_SIZE(desc), "NIC1_QM1");
7030 break;
7031 case GAUDI_EVENT_NIC2_QM0:
7032 glbl_sts_addr = mmNIC2_QM0_GLBL_STS1_0;
7033 arb_err_addr = mmNIC2_QM0_ARB_ERR_CAUSE;
7034 snprintf(desc, ARRAY_SIZE(desc), "NIC2_QM0");
7035 break;
7036 case GAUDI_EVENT_NIC2_QM1:
7037 glbl_sts_addr = mmNIC2_QM1_GLBL_STS1_0;
7038 arb_err_addr = mmNIC2_QM1_ARB_ERR_CAUSE;
7039 snprintf(desc, ARRAY_SIZE(desc), "NIC2_QM1");
7040 break;
7041 case GAUDI_EVENT_NIC3_QM0:
7042 glbl_sts_addr = mmNIC3_QM0_GLBL_STS1_0;
7043 arb_err_addr = mmNIC3_QM0_ARB_ERR_CAUSE;
7044 snprintf(desc, ARRAY_SIZE(desc), "NIC3_QM0");
7045 break;
7046 case GAUDI_EVENT_NIC3_QM1:
7047 glbl_sts_addr = mmNIC3_QM1_GLBL_STS1_0;
7048 arb_err_addr = mmNIC3_QM1_ARB_ERR_CAUSE;
7049 snprintf(desc, ARRAY_SIZE(desc), "NIC3_QM1");
7050 break;
7051 case GAUDI_EVENT_NIC4_QM0:
7052 glbl_sts_addr = mmNIC4_QM0_GLBL_STS1_0;
7053 arb_err_addr = mmNIC4_QM0_ARB_ERR_CAUSE;
7054 snprintf(desc, ARRAY_SIZE(desc), "NIC4_QM0");
7055 break;
7056 case GAUDI_EVENT_NIC4_QM1:
7057 glbl_sts_addr = mmNIC4_QM1_GLBL_STS1_0;
7058 arb_err_addr = mmNIC4_QM1_ARB_ERR_CAUSE;
7059 snprintf(desc, ARRAY_SIZE(desc), "NIC4_QM1");
7060 break;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007061 default:
7062 return;
7063 }
7064
7065 gaudi_handle_qman_err_generic(hdev, desc, glbl_sts_addr, arb_err_addr);
7066}
7067
7068static void gaudi_print_irq_info(struct hl_device *hdev, u16 event_type,
7069 bool razwi)
7070{
Ofir Bittonebd8d122020-05-10 13:41:28 +03007071 char desc[64] = "";
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007072
7073 gaudi_get_event_desc(event_type, desc, sizeof(desc));
7074 dev_err_ratelimited(hdev->dev, "Received H/W interrupt %d [\"%s\"]\n",
7075 event_type, desc);
7076
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007077 if (razwi) {
7078 gaudi_print_razwi_info(hdev);
7079 gaudi_print_mmu_error_info(hdev);
7080 }
7081}
7082
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007083static int gaudi_soft_reset_late_init(struct hl_device *hdev)
7084{
Ofir Bittonebd8d122020-05-10 13:41:28 +03007085 struct gaudi_device *gaudi = hdev->asic_specific;
7086
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007087 /* Unmask all IRQs since some could have been received
7088 * during the soft reset
7089 */
Ofir Bittonebd8d122020-05-10 13:41:28 +03007090 return hl_fw_unmask_irq_arr(hdev, gaudi->events, sizeof(gaudi->events));
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007091}
7092
Ofir Bitton5a2998f2020-10-05 13:44:59 +03007093static int gaudi_hbm_read_interrupts(struct hl_device *hdev, int device,
7094 struct hl_eq_hbm_ecc_data *hbm_ecc_data)
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007095{
Ofir Bitton5a2998f2020-10-05 13:44:59 +03007096 u32 base, val, val2, wr_par, rd_par, ca_par, derr, serr, type, ch;
7097 int err = 0;
7098
7099 if (!hdev->asic_prop.fw_security_disabled) {
7100 if (!hbm_ecc_data) {
7101 dev_err(hdev->dev, "No FW ECC data");
7102 return 0;
7103 }
7104
7105 wr_par = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_WR_PAR_MASK,
7106 le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7107 rd_par = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_RD_PAR_MASK,
7108 le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7109 ca_par = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_CA_PAR_MASK,
7110 le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7111 derr = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_DERR_MASK,
7112 le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7113 serr = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_SERR_MASK,
7114 le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7115 type = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_TYPE_MASK,
7116 le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7117 ch = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_HBM_CH_MASK,
7118 le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7119
7120 dev_err(hdev->dev,
Oded Gabbay64a9d5a2020-11-21 14:29:25 +02007121 "HBM%d pc%d ECC: TYPE=%d, WR_PAR=%d, RD_PAR=%d, CA_PAR=%d, SERR=%d, DERR=%d\n",
7122 device, ch, type, wr_par, rd_par, ca_par, serr, derr);
Ofir Bitton5a2998f2020-10-05 13:44:59 +03007123
7124 err = 1;
7125
7126 return 0;
7127 }
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007128
7129 base = GAUDI_HBM_CFG_BASE + device * GAUDI_HBM_CFG_OFFSET;
7130 for (ch = 0 ; ch < GAUDI_HBM_CHANNELS ; ch++) {
7131 val = RREG32_MASK(base + ch * 0x1000 + 0x06C, 0x0000FFFF);
7132 val = (val & 0xFF) | ((val >> 8) & 0xFF);
7133 if (val) {
7134 err = 1;
7135 dev_err(hdev->dev,
7136 "HBM%d pc%d interrupts info: WR_PAR=%d, RD_PAR=%d, CA_PAR=%d, SERR=%d, DERR=%d\n",
7137 device, ch * 2, val & 0x1, (val >> 1) & 0x1,
7138 (val >> 2) & 0x1, (val >> 3) & 0x1,
7139 (val >> 4) & 0x1);
7140
7141 val2 = RREG32(base + ch * 0x1000 + 0x060);
7142 dev_err(hdev->dev,
Ofir Bitton5a2998f2020-10-05 13:44:59 +03007143 "HBM%d pc%d ECC info: 1ST_ERR_ADDR=0x%x, 1ST_ERR_TYPE=%d, SEC_CONT_CNT=%d, SEC_CNT=%d, DEC_CNT=%d\n",
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007144 device, ch * 2,
7145 RREG32(base + ch * 0x1000 + 0x064),
7146 (val2 & 0x200) >> 9, (val2 & 0xFC00) >> 10,
7147 (val2 & 0xFF0000) >> 16,
7148 (val2 & 0xFF000000) >> 24);
7149 }
7150
7151 val = RREG32_MASK(base + ch * 0x1000 + 0x07C, 0x0000FFFF);
7152 val = (val & 0xFF) | ((val >> 8) & 0xFF);
7153 if (val) {
7154 err = 1;
7155 dev_err(hdev->dev,
7156 "HBM%d pc%d interrupts info: WR_PAR=%d, RD_PAR=%d, CA_PAR=%d, SERR=%d, DERR=%d\n",
7157 device, ch * 2 + 1, val & 0x1, (val >> 1) & 0x1,
7158 (val >> 2) & 0x1, (val >> 3) & 0x1,
7159 (val >> 4) & 0x1);
7160
7161 val2 = RREG32(base + ch * 0x1000 + 0x070);
7162 dev_err(hdev->dev,
Ofir Bitton5a2998f2020-10-05 13:44:59 +03007163 "HBM%d pc%d ECC info: 1ST_ERR_ADDR=0x%x, 1ST_ERR_TYPE=%d, SEC_CONT_CNT=%d, SEC_CNT=%d, DEC_CNT=%d\n",
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007164 device, ch * 2 + 1,
7165 RREG32(base + ch * 0x1000 + 0x074),
7166 (val2 & 0x200) >> 9, (val2 & 0xFC00) >> 10,
7167 (val2 & 0xFF0000) >> 16,
7168 (val2 & 0xFF000000) >> 24);
7169 }
7170
7171 /* Clear interrupts */
7172 RMWREG32(base + (ch * 0x1000) + 0x060, 0x1C8, 0x1FF);
7173 RMWREG32(base + (ch * 0x1000) + 0x070, 0x1C8, 0x1FF);
7174 WREG32(base + (ch * 0x1000) + 0x06C, 0x1F1F);
7175 WREG32(base + (ch * 0x1000) + 0x07C, 0x1F1F);
7176 RMWREG32(base + (ch * 0x1000) + 0x060, 0x0, 0xF);
7177 RMWREG32(base + (ch * 0x1000) + 0x070, 0x0, 0xF);
7178 }
7179
7180 val = RREG32(base + 0x8F30);
7181 val2 = RREG32(base + 0x8F34);
7182 if (val | val2) {
7183 err = 1;
7184 dev_err(hdev->dev,
7185 "HBM %d MC SRAM SERR info: Reg 0x8F30=0x%x, Reg 0x8F34=0x%x\n",
7186 device, val, val2);
7187 }
7188 val = RREG32(base + 0x8F40);
7189 val2 = RREG32(base + 0x8F44);
7190 if (val | val2) {
7191 err = 1;
7192 dev_err(hdev->dev,
7193 "HBM %d MC SRAM DERR info: Reg 0x8F40=0x%x, Reg 0x8F44=0x%x\n",
7194 device, val, val2);
7195 }
7196
7197 return err;
7198}
7199
7200static int gaudi_hbm_event_to_dev(u16 hbm_event_type)
7201{
7202 switch (hbm_event_type) {
7203 case GAUDI_EVENT_HBM0_SPI_0:
7204 case GAUDI_EVENT_HBM0_SPI_1:
7205 return 0;
7206 case GAUDI_EVENT_HBM1_SPI_0:
7207 case GAUDI_EVENT_HBM1_SPI_1:
7208 return 1;
7209 case GAUDI_EVENT_HBM2_SPI_0:
7210 case GAUDI_EVENT_HBM2_SPI_1:
7211 return 2;
7212 case GAUDI_EVENT_HBM3_SPI_0:
7213 case GAUDI_EVENT_HBM3_SPI_1:
7214 return 3;
7215 default:
7216 break;
7217 }
7218
7219 /* Should never happen */
7220 return 0;
7221}
7222
7223static bool gaudi_tpc_read_interrupts(struct hl_device *hdev, u8 tpc_id,
7224 char *interrupt_name)
7225{
7226 struct gaudi_device *gaudi = hdev->asic_specific;
7227 u32 tpc_offset = tpc_id * TPC_CFG_OFFSET, tpc_interrupts_cause, i;
7228 bool soft_reset_required = false;
7229
7230 /* Accessing the TPC_INTR_CAUSE registers requires disabling the clock
Oded Gabbay6138bbe2020-09-04 20:18:16 +03007231 * gating, and thus cannot be done in CPU-CP and should be done instead
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007232 * by the driver.
7233 */
7234
7235 mutex_lock(&gaudi->clk_gate_mutex);
7236
7237 hdev->asic_funcs->disable_clock_gating(hdev);
7238
7239 tpc_interrupts_cause = RREG32(mmTPC0_CFG_TPC_INTR_CAUSE + tpc_offset) &
7240 TPC0_CFG_TPC_INTR_CAUSE_CAUSE_MASK;
7241
7242 for (i = 0 ; i < GAUDI_NUM_OF_TPC_INTR_CAUSE ; i++)
7243 if (tpc_interrupts_cause & BIT(i)) {
7244 dev_err_ratelimited(hdev->dev,
7245 "TPC%d_%s interrupt cause: %s\n",
7246 tpc_id, interrupt_name,
7247 gaudi_tpc_interrupts_cause[i]);
7248 /* If this is QM error, we need to soft-reset */
7249 if (i == 15)
7250 soft_reset_required = true;
7251 }
7252
7253 /* Clear interrupts */
7254 WREG32(mmTPC0_CFG_TPC_INTR_CAUSE + tpc_offset, 0);
7255
Oded Gabbaye38bfd32020-07-03 20:46:12 +03007256 hdev->asic_funcs->set_clock_gating(hdev);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007257
7258 mutex_unlock(&gaudi->clk_gate_mutex);
7259
7260 return soft_reset_required;
7261}
7262
7263static int tpc_dec_event_to_tpc_id(u16 tpc_dec_event_type)
7264{
7265 return (tpc_dec_event_type - GAUDI_EVENT_TPC0_DEC) >> 1;
7266}
7267
7268static int tpc_krn_event_to_tpc_id(u16 tpc_dec_event_type)
7269{
7270 return (tpc_dec_event_type - GAUDI_EVENT_TPC0_KRN_ERR) / 6;
7271}
7272
7273static void gaudi_print_clk_change_info(struct hl_device *hdev,
7274 u16 event_type)
7275{
7276 switch (event_type) {
7277 case GAUDI_EVENT_FIX_POWER_ENV_S:
Ofir Bitton0a068ad2020-07-21 10:49:51 +03007278 hdev->clk_throttling_reason |= HL_CLK_THROTTLE_POWER;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007279 dev_info_ratelimited(hdev->dev,
7280 "Clock throttling due to power consumption\n");
7281 break;
7282
7283 case GAUDI_EVENT_FIX_POWER_ENV_E:
Ofir Bitton0a068ad2020-07-21 10:49:51 +03007284 hdev->clk_throttling_reason &= ~HL_CLK_THROTTLE_POWER;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007285 dev_info_ratelimited(hdev->dev,
7286 "Power envelop is safe, back to optimal clock\n");
7287 break;
7288
7289 case GAUDI_EVENT_FIX_THERMAL_ENV_S:
Ofir Bitton0a068ad2020-07-21 10:49:51 +03007290 hdev->clk_throttling_reason |= HL_CLK_THROTTLE_THERMAL;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007291 dev_info_ratelimited(hdev->dev,
7292 "Clock throttling due to overheating\n");
7293 break;
7294
7295 case GAUDI_EVENT_FIX_THERMAL_ENV_E:
Ofir Bitton0a068ad2020-07-21 10:49:51 +03007296 hdev->clk_throttling_reason &= ~HL_CLK_THROTTLE_THERMAL;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007297 dev_info_ratelimited(hdev->dev,
7298 "Thermal envelop is safe, back to optimal clock\n");
7299 break;
7300
7301 default:
7302 dev_err(hdev->dev, "Received invalid clock change event %d\n",
7303 event_type);
7304 break;
7305 }
7306}
7307
7308static void gaudi_handle_eqe(struct hl_device *hdev,
7309 struct hl_eq_entry *eq_entry)
7310{
7311 struct gaudi_device *gaudi = hdev->asic_specific;
7312 u32 ctl = le32_to_cpu(eq_entry->hdr.ctl);
7313 u16 event_type = ((ctl & EQ_CTL_EVENT_TYPE_MASK)
7314 >> EQ_CTL_EVENT_TYPE_SHIFT);
7315 u8 cause;
Oded Gabbay66446822020-05-18 16:48:01 +03007316 bool reset_required;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007317
7318 gaudi->events_stat[event_type]++;
7319 gaudi->events_stat_aggregate[event_type]++;
7320
7321 switch (event_type) {
7322 case GAUDI_EVENT_PCIE_CORE_DERR:
7323 case GAUDI_EVENT_PCIE_IF_DERR:
7324 case GAUDI_EVENT_PCIE_PHY_DERR:
7325 case GAUDI_EVENT_TPC0_DERR ... GAUDI_EVENT_TPC7_DERR:
7326 case GAUDI_EVENT_MME0_ACC_DERR:
7327 case GAUDI_EVENT_MME0_SBAB_DERR:
7328 case GAUDI_EVENT_MME1_ACC_DERR:
7329 case GAUDI_EVENT_MME1_SBAB_DERR:
7330 case GAUDI_EVENT_MME2_ACC_DERR:
7331 case GAUDI_EVENT_MME2_SBAB_DERR:
7332 case GAUDI_EVENT_MME3_ACC_DERR:
7333 case GAUDI_EVENT_MME3_SBAB_DERR:
7334 case GAUDI_EVENT_DMA0_DERR_ECC ... GAUDI_EVENT_DMA7_DERR_ECC:
7335 fallthrough;
7336 case GAUDI_EVENT_CPU_IF_ECC_DERR:
7337 case GAUDI_EVENT_PSOC_MEM_DERR:
7338 case GAUDI_EVENT_PSOC_CORESIGHT_DERR:
7339 case GAUDI_EVENT_SRAM0_DERR ... GAUDI_EVENT_SRAM28_DERR:
7340 case GAUDI_EVENT_DMA_IF0_DERR ... GAUDI_EVENT_DMA_IF3_DERR:
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007341 case GAUDI_EVENT_HBM_0_DERR ... GAUDI_EVENT_HBM_3_DERR:
7342 case GAUDI_EVENT_MMU_DERR:
Oded Gabbayfcc6a4e2020-05-17 08:20:35 +03007343 gaudi_print_irq_info(hdev, event_type, true);
7344 gaudi_handle_ecc_event(hdev, event_type, &eq_entry->ecc_data);
7345 if (hdev->hard_reset_on_fw_events)
7346 hl_device_reset(hdev, true, false);
7347 break;
7348
7349 case GAUDI_EVENT_GIC500:
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007350 case GAUDI_EVENT_AXI_ECC:
7351 case GAUDI_EVENT_L2_RAM_ECC:
7352 case GAUDI_EVENT_PLL0 ... GAUDI_EVENT_PLL17:
7353 gaudi_print_irq_info(hdev, event_type, false);
7354 if (hdev->hard_reset_on_fw_events)
7355 hl_device_reset(hdev, true, false);
7356 break;
7357
7358 case GAUDI_EVENT_HBM0_SPI_0:
7359 case GAUDI_EVENT_HBM1_SPI_0:
7360 case GAUDI_EVENT_HBM2_SPI_0:
7361 case GAUDI_EVENT_HBM3_SPI_0:
7362 gaudi_print_irq_info(hdev, event_type, false);
7363 gaudi_hbm_read_interrupts(hdev,
Ofir Bitton5a2998f2020-10-05 13:44:59 +03007364 gaudi_hbm_event_to_dev(event_type),
7365 &eq_entry->hbm_ecc_data);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007366 if (hdev->hard_reset_on_fw_events)
7367 hl_device_reset(hdev, true, false);
7368 break;
7369
7370 case GAUDI_EVENT_HBM0_SPI_1:
7371 case GAUDI_EVENT_HBM1_SPI_1:
7372 case GAUDI_EVENT_HBM2_SPI_1:
7373 case GAUDI_EVENT_HBM3_SPI_1:
7374 gaudi_print_irq_info(hdev, event_type, false);
7375 gaudi_hbm_read_interrupts(hdev,
Ofir Bitton5a2998f2020-10-05 13:44:59 +03007376 gaudi_hbm_event_to_dev(event_type),
7377 &eq_entry->hbm_ecc_data);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007378 break;
7379
7380 case GAUDI_EVENT_TPC0_DEC:
7381 case GAUDI_EVENT_TPC1_DEC:
7382 case GAUDI_EVENT_TPC2_DEC:
7383 case GAUDI_EVENT_TPC3_DEC:
7384 case GAUDI_EVENT_TPC4_DEC:
7385 case GAUDI_EVENT_TPC5_DEC:
7386 case GAUDI_EVENT_TPC6_DEC:
7387 case GAUDI_EVENT_TPC7_DEC:
7388 gaudi_print_irq_info(hdev, event_type, true);
Oded Gabbay66446822020-05-18 16:48:01 +03007389 reset_required = gaudi_tpc_read_interrupts(hdev,
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007390 tpc_dec_event_to_tpc_id(event_type),
7391 "AXI_SLV_DEC_Error");
Oded Gabbay66446822020-05-18 16:48:01 +03007392 if (reset_required) {
7393 dev_err(hdev->dev, "hard reset required due to %s\n",
7394 gaudi_irq_map_table[event_type].name);
7395
7396 if (hdev->hard_reset_on_fw_events)
7397 hl_device_reset(hdev, true, false);
7398 } else {
7399 hl_fw_unmask_irq(hdev, event_type);
Omer Shpigelmand7985072020-05-17 23:01:22 +03007400 }
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007401 break;
7402
7403 case GAUDI_EVENT_TPC0_KRN_ERR:
7404 case GAUDI_EVENT_TPC1_KRN_ERR:
7405 case GAUDI_EVENT_TPC2_KRN_ERR:
7406 case GAUDI_EVENT_TPC3_KRN_ERR:
7407 case GAUDI_EVENT_TPC4_KRN_ERR:
7408 case GAUDI_EVENT_TPC5_KRN_ERR:
7409 case GAUDI_EVENT_TPC6_KRN_ERR:
7410 case GAUDI_EVENT_TPC7_KRN_ERR:
7411 gaudi_print_irq_info(hdev, event_type, true);
Oded Gabbay66446822020-05-18 16:48:01 +03007412 reset_required = gaudi_tpc_read_interrupts(hdev,
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007413 tpc_krn_event_to_tpc_id(event_type),
7414 "KRN_ERR");
Oded Gabbay66446822020-05-18 16:48:01 +03007415 if (reset_required) {
7416 dev_err(hdev->dev, "hard reset required due to %s\n",
7417 gaudi_irq_map_table[event_type].name);
7418
7419 if (hdev->hard_reset_on_fw_events)
7420 hl_device_reset(hdev, true, false);
7421 } else {
7422 hl_fw_unmask_irq(hdev, event_type);
Omer Shpigelmand7985072020-05-17 23:01:22 +03007423 }
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007424 break;
7425
7426 case GAUDI_EVENT_PCIE_CORE_SERR:
7427 case GAUDI_EVENT_PCIE_IF_SERR:
7428 case GAUDI_EVENT_PCIE_PHY_SERR:
7429 case GAUDI_EVENT_TPC0_SERR ... GAUDI_EVENT_TPC7_SERR:
7430 case GAUDI_EVENT_MME0_ACC_SERR:
7431 case GAUDI_EVENT_MME0_SBAB_SERR:
7432 case GAUDI_EVENT_MME1_ACC_SERR:
7433 case GAUDI_EVENT_MME1_SBAB_SERR:
7434 case GAUDI_EVENT_MME2_ACC_SERR:
7435 case GAUDI_EVENT_MME2_SBAB_SERR:
7436 case GAUDI_EVENT_MME3_ACC_SERR:
7437 case GAUDI_EVENT_MME3_SBAB_SERR:
7438 case GAUDI_EVENT_DMA0_SERR_ECC ... GAUDI_EVENT_DMA7_SERR_ECC:
7439 case GAUDI_EVENT_CPU_IF_ECC_SERR:
7440 case GAUDI_EVENT_PSOC_MEM_SERR:
7441 case GAUDI_EVENT_PSOC_CORESIGHT_SERR:
7442 case GAUDI_EVENT_SRAM0_SERR ... GAUDI_EVENT_SRAM28_SERR:
7443 case GAUDI_EVENT_DMA_IF0_SERR ... GAUDI_EVENT_DMA_IF3_SERR:
7444 case GAUDI_EVENT_HBM_0_SERR ... GAUDI_EVENT_HBM_3_SERR:
7445 fallthrough;
7446 case GAUDI_EVENT_MMU_SERR:
Oded Gabbayfcc6a4e2020-05-17 08:20:35 +03007447 gaudi_print_irq_info(hdev, event_type, true);
7448 gaudi_handle_ecc_event(hdev, event_type, &eq_entry->ecc_data);
7449 hl_fw_unmask_irq(hdev, event_type);
7450 break;
7451
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007452 case GAUDI_EVENT_PCIE_DEC:
7453 case GAUDI_EVENT_MME0_WBC_RSP:
7454 case GAUDI_EVENT_MME0_SBAB0_RSP:
7455 case GAUDI_EVENT_MME1_WBC_RSP:
7456 case GAUDI_EVENT_MME1_SBAB0_RSP:
7457 case GAUDI_EVENT_MME2_WBC_RSP:
7458 case GAUDI_EVENT_MME2_SBAB0_RSP:
7459 case GAUDI_EVENT_MME3_WBC_RSP:
7460 case GAUDI_EVENT_MME3_SBAB0_RSP:
7461 case GAUDI_EVENT_CPU_AXI_SPLITTER:
7462 case GAUDI_EVENT_PSOC_AXI_DEC:
7463 case GAUDI_EVENT_PSOC_PRSTN_FALL:
7464 case GAUDI_EVENT_MMU_PAGE_FAULT:
7465 case GAUDI_EVENT_MMU_WR_PERM:
7466 case GAUDI_EVENT_RAZWI_OR_ADC:
7467 case GAUDI_EVENT_TPC0_QM ... GAUDI_EVENT_TPC7_QM:
7468 case GAUDI_EVENT_MME0_QM ... GAUDI_EVENT_MME2_QM:
7469 case GAUDI_EVENT_DMA0_QM ... GAUDI_EVENT_DMA7_QM:
7470 fallthrough;
Oded Gabbay3c681572020-11-02 21:10:39 +02007471 case GAUDI_EVENT_NIC0_QM0:
7472 case GAUDI_EVENT_NIC0_QM1:
7473 case GAUDI_EVENT_NIC1_QM0:
7474 case GAUDI_EVENT_NIC1_QM1:
7475 case GAUDI_EVENT_NIC2_QM0:
7476 case GAUDI_EVENT_NIC2_QM1:
7477 case GAUDI_EVENT_NIC3_QM0:
7478 case GAUDI_EVENT_NIC3_QM1:
7479 case GAUDI_EVENT_NIC4_QM0:
7480 case GAUDI_EVENT_NIC4_QM1:
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007481 case GAUDI_EVENT_DMA0_CORE ... GAUDI_EVENT_DMA7_CORE:
7482 gaudi_print_irq_info(hdev, event_type, true);
7483 gaudi_handle_qman_err(hdev, event_type);
Ofir Bittonebd8d122020-05-10 13:41:28 +03007484 hl_fw_unmask_irq(hdev, event_type);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007485 break;
7486
7487 case GAUDI_EVENT_RAZWI_OR_ADC_SW:
7488 gaudi_print_irq_info(hdev, event_type, true);
Oded Gabbay66446822020-05-18 16:48:01 +03007489 if (hdev->hard_reset_on_fw_events)
7490 hl_device_reset(hdev, true, false);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007491 break;
7492
7493 case GAUDI_EVENT_TPC0_BMON_SPMU:
7494 case GAUDI_EVENT_TPC1_BMON_SPMU:
7495 case GAUDI_EVENT_TPC2_BMON_SPMU:
7496 case GAUDI_EVENT_TPC3_BMON_SPMU:
7497 case GAUDI_EVENT_TPC4_BMON_SPMU:
7498 case GAUDI_EVENT_TPC5_BMON_SPMU:
7499 case GAUDI_EVENT_TPC6_BMON_SPMU:
7500 case GAUDI_EVENT_TPC7_BMON_SPMU:
7501 case GAUDI_EVENT_DMA_BM_CH0 ... GAUDI_EVENT_DMA_BM_CH7:
7502 gaudi_print_irq_info(hdev, event_type, false);
Ofir Bittonebd8d122020-05-10 13:41:28 +03007503 hl_fw_unmask_irq(hdev, event_type);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007504 break;
7505
Ofir Bittonf8bc7f02021-01-03 20:52:40 +02007506 case GAUDI_EVENT_DMA_IF_SEI_0 ... GAUDI_EVENT_DMA_IF_SEI_3:
7507 gaudi_print_irq_info(hdev, event_type, false);
7508 gaudi_print_sm_sei_info(hdev, event_type,
7509 &eq_entry->sm_sei_data);
7510 hl_fw_unmask_irq(hdev, event_type);
7511 break;
7512
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007513 case GAUDI_EVENT_FIX_POWER_ENV_S ... GAUDI_EVENT_FIX_THERMAL_ENV_E:
7514 gaudi_print_clk_change_info(hdev, event_type);
Ofir Bittonebd8d122020-05-10 13:41:28 +03007515 hl_fw_unmask_irq(hdev, event_type);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007516 break;
7517
7518 case GAUDI_EVENT_PSOC_GPIO_U16_0:
7519 cause = le64_to_cpu(eq_entry->data[0]) & 0xFF;
7520 dev_err(hdev->dev,
7521 "Received high temp H/W interrupt %d (cause %d)\n",
7522 event_type, cause);
7523 break;
7524
7525 default:
7526 dev_err(hdev->dev, "Received invalid H/W interrupt %d\n",
7527 event_type);
7528 break;
7529 }
7530}
7531
7532static void *gaudi_get_events_stat(struct hl_device *hdev, bool aggregate,
7533 u32 *size)
7534{
7535 struct gaudi_device *gaudi = hdev->asic_specific;
7536
7537 if (aggregate) {
7538 *size = (u32) sizeof(gaudi->events_stat_aggregate);
7539 return gaudi->events_stat_aggregate;
7540 }
7541
7542 *size = (u32) sizeof(gaudi->events_stat);
7543 return gaudi->events_stat;
7544}
7545
Omer Shpigelman8ff5f4f2020-05-24 23:06:59 +03007546static int gaudi_mmu_invalidate_cache(struct hl_device *hdev, bool is_hard,
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007547 u32 flags)
7548{
7549 struct gaudi_device *gaudi = hdev->asic_specific;
7550 u32 status, timeout_usec;
7551 int rc;
7552
7553 if (!(gaudi->hw_cap_initialized & HW_CAP_MMU) ||
7554 hdev->hard_reset_pending)
Omer Shpigelman8ff5f4f2020-05-24 23:06:59 +03007555 return 0;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007556
7557 if (hdev->pldm)
7558 timeout_usec = GAUDI_PLDM_MMU_TIMEOUT_USEC;
7559 else
7560 timeout_usec = MMU_CONFIG_TIMEOUT_USEC;
7561
7562 /* L0 & L1 invalidation */
Omer Shpigelmancfd41762020-06-03 13:03:35 +03007563 WREG32(mmSTLB_INV_PS, 3);
7564 WREG32(mmSTLB_CACHE_INV, gaudi->mmu_cache_inv_pi++);
Omer Shpigelman42d0b0b2020-05-17 17:35:39 +03007565 WREG32(mmSTLB_INV_PS, 2);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007566
7567 rc = hl_poll_timeout(
7568 hdev,
Omer Shpigelman42d0b0b2020-05-17 17:35:39 +03007569 mmSTLB_INV_PS,
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007570 status,
7571 !status,
7572 1000,
7573 timeout_usec);
7574
Omer Shpigelman42d0b0b2020-05-17 17:35:39 +03007575 WREG32(mmSTLB_INV_SET, 0);
7576
Omer Shpigelman8ff5f4f2020-05-24 23:06:59 +03007577 if (rc) {
7578 dev_err_ratelimited(hdev->dev,
7579 "MMU cache invalidation timeout\n");
7580 hl_device_reset(hdev, true, false);
7581 }
7582
7583 return rc;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007584}
7585
Omer Shpigelman8ff5f4f2020-05-24 23:06:59 +03007586static int gaudi_mmu_invalidate_cache_range(struct hl_device *hdev,
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007587 bool is_hard, u32 asid, u64 va, u64 size)
7588{
7589 struct gaudi_device *gaudi = hdev->asic_specific;
7590 u32 status, timeout_usec;
7591 u32 inv_data;
7592 u32 pi;
7593 int rc;
7594
7595 if (!(gaudi->hw_cap_initialized & HW_CAP_MMU) ||
7596 hdev->hard_reset_pending)
Omer Shpigelman8ff5f4f2020-05-24 23:06:59 +03007597 return 0;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007598
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007599 if (hdev->pldm)
7600 timeout_usec = GAUDI_PLDM_MMU_TIMEOUT_USEC;
7601 else
7602 timeout_usec = MMU_CONFIG_TIMEOUT_USEC;
7603
7604 /*
7605 * TODO: currently invalidate entire L0 & L1 as in regular hard
7606 * invalidation. Need to apply invalidation of specific cache
7607 * lines with mask of ASID & VA & size.
7608 * Note that L1 with be flushed entirely in any case.
7609 */
7610
7611 /* L0 & L1 invalidation */
7612 inv_data = RREG32(mmSTLB_CACHE_INV);
7613 /* PI is 8 bit */
7614 pi = ((inv_data & STLB_CACHE_INV_PRODUCER_INDEX_MASK) + 1) & 0xFF;
7615 WREG32(mmSTLB_CACHE_INV,
7616 (inv_data & STLB_CACHE_INV_INDEX_MASK_MASK) | pi);
7617
7618 rc = hl_poll_timeout(
7619 hdev,
7620 mmSTLB_INV_CONSUMER_INDEX,
7621 status,
7622 status == pi,
7623 1000,
7624 timeout_usec);
7625
Omer Shpigelman8ff5f4f2020-05-24 23:06:59 +03007626 if (rc) {
7627 dev_err_ratelimited(hdev->dev,
7628 "MMU cache invalidation timeout\n");
7629 hl_device_reset(hdev, true, false);
7630 }
7631
7632 return rc;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007633}
7634
7635static int gaudi_mmu_update_asid_hop0_addr(struct hl_device *hdev,
7636 u32 asid, u64 phys_addr)
7637{
7638 u32 status, timeout_usec;
7639 int rc;
7640
7641 if (hdev->pldm)
7642 timeout_usec = GAUDI_PLDM_MMU_TIMEOUT_USEC;
7643 else
7644 timeout_usec = MMU_CONFIG_TIMEOUT_USEC;
7645
7646 WREG32(MMU_ASID, asid);
7647 WREG32(MMU_HOP0_PA43_12, phys_addr >> MMU_HOP0_PA43_12_SHIFT);
7648 WREG32(MMU_HOP0_PA49_44, phys_addr >> MMU_HOP0_PA49_44_SHIFT);
7649 WREG32(MMU_BUSY, 0x80000000);
7650
7651 rc = hl_poll_timeout(
7652 hdev,
7653 MMU_BUSY,
7654 status,
7655 !(status & 0x80000000),
7656 1000,
7657 timeout_usec);
7658
7659 if (rc) {
7660 dev_err(hdev->dev,
7661 "Timeout during MMU hop0 config of asid %d\n", asid);
7662 return rc;
7663 }
7664
7665 return 0;
7666}
7667
7668static int gaudi_send_heartbeat(struct hl_device *hdev)
7669{
7670 struct gaudi_device *gaudi = hdev->asic_specific;
7671
7672 if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
7673 return 0;
7674
7675 return hl_fw_send_heartbeat(hdev);
7676}
7677
Oded Gabbay2f553422020-08-15 16:28:10 +03007678static int gaudi_cpucp_info_get(struct hl_device *hdev)
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007679{
7680 struct gaudi_device *gaudi = hdev->asic_specific;
7681 struct asic_fixed_properties *prop = &hdev->asic_prop;
7682 int rc;
7683
7684 if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
7685 return 0;
7686
Ofir Bittonedb07cb2020-12-27 17:09:09 +02007687 rc = hl_fw_cpucp_info_get(hdev, mmCPU_BOOT_DEV_STS0, mmCPU_BOOT_ERR0);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007688 if (rc)
7689 return rc;
7690
Oded Gabbay2f553422020-08-15 16:28:10 +03007691 if (!strlen(prop->cpucp_info.card_name))
7692 strncpy(prop->cpucp_info.card_name, GAUDI_DEFAULT_CARD_NAME,
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007693 CARD_NAME_MAX_LEN);
7694
Oded Gabbay2f553422020-08-15 16:28:10 +03007695 hdev->card_type = le32_to_cpu(hdev->asic_prop.cpucp_info.card_type);
Oded Gabbay58361aa2020-08-08 23:34:47 +03007696
Oded Gabbay2f553422020-08-15 16:28:10 +03007697 if (hdev->card_type == cpucp_card_type_pci)
Oded Gabbay58361aa2020-08-08 23:34:47 +03007698 prop->max_power_default = MAX_POWER_DEFAULT_PCI;
Oded Gabbay2f553422020-08-15 16:28:10 +03007699 else if (hdev->card_type == cpucp_card_type_pmc)
Oded Gabbay58361aa2020-08-08 23:34:47 +03007700 prop->max_power_default = MAX_POWER_DEFAULT_PMC;
7701
7702 hdev->max_power = prop->max_power_default;
7703
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007704 return 0;
7705}
7706
farah kassabrid90416c2020-08-12 17:20:13 +03007707static bool gaudi_is_device_idle(struct hl_device *hdev, u64 *mask,
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007708 struct seq_file *s)
7709{
7710 struct gaudi_device *gaudi = hdev->asic_specific;
7711 const char *fmt = "%-5d%-9s%#-14x%#-12x%#x\n";
7712 const char *mme_slave_fmt = "%-5d%-9s%-14s%-12s%#x\n";
Oded Gabbay3c681572020-11-02 21:10:39 +02007713 const char *nic_fmt = "%-5d%-9s%#-14x%#x\n";
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007714 u32 qm_glbl_sts0, qm_cgm_sts, dma_core_sts0, tpc_cfg_sts, mme_arch_sts;
7715 bool is_idle = true, is_eng_idle, is_slave;
7716 u64 offset;
Oded Gabbay3c681572020-11-02 21:10:39 +02007717 int i, dma_id, port;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007718
7719 mutex_lock(&gaudi->clk_gate_mutex);
7720
7721 hdev->asic_funcs->disable_clock_gating(hdev);
7722
7723 if (s)
7724 seq_puts(s,
7725 "\nDMA is_idle QM_GLBL_STS0 QM_CGM_STS DMA_CORE_STS0\n"
7726 "--- ------- ------------ ---------- -------------\n");
7727
7728 for (i = 0 ; i < DMA_NUMBER_OF_CHNLS ; i++) {
7729 dma_id = gaudi_dma_assignment[i];
7730 offset = dma_id * DMA_QMAN_OFFSET;
7731
7732 qm_glbl_sts0 = RREG32(mmDMA0_QM_GLBL_STS0 + offset);
7733 qm_cgm_sts = RREG32(mmDMA0_QM_CGM_STS + offset);
7734 dma_core_sts0 = RREG32(mmDMA0_CORE_STS0 + offset);
7735 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts) &&
7736 IS_DMA_IDLE(dma_core_sts0);
7737 is_idle &= is_eng_idle;
7738
7739 if (mask)
Oded Gabbayf7639462020-08-29 11:24:03 +03007740 *mask |= ((u64) !is_eng_idle) <<
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007741 (GAUDI_ENGINE_ID_DMA_0 + dma_id);
7742 if (s)
7743 seq_printf(s, fmt, dma_id,
7744 is_eng_idle ? "Y" : "N", qm_glbl_sts0,
7745 qm_cgm_sts, dma_core_sts0);
7746 }
7747
7748 if (s)
7749 seq_puts(s,
7750 "\nTPC is_idle QM_GLBL_STS0 QM_CGM_STS CFG_STATUS\n"
7751 "--- ------- ------------ ---------- ----------\n");
7752
7753 for (i = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) {
7754 offset = i * TPC_QMAN_OFFSET;
7755 qm_glbl_sts0 = RREG32(mmTPC0_QM_GLBL_STS0 + offset);
7756 qm_cgm_sts = RREG32(mmTPC0_QM_CGM_STS + offset);
7757 tpc_cfg_sts = RREG32(mmTPC0_CFG_STATUS + offset);
7758 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts) &&
7759 IS_TPC_IDLE(tpc_cfg_sts);
7760 is_idle &= is_eng_idle;
7761
7762 if (mask)
Oded Gabbayf7639462020-08-29 11:24:03 +03007763 *mask |= ((u64) !is_eng_idle) <<
7764 (GAUDI_ENGINE_ID_TPC_0 + i);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007765 if (s)
7766 seq_printf(s, fmt, i,
7767 is_eng_idle ? "Y" : "N",
7768 qm_glbl_sts0, qm_cgm_sts, tpc_cfg_sts);
7769 }
7770
7771 if (s)
7772 seq_puts(s,
7773 "\nMME is_idle QM_GLBL_STS0 QM_CGM_STS ARCH_STATUS\n"
7774 "--- ------- ------------ ---------- -----------\n");
7775
7776 for (i = 0 ; i < MME_NUMBER_OF_ENGINES ; i++) {
7777 offset = i * MME_QMAN_OFFSET;
7778 mme_arch_sts = RREG32(mmMME0_CTRL_ARCH_STATUS + offset);
7779 is_eng_idle = IS_MME_IDLE(mme_arch_sts);
7780
7781 /* MME 1 & 3 are slaves, no need to check their QMANs */
7782 is_slave = i % 2;
7783 if (!is_slave) {
7784 qm_glbl_sts0 = RREG32(mmMME0_QM_GLBL_STS0 + offset);
7785 qm_cgm_sts = RREG32(mmMME0_QM_CGM_STS + offset);
7786 is_eng_idle &= IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts);
7787 }
7788
7789 is_idle &= is_eng_idle;
7790
7791 if (mask)
Oded Gabbayf7639462020-08-29 11:24:03 +03007792 *mask |= ((u64) !is_eng_idle) <<
7793 (GAUDI_ENGINE_ID_MME_0 + i);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007794 if (s) {
7795 if (!is_slave)
7796 seq_printf(s, fmt, i,
7797 is_eng_idle ? "Y" : "N",
7798 qm_glbl_sts0, qm_cgm_sts, mme_arch_sts);
7799 else
7800 seq_printf(s, mme_slave_fmt, i,
7801 is_eng_idle ? "Y" : "N", "-",
7802 "-", mme_arch_sts);
7803 }
7804 }
7805
7806 if (s)
Oded Gabbay3c681572020-11-02 21:10:39 +02007807 seq_puts(s, "\nNIC is_idle QM_GLBL_STS0 QM_CGM_STS\n"
7808 "--- ------- ------------ ----------\n");
7809
7810 for (i = 0 ; i < (NIC_NUMBER_OF_ENGINES / 2) ; i++) {
7811 offset = i * NIC_MACRO_QMAN_OFFSET;
7812 port = 2 * i;
7813 if (hdev->nic_ports_mask & BIT(port)) {
7814 qm_glbl_sts0 = RREG32(mmNIC0_QM0_GLBL_STS0 + offset);
7815 qm_cgm_sts = RREG32(mmNIC0_QM0_CGM_STS + offset);
7816 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts);
7817 is_idle &= is_eng_idle;
7818
7819 if (mask)
7820 *mask |= ((u64) !is_eng_idle) <<
7821 (GAUDI_ENGINE_ID_NIC_0 + port);
7822 if (s)
7823 seq_printf(s, nic_fmt, port,
7824 is_eng_idle ? "Y" : "N",
7825 qm_glbl_sts0, qm_cgm_sts);
7826 }
7827
7828 port = 2 * i + 1;
7829 if (hdev->nic_ports_mask & BIT(port)) {
7830 qm_glbl_sts0 = RREG32(mmNIC0_QM1_GLBL_STS0 + offset);
7831 qm_cgm_sts = RREG32(mmNIC0_QM1_CGM_STS + offset);
7832 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts);
7833 is_idle &= is_eng_idle;
7834
7835 if (mask)
7836 *mask |= ((u64) !is_eng_idle) <<
7837 (GAUDI_ENGINE_ID_NIC_0 + port);
7838 if (s)
7839 seq_printf(s, nic_fmt, port,
7840 is_eng_idle ? "Y" : "N",
7841 qm_glbl_sts0, qm_cgm_sts);
7842 }
7843 }
7844
7845 if (s)
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007846 seq_puts(s, "\n");
7847
Oded Gabbaye38bfd32020-07-03 20:46:12 +03007848 hdev->asic_funcs->set_clock_gating(hdev);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007849
7850 mutex_unlock(&gaudi->clk_gate_mutex);
7851
7852 return is_idle;
7853}
7854
7855static void gaudi_hw_queues_lock(struct hl_device *hdev)
7856 __acquires(&gaudi->hw_queues_lock)
7857{
7858 struct gaudi_device *gaudi = hdev->asic_specific;
7859
7860 spin_lock(&gaudi->hw_queues_lock);
7861}
7862
7863static void gaudi_hw_queues_unlock(struct hl_device *hdev)
7864 __releases(&gaudi->hw_queues_lock)
7865{
7866 struct gaudi_device *gaudi = hdev->asic_specific;
7867
7868 spin_unlock(&gaudi->hw_queues_lock);
7869}
7870
7871static u32 gaudi_get_pci_id(struct hl_device *hdev)
7872{
7873 return hdev->pdev->device;
7874}
7875
7876static int gaudi_get_eeprom_data(struct hl_device *hdev, void *data,
7877 size_t max_size)
7878{
7879 struct gaudi_device *gaudi = hdev->asic_specific;
7880
7881 if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
7882 return 0;
7883
7884 return hl_fw_get_eeprom_data(hdev, data, max_size);
7885}
7886
7887/*
7888 * this function should be used only during initialization and/or after reset,
7889 * when there are no active users.
7890 */
7891static int gaudi_run_tpc_kernel(struct hl_device *hdev, u64 tpc_kernel,
7892 u32 tpc_id)
7893{
7894 struct gaudi_device *gaudi = hdev->asic_specific;
7895 u64 kernel_timeout;
7896 u32 status, offset;
7897 int rc;
7898
7899 offset = tpc_id * (mmTPC1_CFG_STATUS - mmTPC0_CFG_STATUS);
7900
7901 if (hdev->pldm)
7902 kernel_timeout = GAUDI_PLDM_TPC_KERNEL_WAIT_USEC;
7903 else
7904 kernel_timeout = HL_DEVICE_TIMEOUT_USEC;
7905
7906 mutex_lock(&gaudi->clk_gate_mutex);
7907
7908 hdev->asic_funcs->disable_clock_gating(hdev);
7909
7910 WREG32(mmTPC0_CFG_QM_KERNEL_BASE_ADDRESS_LOW + offset,
7911 lower_32_bits(tpc_kernel));
7912 WREG32(mmTPC0_CFG_QM_KERNEL_BASE_ADDRESS_HIGH + offset,
7913 upper_32_bits(tpc_kernel));
7914
7915 WREG32(mmTPC0_CFG_ICACHE_BASE_ADDERESS_LOW + offset,
7916 lower_32_bits(tpc_kernel));
7917 WREG32(mmTPC0_CFG_ICACHE_BASE_ADDERESS_HIGH + offset,
7918 upper_32_bits(tpc_kernel));
7919 /* set a valid LUT pointer, content is of no significance */
7920 WREG32(mmTPC0_CFG_LUT_FUNC256_BASE_ADDR_LO + offset,
7921 lower_32_bits(tpc_kernel));
7922 WREG32(mmTPC0_CFG_LUT_FUNC256_BASE_ADDR_HI + offset,
7923 upper_32_bits(tpc_kernel));
7924
7925 WREG32(mmTPC0_CFG_QM_SYNC_OBJECT_ADDR + offset,
7926 lower_32_bits(CFG_BASE +
7927 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0));
7928
7929 WREG32(mmTPC0_CFG_TPC_CMD + offset,
7930 (1 << TPC0_CFG_TPC_CMD_ICACHE_INVALIDATE_SHIFT |
7931 1 << TPC0_CFG_TPC_CMD_ICACHE_PREFETCH_64KB_SHIFT));
7932 /* wait a bit for the engine to start executing */
7933 usleep_range(1000, 1500);
7934
7935 /* wait until engine has finished executing */
7936 rc = hl_poll_timeout(
7937 hdev,
7938 mmTPC0_CFG_STATUS + offset,
7939 status,
7940 (status & TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK) ==
7941 TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK,
7942 1000,
7943 kernel_timeout);
7944
7945 if (rc) {
7946 dev_err(hdev->dev,
7947 "Timeout while waiting for TPC%d icache prefetch\n",
7948 tpc_id);
Oded Gabbaye38bfd32020-07-03 20:46:12 +03007949 hdev->asic_funcs->set_clock_gating(hdev);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007950 mutex_unlock(&gaudi->clk_gate_mutex);
7951 return -EIO;
7952 }
7953
7954 WREG32(mmTPC0_CFG_TPC_EXECUTE + offset,
7955 1 << TPC0_CFG_TPC_EXECUTE_V_SHIFT);
7956
7957 /* wait a bit for the engine to start executing */
7958 usleep_range(1000, 1500);
7959
7960 /* wait until engine has finished executing */
7961 rc = hl_poll_timeout(
7962 hdev,
7963 mmTPC0_CFG_STATUS + offset,
7964 status,
7965 (status & TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK) ==
7966 TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK,
7967 1000,
7968 kernel_timeout);
7969
Oded Gabbay31ac1f12020-08-12 11:28:13 +03007970 if (rc) {
7971 dev_err(hdev->dev,
7972 "Timeout while waiting for TPC%d vector pipe\n",
7973 tpc_id);
7974 hdev->asic_funcs->set_clock_gating(hdev);
7975 mutex_unlock(&gaudi->clk_gate_mutex);
7976 return -EIO;
7977 }
7978
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007979 rc = hl_poll_timeout(
7980 hdev,
7981 mmTPC0_CFG_WQ_INFLIGHT_CNTR + offset,
7982 status,
7983 (status == 0),
7984 1000,
7985 kernel_timeout);
7986
Oded Gabbaye38bfd32020-07-03 20:46:12 +03007987 hdev->asic_funcs->set_clock_gating(hdev);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007988 mutex_unlock(&gaudi->clk_gate_mutex);
7989
7990 if (rc) {
7991 dev_err(hdev->dev,
7992 "Timeout while waiting for TPC%d kernel to execute\n",
7993 tpc_id);
7994 return -EIO;
7995 }
7996
7997 return 0;
7998}
7999
Ofir Bitton5de406c2020-09-10 10:56:26 +03008000static int gaudi_internal_cb_pool_init(struct hl_device *hdev,
8001 struct hl_ctx *ctx)
8002{
8003 struct gaudi_device *gaudi = hdev->asic_specific;
Ofir Bitton5de406c2020-09-10 10:56:26 +03008004 int min_alloc_order, rc, collective_cb_size;
8005
8006 if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
8007 return 0;
8008
8009 hdev->internal_cb_pool_virt_addr =
8010 hdev->asic_funcs->asic_dma_alloc_coherent(hdev,
8011 HOST_SPACE_INTERNAL_CB_SZ,
8012 &hdev->internal_cb_pool_dma_addr,
8013 GFP_KERNEL | __GFP_ZERO);
8014
8015 if (!hdev->internal_cb_pool_virt_addr)
8016 return -ENOMEM;
8017
8018 collective_cb_size = sizeof(struct packet_msg_short) * 5 +
8019 sizeof(struct packet_fence);
8020 min_alloc_order = ilog2(collective_cb_size);
8021
8022 hdev->internal_cb_pool = gen_pool_create(min_alloc_order, -1);
8023 if (!hdev->internal_cb_pool) {
8024 dev_err(hdev->dev,
8025 "Failed to create internal CB pool\n");
8026 rc = -ENOMEM;
8027 goto free_internal_cb_pool;
8028 }
8029
8030 rc = gen_pool_add(hdev->internal_cb_pool,
8031 (uintptr_t) hdev->internal_cb_pool_virt_addr,
8032 HOST_SPACE_INTERNAL_CB_SZ, -1);
8033 if (rc) {
8034 dev_err(hdev->dev,
8035 "Failed to add memory to internal CB pool\n");
8036 rc = -EFAULT;
8037 goto destroy_internal_cb_pool;
8038 }
8039
Ofir Bittonbe91b912020-10-22 15:04:10 +03008040 hdev->internal_cb_va_base = hl_reserve_va_block(hdev, ctx,
Ofir Bitton412c41f2020-11-04 15:18:55 +02008041 HL_VA_RANGE_TYPE_HOST, HOST_SPACE_INTERNAL_CB_SZ,
8042 HL_MMU_VA_ALIGNMENT_NOT_NEEDED);
Ofir Bittonbe91b912020-10-22 15:04:10 +03008043
8044 if (!hdev->internal_cb_va_base)
8045 goto destroy_internal_cb_pool;
Ofir Bitton5de406c2020-09-10 10:56:26 +03008046
8047 mutex_lock(&ctx->mmu_lock);
Ofir Bitton5c054872020-10-22 15:13:10 +03008048 rc = hl_mmu_map_contiguous(ctx, hdev->internal_cb_va_base,
8049 hdev->internal_cb_pool_dma_addr,
8050 HOST_SPACE_INTERNAL_CB_SZ);
Ofir Bitton5de406c2020-09-10 10:56:26 +03008051
8052 hdev->asic_funcs->mmu_invalidate_cache(hdev, false, VM_TYPE_USERPTR);
Ofir Bitton5de406c2020-09-10 10:56:26 +03008053 mutex_unlock(&ctx->mmu_lock);
8054
Ofir Bitton5c054872020-10-22 15:13:10 +03008055 if (rc)
8056 goto unreserve_internal_cb_pool;
8057
Ofir Bitton5de406c2020-09-10 10:56:26 +03008058 return 0;
8059
Ofir Bitton5c054872020-10-22 15:13:10 +03008060unreserve_internal_cb_pool:
Ofir Bittonbe91b912020-10-22 15:04:10 +03008061 hl_unreserve_va_block(hdev, ctx, hdev->internal_cb_va_base,
8062 HOST_SPACE_INTERNAL_CB_SZ);
Ofir Bitton5de406c2020-09-10 10:56:26 +03008063destroy_internal_cb_pool:
8064 gen_pool_destroy(hdev->internal_cb_pool);
Ofir Bitton5de406c2020-09-10 10:56:26 +03008065free_internal_cb_pool:
8066 hdev->asic_funcs->asic_dma_free_coherent(hdev,
8067 HOST_SPACE_INTERNAL_CB_SZ,
8068 hdev->internal_cb_pool_virt_addr,
8069 hdev->internal_cb_pool_dma_addr);
8070
8071 return rc;
8072}
8073
8074static void gaudi_internal_cb_pool_fini(struct hl_device *hdev,
8075 struct hl_ctx *ctx)
8076{
8077 struct gaudi_device *gaudi = hdev->asic_specific;
Ofir Bitton5de406c2020-09-10 10:56:26 +03008078
8079 if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
8080 return;
8081
8082 mutex_lock(&ctx->mmu_lock);
Ofir Bitton5c054872020-10-22 15:13:10 +03008083 hl_mmu_unmap_contiguous(ctx, hdev->internal_cb_va_base,
8084 HOST_SPACE_INTERNAL_CB_SZ);
Ofir Bittonbe91b912020-10-22 15:04:10 +03008085 hl_unreserve_va_block(hdev, ctx, hdev->internal_cb_va_base,
8086 HOST_SPACE_INTERNAL_CB_SZ);
Ofir Bitton5de406c2020-09-10 10:56:26 +03008087 hdev->asic_funcs->mmu_invalidate_cache(hdev, true, VM_TYPE_USERPTR);
Ofir Bitton5de406c2020-09-10 10:56:26 +03008088 mutex_unlock(&ctx->mmu_lock);
8089
8090 gen_pool_destroy(hdev->internal_cb_pool);
8091
8092 hdev->asic_funcs->asic_dma_free_coherent(hdev,
8093 HOST_SPACE_INTERNAL_CB_SZ,
8094 hdev->internal_cb_pool_virt_addr,
8095 hdev->internal_cb_pool_dma_addr);
8096}
8097
kernel test robotbb34bf72020-07-29 08:03:13 +08008098static int gaudi_ctx_init(struct hl_ctx *ctx)
Ofir Bittona04b7cd2020-07-13 13:36:55 +03008099{
Ofir Bitton8e39e752020-11-12 11:03:32 +02008100 if (ctx->asid == HL_KERNEL_ASID_ID)
8101 return 0;
8102
Ofir Bitton20b75252020-09-30 15:51:10 +03008103 gaudi_mmu_prepare(ctx->hdev, ctx->asid);
Ofir Bitton5de406c2020-09-10 10:56:26 +03008104 return gaudi_internal_cb_pool_init(ctx->hdev, ctx);
8105}
Ofir Bitton20b75252020-09-30 15:51:10 +03008106
kernel test robot293744d2020-11-19 12:25:43 +08008107static void gaudi_ctx_fini(struct hl_ctx *ctx)
Ofir Bitton5de406c2020-09-10 10:56:26 +03008108{
Ofir Bitton8e39e752020-11-12 11:03:32 +02008109 if (ctx->asid == HL_KERNEL_ASID_ID)
Ofir Bitton5de406c2020-09-10 10:56:26 +03008110 return;
8111
8112 gaudi_internal_cb_pool_fini(ctx->hdev, ctx);
Ofir Bittona04b7cd2020-07-13 13:36:55 +03008113}
8114
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03008115static u32 gaudi_get_queue_id_for_cq(struct hl_device *hdev, u32 cq_idx)
8116{
8117 return gaudi_cq_assignment[cq_idx];
8118}
8119
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03008120static u32 gaudi_get_signal_cb_size(struct hl_device *hdev)
8121{
8122 return sizeof(struct packet_msg_short) +
8123 sizeof(struct packet_msg_prot) * 2;
8124}
8125
8126static u32 gaudi_get_wait_cb_size(struct hl_device *hdev)
8127{
8128 return sizeof(struct packet_msg_short) * 4 +
8129 sizeof(struct packet_fence) +
8130 sizeof(struct packet_msg_prot) * 2;
8131}
8132
Ofir Bitton2992c1d2020-09-10 09:40:35 +03008133static u32 gaudi_gen_signal_cb(struct hl_device *hdev, void *data, u16 sob_id,
Alon Mizrahi72ab9ca52020-12-02 19:55:30 +02008134 u32 size, bool eb)
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03008135{
8136 struct hl_cb *cb = (struct hl_cb *) data;
8137 struct packet_msg_short *pkt;
Ofir Bitton2992c1d2020-09-10 09:40:35 +03008138 u32 value, ctl, pkt_size = sizeof(*pkt);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03008139
Ofir Bitton2992c1d2020-09-10 09:40:35 +03008140 pkt = cb->kernel_address + size;
8141 memset(pkt, 0, pkt_size);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03008142
Ofir Bitton6c07bab2020-06-01 10:38:46 +03008143 /* Inc by 1, Mode ADD */
8144 value = FIELD_PREP(GAUDI_PKT_SHORT_VAL_SOB_SYNC_VAL_MASK, 1);
8145 value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_SOB_MOD_MASK, 1);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03008146
Ofir Bitton6c07bab2020-06-01 10:38:46 +03008147 ctl = FIELD_PREP(GAUDI_PKT_SHORT_CTL_ADDR_MASK, sob_id * 4);
8148 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_OP_MASK, 0); /* write the value */
8149 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_BASE_MASK, 3); /* W_S SOB base */
Ofir Bittonf8b0f2e2020-12-06 10:22:32 +02008150 ctl |= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_SHORT);
8151 ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, eb);
8152 ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
8153 ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03008154
8155 pkt->value = cpu_to_le32(value);
8156 pkt->ctl = cpu_to_le32(ctl);
Ofir Bitton2992c1d2020-09-10 09:40:35 +03008157
8158 return size + pkt_size;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03008159}
8160
8161static u32 gaudi_add_mon_msg_short(struct packet_msg_short *pkt, u32 value,
8162 u16 addr)
8163{
8164 u32 ctl, pkt_size = sizeof(*pkt);
8165
8166 memset(pkt, 0, pkt_size);
8167
Ofir Bitton6c07bab2020-06-01 10:38:46 +03008168 ctl = FIELD_PREP(GAUDI_PKT_SHORT_CTL_ADDR_MASK, addr);
8169 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_BASE_MASK, 2); /* W_S MON base */
Ofir Bittonf8b0f2e2020-12-06 10:22:32 +02008170 ctl |= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_SHORT);
8171 ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 0);
8172 ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
8173 ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 0); /* last pkt MB */
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03008174
8175 pkt->value = cpu_to_le32(value);
8176 pkt->ctl = cpu_to_le32(ctl);
8177
8178 return pkt_size;
8179}
8180
Ofir Bitton2992c1d2020-09-10 09:40:35 +03008181static u32 gaudi_add_arm_monitor_pkt(struct hl_device *hdev,
8182 struct packet_msg_short *pkt, u16 sob_base, u8 sob_mask,
8183 u16 sob_val, u16 mon_id)
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03008184{
Ofir Bitton2992c1d2020-09-10 09:40:35 +03008185 u64 monitor_base;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03008186 u32 ctl, value, pkt_size = sizeof(*pkt);
Ofir Bitton2992c1d2020-09-10 09:40:35 +03008187 u16 msg_addr_offset;
8188 u8 mask;
8189
8190 if (hl_gen_sob_mask(sob_base, sob_mask, &mask)) {
8191 dev_err(hdev->dev,
8192 "sob_base %u (mask %#x) is not valid\n",
8193 sob_base, sob_mask);
8194 return 0;
8195 }
8196
8197 /*
8198 * monitor_base should be the content of the base0 address registers,
8199 * so it will be added to the msg short offsets
8200 */
8201 monitor_base = mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0;
8202
8203 msg_addr_offset =
8204 (mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0 + mon_id * 4) -
8205 monitor_base;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03008206
8207 memset(pkt, 0, pkt_size);
8208
Ofir Bitton2992c1d2020-09-10 09:40:35 +03008209 /* Monitor config packet: bind the monitor to a sync object */
8210 value = FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_SYNC_GID_MASK, sob_base / 8);
Ofir Bitton6c07bab2020-06-01 10:38:46 +03008211 value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_SYNC_VAL_MASK, sob_val);
8212 value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_MODE_MASK,
8213 0); /* GREATER OR EQUAL*/
8214 value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_MASK_MASK, mask);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03008215
Ofir Bitton2992c1d2020-09-10 09:40:35 +03008216 ctl = FIELD_PREP(GAUDI_PKT_SHORT_CTL_ADDR_MASK, msg_addr_offset);
Ofir Bitton6c07bab2020-06-01 10:38:46 +03008217 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_OP_MASK, 0); /* write the value */
8218 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_BASE_MASK, 2); /* W_S MON base */
Ofir Bittonf8b0f2e2020-12-06 10:22:32 +02008219 ctl |= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_SHORT);
8220 ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 0);
8221 ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
8222 ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03008223
8224 pkt->value = cpu_to_le32(value);
8225 pkt->ctl = cpu_to_le32(ctl);
8226
8227 return pkt_size;
8228}
8229
8230static u32 gaudi_add_fence_pkt(struct packet_fence *pkt)
8231{
8232 u32 ctl, cfg, pkt_size = sizeof(*pkt);
8233
8234 memset(pkt, 0, pkt_size);
8235
Ofir Bitton6c07bab2020-06-01 10:38:46 +03008236 cfg = FIELD_PREP(GAUDI_PKT_FENCE_CFG_DEC_VAL_MASK, 1);
8237 cfg |= FIELD_PREP(GAUDI_PKT_FENCE_CFG_TARGET_VAL_MASK, 1);
8238 cfg |= FIELD_PREP(GAUDI_PKT_FENCE_CFG_ID_MASK, 2);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03008239
Ofir Bittonf8b0f2e2020-12-06 10:22:32 +02008240 ctl = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_FENCE);
8241 ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 0);
8242 ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
8243 ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03008244
8245 pkt->cfg = cpu_to_le32(cfg);
8246 pkt->ctl = cpu_to_le32(ctl);
8247
8248 return pkt_size;
8249}
8250
Ofir Bitton2992c1d2020-09-10 09:40:35 +03008251static int gaudi_get_fence_addr(struct hl_device *hdev, u32 queue_id, u64 *addr)
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03008252{
Ofir Bitton5de406c2020-09-10 10:56:26 +03008253 u32 offset, nic_index;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03008254
Ofir Bitton2992c1d2020-09-10 09:40:35 +03008255 switch (queue_id) {
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03008256 case GAUDI_QUEUE_ID_DMA_0_0:
Ofir Bitton2992c1d2020-09-10 09:40:35 +03008257 offset = mmDMA0_QM_CP_FENCE2_RDATA_0;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03008258 break;
8259 case GAUDI_QUEUE_ID_DMA_0_1:
Ofir Bitton2992c1d2020-09-10 09:40:35 +03008260 offset = mmDMA0_QM_CP_FENCE2_RDATA_1;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03008261 break;
8262 case GAUDI_QUEUE_ID_DMA_0_2:
Ofir Bitton2992c1d2020-09-10 09:40:35 +03008263 offset = mmDMA0_QM_CP_FENCE2_RDATA_2;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03008264 break;
8265 case GAUDI_QUEUE_ID_DMA_0_3:
Ofir Bitton2992c1d2020-09-10 09:40:35 +03008266 offset = mmDMA0_QM_CP_FENCE2_RDATA_3;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03008267 break;
8268 case GAUDI_QUEUE_ID_DMA_1_0:
Ofir Bitton2992c1d2020-09-10 09:40:35 +03008269 offset = mmDMA1_QM_CP_FENCE2_RDATA_0;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03008270 break;
8271 case GAUDI_QUEUE_ID_DMA_1_1:
Ofir Bitton2992c1d2020-09-10 09:40:35 +03008272 offset = mmDMA1_QM_CP_FENCE2_RDATA_1;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03008273 break;
8274 case GAUDI_QUEUE_ID_DMA_1_2:
Ofir Bitton2992c1d2020-09-10 09:40:35 +03008275 offset = mmDMA1_QM_CP_FENCE2_RDATA_2;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03008276 break;
8277 case GAUDI_QUEUE_ID_DMA_1_3:
Ofir Bitton2992c1d2020-09-10 09:40:35 +03008278 offset = mmDMA1_QM_CP_FENCE2_RDATA_3;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03008279 break;
8280 case GAUDI_QUEUE_ID_DMA_5_0:
Ofir Bitton2992c1d2020-09-10 09:40:35 +03008281 offset = mmDMA5_QM_CP_FENCE2_RDATA_0;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03008282 break;
8283 case GAUDI_QUEUE_ID_DMA_5_1:
Ofir Bitton2992c1d2020-09-10 09:40:35 +03008284 offset = mmDMA5_QM_CP_FENCE2_RDATA_1;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03008285 break;
8286 case GAUDI_QUEUE_ID_DMA_5_2:
Ofir Bitton2992c1d2020-09-10 09:40:35 +03008287 offset = mmDMA5_QM_CP_FENCE2_RDATA_2;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03008288 break;
8289 case GAUDI_QUEUE_ID_DMA_5_3:
Ofir Bitton2992c1d2020-09-10 09:40:35 +03008290 offset = mmDMA5_QM_CP_FENCE2_RDATA_3;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03008291 break;
Ofir Bitton5de406c2020-09-10 10:56:26 +03008292 case GAUDI_QUEUE_ID_TPC_7_0:
8293 offset = mmTPC7_QM_CP_FENCE2_RDATA_0;
8294 break;
8295 case GAUDI_QUEUE_ID_TPC_7_1:
8296 offset = mmTPC7_QM_CP_FENCE2_RDATA_1;
8297 break;
8298 case GAUDI_QUEUE_ID_TPC_7_2:
8299 offset = mmTPC7_QM_CP_FENCE2_RDATA_2;
8300 break;
8301 case GAUDI_QUEUE_ID_TPC_7_3:
8302 offset = mmTPC7_QM_CP_FENCE2_RDATA_3;
8303 break;
8304 case GAUDI_QUEUE_ID_NIC_0_0:
8305 case GAUDI_QUEUE_ID_NIC_1_0:
8306 case GAUDI_QUEUE_ID_NIC_2_0:
8307 case GAUDI_QUEUE_ID_NIC_3_0:
8308 case GAUDI_QUEUE_ID_NIC_4_0:
8309 case GAUDI_QUEUE_ID_NIC_5_0:
8310 case GAUDI_QUEUE_ID_NIC_6_0:
8311 case GAUDI_QUEUE_ID_NIC_7_0:
8312 case GAUDI_QUEUE_ID_NIC_8_0:
8313 case GAUDI_QUEUE_ID_NIC_9_0:
8314 nic_index = (queue_id - GAUDI_QUEUE_ID_NIC_0_0) >> 2;
8315 offset = mmNIC0_QM0_CP_FENCE2_RDATA_0 +
8316 (nic_index >> 1) * NIC_MACRO_QMAN_OFFSET +
8317 (nic_index & 0x1) * NIC_ENGINE_QMAN_OFFSET;
8318 break;
8319 case GAUDI_QUEUE_ID_NIC_0_1:
8320 case GAUDI_QUEUE_ID_NIC_1_1:
8321 case GAUDI_QUEUE_ID_NIC_2_1:
8322 case GAUDI_QUEUE_ID_NIC_3_1:
8323 case GAUDI_QUEUE_ID_NIC_4_1:
8324 case GAUDI_QUEUE_ID_NIC_5_1:
8325 case GAUDI_QUEUE_ID_NIC_6_1:
8326 case GAUDI_QUEUE_ID_NIC_7_1:
8327 case GAUDI_QUEUE_ID_NIC_8_1:
8328 case GAUDI_QUEUE_ID_NIC_9_1:
8329 nic_index = (queue_id - GAUDI_QUEUE_ID_NIC_0_1) >> 2;
8330 offset = mmNIC0_QM0_CP_FENCE2_RDATA_1 +
8331 (nic_index >> 1) * NIC_MACRO_QMAN_OFFSET +
8332 (nic_index & 0x1) * NIC_ENGINE_QMAN_OFFSET;
8333 break;
8334 case GAUDI_QUEUE_ID_NIC_0_2:
8335 case GAUDI_QUEUE_ID_NIC_1_2:
8336 case GAUDI_QUEUE_ID_NIC_2_2:
8337 case GAUDI_QUEUE_ID_NIC_3_2:
8338 case GAUDI_QUEUE_ID_NIC_4_2:
8339 case GAUDI_QUEUE_ID_NIC_5_2:
8340 case GAUDI_QUEUE_ID_NIC_6_2:
8341 case GAUDI_QUEUE_ID_NIC_7_2:
8342 case GAUDI_QUEUE_ID_NIC_8_2:
8343 case GAUDI_QUEUE_ID_NIC_9_2:
8344 nic_index = (queue_id - GAUDI_QUEUE_ID_NIC_0_2) >> 2;
8345 offset = mmNIC0_QM0_CP_FENCE2_RDATA_2 +
8346 (nic_index >> 1) * NIC_MACRO_QMAN_OFFSET +
8347 (nic_index & 0x1) * NIC_ENGINE_QMAN_OFFSET;
8348 break;
8349 case GAUDI_QUEUE_ID_NIC_0_3:
8350 case GAUDI_QUEUE_ID_NIC_1_3:
8351 case GAUDI_QUEUE_ID_NIC_2_3:
8352 case GAUDI_QUEUE_ID_NIC_3_3:
8353 case GAUDI_QUEUE_ID_NIC_4_3:
8354 case GAUDI_QUEUE_ID_NIC_5_3:
8355 case GAUDI_QUEUE_ID_NIC_6_3:
8356 case GAUDI_QUEUE_ID_NIC_7_3:
8357 case GAUDI_QUEUE_ID_NIC_8_3:
8358 case GAUDI_QUEUE_ID_NIC_9_3:
8359 nic_index = (queue_id - GAUDI_QUEUE_ID_NIC_0_3) >> 2;
8360 offset = mmNIC0_QM0_CP_FENCE2_RDATA_3 +
8361 (nic_index >> 1) * NIC_MACRO_QMAN_OFFSET +
8362 (nic_index & 0x1) * NIC_ENGINE_QMAN_OFFSET;
8363 break;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03008364 default:
Ofir Bitton2992c1d2020-09-10 09:40:35 +03008365 return -EINVAL;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03008366 }
8367
Ofir Bitton2992c1d2020-09-10 09:40:35 +03008368 *addr = CFG_BASE + offset;
8369
8370 return 0;
8371}
8372
8373static u32 gaudi_add_mon_pkts(void *buf, u16 mon_id, u64 fence_addr)
8374{
8375 u64 monitor_base;
8376 u32 size = 0;
8377 u16 msg_addr_offset;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03008378
8379 /*
8380 * monitor_base should be the content of the base0 address registers,
8381 * so it will be added to the msg short offsets
8382 */
8383 monitor_base = mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0;
8384
8385 /* First monitor config packet: low address of the sync */
Ofir Bitton2992c1d2020-09-10 09:40:35 +03008386 msg_addr_offset =
8387 (mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0 + mon_id * 4) -
8388 monitor_base;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03008389
8390 size += gaudi_add_mon_msg_short(buf + size, (u32) fence_addr,
8391 msg_addr_offset);
8392
8393 /* Second monitor config packet: high address of the sync */
Ofir Bitton2992c1d2020-09-10 09:40:35 +03008394 msg_addr_offset =
8395 (mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRH_0 + mon_id * 4) -
8396 monitor_base;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03008397
8398 size += gaudi_add_mon_msg_short(buf + size, (u32) (fence_addr >> 32),
8399 msg_addr_offset);
8400
8401 /*
8402 * Third monitor config packet: the payload, i.e. what to write when the
8403 * sync triggers
8404 */
Ofir Bitton2992c1d2020-09-10 09:40:35 +03008405 msg_addr_offset =
8406 (mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_DATA_0 + mon_id * 4) -
8407 monitor_base;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03008408
8409 size += gaudi_add_mon_msg_short(buf + size, 1, msg_addr_offset);
8410
Ofir Bitton2992c1d2020-09-10 09:40:35 +03008411 return size;
8412}
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03008413
Oded Gabbay3c681572020-11-02 21:10:39 +02008414static u32 gaudi_gen_wait_cb(struct hl_device *hdev,
8415 struct hl_gen_wait_properties *prop)
Ofir Bitton2992c1d2020-09-10 09:40:35 +03008416{
8417 struct hl_cb *cb = (struct hl_cb *) prop->data;
8418 void *buf = cb->kernel_address;
8419 u64 fence_addr = 0;
8420 u32 size = prop->size;
8421
8422 if (gaudi_get_fence_addr(hdev, prop->q_idx, &fence_addr)) {
8423 dev_crit(hdev->dev, "wrong queue id %d for wait packet\n",
8424 prop->q_idx);
8425 return 0;
8426 }
8427
8428 size += gaudi_add_mon_pkts(buf + size, prop->mon_id, fence_addr);
8429 size += gaudi_add_arm_monitor_pkt(hdev, buf + size, prop->sob_base,
8430 prop->sob_mask, prop->sob_val, prop->mon_id);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03008431 size += gaudi_add_fence_pkt(buf + size);
Ofir Bitton2992c1d2020-09-10 09:40:35 +03008432
8433 return size;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03008434}
8435
8436static void gaudi_reset_sob(struct hl_device *hdev, void *data)
8437{
8438 struct hl_hw_sob *hw_sob = (struct hl_hw_sob *) data;
Ofir Bitton423815b2021-01-05 09:04:07 +02008439 int rc;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03008440
8441 dev_dbg(hdev->dev, "reset SOB, q_idx: %d, sob_id: %d\n", hw_sob->q_idx,
8442 hw_sob->sob_id);
8443
Ofir Bitton423815b2021-01-05 09:04:07 +02008444 rc = gaudi_schedule_register_memset(hdev, hw_sob->q_idx,
8445 CFG_BASE + mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 +
8446 hw_sob->sob_id * 4, 1, 0);
8447 if (rc)
8448 dev_err(hdev->dev, "failed resetting sob %u", hw_sob->sob_id);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03008449
8450 kref_init(&hw_sob->kref);
8451}
8452
8453static void gaudi_set_dma_mask_from_fw(struct hl_device *hdev)
8454{
8455 if (RREG32(mmPSOC_GLOBAL_CONF_NON_RST_FLOPS_0) ==
8456 HL_POWER9_HOST_MAGIC) {
8457 hdev->power9_64bit_dma_enable = 1;
8458 hdev->dma_mask = 64;
8459 } else {
8460 hdev->power9_64bit_dma_enable = 0;
8461 hdev->dma_mask = 48;
8462 }
8463}
8464
8465static u64 gaudi_get_device_time(struct hl_device *hdev)
8466{
8467 u64 device_time = ((u64) RREG32(mmPSOC_TIMESTAMP_CNTCVU)) << 32;
8468
8469 return device_time | RREG32(mmPSOC_TIMESTAMP_CNTCVL);
8470}
8471
8472static const struct hl_asic_funcs gaudi_funcs = {
8473 .early_init = gaudi_early_init,
8474 .early_fini = gaudi_early_fini,
8475 .late_init = gaudi_late_init,
8476 .late_fini = gaudi_late_fini,
8477 .sw_init = gaudi_sw_init,
8478 .sw_fini = gaudi_sw_fini,
8479 .hw_init = gaudi_hw_init,
8480 .hw_fini = gaudi_hw_fini,
8481 .halt_engines = gaudi_halt_engines,
8482 .suspend = gaudi_suspend,
8483 .resume = gaudi_resume,
8484 .cb_mmap = gaudi_cb_mmap,
8485 .ring_doorbell = gaudi_ring_doorbell,
8486 .pqe_write = gaudi_pqe_write,
8487 .asic_dma_alloc_coherent = gaudi_dma_alloc_coherent,
8488 .asic_dma_free_coherent = gaudi_dma_free_coherent,
farah kassabri03df1362020-05-06 11:17:38 +03008489 .scrub_device_mem = gaudi_scrub_device_mem,
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03008490 .get_int_queue_base = gaudi_get_int_queue_base,
8491 .test_queues = gaudi_test_queues,
8492 .asic_dma_pool_zalloc = gaudi_dma_pool_zalloc,
8493 .asic_dma_pool_free = gaudi_dma_pool_free,
8494 .cpu_accessible_dma_pool_alloc = gaudi_cpu_accessible_dma_pool_alloc,
8495 .cpu_accessible_dma_pool_free = gaudi_cpu_accessible_dma_pool_free,
8496 .hl_dma_unmap_sg = gaudi_dma_unmap_sg,
8497 .cs_parser = gaudi_cs_parser,
8498 .asic_dma_map_sg = gaudi_dma_map_sg,
8499 .get_dma_desc_list_size = gaudi_get_dma_desc_list_size,
8500 .add_end_of_cb_packets = gaudi_add_end_of_cb_packets,
8501 .update_eq_ci = gaudi_update_eq_ci,
8502 .context_switch = gaudi_context_switch,
8503 .restore_phase_topology = gaudi_restore_phase_topology,
8504 .debugfs_read32 = gaudi_debugfs_read32,
8505 .debugfs_write32 = gaudi_debugfs_write32,
8506 .debugfs_read64 = gaudi_debugfs_read64,
8507 .debugfs_write64 = gaudi_debugfs_write64,
Oded Gabbaybcaf4152020-05-11 10:41:37 +03008508 .add_device_attr = gaudi_add_device_attr,
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03008509 .handle_eqe = gaudi_handle_eqe,
Oded Gabbaybcaf4152020-05-11 10:41:37 +03008510 .set_pll_profile = gaudi_set_pll_profile,
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03008511 .get_events_stat = gaudi_get_events_stat,
8512 .read_pte = gaudi_read_pte,
8513 .write_pte = gaudi_write_pte,
8514 .mmu_invalidate_cache = gaudi_mmu_invalidate_cache,
8515 .mmu_invalidate_cache_range = gaudi_mmu_invalidate_cache_range,
8516 .send_heartbeat = gaudi_send_heartbeat,
Oded Gabbaye38bfd32020-07-03 20:46:12 +03008517 .set_clock_gating = gaudi_set_clock_gating,
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03008518 .disable_clock_gating = gaudi_disable_clock_gating,
Omer Shpigelman79fc7a92020-05-11 10:46:29 +03008519 .debug_coresight = gaudi_debug_coresight,
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03008520 .is_device_idle = gaudi_is_device_idle,
8521 .soft_reset_late_init = gaudi_soft_reset_late_init,
8522 .hw_queues_lock = gaudi_hw_queues_lock,
8523 .hw_queues_unlock = gaudi_hw_queues_unlock,
8524 .get_pci_id = gaudi_get_pci_id,
8525 .get_eeprom_data = gaudi_get_eeprom_data,
8526 .send_cpu_message = gaudi_send_cpu_message,
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03008527 .pci_bars_map = gaudi_pci_bars_map,
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03008528 .init_iatu = gaudi_init_iatu,
8529 .rreg = hl_rreg,
8530 .wreg = hl_wreg,
Omer Shpigelman79fc7a92020-05-11 10:46:29 +03008531 .halt_coresight = gaudi_halt_coresight,
Ofir Bittona04b7cd2020-07-13 13:36:55 +03008532 .ctx_init = gaudi_ctx_init,
Ofir Bitton5de406c2020-09-10 10:56:26 +03008533 .ctx_fini = gaudi_ctx_fini,
Oded Gabbaybcaf4152020-05-11 10:41:37 +03008534 .get_clk_rate = gaudi_get_clk_rate,
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03008535 .get_queue_id_for_cq = gaudi_get_queue_id_for_cq,
8536 .read_device_fw_version = gaudi_read_device_fw_version,
8537 .load_firmware_to_device = gaudi_load_firmware_to_device,
8538 .load_boot_fit_to_device = gaudi_load_boot_fit_to_device,
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03008539 .get_signal_cb_size = gaudi_get_signal_cb_size,
8540 .get_wait_cb_size = gaudi_get_wait_cb_size,
8541 .gen_signal_cb = gaudi_gen_signal_cb,
8542 .gen_wait_cb = gaudi_gen_wait_cb,
8543 .reset_sob = gaudi_reset_sob,
Ofir Bitton5fe1c172020-09-10 10:10:55 +03008544 .reset_sob_group = gaudi_reset_sob_group,
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03008545 .set_dma_mask_from_fw = gaudi_set_dma_mask_from_fw,
Ofir Bitton5fe1c172020-09-10 10:10:55 +03008546 .get_device_time = gaudi_get_device_time,
8547 .collective_wait_init_cs = gaudi_collective_wait_init_cs,
Moti Haimovskib19dc672020-11-18 20:15:29 +02008548 .collective_wait_create_jobs = gaudi_collective_wait_create_jobs,
Ofir Bittond2b980f2021-01-07 12:14:17 +02008549 .scramble_vaddr = hl_mmu_scramble_vaddr,
8550 .ack_protection_bits_errors = gaudi_ack_protection_bits_errors
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03008551};
8552
8553/**
8554 * gaudi_set_asic_funcs - set GAUDI function pointers
8555 *
Lee Jonesf7d227c2020-07-01 09:58:42 +01008556 * @hdev: pointer to hl_device structure
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03008557 *
8558 */
8559void gaudi_set_asic_funcs(struct hl_device *hdev)
8560{
8561 hdev->asic_funcs = &gaudi_funcs;
8562}