blob: ab8c9463932f83bbe18662de6cd32c459f6d92f7 [file] [log] [blame]
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03001// SPDX-License-Identifier: GPL-2.0
2
3/*
4 * Copyright 2016-2020 HabanaLabs, Ltd.
5 * All Rights Reserved.
6 */
7
8#include "gaudiP.h"
Greg Kroah-Hartman7b16a152020-07-28 19:18:51 +02009#include "../include/hw_ip/mmu/mmu_general.h"
10#include "../include/hw_ip/mmu/mmu_v1_1.h"
11#include "../include/gaudi/gaudi_masks.h"
12#include "../include/gaudi/gaudi_fw_if.h"
13#include "../include/gaudi/gaudi_reg_map.h"
14#include "../include/gaudi/gaudi_async_ids_map_extended.h"
Oded Gabbayac0ae6a2020-05-11 10:29:27 +030015
16#include <linux/module.h>
17#include <linux/pci.h>
18#include <linux/firmware.h>
19#include <linux/hwmon.h>
20#include <linux/genalloc.h>
21#include <linux/io-64-nonatomic-lo-hi.h>
22#include <linux/iommu.h>
23#include <linux/seq_file.h>
24
25/*
26 * Gaudi security scheme:
27 *
28 * 1. Host is protected by:
29 * - Range registers
30 * - MMU
31 *
32 * 2. DDR is protected by:
33 * - Range registers (protect the first 512MB)
34 *
35 * 3. Configuration is protected by:
36 * - Range registers
37 * - Protection bits
38 *
39 * MMU is always enabled.
40 *
Ofir Bitton0940cab2020-08-31 08:52:56 +030041 * QMAN DMA channels 0,1 (PCI DMAN):
Oded Gabbayac0ae6a2020-05-11 10:29:27 +030042 * - DMA is not secured.
43 * - PQ and CQ are secured.
44 * - CP is secured: The driver needs to parse CB but WREG should be allowed
45 * because of TDMA (tensor DMA). Hence, WREG is always not
46 * secured.
47 *
48 * When the driver needs to use DMA it will check that Gaudi is idle, set DMA
49 * channel 0 to be secured, execute the DMA and change it back to not secured.
50 * Currently, the driver doesn't use the DMA while there are compute jobs
51 * running.
52 *
53 * The current use cases for the driver to use the DMA are:
54 * - Clear SRAM on context switch (happens on context switch when device is
55 * idle)
56 * - MMU page tables area clear (happens on init)
57 *
Ofir Bitton0940cab2020-08-31 08:52:56 +030058 * QMAN DMA 2-7, TPC, MME, NIC:
Oded Gabbayac0ae6a2020-05-11 10:29:27 +030059 * PQ is secured and is located on the Host (HBM CON TPC3 bug)
60 * CQ, CP and the engine are not secured
61 *
62 */
63
64#define GAUDI_BOOT_FIT_FILE "habanalabs/gaudi/gaudi-boot-fit.itb"
65#define GAUDI_LINUX_FW_FILE "habanalabs/gaudi/gaudi-fit.itb"
66#define GAUDI_TPC_FW_FILE "habanalabs/gaudi/gaudi_tpc.bin"
67
68#define GAUDI_DMA_POOL_BLK_SIZE 0x100 /* 256 bytes */
69
70#define GAUDI_RESET_TIMEOUT_MSEC 1000 /* 1000ms */
71#define GAUDI_RESET_WAIT_MSEC 1 /* 1ms */
72#define GAUDI_CPU_RESET_WAIT_MSEC 200 /* 200ms */
73#define GAUDI_TEST_QUEUE_WAIT_USEC 100000 /* 100ms */
74
75#define GAUDI_PLDM_RESET_WAIT_MSEC 1000 /* 1s */
76#define GAUDI_PLDM_HRESET_TIMEOUT_MSEC 20000 /* 20s */
Oded Gabbayac0ae6a2020-05-11 10:29:27 +030077#define GAUDI_PLDM_TEST_QUEUE_WAIT_USEC 1000000 /* 1s */
78#define GAUDI_PLDM_MMU_TIMEOUT_USEC (MMU_CONFIG_TIMEOUT_USEC * 100)
79#define GAUDI_PLDM_QMAN0_TIMEOUT_USEC (HL_DEVICE_TIMEOUT_USEC * 30)
80#define GAUDI_PLDM_TPC_KERNEL_WAIT_USEC (HL_DEVICE_TIMEOUT_USEC * 30)
81#define GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC 1000000 /* 1s */
Oded Gabbay788cacf2020-07-07 17:30:13 +030082#define GAUDI_MSG_TO_CPU_TIMEOUT_USEC 4000000 /* 4s */
Oded Gabbayac0ae6a2020-05-11 10:29:27 +030083
84#define GAUDI_QMAN0_FENCE_VAL 0x72E91AB9
85
86#define GAUDI_MAX_STRING_LEN 20
87
88#define GAUDI_CB_POOL_CB_CNT 512
89#define GAUDI_CB_POOL_CB_SIZE 0x20000 /* 128KB */
90
91#define GAUDI_ALLOC_CPU_MEM_RETRY_CNT 3
92
93#define GAUDI_NUM_OF_TPC_INTR_CAUSE 20
94
95#define GAUDI_NUM_OF_QM_ERR_CAUSE 16
96
97#define GAUDI_NUM_OF_QM_ARB_ERR_CAUSE 3
98
Oded Gabbay647e8352020-06-07 11:26:48 +030099#define GAUDI_ARB_WDT_TIMEOUT 0x1000000
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300100
Oded Gabbaye38bfd32020-07-03 20:46:12 +0300101#define GAUDI_CLK_GATE_DEBUGFS_MASK (\
102 BIT(GAUDI_ENGINE_ID_MME_0) |\
103 BIT(GAUDI_ENGINE_ID_MME_2) |\
104 GENMASK_ULL(GAUDI_ENGINE_ID_TPC_7, GAUDI_ENGINE_ID_TPC_0))
105
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300106static const char gaudi_irq_name[GAUDI_MSI_ENTRIES][GAUDI_MAX_STRING_LEN] = {
107 "gaudi cq 0_0", "gaudi cq 0_1", "gaudi cq 0_2", "gaudi cq 0_3",
108 "gaudi cq 1_0", "gaudi cq 1_1", "gaudi cq 1_2", "gaudi cq 1_3",
109 "gaudi cq 5_0", "gaudi cq 5_1", "gaudi cq 5_2", "gaudi cq 5_3",
110 "gaudi cpu eq"
111};
112
113static const u8 gaudi_dma_assignment[GAUDI_DMA_MAX] = {
Oded Gabbaye38bfd32020-07-03 20:46:12 +0300114 [GAUDI_PCI_DMA_1] = GAUDI_ENGINE_ID_DMA_0,
115 [GAUDI_PCI_DMA_2] = GAUDI_ENGINE_ID_DMA_1,
Oded Gabbaye38bfd32020-07-03 20:46:12 +0300116 [GAUDI_HBM_DMA_1] = GAUDI_ENGINE_ID_DMA_2,
117 [GAUDI_HBM_DMA_2] = GAUDI_ENGINE_ID_DMA_3,
118 [GAUDI_HBM_DMA_3] = GAUDI_ENGINE_ID_DMA_4,
Ofir Bitton0940cab2020-08-31 08:52:56 +0300119 [GAUDI_HBM_DMA_4] = GAUDI_ENGINE_ID_DMA_5,
120 [GAUDI_HBM_DMA_5] = GAUDI_ENGINE_ID_DMA_6,
121 [GAUDI_HBM_DMA_6] = GAUDI_ENGINE_ID_DMA_7
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300122};
123
124static const u8 gaudi_cq_assignment[NUMBER_OF_CMPLT_QUEUES] = {
125 [0] = GAUDI_QUEUE_ID_DMA_0_0,
126 [1] = GAUDI_QUEUE_ID_DMA_0_1,
127 [2] = GAUDI_QUEUE_ID_DMA_0_2,
128 [3] = GAUDI_QUEUE_ID_DMA_0_3,
129 [4] = GAUDI_QUEUE_ID_DMA_1_0,
130 [5] = GAUDI_QUEUE_ID_DMA_1_1,
131 [6] = GAUDI_QUEUE_ID_DMA_1_2,
132 [7] = GAUDI_QUEUE_ID_DMA_1_3,
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300133};
134
135static const u16 gaudi_packet_sizes[MAX_PACKET_ID] = {
136 [PACKET_WREG_32] = sizeof(struct packet_wreg32),
137 [PACKET_WREG_BULK] = sizeof(struct packet_wreg_bulk),
138 [PACKET_MSG_LONG] = sizeof(struct packet_msg_long),
139 [PACKET_MSG_SHORT] = sizeof(struct packet_msg_short),
140 [PACKET_CP_DMA] = sizeof(struct packet_cp_dma),
141 [PACKET_REPEAT] = sizeof(struct packet_repeat),
142 [PACKET_MSG_PROT] = sizeof(struct packet_msg_prot),
143 [PACKET_FENCE] = sizeof(struct packet_fence),
144 [PACKET_LIN_DMA] = sizeof(struct packet_lin_dma),
145 [PACKET_NOP] = sizeof(struct packet_nop),
146 [PACKET_STOP] = sizeof(struct packet_stop),
147 [PACKET_ARB_POINT] = sizeof(struct packet_arb_point),
148 [PACKET_WAIT] = sizeof(struct packet_wait),
149 [PACKET_LOAD_AND_EXE] = sizeof(struct packet_load_and_exe)
150};
151
Ofir Bittonbc75be22020-07-30 14:56:38 +0300152static inline bool validate_packet_id(enum packet_id id)
153{
154 switch (id) {
155 case PACKET_WREG_32:
156 case PACKET_WREG_BULK:
157 case PACKET_MSG_LONG:
158 case PACKET_MSG_SHORT:
159 case PACKET_CP_DMA:
160 case PACKET_REPEAT:
161 case PACKET_MSG_PROT:
162 case PACKET_FENCE:
163 case PACKET_LIN_DMA:
164 case PACKET_NOP:
165 case PACKET_STOP:
166 case PACKET_ARB_POINT:
167 case PACKET_WAIT:
168 case PACKET_LOAD_AND_EXE:
169 return true;
170 default:
171 return false;
172 }
173}
174
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300175static const char * const
176gaudi_tpc_interrupts_cause[GAUDI_NUM_OF_TPC_INTR_CAUSE] = {
177 "tpc_address_exceed_slm",
178 "tpc_div_by_0",
179 "tpc_spu_mac_overflow",
180 "tpc_spu_addsub_overflow",
181 "tpc_spu_abs_overflow",
182 "tpc_spu_fp_dst_nan_inf",
183 "tpc_spu_fp_dst_denorm",
184 "tpc_vpu_mac_overflow",
185 "tpc_vpu_addsub_overflow",
186 "tpc_vpu_abs_overflow",
187 "tpc_vpu_fp_dst_nan_inf",
188 "tpc_vpu_fp_dst_denorm",
189 "tpc_assertions",
190 "tpc_illegal_instruction",
191 "tpc_pc_wrap_around",
192 "tpc_qm_sw_err",
193 "tpc_hbw_rresp_err",
194 "tpc_hbw_bresp_err",
195 "tpc_lbw_rresp_err",
196 "tpc_lbw_bresp_err"
197};
198
199static const char * const
200gaudi_qman_error_cause[GAUDI_NUM_OF_QM_ERR_CAUSE] = {
201 "PQ AXI HBW error",
202 "CQ AXI HBW error",
203 "CP AXI HBW error",
204 "CP error due to undefined OPCODE",
205 "CP encountered STOP OPCODE",
206 "CP AXI LBW error",
207 "CP WRREG32 or WRBULK returned error",
208 "N/A",
209 "FENCE 0 inc over max value and clipped",
210 "FENCE 1 inc over max value and clipped",
211 "FENCE 2 inc over max value and clipped",
212 "FENCE 3 inc over max value and clipped",
213 "FENCE 0 dec under min value and clipped",
214 "FENCE 1 dec under min value and clipped",
215 "FENCE 2 dec under min value and clipped",
216 "FENCE 3 dec under min value and clipped"
217};
218
219static const char * const
220gaudi_qman_arb_error_cause[GAUDI_NUM_OF_QM_ARB_ERR_CAUSE] = {
221 "Choice push while full error",
222 "Choice Q watchdog error",
223 "MSG AXI LBW returned with error"
224};
225
226static enum hl_queue_type gaudi_queue_type[GAUDI_QUEUE_ID_SIZE] = {
227 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_0 */
228 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_1 */
229 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_2 */
230 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_3 */
231 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_0 */
232 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_1 */
233 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_2 */
234 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_3 */
235 QUEUE_TYPE_CPU, /* GAUDI_QUEUE_ID_CPU_PQ */
236 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_0 */
237 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_1 */
238 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_2 */
239 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_3 */
240 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_0 */
241 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_1 */
242 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_2 */
243 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_3 */
244 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_0 */
245 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_1 */
246 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_2 */
247 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_3 */
Ofir Bitton0940cab2020-08-31 08:52:56 +0300248 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_5_0 */
249 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_5_1 */
250 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_5_2 */
251 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_5_3 */
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300252 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_0 */
253 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_1 */
254 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_2 */
255 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_3 */
256 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_0 */
257 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_1 */
258 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_2 */
259 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_3 */
260 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_0 */
261 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_1 */
262 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_2 */
263 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_3 */
264 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_0 */
265 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_1 */
266 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_2 */
267 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_3 */
268 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_0 */
269 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_1 */
270 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_2 */
271 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_3 */
272 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_0 */
273 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_1 */
274 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_2 */
275 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_3 */
276 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_0 */
277 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_1 */
278 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_2 */
279 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_3 */
280 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_0 */
281 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_1 */
282 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_2 */
283 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_3 */
284 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_0 */
285 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_1 */
286 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_2 */
287 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_3 */
288 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_0 */
289 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_1 */
290 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_2 */
291 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_3 */
292 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_0 */
293 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_1 */
294 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_2 */
295 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_3 */
296 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_0 */
297 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_1 */
298 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_2 */
299 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_3 */
Oded Gabbay3c681572020-11-02 21:10:39 +0200300 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_0_0 */
301 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_0_1 */
302 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_0_2 */
303 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_0_3 */
304 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_1_0 */
305 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_1_1 */
306 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_1_2 */
307 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_1_3 */
308 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_2_0 */
309 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_2_1 */
310 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_2_2 */
311 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_2_3 */
312 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_3_0 */
313 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_3_1 */
314 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_3_2 */
315 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_3_3 */
316 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_4_0 */
317 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_4_1 */
318 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_4_2 */
319 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_4_3 */
320 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_5_0 */
321 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_5_1 */
322 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_5_2 */
323 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_5_3 */
324 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_6_0 */
325 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_6_1 */
326 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_6_2 */
327 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_6_3 */
328 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_7_0 */
329 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_7_1 */
330 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_7_2 */
331 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_7_3 */
332 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_8_0 */
333 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_8_1 */
334 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_8_2 */
335 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_8_3 */
336 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_9_0 */
337 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_9_1 */
338 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_9_2 */
339 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_9_3 */
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300340};
341
Oded Gabbayfcc6a4e2020-05-17 08:20:35 +0300342struct ecc_info_extract_params {
343 u64 block_address;
344 u32 num_memories;
345 bool derr;
346 bool disable_clock_gating;
347};
348
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300349static int gaudi_mmu_update_asid_hop0_addr(struct hl_device *hdev, u32 asid,
350 u64 phys_addr);
351static int gaudi_send_job_on_qman0(struct hl_device *hdev,
352 struct hl_cs_job *job);
353static int gaudi_memset_device_memory(struct hl_device *hdev, u64 addr,
354 u32 size, u64 val);
355static int gaudi_run_tpc_kernel(struct hl_device *hdev, u64 tpc_kernel,
356 u32 tpc_id);
357static int gaudi_mmu_clear_pgt_range(struct hl_device *hdev);
Oded Gabbay2f553422020-08-15 16:28:10 +0300358static int gaudi_cpucp_info_get(struct hl_device *hdev);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300359static void gaudi_disable_clock_gating(struct hl_device *hdev);
360static void gaudi_mmu_prepare(struct hl_device *hdev, u32 asid);
Ofir Bitton5de406c2020-09-10 10:56:26 +0300361static u32 gaudi_gen_signal_cb(struct hl_device *hdev, void *data, u16 sob_id,
362 u32 size);
363static u32 gaudi_gen_wait_cb(struct hl_device *hdev,
364 struct hl_gen_wait_properties *prop);
365
366static inline enum hl_collective_mode
367get_collective_mode(struct hl_device *hdev, u32 queue_id)
368{
369 if (gaudi_queue_type[queue_id] == QUEUE_TYPE_EXT)
370 return HL_COLLECTIVE_MASTER;
371
372 if (queue_id >= GAUDI_QUEUE_ID_DMA_5_0 &&
373 queue_id <= GAUDI_QUEUE_ID_DMA_5_3)
374 return HL_COLLECTIVE_SLAVE;
375
376 if (queue_id >= GAUDI_QUEUE_ID_TPC_7_0 &&
377 queue_id <= GAUDI_QUEUE_ID_TPC_7_3)
378 return HL_COLLECTIVE_SLAVE;
379
380 if (queue_id >= GAUDI_QUEUE_ID_NIC_0_0 &&
381 queue_id <= GAUDI_QUEUE_ID_NIC_9_3)
382 return HL_COLLECTIVE_SLAVE;
383
384 return HL_COLLECTIVE_NOT_SUPPORTED;
385}
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300386
387static int gaudi_get_fixed_properties(struct hl_device *hdev)
388{
389 struct asic_fixed_properties *prop = &hdev->asic_prop;
Ofir Bitton843839b2020-07-19 11:08:09 +0300390 u32 num_sync_stream_queues = 0;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300391 int i;
392
Ofir Bitton3abc99b2020-06-23 14:50:39 +0300393 prop->max_queues = GAUDI_QUEUE_ID_SIZE;
394 prop->hw_queues_props = kcalloc(prop->max_queues,
395 sizeof(struct hw_queue_properties),
396 GFP_KERNEL);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300397
Ofir Bitton3abc99b2020-06-23 14:50:39 +0300398 if (!prop->hw_queues_props)
399 return -ENOMEM;
400
401 for (i = 0 ; i < prop->max_queues ; i++) {
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300402 if (gaudi_queue_type[i] == QUEUE_TYPE_EXT) {
403 prop->hw_queues_props[i].type = QUEUE_TYPE_EXT;
404 prop->hw_queues_props[i].driver_only = 0;
Ofir Bitton21e7a342020-05-14 18:25:47 +0300405 prop->hw_queues_props[i].supports_sync_stream = 1;
Tal Cohen4bb1f2f2020-06-03 09:25:27 +0300406 prop->hw_queues_props[i].cb_alloc_flags =
407 CB_ALLOC_KERNEL;
Ofir Bitton843839b2020-07-19 11:08:09 +0300408 num_sync_stream_queues++;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300409 } else if (gaudi_queue_type[i] == QUEUE_TYPE_CPU) {
410 prop->hw_queues_props[i].type = QUEUE_TYPE_CPU;
411 prop->hw_queues_props[i].driver_only = 1;
Ofir Bitton21e7a342020-05-14 18:25:47 +0300412 prop->hw_queues_props[i].supports_sync_stream = 0;
Tal Cohen4bb1f2f2020-06-03 09:25:27 +0300413 prop->hw_queues_props[i].cb_alloc_flags =
414 CB_ALLOC_KERNEL;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300415 } else if (gaudi_queue_type[i] == QUEUE_TYPE_INT) {
416 prop->hw_queues_props[i].type = QUEUE_TYPE_INT;
417 prop->hw_queues_props[i].driver_only = 0;
Tal Cohen4bb1f2f2020-06-03 09:25:27 +0300418 prop->hw_queues_props[i].supports_sync_stream = 0;
419 prop->hw_queues_props[i].cb_alloc_flags =
420 CB_ALLOC_USER;
Ofir Bitton5de406c2020-09-10 10:56:26 +0300421
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300422 }
Ofir Bitton5de406c2020-09-10 10:56:26 +0300423 prop->hw_queues_props[i].collective_mode =
424 get_collective_mode(hdev, i);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300425 }
426
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300427 prop->completion_queues_count = NUMBER_OF_CMPLT_QUEUES;
Ofir Bitton5de406c2020-09-10 10:56:26 +0300428 prop->collective_first_sob = 0;
429 prop->collective_first_mon = 0;
430
431 /* 2 SOBs per internal queue stream are reserved for collective */
432 prop->sync_stream_first_sob =
433 ALIGN(NUMBER_OF_SOBS_IN_GRP, HL_MAX_SOBS_PER_MONITOR)
434 * QMAN_STREAMS * HL_RSVD_SOBS;
435
436 /* 1 monitor per internal queue stream are reserved for collective
437 * 2 monitors per external queue stream are reserved for collective
438 */
439 prop->sync_stream_first_mon =
440 (NUMBER_OF_COLLECTIVE_QUEUES * QMAN_STREAMS) +
441 (NUMBER_OF_EXT_HW_QUEUES * 2);
442
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300443 prop->dram_base_address = DRAM_PHYS_BASE;
444 prop->dram_size = GAUDI_HBM_SIZE_32GB;
445 prop->dram_end_address = prop->dram_base_address +
446 prop->dram_size;
447 prop->dram_user_base_address = DRAM_BASE_ADDR_USER;
448
449 prop->sram_base_address = SRAM_BASE_ADDR;
450 prop->sram_size = SRAM_SIZE;
451 prop->sram_end_address = prop->sram_base_address +
452 prop->sram_size;
453 prop->sram_user_base_address = prop->sram_base_address +
454 SRAM_USER_BASE_OFFSET;
455
456 prop->mmu_pgt_addr = MMU_PAGE_TABLES_ADDR;
457 if (hdev->pldm)
458 prop->mmu_pgt_size = 0x800000; /* 8MB */
459 else
460 prop->mmu_pgt_size = MMU_PAGE_TABLES_SIZE;
461 prop->mmu_pte_size = HL_PTE_SIZE;
462 prop->mmu_hop_table_size = HOP_TABLE_SIZE;
463 prop->mmu_hop0_tables_total_size = HOP0_TABLES_TOTAL_SIZE;
464 prop->dram_page_size = PAGE_SIZE_2MB;
465
466 prop->pmmu.hop0_shift = HOP0_SHIFT;
467 prop->pmmu.hop1_shift = HOP1_SHIFT;
468 prop->pmmu.hop2_shift = HOP2_SHIFT;
469 prop->pmmu.hop3_shift = HOP3_SHIFT;
470 prop->pmmu.hop4_shift = HOP4_SHIFT;
471 prop->pmmu.hop0_mask = HOP0_MASK;
472 prop->pmmu.hop1_mask = HOP1_MASK;
473 prop->pmmu.hop2_mask = HOP2_MASK;
474 prop->pmmu.hop3_mask = HOP3_MASK;
475 prop->pmmu.hop4_mask = HOP4_MASK;
476 prop->pmmu.start_addr = VA_HOST_SPACE_START;
477 prop->pmmu.end_addr =
478 (VA_HOST_SPACE_START + VA_HOST_SPACE_SIZE / 2) - 1;
479 prop->pmmu.page_size = PAGE_SIZE_4KB;
Moti Haimovski7edf3412020-08-23 13:23:13 +0300480 prop->pmmu.num_hops = MMU_ARCH_5_HOPS;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300481
482 /* PMMU and HPMMU are the same except of page size */
483 memcpy(&prop->pmmu_huge, &prop->pmmu, sizeof(prop->pmmu));
484 prop->pmmu_huge.page_size = PAGE_SIZE_2MB;
485
486 /* shifts and masks are the same in PMMU and DMMU */
487 memcpy(&prop->dmmu, &prop->pmmu, sizeof(prop->pmmu));
488 prop->dmmu.start_addr = (VA_HOST_SPACE_START + VA_HOST_SPACE_SIZE / 2);
489 prop->dmmu.end_addr = VA_HOST_SPACE_END;
490 prop->dmmu.page_size = PAGE_SIZE_2MB;
491
492 prop->cfg_size = CFG_SIZE;
493 prop->max_asid = MAX_ASID;
494 prop->num_of_events = GAUDI_EVENT_SIZE;
495 prop->tpc_enabled_mask = TPC_ENABLED_MASK;
496
Oded Gabbay58361aa2020-08-08 23:34:47 +0300497 prop->max_power_default = MAX_POWER_DEFAULT_PCI;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300498
499 prop->cb_pool_cb_cnt = GAUDI_CB_POOL_CB_CNT;
500 prop->cb_pool_cb_size = GAUDI_CB_POOL_CB_SIZE;
501
502 prop->pcie_dbi_base_address = mmPCIE_DBI_BASE;
503 prop->pcie_aux_dbi_reg_addr = CFG_BASE + mmPCIE_AUX_DBI;
504
Oded Gabbay2f553422020-08-15 16:28:10 +0300505 strncpy(prop->cpucp_info.card_name, GAUDI_DEFAULT_CARD_NAME,
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300506 CARD_NAME_MAX_LEN);
507
Ofir Bittonc16d45f2020-06-02 12:28:27 +0300508 prop->max_pending_cs = GAUDI_MAX_PENDING_CS;
509
Ofir Bitton843839b2020-07-19 11:08:09 +0300510 prop->first_available_user_sob[HL_GAUDI_WS_DCORE] =
Ofir Bitton3cf74b32020-09-10 09:17:50 +0300511 prop->sync_stream_first_sob +
512 (num_sync_stream_queues * HL_RSVD_SOBS);
Ofir Bitton843839b2020-07-19 11:08:09 +0300513 prop->first_available_user_mon[HL_GAUDI_WS_DCORE] =
Ofir Bitton3cf74b32020-09-10 09:17:50 +0300514 prop->sync_stream_first_mon +
515 (num_sync_stream_queues * HL_RSVD_MONS);
Ofir Bitton843839b2020-07-19 11:08:09 +0300516
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300517 return 0;
518}
519
520static int gaudi_pci_bars_map(struct hl_device *hdev)
521{
522 static const char * const name[] = {"SRAM", "CFG", "HBM"};
523 bool is_wc[3] = {false, false, true};
524 int rc;
525
526 rc = hl_pci_bars_map(hdev, name, is_wc);
527 if (rc)
528 return rc;
529
530 hdev->rmmio = hdev->pcie_bar[CFG_BAR_ID] +
531 (CFG_BASE - SPI_FLASH_BASE_ADDR);
532
533 return 0;
534}
535
536static u64 gaudi_set_hbm_bar_base(struct hl_device *hdev, u64 addr)
537{
538 struct gaudi_device *gaudi = hdev->asic_specific;
Ofir Bittonf4cbfd22020-06-15 17:45:12 +0300539 struct hl_inbound_pci_region pci_region;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300540 u64 old_addr = addr;
541 int rc;
542
543 if ((gaudi) && (gaudi->hbm_bar_cur_addr == addr))
544 return old_addr;
545
546 /* Inbound Region 2 - Bar 4 - Point to HBM */
Ofir Bittonf4cbfd22020-06-15 17:45:12 +0300547 pci_region.mode = PCI_BAR_MATCH_MODE;
548 pci_region.bar = HBM_BAR_ID;
549 pci_region.addr = addr;
550 rc = hl_pci_set_inbound_region(hdev, 2, &pci_region);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300551 if (rc)
552 return U64_MAX;
553
554 if (gaudi) {
555 old_addr = gaudi->hbm_bar_cur_addr;
556 gaudi->hbm_bar_cur_addr = addr;
557 }
558
559 return old_addr;
560}
561
562static int gaudi_init_iatu(struct hl_device *hdev)
563{
Ofir Bittonf4cbfd22020-06-15 17:45:12 +0300564 struct hl_inbound_pci_region inbound_region;
565 struct hl_outbound_pci_region outbound_region;
566 int rc;
567
568 /* Inbound Region 0 - Bar 0 - Point to SRAM + CFG */
569 inbound_region.mode = PCI_BAR_MATCH_MODE;
570 inbound_region.bar = SRAM_BAR_ID;
571 inbound_region.addr = SRAM_BASE_ADDR;
572 rc = hl_pci_set_inbound_region(hdev, 0, &inbound_region);
573 if (rc)
574 goto done;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300575
576 /* Inbound Region 1 - Bar 2 - Point to SPI FLASH */
Ofir Bittonf4cbfd22020-06-15 17:45:12 +0300577 inbound_region.mode = PCI_BAR_MATCH_MODE;
578 inbound_region.bar = CFG_BAR_ID;
579 inbound_region.addr = SPI_FLASH_BASE_ADDR;
580 rc = hl_pci_set_inbound_region(hdev, 1, &inbound_region);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300581 if (rc)
Ofir Bittonf4cbfd22020-06-15 17:45:12 +0300582 goto done;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300583
Ofir Bittonf4cbfd22020-06-15 17:45:12 +0300584 /* Inbound Region 2 - Bar 4 - Point to HBM */
585 inbound_region.mode = PCI_BAR_MATCH_MODE;
586 inbound_region.bar = HBM_BAR_ID;
587 inbound_region.addr = DRAM_PHYS_BASE;
588 rc = hl_pci_set_inbound_region(hdev, 2, &inbound_region);
589 if (rc)
590 goto done;
591
592 hdev->asic_funcs->set_dma_mask_from_fw(hdev);
593
594 /* Outbound Region 0 - Point to Host */
595 outbound_region.addr = HOST_PHYS_BASE;
596 outbound_region.size = HOST_PHYS_SIZE;
597 rc = hl_pci_set_outbound_region(hdev, &outbound_region);
598
599done:
600 return rc;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300601}
602
603static int gaudi_early_init(struct hl_device *hdev)
604{
605 struct asic_fixed_properties *prop = &hdev->asic_prop;
606 struct pci_dev *pdev = hdev->pdev;
607 int rc;
608
609 rc = gaudi_get_fixed_properties(hdev);
610 if (rc) {
611 dev_err(hdev->dev, "Failed to get fixed properties\n");
612 return rc;
613 }
614
615 /* Check BAR sizes */
616 if (pci_resource_len(pdev, SRAM_BAR_ID) != SRAM_BAR_SIZE) {
617 dev_err(hdev->dev,
618 "Not " HL_NAME "? BAR %d size %llu, expecting %llu\n",
619 SRAM_BAR_ID,
620 (unsigned long long) pci_resource_len(pdev,
621 SRAM_BAR_ID),
622 SRAM_BAR_SIZE);
Ofir Bitton3abc99b2020-06-23 14:50:39 +0300623 rc = -ENODEV;
624 goto free_queue_props;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300625 }
626
627 if (pci_resource_len(pdev, CFG_BAR_ID) != CFG_BAR_SIZE) {
628 dev_err(hdev->dev,
629 "Not " HL_NAME "? BAR %d size %llu, expecting %llu\n",
630 CFG_BAR_ID,
631 (unsigned long long) pci_resource_len(pdev,
632 CFG_BAR_ID),
633 CFG_BAR_SIZE);
Ofir Bitton3abc99b2020-06-23 14:50:39 +0300634 rc = -ENODEV;
635 goto free_queue_props;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300636 }
637
638 prop->dram_pci_bar_size = pci_resource_len(pdev, HBM_BAR_ID);
639
Oded Gabbay57799ce2020-09-13 15:51:28 +0300640 rc = hl_pci_init(hdev, mmPSOC_GLOBAL_CONF_CPU_BOOT_STATUS,
641 mmCPU_BOOT_ERR0, GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300642 if (rc)
Ofir Bitton3abc99b2020-06-23 14:50:39 +0300643 goto free_queue_props;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300644
Oded Gabbay57799ce2020-09-13 15:51:28 +0300645 /* GAUDI Firmware does not yet support security */
646 prop->fw_security_disabled = true;
647 dev_info(hdev->dev, "firmware-level security is disabled\n");
648
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300649 return 0;
Ofir Bitton3abc99b2020-06-23 14:50:39 +0300650
651free_queue_props:
652 kfree(hdev->asic_prop.hw_queues_props);
653 return rc;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300654}
655
656static int gaudi_early_fini(struct hl_device *hdev)
657{
Ofir Bitton3abc99b2020-06-23 14:50:39 +0300658 kfree(hdev->asic_prop.hw_queues_props);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300659 hl_pci_fini(hdev);
660
661 return 0;
662}
663
664/**
665 * gaudi_fetch_psoc_frequency - Fetch PSOC frequency values
666 *
667 * @hdev: pointer to hl_device structure
668 *
669 */
670static void gaudi_fetch_psoc_frequency(struct hl_device *hdev)
671{
672 struct asic_fixed_properties *prop = &hdev->asic_prop;
Adam Aharone8edded2020-05-26 11:04:30 +0300673 u32 trace_freq = 0;
674 u32 pll_clk = 0;
675 u32 div_fctr = RREG32(mmPSOC_CPU_PLL_DIV_FACTOR_2);
676 u32 div_sel = RREG32(mmPSOC_CPU_PLL_DIV_SEL_2);
677 u32 nr = RREG32(mmPSOC_CPU_PLL_NR);
678 u32 nf = RREG32(mmPSOC_CPU_PLL_NF);
679 u32 od = RREG32(mmPSOC_CPU_PLL_OD);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300680
Adam Aharone8edded2020-05-26 11:04:30 +0300681 if (div_sel == DIV_SEL_REF_CLK || div_sel == DIV_SEL_DIVIDED_REF) {
682 if (div_sel == DIV_SEL_REF_CLK)
683 trace_freq = PLL_REF_CLK;
684 else
685 trace_freq = PLL_REF_CLK / (div_fctr + 1);
686 } else if (div_sel == DIV_SEL_PLL_CLK ||
687 div_sel == DIV_SEL_DIVIDED_PLL) {
688 pll_clk = PLL_REF_CLK * (nf + 1) / ((nr + 1) * (od + 1));
689 if (div_sel == DIV_SEL_PLL_CLK)
690 trace_freq = pll_clk;
691 else
692 trace_freq = pll_clk / (div_fctr + 1);
693 } else {
694 dev_warn(hdev->dev,
695 "Received invalid div select value: %d", div_sel);
696 }
697
698 prop->psoc_timestamp_frequency = trace_freq;
699 prop->psoc_pci_pll_nr = nr;
700 prop->psoc_pci_pll_nf = nf;
701 prop->psoc_pci_pll_od = od;
702 prop->psoc_pci_pll_div_factor = div_fctr;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300703}
704
705static int _gaudi_init_tpc_mem(struct hl_device *hdev,
706 dma_addr_t tpc_kernel_src_addr, u32 tpc_kernel_size)
707{
708 struct asic_fixed_properties *prop = &hdev->asic_prop;
709 struct packet_lin_dma *init_tpc_mem_pkt;
710 struct hl_cs_job *job;
711 struct hl_cb *cb;
712 u64 dst_addr;
713 u32 cb_size, ctl;
714 u8 tpc_id;
715 int rc;
716
Ofir Bittona04b7cd2020-07-13 13:36:55 +0300717 cb = hl_cb_kernel_create(hdev, PAGE_SIZE, false);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300718 if (!cb)
719 return -EFAULT;
720
Arnd Bergmann82948e62020-10-26 17:08:06 +0100721 init_tpc_mem_pkt = cb->kernel_address;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300722 cb_size = sizeof(*init_tpc_mem_pkt);
723 memset(init_tpc_mem_pkt, 0, cb_size);
724
725 init_tpc_mem_pkt->tsize = cpu_to_le32(tpc_kernel_size);
726
Oded Gabbay65887292020-08-12 11:21:01 +0300727 ctl = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_LIN_DMA);
728 ctl |= FIELD_PREP(GAUDI_PKT_LIN_DMA_CTL_LIN_MASK, 1);
729 ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
730 ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300731
732 init_tpc_mem_pkt->ctl = cpu_to_le32(ctl);
733
734 init_tpc_mem_pkt->src_addr = cpu_to_le64(tpc_kernel_src_addr);
735 dst_addr = (prop->sram_user_base_address &
736 GAUDI_PKT_LIN_DMA_DST_ADDR_MASK) >>
737 GAUDI_PKT_LIN_DMA_DST_ADDR_SHIFT;
738 init_tpc_mem_pkt->dst_addr |= cpu_to_le64(dst_addr);
739
740 job = hl_cs_allocate_job(hdev, QUEUE_TYPE_EXT, true);
741 if (!job) {
742 dev_err(hdev->dev, "Failed to allocate a new job\n");
743 rc = -ENOMEM;
744 goto release_cb;
745 }
746
747 job->id = 0;
748 job->user_cb = cb;
749 job->user_cb->cs_cnt++;
750 job->user_cb_size = cb_size;
751 job->hw_queue_id = GAUDI_QUEUE_ID_DMA_0_0;
752 job->patched_cb = job->user_cb;
753 job->job_cb_size = job->user_cb_size + sizeof(struct packet_msg_prot);
754
755 hl_debugfs_add_job(hdev, job);
756
757 rc = gaudi_send_job_on_qman0(hdev, job);
758
759 if (rc)
760 goto free_job;
761
762 for (tpc_id = 0 ; tpc_id < TPC_NUMBER_OF_ENGINES ; tpc_id++) {
763 rc = gaudi_run_tpc_kernel(hdev, dst_addr, tpc_id);
764 if (rc)
765 break;
766 }
767
768free_job:
769 hl_userptr_delete_list(hdev, &job->userptr_list);
770 hl_debugfs_remove_job(hdev, job);
771 kfree(job);
772 cb->cs_cnt--;
773
774release_cb:
775 hl_cb_put(cb);
776 hl_cb_destroy(hdev, &hdev->kernel_cb_mgr, cb->id << PAGE_SHIFT);
777
778 return rc;
779}
780
781/*
782 * gaudi_init_tpc_mem() - Initialize TPC memories.
783 * @hdev: Pointer to hl_device structure.
784 *
785 * Copy TPC kernel fw from firmware file and run it to initialize TPC memories.
786 *
787 * Return: 0 for success, negative value for error.
788 */
789static int gaudi_init_tpc_mem(struct hl_device *hdev)
790{
791 const struct firmware *fw;
792 size_t fw_size;
793 void *cpu_addr;
794 dma_addr_t dma_handle;
795 int rc;
796
797 rc = request_firmware(&fw, GAUDI_TPC_FW_FILE, hdev->dev);
798 if (rc) {
799 dev_err(hdev->dev, "Firmware file %s is not found!\n",
800 GAUDI_TPC_FW_FILE);
801 goto out;
802 }
803
804 fw_size = fw->size;
805 cpu_addr = hdev->asic_funcs->asic_dma_alloc_coherent(hdev, fw_size,
806 &dma_handle, GFP_KERNEL | __GFP_ZERO);
807 if (!cpu_addr) {
808 dev_err(hdev->dev,
809 "Failed to allocate %zu of dma memory for TPC kernel\n",
810 fw_size);
811 rc = -ENOMEM;
812 goto out;
813 }
814
815 memcpy(cpu_addr, fw->data, fw_size);
816
817 rc = _gaudi_init_tpc_mem(hdev, dma_handle, fw_size);
818
819 hdev->asic_funcs->asic_dma_free_coherent(hdev, fw->size, cpu_addr,
820 dma_handle);
821
822out:
823 release_firmware(fw);
824 return rc;
825}
826
Ofir Bitton5de406c2020-09-10 10:56:26 +0300827static void gaudi_collective_map_sobs(struct hl_device *hdev, u32 stream)
Ofir Bitton5fe1c172020-09-10 10:10:55 +0300828{
Ofir Bitton5de406c2020-09-10 10:56:26 +0300829 struct gaudi_device *gaudi = hdev->asic_specific;
830 struct gaudi_collective_properties *prop = &gaudi->collective_props;
831 struct hl_hw_queue *q;
832 u32 i, sob_id, sob_group_id, queue_id;
Ofir Bitton5fe1c172020-09-10 10:10:55 +0300833
Ofir Bitton5de406c2020-09-10 10:56:26 +0300834 /* Iterate through SOB groups and assign a SOB for each slave queue */
835 sob_group_id =
836 stream * HL_RSVD_SOBS + prop->curr_sob_group_idx[stream];
837 sob_id = prop->hw_sob_group[sob_group_id].base_sob_id;
838
839 queue_id = GAUDI_QUEUE_ID_NIC_0_0 + stream;
840 for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++) {
841 q = &hdev->kernel_queues[queue_id + (4 * i)];
842 q->sync_stream_prop.collective_sob_id = sob_id + i;
843 }
844
845 /* Both DMA5 and TPC7 use the same resources since only a single
846 * engine need to participate in the reduction process
847 */
848 queue_id = GAUDI_QUEUE_ID_DMA_5_0 + stream;
849 q = &hdev->kernel_queues[queue_id];
850 q->sync_stream_prop.collective_sob_id =
851 sob_id + NIC_NUMBER_OF_ENGINES;
852
853 queue_id = GAUDI_QUEUE_ID_TPC_7_0 + stream;
854 q = &hdev->kernel_queues[queue_id];
855 q->sync_stream_prop.collective_sob_id =
856 sob_id + NIC_NUMBER_OF_ENGINES;
857}
858
859static void gaudi_sob_group_hw_reset(struct kref *ref)
860{
861 struct gaudi_hw_sob_group *hw_sob_group =
862 container_of(ref, struct gaudi_hw_sob_group, kref);
863 struct hl_device *hdev = hw_sob_group->hdev;
864 int i;
865
866 for (i = 0 ; i < NUMBER_OF_SOBS_IN_GRP ; i++)
867 WREG32(mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 +
868 (hw_sob_group->base_sob_id + i) * 4, 0);
869
870 kref_init(&hw_sob_group->kref);
871}
872
873static void gaudi_sob_group_reset_error(struct kref *ref)
874{
875 struct gaudi_hw_sob_group *hw_sob_group =
876 container_of(ref, struct gaudi_hw_sob_group, kref);
877 struct hl_device *hdev = hw_sob_group->hdev;
878
879 dev_crit(hdev->dev,
880 "SOB release shouldn't be called here, base_sob_id: %d\n",
881 hw_sob_group->base_sob_id);
882}
883
884static int gaudi_collective_init(struct hl_device *hdev)
885{
886 u32 i, master_monitor_sobs, sob_id, reserved_sobs_per_group;
887 struct gaudi_collective_properties *prop;
888 struct gaudi_device *gaudi;
889
890 gaudi = hdev->asic_specific;
891 prop = &gaudi->collective_props;
892 sob_id = hdev->asic_prop.collective_first_sob;
893
894 /* First sob in group must be aligned to HL_MAX_SOBS_PER_MONITOR */
895 reserved_sobs_per_group =
896 ALIGN(NUMBER_OF_SOBS_IN_GRP, HL_MAX_SOBS_PER_MONITOR);
897
898 /* Init SOB groups */
899 for (i = 0 ; i < NUM_SOB_GROUPS; i++) {
900 prop->hw_sob_group[i].hdev = hdev;
901 prop->hw_sob_group[i].base_sob_id = sob_id;
902 sob_id += reserved_sobs_per_group;
903 gaudi_sob_group_hw_reset(&prop->hw_sob_group[i].kref);
904 }
905
906 for (i = 0 ; i < QMAN_STREAMS; i++) {
907 prop->next_sob_group_val[i] = 1;
908 prop->curr_sob_group_idx[i] = 0;
909 gaudi_collective_map_sobs(hdev, i);
910 }
911
912 prop->mstr_sob_mask[0] = 0;
913 master_monitor_sobs = HL_MAX_SOBS_PER_MONITOR;
914 for (i = 0 ; i < master_monitor_sobs ; i++)
915 if (gaudi->hw_cap_initialized & BIT(HW_CAP_NIC_SHIFT + i))
916 prop->mstr_sob_mask[0] |= BIT(i);
917
918 prop->mstr_sob_mask[1] = 0;
919 master_monitor_sobs =
920 NIC_NUMBER_OF_ENGINES - HL_MAX_SOBS_PER_MONITOR;
921 for (i = 0 ; i < master_monitor_sobs; i++) {
922 if (gaudi->hw_cap_initialized & BIT(HW_CAP_NIC_SHIFT + i))
923 prop->mstr_sob_mask[1] |= BIT(i);
924 }
925
926 /* Set collective engine bit */
927 prop->mstr_sob_mask[1] |= BIT(i);
928
929 return 0;
930}
931
932static void gaudi_reset_sob_group(struct hl_device *hdev, u16 sob_group)
933{
934 struct gaudi_device *gaudi = hdev->asic_specific;
935 struct gaudi_collective_properties *cprop = &gaudi->collective_props;
936
937 kref_put(&cprop->hw_sob_group[sob_group].kref,
938 gaudi_sob_group_hw_reset);
939}
940
941static void gaudi_collective_master_init_job(struct hl_device *hdev,
942 struct hl_cs_job *job, u32 stream, u32 sob_group_offset)
943{
944 u32 master_sob_base, master_monitor, queue_id, cb_size = 0;
945 struct gaudi_collective_properties *cprop;
946 struct hl_gen_wait_properties wait_prop;
947 struct hl_sync_stream_properties *prop;
948 struct gaudi_device *gaudi;
949
950 gaudi = hdev->asic_specific;
951 cprop = &gaudi->collective_props;
952 queue_id = job->hw_queue_id;
953 prop = &hdev->kernel_queues[queue_id].sync_stream_prop;
954
955 master_sob_base =
956 cprop->hw_sob_group[sob_group_offset].base_sob_id;
957 master_monitor = prop->collective_mstr_mon_id[0];
958
959 dev_dbg(hdev->dev,
960 "Generate master wait CBs, sob %d (mask %#x), val:0x%x, mon %u, q %d\n",
961 master_sob_base, cprop->mstr_sob_mask[0],
962 cprop->next_sob_group_val[stream],
963 master_monitor, queue_id);
964
965 wait_prop.data = (void *) job->patched_cb;
966 wait_prop.sob_base = master_sob_base;
967 wait_prop.sob_mask = cprop->mstr_sob_mask[0];
968 wait_prop.sob_val = cprop->next_sob_group_val[stream];
969 wait_prop.mon_id = master_monitor;
970 wait_prop.q_idx = queue_id;
971 wait_prop.size = cb_size;
972 cb_size += gaudi_gen_wait_cb(hdev, &wait_prop);
973
974 master_sob_base += HL_MAX_SOBS_PER_MONITOR;
975 master_monitor = prop->collective_mstr_mon_id[1];
976
977 dev_dbg(hdev->dev,
978 "Generate master wait CBs, sob %d (mask %#x), val:0x%x, mon %u, q %d\n",
979 master_sob_base, cprop->mstr_sob_mask[1],
980 cprop->next_sob_group_val[stream],
981 master_monitor, queue_id);
982
983 wait_prop.sob_base = master_sob_base;
984 wait_prop.sob_mask = cprop->mstr_sob_mask[1];
985 wait_prop.mon_id = master_monitor;
986 wait_prop.size = cb_size;
987 cb_size += gaudi_gen_wait_cb(hdev, &wait_prop);
988}
989
990static void gaudi_collective_slave_init_job(struct hl_device *hdev,
991 struct hl_cs_job *job, struct hl_cs_compl *cs_cmpl)
992{
993 struct hl_gen_wait_properties wait_prop;
994 struct hl_sync_stream_properties *prop;
995 u32 queue_id, cb_size = 0;
996
997 queue_id = job->hw_queue_id;
998 prop = &hdev->kernel_queues[queue_id].sync_stream_prop;
999
1000 /* Add to wait CBs using slave monitor */
1001 wait_prop.data = (void *) job->user_cb;
1002 wait_prop.sob_base = cs_cmpl->hw_sob->sob_id;
1003 wait_prop.sob_mask = 0x1;
1004 wait_prop.sob_val = cs_cmpl->sob_val;
1005 wait_prop.mon_id = prop->collective_slave_mon_id;
1006 wait_prop.q_idx = queue_id;
1007 wait_prop.size = cb_size;
1008
1009 dev_dbg(hdev->dev,
1010 "Generate slave wait CB, sob %d, val:0x%x, mon %d, q %d\n",
1011 cs_cmpl->hw_sob->sob_id, cs_cmpl->sob_val,
1012 prop->collective_slave_mon_id, queue_id);
1013
1014 cb_size += gaudi_gen_wait_cb(hdev, &wait_prop);
1015
1016 dev_dbg(hdev->dev,
1017 "generate signal CB, sob_id: %d, sob val: 1, q_idx: %d\n",
1018 prop->collective_sob_id, queue_id);
1019
1020 cb_size += gaudi_gen_signal_cb(hdev, job->user_cb,
1021 prop->collective_sob_id, cb_size);
Ofir Bitton5fe1c172020-09-10 10:10:55 +03001022}
1023
1024static void gaudi_collective_wait_init_cs(struct hl_cs *cs)
1025{
Ofir Bitton5de406c2020-09-10 10:56:26 +03001026 struct hl_cs_compl *signal_cs_cmpl =
1027 container_of(cs->signal_fence, struct hl_cs_compl, base_fence);
1028 struct hl_cs_compl *cs_cmpl =
1029 container_of(cs->fence, struct hl_cs_compl, base_fence);
1030 struct gaudi_collective_properties *cprop;
1031 u32 stream, queue_id, sob_group_offset;
1032 struct gaudi_device *gaudi;
1033 struct hl_device *hdev;
1034 struct hl_cs_job *job;
1035 struct hl_ctx *ctx;
Ofir Bitton5fe1c172020-09-10 10:10:55 +03001036
Ofir Bitton5de406c2020-09-10 10:56:26 +03001037 ctx = cs->ctx;
1038 hdev = ctx->hdev;
1039 gaudi = hdev->asic_specific;
1040 cprop = &gaudi->collective_props;
1041
1042 /* copy the SOB id and value of the signal CS */
1043 cs_cmpl->hw_sob = signal_cs_cmpl->hw_sob;
1044 cs_cmpl->sob_val = signal_cs_cmpl->sob_val;
1045
1046 /* Calculate the stream from collective master queue (1st job) */
1047 job = list_first_entry(&cs->job_list, struct hl_cs_job, cs_node);
1048 stream = job->hw_queue_id % 4;
1049 sob_group_offset =
1050 stream * HL_RSVD_SOBS + cprop->curr_sob_group_idx[stream];
1051
1052 list_for_each_entry(job, &cs->job_list, cs_node) {
1053 queue_id = job->hw_queue_id;
1054
1055 if (hdev->kernel_queues[queue_id].collective_mode ==
1056 HL_COLLECTIVE_MASTER)
1057 gaudi_collective_master_init_job(hdev, job, stream,
1058 sob_group_offset);
1059 else
1060 gaudi_collective_slave_init_job(hdev, job, cs_cmpl);
1061 }
1062
1063 cs_cmpl->sob_group = sob_group_offset;
1064
1065 /* Handle sob group kref and wraparound */
1066 kref_get(&cprop->hw_sob_group[sob_group_offset].kref);
1067 cprop->next_sob_group_val[stream]++;
1068
1069 if (cprop->next_sob_group_val[stream] == HL_MAX_SOB_VAL) {
1070 /*
1071 * Decrement as we reached the max value.
1072 * The release function won't be called here as we've
1073 * just incremented the refcount.
1074 */
1075 kref_put(&cprop->hw_sob_group[sob_group_offset].kref,
1076 gaudi_sob_group_reset_error);
1077 cprop->next_sob_group_val[stream] = 1;
1078 /* only two SOBs are currently in use */
1079 cprop->curr_sob_group_idx[stream] =
1080 (cprop->curr_sob_group_idx[stream] + 1) &
1081 (HL_RSVD_SOBS - 1);
1082
1083 gaudi_collective_map_sobs(hdev, stream);
1084
1085 dev_dbg(hdev->dev, "switched to SOB group %d, stream: %d\n",
1086 cprop->curr_sob_group_idx[stream], stream);
1087 }
1088
1089 /* Increment kref since all slave queues are now waiting on it */
1090 kref_get(&cs_cmpl->hw_sob->kref);
1091 /*
1092 * Must put the signal fence after the SOB refcnt increment so
1093 * the SOB refcnt won't turn 0 and reset the SOB before the
1094 * wait CS was submitted.
1095 */
1096 mb();
1097 hl_fence_put(cs->signal_fence);
1098 cs->signal_fence = NULL;
1099}
1100
1101static int gaudi_collective_wait_create_job(struct hl_device *hdev,
1102 struct hl_ctx *ctx, struct hl_cs *cs,
1103 enum hl_collective_mode mode, u32 queue_id, u32 wait_queue_id)
1104{
1105 struct hw_queue_properties *hw_queue_prop;
1106 struct hl_cs_counters_atomic *cntr;
1107 struct hl_cs_job *job;
1108 struct hl_cb *cb;
1109 u32 cb_size;
1110 bool patched_cb;
1111
1112 cntr = &hdev->aggregated_cs_counters;
1113
1114 if (mode == HL_COLLECTIVE_MASTER) {
1115 /* CB size of collective master queue contains
1116 * 4 msg short packets for monitor 1 configuration
1117 * 1 fence packet
1118 * 4 msg short packets for monitor 2 configuration
1119 * 1 fence packet
1120 * 2 msg prot packets for completion and MSI-X
1121 */
1122 cb_size = sizeof(struct packet_msg_short) * 8 +
1123 sizeof(struct packet_fence) * 2 +
1124 sizeof(struct packet_msg_prot) * 2;
1125 patched_cb = true;
1126 } else {
1127 /* CB size of collective slave queues contains
1128 * 4 msg short packets for monitor configuration
1129 * 1 fence packet
1130 * 1 additional msg short packet for sob signal
1131 */
1132 cb_size = sizeof(struct packet_msg_short) * 5 +
1133 sizeof(struct packet_fence);
1134 patched_cb = false;
1135 }
1136
1137 hw_queue_prop = &hdev->asic_prop.hw_queues_props[queue_id];
1138 job = hl_cs_allocate_job(hdev, hw_queue_prop->type, true);
1139 if (!job) {
1140 ctx->cs_counters.out_of_mem_drop_cnt++;
1141 atomic64_inc(&cntr->out_of_mem_drop_cnt);
1142 dev_err(hdev->dev, "Failed to allocate a new job\n");
1143 return -ENOMEM;
1144 }
1145
1146 /* Allocate internal mapped CB for non patched CBs */
1147 cb = hl_cb_kernel_create(hdev, cb_size,
1148 hdev->mmu_enable && !patched_cb);
1149 if (!cb) {
1150 ctx->cs_counters.out_of_mem_drop_cnt++;
1151 atomic64_inc(&cntr->out_of_mem_drop_cnt);
1152 kfree(job);
1153 return -EFAULT;
1154 }
1155
1156 job->id = 0;
1157 job->cs = cs;
1158 job->user_cb = cb;
1159 job->user_cb->cs_cnt++;
1160 job->user_cb_size = cb_size;
1161 job->hw_queue_id = queue_id;
1162
1163 /*
1164 * No need in parsing, user CB is the patched CB.
1165 * We call hl_cb_destroy() out of two reasons - we don't need
1166 * the CB in the CB idr anymore and to decrement its refcount as
1167 * it was incremented inside hl_cb_kernel_create().
1168 */
1169 if (patched_cb)
1170 job->patched_cb = job->user_cb;
1171 else
1172 job->patched_cb = NULL;
1173
1174 job->job_cb_size = job->user_cb_size;
1175 hl_cb_destroy(hdev, &hdev->kernel_cb_mgr, cb->id << PAGE_SHIFT);
1176
1177 /* increment refcount as for external queues we get completion */
1178 if (hw_queue_prop->type == QUEUE_TYPE_EXT)
1179 cs_get(cs);
1180
1181 cs->jobs_in_queue_cnt[job->hw_queue_id]++;
1182
1183 list_add_tail(&job->cs_node, &cs->job_list);
1184
1185 hl_debugfs_add_job(hdev, job);
1186
1187 return 0;
Ofir Bitton5fe1c172020-09-10 10:10:55 +03001188}
1189
1190static int gaudi_collective_wait_create_jobs(struct hl_device *hdev,
1191 struct hl_ctx *ctx, struct hl_cs *cs, u32 wait_queue_id,
1192 u32 collective_engine_id)
1193{
Ofir Bitton5de406c2020-09-10 10:56:26 +03001194 struct gaudi_device *gaudi = hdev->asic_specific;
1195 struct hw_queue_properties *hw_queue_prop;
1196 u32 queue_id, collective_queue, num_jobs;
1197 u32 stream, nic_queue, nic_idx = 0;
1198 bool skip;
1199 int i, rc;
1200
1201 /* Verify wait queue id is configured as master */
1202 hw_queue_prop = &hdev->asic_prop.hw_queues_props[wait_queue_id];
1203 if (!(hw_queue_prop->collective_mode == HL_COLLECTIVE_MASTER)) {
1204 dev_err(hdev->dev,
1205 "Queue %d is not configured as collective master\n",
1206 wait_queue_id);
1207 return -EINVAL;
1208 }
1209
1210 /* Verify engine id is supported */
1211 if (collective_engine_id != GAUDI_ENGINE_ID_DMA_5 &&
1212 collective_engine_id != GAUDI_ENGINE_ID_TPC_7) {
1213 dev_err(hdev->dev,
1214 "Collective wait does not support engine %u\n",
1215 collective_engine_id);
1216 return -EINVAL;
1217 }
1218
1219 stream = wait_queue_id % 4;
1220
1221 if (collective_engine_id == GAUDI_ENGINE_ID_DMA_5)
1222 collective_queue = GAUDI_QUEUE_ID_DMA_5_0 + stream;
Ofir Bitton5de406c2020-09-10 10:56:26 +03001223 else
Ofir Bitton71a984f2020-10-19 16:52:00 +03001224 collective_queue = GAUDI_QUEUE_ID_TPC_7_0 + stream;
Ofir Bitton5de406c2020-09-10 10:56:26 +03001225
1226 num_jobs = NUMBER_OF_SOBS_IN_GRP + 1;
1227 nic_queue = GAUDI_QUEUE_ID_NIC_0_0 + stream;
1228
1229 /* First job goes to the collective master queue, it will wait for
1230 * the collective slave queues to finish execution.
1231 * The synchronization is done using two monitors:
1232 * First monitor for NICs 0-7, second monitor for NICs 8-9 and the
1233 * reduction engine (DMA5/TPC7).
1234 *
1235 * Rest of the jobs goes to the collective slave queues which will
1236 * all wait for the user to signal sob 'cs_cmpl->sob_val'.
1237 */
1238 for (i = 0 ; i < num_jobs ; i++) {
1239 if (i == 0) {
1240 queue_id = wait_queue_id;
1241 rc = gaudi_collective_wait_create_job(hdev, ctx, cs,
1242 HL_COLLECTIVE_MASTER, queue_id, wait_queue_id);
1243 } else {
1244 if (nic_idx < NIC_NUMBER_OF_ENGINES) {
1245 if (gaudi->hw_cap_initialized &
1246 BIT(HW_CAP_NIC_SHIFT + nic_idx))
1247 skip = false;
1248 else
1249 skip = true;
1250
1251 queue_id = nic_queue;
1252 nic_queue += 4;
1253 nic_idx++;
1254
1255 if (skip)
1256 continue;
1257 } else {
1258 queue_id = collective_queue;
1259 }
1260
1261 rc = gaudi_collective_wait_create_job(hdev, ctx, cs,
1262 HL_COLLECTIVE_SLAVE, queue_id, wait_queue_id);
1263 }
1264
1265 if (rc)
1266 return rc;
1267 }
1268
1269 return rc;
Ofir Bitton5fe1c172020-09-10 10:10:55 +03001270}
1271
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03001272static int gaudi_late_init(struct hl_device *hdev)
1273{
1274 struct gaudi_device *gaudi = hdev->asic_specific;
1275 int rc;
1276
Oded Gabbay2f553422020-08-15 16:28:10 +03001277 rc = gaudi->cpucp_info_get(hdev);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03001278 if (rc) {
Oded Gabbay2f553422020-08-15 16:28:10 +03001279 dev_err(hdev->dev, "Failed to get cpucp info\n");
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03001280 return rc;
1281 }
1282
Oded Gabbay3c681572020-11-02 21:10:39 +02001283 if ((hdev->card_type == cpucp_card_type_pci) &&
1284 (hdev->nic_ports_mask & 0x3)) {
1285 dev_info(hdev->dev,
1286 "PCI card detected, only 8 ports are enabled\n");
1287 hdev->nic_ports_mask &= ~0x3;
1288
1289 /* Stop and disable unused NIC QMANs */
1290 WREG32(mmNIC0_QM0_GLBL_CFG1, NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
1291 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
1292 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
1293
1294 WREG32(mmNIC0_QM1_GLBL_CFG1, NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
1295 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
1296 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
1297
1298 WREG32(mmNIC0_QM0_GLBL_CFG0, 0);
1299 WREG32(mmNIC0_QM1_GLBL_CFG0, 0);
1300
1301 gaudi->hw_cap_initialized &= ~(HW_CAP_NIC0 | HW_CAP_NIC1);
1302 }
1303
Oded Gabbay2f553422020-08-15 16:28:10 +03001304 rc = hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_ENABLE_PCI_ACCESS);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03001305 if (rc) {
1306 dev_err(hdev->dev, "Failed to enable PCI access from CPU\n");
1307 return rc;
1308 }
1309
1310 WREG32(mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR, GAUDI_EVENT_INTS_REGISTER);
1311
1312 gaudi_fetch_psoc_frequency(hdev);
1313
1314 rc = gaudi_mmu_clear_pgt_range(hdev);
1315 if (rc) {
1316 dev_err(hdev->dev, "Failed to clear MMU page tables range\n");
1317 goto disable_pci_access;
1318 }
1319
1320 rc = gaudi_init_tpc_mem(hdev);
1321 if (rc) {
1322 dev_err(hdev->dev, "Failed to initialize TPC memories\n");
1323 goto disable_pci_access;
1324 }
1325
Ofir Bitton5de406c2020-09-10 10:56:26 +03001326 rc = gaudi_collective_init(hdev);
1327 if (rc) {
1328 dev_err(hdev->dev, "Failed to init collective\n");
1329 goto disable_pci_access;
1330 }
1331
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03001332 return 0;
1333
1334disable_pci_access:
Oded Gabbay2f553422020-08-15 16:28:10 +03001335 hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_DISABLE_PCI_ACCESS);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03001336
1337 return rc;
1338}
1339
1340static void gaudi_late_fini(struct hl_device *hdev)
1341{
1342 const struct hwmon_channel_info **channel_info_arr;
1343 int i = 0;
1344
1345 if (!hdev->hl_chip_info->info)
1346 return;
1347
1348 channel_info_arr = hdev->hl_chip_info->info;
1349
1350 while (channel_info_arr[i]) {
1351 kfree(channel_info_arr[i]->config);
1352 kfree(channel_info_arr[i]);
1353 i++;
1354 }
1355
1356 kfree(channel_info_arr);
1357
1358 hdev->hl_chip_info->info = NULL;
1359}
1360
1361static int gaudi_alloc_cpu_accessible_dma_mem(struct hl_device *hdev)
1362{
1363 dma_addr_t dma_addr_arr[GAUDI_ALLOC_CPU_MEM_RETRY_CNT] = {}, end_addr;
1364 void *virt_addr_arr[GAUDI_ALLOC_CPU_MEM_RETRY_CNT] = {};
1365 int i, j, rc = 0;
1366
1367 /*
1368 * The device CPU works with 40-bits addresses, while bit 39 must be set
1369 * to '1' when accessing the host.
1370 * Bits 49:39 of the full host address are saved for a later
1371 * configuration of the HW to perform extension to 50 bits.
1372 * Because there is a single HW register that holds the extension bits,
1373 * these bits must be identical in all allocated range.
1374 */
1375
1376 for (i = 0 ; i < GAUDI_ALLOC_CPU_MEM_RETRY_CNT ; i++) {
1377 virt_addr_arr[i] =
1378 hdev->asic_funcs->asic_dma_alloc_coherent(hdev,
1379 HL_CPU_ACCESSIBLE_MEM_SIZE,
1380 &dma_addr_arr[i],
1381 GFP_KERNEL | __GFP_ZERO);
1382 if (!virt_addr_arr[i]) {
1383 rc = -ENOMEM;
1384 goto free_dma_mem_arr;
1385 }
1386
1387 end_addr = dma_addr_arr[i] + HL_CPU_ACCESSIBLE_MEM_SIZE - 1;
1388 if (GAUDI_CPU_PCI_MSB_ADDR(dma_addr_arr[i]) ==
1389 GAUDI_CPU_PCI_MSB_ADDR(end_addr))
1390 break;
1391 }
1392
1393 if (i == GAUDI_ALLOC_CPU_MEM_RETRY_CNT) {
1394 dev_err(hdev->dev,
1395 "MSB of CPU accessible DMA memory are not identical in all range\n");
1396 rc = -EFAULT;
1397 goto free_dma_mem_arr;
1398 }
1399
1400 hdev->cpu_accessible_dma_mem = virt_addr_arr[i];
1401 hdev->cpu_accessible_dma_address = dma_addr_arr[i];
1402 hdev->cpu_pci_msb_addr =
1403 GAUDI_CPU_PCI_MSB_ADDR(hdev->cpu_accessible_dma_address);
1404
1405 GAUDI_PCI_TO_CPU_ADDR(hdev->cpu_accessible_dma_address);
1406
1407free_dma_mem_arr:
1408 for (j = 0 ; j < i ; j++)
1409 hdev->asic_funcs->asic_dma_free_coherent(hdev,
1410 HL_CPU_ACCESSIBLE_MEM_SIZE,
1411 virt_addr_arr[j],
1412 dma_addr_arr[j]);
1413
1414 return rc;
1415}
1416
1417static void gaudi_free_internal_qmans_pq_mem(struct hl_device *hdev)
1418{
1419 struct gaudi_device *gaudi = hdev->asic_specific;
1420 struct gaudi_internal_qman_info *q;
1421 u32 i;
1422
1423 for (i = 0 ; i < GAUDI_QUEUE_ID_SIZE ; i++) {
1424 q = &gaudi->internal_qmans[i];
1425 if (!q->pq_kernel_addr)
1426 continue;
1427 hdev->asic_funcs->asic_dma_free_coherent(hdev, q->pq_size,
1428 q->pq_kernel_addr,
1429 q->pq_dma_addr);
1430 }
1431}
1432
1433static int gaudi_alloc_internal_qmans_pq_mem(struct hl_device *hdev)
1434{
1435 struct gaudi_device *gaudi = hdev->asic_specific;
1436 struct gaudi_internal_qman_info *q;
1437 int rc, i;
1438
1439 for (i = 0 ; i < GAUDI_QUEUE_ID_SIZE ; i++) {
1440 if (gaudi_queue_type[i] != QUEUE_TYPE_INT)
1441 continue;
1442
1443 q = &gaudi->internal_qmans[i];
1444
1445 switch (i) {
Ofir Bitton0940cab2020-08-31 08:52:56 +03001446 case GAUDI_QUEUE_ID_DMA_2_0 ... GAUDI_QUEUE_ID_DMA_7_3:
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03001447 q->pq_size = HBM_DMA_QMAN_SIZE_IN_BYTES;
1448 break;
1449 case GAUDI_QUEUE_ID_MME_0_0 ... GAUDI_QUEUE_ID_MME_1_3:
1450 q->pq_size = MME_QMAN_SIZE_IN_BYTES;
1451 break;
1452 case GAUDI_QUEUE_ID_TPC_0_0 ... GAUDI_QUEUE_ID_TPC_7_3:
1453 q->pq_size = TPC_QMAN_SIZE_IN_BYTES;
1454 break;
Oded Gabbay3c681572020-11-02 21:10:39 +02001455 case GAUDI_QUEUE_ID_NIC_0_0 ... GAUDI_QUEUE_ID_NIC_9_3:
1456 q->pq_size = NIC_QMAN_SIZE_IN_BYTES;
1457 break;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03001458 default:
1459 dev_err(hdev->dev, "Bad internal queue index %d", i);
1460 rc = -EINVAL;
1461 goto free_internal_qmans_pq_mem;
1462 }
1463
1464 q->pq_kernel_addr = hdev->asic_funcs->asic_dma_alloc_coherent(
1465 hdev, q->pq_size,
1466 &q->pq_dma_addr,
1467 GFP_KERNEL | __GFP_ZERO);
1468 if (!q->pq_kernel_addr) {
1469 rc = -ENOMEM;
1470 goto free_internal_qmans_pq_mem;
1471 }
1472 }
1473
1474 return 0;
1475
1476free_internal_qmans_pq_mem:
1477 gaudi_free_internal_qmans_pq_mem(hdev);
1478 return rc;
1479}
1480
1481static int gaudi_sw_init(struct hl_device *hdev)
1482{
1483 struct gaudi_device *gaudi;
Ofir Bittonebd8d122020-05-10 13:41:28 +03001484 u32 i, event_id = 0;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03001485 int rc;
1486
1487 /* Allocate device structure */
1488 gaudi = kzalloc(sizeof(*gaudi), GFP_KERNEL);
1489 if (!gaudi)
1490 return -ENOMEM;
1491
Ofir Bittonebd8d122020-05-10 13:41:28 +03001492 for (i = 0 ; i < ARRAY_SIZE(gaudi_irq_map_table) ; i++) {
1493 if (gaudi_irq_map_table[i].valid) {
1494 if (event_id == GAUDI_EVENT_SIZE) {
1495 dev_err(hdev->dev,
1496 "Event array exceeds the limit of %u events\n",
1497 GAUDI_EVENT_SIZE);
1498 rc = -EINVAL;
1499 goto free_gaudi_device;
1500 }
1501
1502 gaudi->events[event_id++] =
1503 gaudi_irq_map_table[i].fc_id;
1504 }
1505 }
1506
Oded Gabbay2f553422020-08-15 16:28:10 +03001507 gaudi->cpucp_info_get = gaudi_cpucp_info_get;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03001508
1509 gaudi->max_freq_value = GAUDI_MAX_CLK_FREQ;
1510
1511 hdev->asic_specific = gaudi;
1512
1513 /* Create DMA pool for small allocations */
1514 hdev->dma_pool = dma_pool_create(dev_name(hdev->dev),
1515 &hdev->pdev->dev, GAUDI_DMA_POOL_BLK_SIZE, 8, 0);
1516 if (!hdev->dma_pool) {
1517 dev_err(hdev->dev, "failed to create DMA pool\n");
1518 rc = -ENOMEM;
1519 goto free_gaudi_device;
1520 }
1521
1522 rc = gaudi_alloc_cpu_accessible_dma_mem(hdev);
1523 if (rc)
1524 goto free_dma_pool;
1525
1526 hdev->cpu_accessible_dma_pool = gen_pool_create(ilog2(32), -1);
1527 if (!hdev->cpu_accessible_dma_pool) {
1528 dev_err(hdev->dev,
1529 "Failed to create CPU accessible DMA pool\n");
1530 rc = -ENOMEM;
1531 goto free_cpu_dma_mem;
1532 }
1533
1534 rc = gen_pool_add(hdev->cpu_accessible_dma_pool,
1535 (uintptr_t) hdev->cpu_accessible_dma_mem,
1536 HL_CPU_ACCESSIBLE_MEM_SIZE, -1);
1537 if (rc) {
1538 dev_err(hdev->dev,
1539 "Failed to add memory to CPU accessible DMA pool\n");
1540 rc = -EFAULT;
1541 goto free_cpu_accessible_dma_pool;
1542 }
1543
1544 rc = gaudi_alloc_internal_qmans_pq_mem(hdev);
1545 if (rc)
1546 goto free_cpu_accessible_dma_pool;
1547
1548 spin_lock_init(&gaudi->hw_queues_lock);
1549 mutex_init(&gaudi->clk_gate_mutex);
1550
1551 hdev->supports_sync_stream = true;
1552 hdev->supports_coresight = true;
1553
1554 return 0;
1555
1556free_cpu_accessible_dma_pool:
1557 gen_pool_destroy(hdev->cpu_accessible_dma_pool);
1558free_cpu_dma_mem:
1559 GAUDI_CPU_TO_PCI_ADDR(hdev->cpu_accessible_dma_address,
1560 hdev->cpu_pci_msb_addr);
1561 hdev->asic_funcs->asic_dma_free_coherent(hdev,
1562 HL_CPU_ACCESSIBLE_MEM_SIZE,
1563 hdev->cpu_accessible_dma_mem,
1564 hdev->cpu_accessible_dma_address);
1565free_dma_pool:
1566 dma_pool_destroy(hdev->dma_pool);
1567free_gaudi_device:
1568 kfree(gaudi);
1569 return rc;
1570}
1571
1572static int gaudi_sw_fini(struct hl_device *hdev)
1573{
1574 struct gaudi_device *gaudi = hdev->asic_specific;
1575
1576 gaudi_free_internal_qmans_pq_mem(hdev);
1577
1578 gen_pool_destroy(hdev->cpu_accessible_dma_pool);
1579
1580 GAUDI_CPU_TO_PCI_ADDR(hdev->cpu_accessible_dma_address,
1581 hdev->cpu_pci_msb_addr);
1582 hdev->asic_funcs->asic_dma_free_coherent(hdev,
1583 HL_CPU_ACCESSIBLE_MEM_SIZE,
1584 hdev->cpu_accessible_dma_mem,
1585 hdev->cpu_accessible_dma_address);
1586
1587 dma_pool_destroy(hdev->dma_pool);
1588
1589 mutex_destroy(&gaudi->clk_gate_mutex);
1590
1591 kfree(gaudi);
1592
1593 return 0;
1594}
1595
1596static irqreturn_t gaudi_irq_handler_single(int irq, void *arg)
1597{
1598 struct hl_device *hdev = arg;
1599 int i;
1600
1601 if (hdev->disabled)
1602 return IRQ_HANDLED;
1603
1604 for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++)
1605 hl_irq_handler_cq(irq, &hdev->completion_queue[i]);
1606
1607 hl_irq_handler_eq(irq, &hdev->event_queue);
1608
1609 return IRQ_HANDLED;
1610}
1611
1612/*
1613 * For backward compatibility, new MSI interrupts should be set after the
1614 * existing CPU and NIC interrupts.
1615 */
1616static int gaudi_pci_irq_vector(struct hl_device *hdev, unsigned int nr,
1617 bool cpu_eq)
1618{
1619 int msi_vec;
1620
1621 if ((nr != GAUDI_EVENT_QUEUE_MSI_IDX) && (cpu_eq))
1622 dev_crit(hdev->dev, "CPU EQ must use IRQ %d\n",
1623 GAUDI_EVENT_QUEUE_MSI_IDX);
1624
1625 msi_vec = ((nr < GAUDI_EVENT_QUEUE_MSI_IDX) || (cpu_eq)) ? nr :
1626 (nr + NIC_NUMBER_OF_ENGINES + 1);
1627
1628 return pci_irq_vector(hdev->pdev, msi_vec);
1629}
1630
1631static int gaudi_enable_msi_single(struct hl_device *hdev)
1632{
1633 int rc, irq;
1634
1635 dev_info(hdev->dev, "Working in single MSI IRQ mode\n");
1636
1637 irq = gaudi_pci_irq_vector(hdev, 0, false);
1638 rc = request_irq(irq, gaudi_irq_handler_single, 0,
1639 "gaudi single msi", hdev);
1640 if (rc)
1641 dev_err(hdev->dev,
1642 "Failed to request single MSI IRQ\n");
1643
1644 return rc;
1645}
1646
1647static int gaudi_enable_msi_multi(struct hl_device *hdev)
1648{
1649 int cq_cnt = hdev->asic_prop.completion_queues_count;
1650 int rc, i, irq_cnt_init, irq;
1651
1652 for (i = 0, irq_cnt_init = 0 ; i < cq_cnt ; i++, irq_cnt_init++) {
1653 irq = gaudi_pci_irq_vector(hdev, i, false);
1654 rc = request_irq(irq, hl_irq_handler_cq, 0, gaudi_irq_name[i],
1655 &hdev->completion_queue[i]);
1656 if (rc) {
1657 dev_err(hdev->dev, "Failed to request IRQ %d", irq);
1658 goto free_irqs;
1659 }
1660 }
1661
1662 irq = gaudi_pci_irq_vector(hdev, GAUDI_EVENT_QUEUE_MSI_IDX, true);
1663 rc = request_irq(irq, hl_irq_handler_eq, 0, gaudi_irq_name[cq_cnt],
1664 &hdev->event_queue);
1665 if (rc) {
1666 dev_err(hdev->dev, "Failed to request IRQ %d", irq);
1667 goto free_irqs;
1668 }
1669
1670 return 0;
1671
1672free_irqs:
1673 for (i = 0 ; i < irq_cnt_init ; i++)
1674 free_irq(gaudi_pci_irq_vector(hdev, i, false),
1675 &hdev->completion_queue[i]);
1676 return rc;
1677}
1678
1679static int gaudi_enable_msi(struct hl_device *hdev)
1680{
1681 struct gaudi_device *gaudi = hdev->asic_specific;
1682 int rc;
1683
1684 if (gaudi->hw_cap_initialized & HW_CAP_MSI)
1685 return 0;
1686
1687 rc = pci_alloc_irq_vectors(hdev->pdev, 1, GAUDI_MSI_ENTRIES,
1688 PCI_IRQ_MSI);
1689 if (rc < 0) {
1690 dev_err(hdev->dev, "MSI: Failed to enable support %d\n", rc);
1691 return rc;
1692 }
1693
1694 if (rc < NUMBER_OF_INTERRUPTS) {
1695 gaudi->multi_msi_mode = false;
1696 rc = gaudi_enable_msi_single(hdev);
1697 } else {
1698 gaudi->multi_msi_mode = true;
1699 rc = gaudi_enable_msi_multi(hdev);
1700 }
1701
1702 if (rc)
1703 goto free_pci_irq_vectors;
1704
1705 gaudi->hw_cap_initialized |= HW_CAP_MSI;
1706
1707 return 0;
1708
1709free_pci_irq_vectors:
1710 pci_free_irq_vectors(hdev->pdev);
1711 return rc;
1712}
1713
1714static void gaudi_sync_irqs(struct hl_device *hdev)
1715{
1716 struct gaudi_device *gaudi = hdev->asic_specific;
1717 int i, cq_cnt = hdev->asic_prop.completion_queues_count;
1718
1719 if (!(gaudi->hw_cap_initialized & HW_CAP_MSI))
1720 return;
1721
1722 /* Wait for all pending IRQs to be finished */
1723 if (gaudi->multi_msi_mode) {
1724 for (i = 0 ; i < cq_cnt ; i++)
1725 synchronize_irq(gaudi_pci_irq_vector(hdev, i, false));
1726
1727 synchronize_irq(gaudi_pci_irq_vector(hdev,
1728 GAUDI_EVENT_QUEUE_MSI_IDX,
1729 true));
1730 } else {
1731 synchronize_irq(gaudi_pci_irq_vector(hdev, 0, false));
1732 }
1733}
1734
1735static void gaudi_disable_msi(struct hl_device *hdev)
1736{
1737 struct gaudi_device *gaudi = hdev->asic_specific;
1738 int i, irq, cq_cnt = hdev->asic_prop.completion_queues_count;
1739
1740 if (!(gaudi->hw_cap_initialized & HW_CAP_MSI))
1741 return;
1742
1743 gaudi_sync_irqs(hdev);
1744
1745 if (gaudi->multi_msi_mode) {
1746 irq = gaudi_pci_irq_vector(hdev, GAUDI_EVENT_QUEUE_MSI_IDX,
1747 true);
1748 free_irq(irq, &hdev->event_queue);
1749
1750 for (i = 0 ; i < cq_cnt ; i++) {
1751 irq = gaudi_pci_irq_vector(hdev, i, false);
1752 free_irq(irq, &hdev->completion_queue[i]);
1753 }
1754 } else {
1755 free_irq(gaudi_pci_irq_vector(hdev, 0, false), hdev);
1756 }
1757
1758 pci_free_irq_vectors(hdev->pdev);
1759
1760 gaudi->hw_cap_initialized &= ~HW_CAP_MSI;
1761}
1762
1763static void gaudi_init_scrambler_sram(struct hl_device *hdev)
1764{
1765 struct gaudi_device *gaudi = hdev->asic_specific;
1766
1767 if (gaudi->hw_cap_initialized & HW_CAP_SRAM_SCRAMBLER)
1768 return;
1769
1770 if (!hdev->sram_scrambler_enable)
1771 return;
1772
1773 WREG32(mmNIF_RTR_CTRL_0_SCRAM_SRAM_EN,
1774 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1775 WREG32(mmNIF_RTR_CTRL_1_SCRAM_SRAM_EN,
1776 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1777 WREG32(mmNIF_RTR_CTRL_2_SCRAM_SRAM_EN,
1778 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1779 WREG32(mmNIF_RTR_CTRL_3_SCRAM_SRAM_EN,
1780 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1781 WREG32(mmNIF_RTR_CTRL_4_SCRAM_SRAM_EN,
1782 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1783 WREG32(mmNIF_RTR_CTRL_5_SCRAM_SRAM_EN,
1784 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1785 WREG32(mmNIF_RTR_CTRL_6_SCRAM_SRAM_EN,
1786 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1787 WREG32(mmNIF_RTR_CTRL_7_SCRAM_SRAM_EN,
1788 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1789
1790 WREG32(mmSIF_RTR_CTRL_0_SCRAM_SRAM_EN,
1791 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1792 WREG32(mmSIF_RTR_CTRL_1_SCRAM_SRAM_EN,
1793 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1794 WREG32(mmSIF_RTR_CTRL_2_SCRAM_SRAM_EN,
1795 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1796 WREG32(mmSIF_RTR_CTRL_3_SCRAM_SRAM_EN,
1797 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1798 WREG32(mmSIF_RTR_CTRL_4_SCRAM_SRAM_EN,
1799 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1800 WREG32(mmSIF_RTR_CTRL_5_SCRAM_SRAM_EN,
1801 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1802 WREG32(mmSIF_RTR_CTRL_6_SCRAM_SRAM_EN,
1803 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1804 WREG32(mmSIF_RTR_CTRL_7_SCRAM_SRAM_EN,
1805 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1806
1807 WREG32(mmDMA_IF_E_N_DOWN_CH0_SCRAM_SRAM_EN,
1808 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
1809 WREG32(mmDMA_IF_E_N_DOWN_CH1_SCRAM_SRAM_EN,
1810 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
1811 WREG32(mmDMA_IF_E_S_DOWN_CH0_SCRAM_SRAM_EN,
1812 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
1813 WREG32(mmDMA_IF_E_S_DOWN_CH1_SCRAM_SRAM_EN,
1814 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
1815 WREG32(mmDMA_IF_W_N_DOWN_CH0_SCRAM_SRAM_EN,
1816 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
1817 WREG32(mmDMA_IF_W_N_DOWN_CH1_SCRAM_SRAM_EN,
1818 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
1819 WREG32(mmDMA_IF_W_S_DOWN_CH0_SCRAM_SRAM_EN,
1820 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
1821 WREG32(mmDMA_IF_W_S_DOWN_CH1_SCRAM_SRAM_EN,
1822 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
1823
1824 gaudi->hw_cap_initialized |= HW_CAP_SRAM_SCRAMBLER;
1825}
1826
1827static void gaudi_init_scrambler_hbm(struct hl_device *hdev)
1828{
1829 struct gaudi_device *gaudi = hdev->asic_specific;
1830
1831 if (gaudi->hw_cap_initialized & HW_CAP_HBM_SCRAMBLER)
1832 return;
1833
1834 if (!hdev->dram_scrambler_enable)
1835 return;
1836
1837 WREG32(mmNIF_RTR_CTRL_0_SCRAM_HBM_EN,
1838 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1839 WREG32(mmNIF_RTR_CTRL_1_SCRAM_HBM_EN,
1840 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1841 WREG32(mmNIF_RTR_CTRL_2_SCRAM_HBM_EN,
1842 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1843 WREG32(mmNIF_RTR_CTRL_3_SCRAM_HBM_EN,
1844 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1845 WREG32(mmNIF_RTR_CTRL_4_SCRAM_HBM_EN,
1846 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1847 WREG32(mmNIF_RTR_CTRL_5_SCRAM_HBM_EN,
1848 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1849 WREG32(mmNIF_RTR_CTRL_6_SCRAM_HBM_EN,
1850 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1851 WREG32(mmNIF_RTR_CTRL_7_SCRAM_HBM_EN,
1852 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1853
1854 WREG32(mmSIF_RTR_CTRL_0_SCRAM_HBM_EN,
1855 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1856 WREG32(mmSIF_RTR_CTRL_1_SCRAM_HBM_EN,
1857 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1858 WREG32(mmSIF_RTR_CTRL_2_SCRAM_HBM_EN,
1859 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1860 WREG32(mmSIF_RTR_CTRL_3_SCRAM_HBM_EN,
1861 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1862 WREG32(mmSIF_RTR_CTRL_4_SCRAM_HBM_EN,
1863 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1864 WREG32(mmSIF_RTR_CTRL_5_SCRAM_HBM_EN,
1865 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1866 WREG32(mmSIF_RTR_CTRL_6_SCRAM_HBM_EN,
1867 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1868 WREG32(mmSIF_RTR_CTRL_7_SCRAM_HBM_EN,
1869 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1870
1871 WREG32(mmDMA_IF_E_N_DOWN_CH0_SCRAM_HBM_EN,
1872 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
1873 WREG32(mmDMA_IF_E_N_DOWN_CH1_SCRAM_HBM_EN,
1874 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
1875 WREG32(mmDMA_IF_E_S_DOWN_CH0_SCRAM_HBM_EN,
1876 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
1877 WREG32(mmDMA_IF_E_S_DOWN_CH1_SCRAM_HBM_EN,
1878 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
1879 WREG32(mmDMA_IF_W_N_DOWN_CH0_SCRAM_HBM_EN,
1880 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
1881 WREG32(mmDMA_IF_W_N_DOWN_CH1_SCRAM_HBM_EN,
1882 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
1883 WREG32(mmDMA_IF_W_S_DOWN_CH0_SCRAM_HBM_EN,
1884 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
1885 WREG32(mmDMA_IF_W_S_DOWN_CH1_SCRAM_HBM_EN,
1886 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
1887
1888 gaudi->hw_cap_initialized |= HW_CAP_HBM_SCRAMBLER;
1889}
1890
1891static void gaudi_init_e2e(struct hl_device *hdev)
1892{
1893 WREG32(mmSIF_RTR_CTRL_0_E2E_HBM_WR_SIZE, 247 >> 3);
1894 WREG32(mmSIF_RTR_CTRL_0_E2E_HBM_RD_SIZE, 785 >> 3);
1895 WREG32(mmSIF_RTR_CTRL_0_E2E_PCI_WR_SIZE, 49);
1896 WREG32(mmSIF_RTR_CTRL_0_E2E_PCI_RD_SIZE, 101);
1897
1898 WREG32(mmSIF_RTR_CTRL_1_E2E_HBM_WR_SIZE, 275 >> 3);
1899 WREG32(mmSIF_RTR_CTRL_1_E2E_HBM_RD_SIZE, 614 >> 3);
1900 WREG32(mmSIF_RTR_CTRL_1_E2E_PCI_WR_SIZE, 1);
1901 WREG32(mmSIF_RTR_CTRL_1_E2E_PCI_RD_SIZE, 39);
1902
1903 WREG32(mmSIF_RTR_CTRL_2_E2E_HBM_WR_SIZE, 1);
1904 WREG32(mmSIF_RTR_CTRL_2_E2E_HBM_RD_SIZE, 1);
1905 WREG32(mmSIF_RTR_CTRL_2_E2E_PCI_WR_SIZE, 1);
1906 WREG32(mmSIF_RTR_CTRL_2_E2E_PCI_RD_SIZE, 32);
1907
1908 WREG32(mmSIF_RTR_CTRL_3_E2E_HBM_WR_SIZE, 176 >> 3);
1909 WREG32(mmSIF_RTR_CTRL_3_E2E_HBM_RD_SIZE, 32 >> 3);
1910 WREG32(mmSIF_RTR_CTRL_3_E2E_PCI_WR_SIZE, 19);
1911 WREG32(mmSIF_RTR_CTRL_3_E2E_PCI_RD_SIZE, 32);
1912
1913 WREG32(mmSIF_RTR_CTRL_4_E2E_HBM_WR_SIZE, 176 >> 3);
1914 WREG32(mmSIF_RTR_CTRL_4_E2E_HBM_RD_SIZE, 32 >> 3);
1915 WREG32(mmSIF_RTR_CTRL_4_E2E_PCI_WR_SIZE, 19);
1916 WREG32(mmSIF_RTR_CTRL_4_E2E_PCI_RD_SIZE, 32);
1917
1918 WREG32(mmSIF_RTR_CTRL_5_E2E_HBM_WR_SIZE, 1);
1919 WREG32(mmSIF_RTR_CTRL_5_E2E_HBM_RD_SIZE, 1);
1920 WREG32(mmSIF_RTR_CTRL_5_E2E_PCI_WR_SIZE, 1);
1921 WREG32(mmSIF_RTR_CTRL_5_E2E_PCI_RD_SIZE, 32);
1922
1923 WREG32(mmSIF_RTR_CTRL_6_E2E_HBM_WR_SIZE, 275 >> 3);
1924 WREG32(mmSIF_RTR_CTRL_6_E2E_HBM_RD_SIZE, 614 >> 3);
1925 WREG32(mmSIF_RTR_CTRL_6_E2E_PCI_WR_SIZE, 1);
1926 WREG32(mmSIF_RTR_CTRL_6_E2E_PCI_RD_SIZE, 39);
1927
1928 WREG32(mmSIF_RTR_CTRL_7_E2E_HBM_WR_SIZE, 297 >> 3);
1929 WREG32(mmSIF_RTR_CTRL_7_E2E_HBM_RD_SIZE, 908 >> 3);
1930 WREG32(mmSIF_RTR_CTRL_7_E2E_PCI_WR_SIZE, 19);
1931 WREG32(mmSIF_RTR_CTRL_7_E2E_PCI_RD_SIZE, 19);
1932
1933 WREG32(mmNIF_RTR_CTRL_0_E2E_HBM_WR_SIZE, 318 >> 3);
1934 WREG32(mmNIF_RTR_CTRL_0_E2E_HBM_RD_SIZE, 956 >> 3);
1935 WREG32(mmNIF_RTR_CTRL_0_E2E_PCI_WR_SIZE, 79);
1936 WREG32(mmNIF_RTR_CTRL_0_E2E_PCI_RD_SIZE, 163);
1937
1938 WREG32(mmNIF_RTR_CTRL_1_E2E_HBM_WR_SIZE, 275 >> 3);
1939 WREG32(mmNIF_RTR_CTRL_1_E2E_HBM_RD_SIZE, 614 >> 3);
1940 WREG32(mmNIF_RTR_CTRL_1_E2E_PCI_WR_SIZE, 1);
1941 WREG32(mmNIF_RTR_CTRL_1_E2E_PCI_RD_SIZE, 39);
1942
1943 WREG32(mmNIF_RTR_CTRL_2_E2E_HBM_WR_SIZE, 1);
1944 WREG32(mmNIF_RTR_CTRL_2_E2E_HBM_RD_SIZE, 1);
1945 WREG32(mmNIF_RTR_CTRL_2_E2E_PCI_WR_SIZE, 1);
1946 WREG32(mmNIF_RTR_CTRL_2_E2E_PCI_RD_SIZE, 32);
1947
1948 WREG32(mmNIF_RTR_CTRL_3_E2E_HBM_WR_SIZE, 176 >> 3);
1949 WREG32(mmNIF_RTR_CTRL_3_E2E_HBM_RD_SIZE, 32 >> 3);
1950 WREG32(mmNIF_RTR_CTRL_3_E2E_PCI_WR_SIZE, 19);
1951 WREG32(mmNIF_RTR_CTRL_3_E2E_PCI_RD_SIZE, 32);
1952
1953 WREG32(mmNIF_RTR_CTRL_4_E2E_HBM_WR_SIZE, 176 >> 3);
1954 WREG32(mmNIF_RTR_CTRL_4_E2E_HBM_RD_SIZE, 32 >> 3);
1955 WREG32(mmNIF_RTR_CTRL_4_E2E_PCI_WR_SIZE, 19);
1956 WREG32(mmNIF_RTR_CTRL_4_E2E_PCI_RD_SIZE, 32);
1957
1958 WREG32(mmNIF_RTR_CTRL_5_E2E_HBM_WR_SIZE, 1);
1959 WREG32(mmNIF_RTR_CTRL_5_E2E_HBM_RD_SIZE, 1);
1960 WREG32(mmNIF_RTR_CTRL_5_E2E_PCI_WR_SIZE, 1);
1961 WREG32(mmNIF_RTR_CTRL_5_E2E_PCI_RD_SIZE, 32);
1962
1963 WREG32(mmNIF_RTR_CTRL_6_E2E_HBM_WR_SIZE, 275 >> 3);
1964 WREG32(mmNIF_RTR_CTRL_6_E2E_HBM_RD_SIZE, 614 >> 3);
1965 WREG32(mmNIF_RTR_CTRL_6_E2E_PCI_WR_SIZE, 1);
1966 WREG32(mmNIF_RTR_CTRL_6_E2E_PCI_RD_SIZE, 39);
1967
1968 WREG32(mmNIF_RTR_CTRL_7_E2E_HBM_WR_SIZE, 318 >> 3);
1969 WREG32(mmNIF_RTR_CTRL_7_E2E_HBM_RD_SIZE, 956 >> 3);
1970 WREG32(mmNIF_RTR_CTRL_7_E2E_PCI_WR_SIZE, 79);
1971 WREG32(mmNIF_RTR_CTRL_7_E2E_PCI_RD_SIZE, 79);
1972
1973 WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
1974 WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
1975 WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
1976 WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
1977
1978 WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
1979 WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
1980 WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
1981 WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
1982
1983 WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
1984 WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
1985 WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
1986 WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
1987
1988 WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
1989 WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
1990 WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
1991 WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
1992
1993 WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
1994 WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
1995 WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
1996 WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
1997
1998 WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
1999 WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
2000 WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
2001 WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
2002
2003 WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
2004 WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
2005 WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
2006 WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
2007
2008 WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
2009 WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
2010 WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
2011 WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
2012
2013 if (!hdev->dram_scrambler_enable) {
2014 WREG32(mmSIF_RTR_CTRL_0_NL_HBM_SEL_0, 0x21);
2015 WREG32(mmSIF_RTR_CTRL_0_NL_HBM_SEL_1, 0x22);
2016 WREG32(mmSIF_RTR_CTRL_0_NL_HBM_OFFSET_18, 0x1F);
2017 WREG32(mmSIF_RTR_CTRL_0_NL_HBM_PC_SEL_3, 0x20);
2018
2019 WREG32(mmSIF_RTR_CTRL_1_NL_HBM_SEL_0, 0x21);
2020 WREG32(mmSIF_RTR_CTRL_1_NL_HBM_SEL_1, 0x22);
2021 WREG32(mmSIF_RTR_CTRL_1_NL_HBM_OFFSET_18, 0x1F);
2022 WREG32(mmSIF_RTR_CTRL_1_NL_HBM_PC_SEL_3, 0x20);
2023
2024 WREG32(mmSIF_RTR_CTRL_2_NL_HBM_SEL_0, 0x21);
2025 WREG32(mmSIF_RTR_CTRL_2_NL_HBM_SEL_1, 0x22);
2026 WREG32(mmSIF_RTR_CTRL_2_NL_HBM_OFFSET_18, 0x1F);
2027 WREG32(mmSIF_RTR_CTRL_2_NL_HBM_PC_SEL_3, 0x20);
2028
2029 WREG32(mmSIF_RTR_CTRL_3_NL_HBM_SEL_0, 0x21);
2030 WREG32(mmSIF_RTR_CTRL_3_NL_HBM_SEL_1, 0x22);
2031 WREG32(mmSIF_RTR_CTRL_3_NL_HBM_OFFSET_18, 0x1F);
2032 WREG32(mmSIF_RTR_CTRL_3_NL_HBM_PC_SEL_3, 0x20);
2033
2034 WREG32(mmSIF_RTR_CTRL_4_NL_HBM_SEL_0, 0x21);
2035 WREG32(mmSIF_RTR_CTRL_4_NL_HBM_SEL_1, 0x22);
2036 WREG32(mmSIF_RTR_CTRL_4_NL_HBM_OFFSET_18, 0x1F);
2037 WREG32(mmSIF_RTR_CTRL_4_NL_HBM_PC_SEL_3, 0x20);
2038
2039 WREG32(mmSIF_RTR_CTRL_5_NL_HBM_SEL_0, 0x21);
2040 WREG32(mmSIF_RTR_CTRL_5_NL_HBM_SEL_1, 0x22);
2041 WREG32(mmSIF_RTR_CTRL_5_NL_HBM_OFFSET_18, 0x1F);
2042 WREG32(mmSIF_RTR_CTRL_5_NL_HBM_PC_SEL_3, 0x20);
2043
2044 WREG32(mmSIF_RTR_CTRL_6_NL_HBM_SEL_0, 0x21);
2045 WREG32(mmSIF_RTR_CTRL_6_NL_HBM_SEL_1, 0x22);
2046 WREG32(mmSIF_RTR_CTRL_6_NL_HBM_OFFSET_18, 0x1F);
2047 WREG32(mmSIF_RTR_CTRL_6_NL_HBM_PC_SEL_3, 0x20);
2048
2049 WREG32(mmSIF_RTR_CTRL_7_NL_HBM_SEL_0, 0x21);
2050 WREG32(mmSIF_RTR_CTRL_7_NL_HBM_SEL_1, 0x22);
2051 WREG32(mmSIF_RTR_CTRL_7_NL_HBM_OFFSET_18, 0x1F);
2052 WREG32(mmSIF_RTR_CTRL_7_NL_HBM_PC_SEL_3, 0x20);
2053
2054 WREG32(mmNIF_RTR_CTRL_0_NL_HBM_SEL_0, 0x21);
2055 WREG32(mmNIF_RTR_CTRL_0_NL_HBM_SEL_1, 0x22);
2056 WREG32(mmNIF_RTR_CTRL_0_NL_HBM_OFFSET_18, 0x1F);
2057 WREG32(mmNIF_RTR_CTRL_0_NL_HBM_PC_SEL_3, 0x20);
2058
2059 WREG32(mmNIF_RTR_CTRL_1_NL_HBM_SEL_0, 0x21);
2060 WREG32(mmNIF_RTR_CTRL_1_NL_HBM_SEL_1, 0x22);
2061 WREG32(mmNIF_RTR_CTRL_1_NL_HBM_OFFSET_18, 0x1F);
2062 WREG32(mmNIF_RTR_CTRL_1_NL_HBM_PC_SEL_3, 0x20);
2063
2064 WREG32(mmNIF_RTR_CTRL_2_NL_HBM_SEL_0, 0x21);
2065 WREG32(mmNIF_RTR_CTRL_2_NL_HBM_SEL_1, 0x22);
2066 WREG32(mmNIF_RTR_CTRL_2_NL_HBM_OFFSET_18, 0x1F);
2067 WREG32(mmNIF_RTR_CTRL_2_NL_HBM_PC_SEL_3, 0x20);
2068
2069 WREG32(mmNIF_RTR_CTRL_3_NL_HBM_SEL_0, 0x21);
2070 WREG32(mmNIF_RTR_CTRL_3_NL_HBM_SEL_1, 0x22);
2071 WREG32(mmNIF_RTR_CTRL_3_NL_HBM_OFFSET_18, 0x1F);
2072 WREG32(mmNIF_RTR_CTRL_3_NL_HBM_PC_SEL_3, 0x20);
2073
2074 WREG32(mmNIF_RTR_CTRL_4_NL_HBM_SEL_0, 0x21);
2075 WREG32(mmNIF_RTR_CTRL_4_NL_HBM_SEL_1, 0x22);
2076 WREG32(mmNIF_RTR_CTRL_4_NL_HBM_OFFSET_18, 0x1F);
2077 WREG32(mmNIF_RTR_CTRL_4_NL_HBM_PC_SEL_3, 0x20);
2078
2079 WREG32(mmNIF_RTR_CTRL_5_NL_HBM_SEL_0, 0x21);
2080 WREG32(mmNIF_RTR_CTRL_5_NL_HBM_SEL_1, 0x22);
2081 WREG32(mmNIF_RTR_CTRL_5_NL_HBM_OFFSET_18, 0x1F);
2082 WREG32(mmNIF_RTR_CTRL_5_NL_HBM_PC_SEL_3, 0x20);
2083
2084 WREG32(mmNIF_RTR_CTRL_6_NL_HBM_SEL_0, 0x21);
2085 WREG32(mmNIF_RTR_CTRL_6_NL_HBM_SEL_1, 0x22);
2086 WREG32(mmNIF_RTR_CTRL_6_NL_HBM_OFFSET_18, 0x1F);
2087 WREG32(mmNIF_RTR_CTRL_6_NL_HBM_PC_SEL_3, 0x20);
2088
2089 WREG32(mmNIF_RTR_CTRL_7_NL_HBM_SEL_0, 0x21);
2090 WREG32(mmNIF_RTR_CTRL_7_NL_HBM_SEL_1, 0x22);
2091 WREG32(mmNIF_RTR_CTRL_7_NL_HBM_OFFSET_18, 0x1F);
2092 WREG32(mmNIF_RTR_CTRL_7_NL_HBM_PC_SEL_3, 0x20);
2093
2094 WREG32(mmDMA_IF_E_N_DOWN_CH0_NL_HBM_SEL_0, 0x21);
2095 WREG32(mmDMA_IF_E_N_DOWN_CH0_NL_HBM_SEL_1, 0x22);
2096 WREG32(mmDMA_IF_E_N_DOWN_CH0_NL_HBM_OFFSET_18, 0x1F);
2097 WREG32(mmDMA_IF_E_N_DOWN_CH0_NL_HBM_PC_SEL_3, 0x20);
2098
2099 WREG32(mmDMA_IF_E_N_DOWN_CH1_NL_HBM_SEL_0, 0x21);
2100 WREG32(mmDMA_IF_E_N_DOWN_CH1_NL_HBM_SEL_1, 0x22);
2101 WREG32(mmDMA_IF_E_N_DOWN_CH1_NL_HBM_OFFSET_18, 0x1F);
2102 WREG32(mmDMA_IF_E_N_DOWN_CH1_NL_HBM_PC_SEL_3, 0x20);
2103
2104 WREG32(mmDMA_IF_E_S_DOWN_CH0_NL_HBM_SEL_0, 0x21);
2105 WREG32(mmDMA_IF_E_S_DOWN_CH0_NL_HBM_SEL_1, 0x22);
2106 WREG32(mmDMA_IF_E_S_DOWN_CH0_NL_HBM_OFFSET_18, 0x1F);
2107 WREG32(mmDMA_IF_E_S_DOWN_CH0_NL_HBM_PC_SEL_3, 0x20);
2108
2109 WREG32(mmDMA_IF_E_S_DOWN_CH1_NL_HBM_SEL_0, 0x21);
2110 WREG32(mmDMA_IF_E_S_DOWN_CH1_NL_HBM_SEL_1, 0x22);
2111 WREG32(mmDMA_IF_E_S_DOWN_CH1_NL_HBM_OFFSET_18, 0x1F);
2112 WREG32(mmDMA_IF_E_S_DOWN_CH1_NL_HBM_PC_SEL_3, 0x20);
2113
2114 WREG32(mmDMA_IF_W_N_DOWN_CH0_NL_HBM_SEL_0, 0x21);
2115 WREG32(mmDMA_IF_W_N_DOWN_CH0_NL_HBM_SEL_1, 0x22);
2116 WREG32(mmDMA_IF_W_N_DOWN_CH0_NL_HBM_OFFSET_18, 0x1F);
2117 WREG32(mmDMA_IF_W_N_DOWN_CH0_NL_HBM_PC_SEL_3, 0x20);
2118
2119 WREG32(mmDMA_IF_W_N_DOWN_CH1_NL_HBM_SEL_0, 0x21);
2120 WREG32(mmDMA_IF_W_N_DOWN_CH1_NL_HBM_SEL_1, 0x22);
2121 WREG32(mmDMA_IF_W_N_DOWN_CH1_NL_HBM_OFFSET_18, 0x1F);
2122 WREG32(mmDMA_IF_W_N_DOWN_CH1_NL_HBM_PC_SEL_3, 0x20);
2123
2124 WREG32(mmDMA_IF_W_S_DOWN_CH0_NL_HBM_SEL_0, 0x21);
2125 WREG32(mmDMA_IF_W_S_DOWN_CH0_NL_HBM_SEL_1, 0x22);
2126 WREG32(mmDMA_IF_W_S_DOWN_CH0_NL_HBM_OFFSET_18, 0x1F);
2127 WREG32(mmDMA_IF_W_S_DOWN_CH0_NL_HBM_PC_SEL_3, 0x20);
2128
2129 WREG32(mmDMA_IF_W_S_DOWN_CH1_NL_HBM_SEL_0, 0x21);
2130 WREG32(mmDMA_IF_W_S_DOWN_CH1_NL_HBM_SEL_1, 0x22);
2131 WREG32(mmDMA_IF_W_S_DOWN_CH1_NL_HBM_OFFSET_18, 0x1F);
2132 WREG32(mmDMA_IF_W_S_DOWN_CH1_NL_HBM_PC_SEL_3, 0x20);
2133 }
2134
2135 WREG32(mmSIF_RTR_CTRL_0_E2E_HBM_EN,
2136 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2137 WREG32(mmSIF_RTR_CTRL_0_E2E_PCI_EN,
2138 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2139
2140 WREG32(mmSIF_RTR_CTRL_1_E2E_HBM_EN,
2141 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2142 WREG32(mmSIF_RTR_CTRL_1_E2E_PCI_EN,
2143 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2144
2145 WREG32(mmSIF_RTR_CTRL_2_E2E_HBM_EN,
2146 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2147 WREG32(mmSIF_RTR_CTRL_2_E2E_PCI_EN,
2148 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2149
2150 WREG32(mmSIF_RTR_CTRL_3_E2E_HBM_EN,
2151 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2152 WREG32(mmSIF_RTR_CTRL_3_E2E_PCI_EN,
2153 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2154
2155 WREG32(mmSIF_RTR_CTRL_4_E2E_HBM_EN,
2156 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2157 WREG32(mmSIF_RTR_CTRL_4_E2E_PCI_EN,
2158 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2159
2160 WREG32(mmSIF_RTR_CTRL_5_E2E_HBM_EN,
2161 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2162 WREG32(mmSIF_RTR_CTRL_5_E2E_PCI_EN,
2163 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2164
2165 WREG32(mmSIF_RTR_CTRL_6_E2E_HBM_EN,
2166 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2167 WREG32(mmSIF_RTR_CTRL_6_E2E_PCI_EN,
2168 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2169
2170 WREG32(mmSIF_RTR_CTRL_7_E2E_HBM_EN,
2171 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2172 WREG32(mmSIF_RTR_CTRL_7_E2E_PCI_EN,
2173 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2174
2175 WREG32(mmNIF_RTR_CTRL_0_E2E_HBM_EN,
2176 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2177 WREG32(mmNIF_RTR_CTRL_0_E2E_PCI_EN,
2178 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2179
2180 WREG32(mmNIF_RTR_CTRL_1_E2E_HBM_EN,
2181 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2182 WREG32(mmNIF_RTR_CTRL_1_E2E_PCI_EN,
2183 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2184
2185 WREG32(mmNIF_RTR_CTRL_2_E2E_HBM_EN,
2186 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2187 WREG32(mmNIF_RTR_CTRL_2_E2E_PCI_EN,
2188 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2189
2190 WREG32(mmNIF_RTR_CTRL_3_E2E_HBM_EN,
2191 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2192 WREG32(mmNIF_RTR_CTRL_3_E2E_PCI_EN,
2193 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2194
2195 WREG32(mmNIF_RTR_CTRL_4_E2E_HBM_EN,
2196 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2197 WREG32(mmNIF_RTR_CTRL_4_E2E_PCI_EN,
2198 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2199
2200 WREG32(mmNIF_RTR_CTRL_5_E2E_HBM_EN,
2201 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2202 WREG32(mmNIF_RTR_CTRL_5_E2E_PCI_EN,
2203 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2204
2205 WREG32(mmNIF_RTR_CTRL_6_E2E_HBM_EN,
2206 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2207 WREG32(mmNIF_RTR_CTRL_6_E2E_PCI_EN,
2208 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2209
2210 WREG32(mmNIF_RTR_CTRL_7_E2E_HBM_EN,
2211 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2212 WREG32(mmNIF_RTR_CTRL_7_E2E_PCI_EN,
2213 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2214
2215 WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_HBM_EN,
2216 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2217 WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_PCI_EN,
2218 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2219
2220 WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_HBM_EN,
2221 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2222 WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_PCI_EN,
2223 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2224
2225 WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_HBM_EN,
2226 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2227 WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_PCI_EN,
2228 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2229
2230 WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_HBM_EN,
2231 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2232 WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_PCI_EN,
2233 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2234
2235 WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_HBM_EN,
2236 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2237 WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_PCI_EN,
2238 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2239
2240 WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_HBM_EN,
2241 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2242 WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_PCI_EN,
2243 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2244
2245 WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_HBM_EN,
2246 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2247 WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_PCI_EN,
2248 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2249
2250 WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_HBM_EN,
2251 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2252 WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_PCI_EN,
2253 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2254}
2255
2256static void gaudi_init_hbm_cred(struct hl_device *hdev)
2257{
2258 uint32_t hbm0_wr, hbm1_wr, hbm0_rd, hbm1_rd;
2259
2260 hbm0_wr = 0x33333333;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03002261 hbm0_rd = 0x77777777;
Oded Gabbay0b168c82020-06-15 19:25:57 +03002262 hbm1_wr = 0x55555555;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03002263 hbm1_rd = 0xDDDDDDDD;
2264
2265 WREG32(mmDMA_IF_E_N_HBM0_WR_CRED_CNT, hbm0_wr);
2266 WREG32(mmDMA_IF_E_N_HBM1_WR_CRED_CNT, hbm1_wr);
2267 WREG32(mmDMA_IF_E_N_HBM0_RD_CRED_CNT, hbm0_rd);
2268 WREG32(mmDMA_IF_E_N_HBM1_RD_CRED_CNT, hbm1_rd);
2269
2270 WREG32(mmDMA_IF_E_S_HBM0_WR_CRED_CNT, hbm0_wr);
2271 WREG32(mmDMA_IF_E_S_HBM1_WR_CRED_CNT, hbm1_wr);
2272 WREG32(mmDMA_IF_E_S_HBM0_RD_CRED_CNT, hbm0_rd);
2273 WREG32(mmDMA_IF_E_S_HBM1_RD_CRED_CNT, hbm1_rd);
2274
2275 WREG32(mmDMA_IF_W_N_HBM0_WR_CRED_CNT, hbm0_wr);
2276 WREG32(mmDMA_IF_W_N_HBM1_WR_CRED_CNT, hbm1_wr);
2277 WREG32(mmDMA_IF_W_N_HBM0_RD_CRED_CNT, hbm0_rd);
2278 WREG32(mmDMA_IF_W_N_HBM1_RD_CRED_CNT, hbm1_rd);
2279
2280 WREG32(mmDMA_IF_W_S_HBM0_WR_CRED_CNT, hbm0_wr);
2281 WREG32(mmDMA_IF_W_S_HBM1_WR_CRED_CNT, hbm1_wr);
2282 WREG32(mmDMA_IF_W_S_HBM0_RD_CRED_CNT, hbm0_rd);
2283 WREG32(mmDMA_IF_W_S_HBM1_RD_CRED_CNT, hbm1_rd);
2284
2285 WREG32(mmDMA_IF_E_N_HBM_CRED_EN_0,
2286 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2287 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2288 WREG32(mmDMA_IF_E_S_HBM_CRED_EN_0,
2289 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2290 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2291 WREG32(mmDMA_IF_W_N_HBM_CRED_EN_0,
2292 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2293 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2294 WREG32(mmDMA_IF_W_S_HBM_CRED_EN_0,
2295 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2296 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2297
2298 WREG32(mmDMA_IF_E_N_HBM_CRED_EN_1,
2299 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2300 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2301 WREG32(mmDMA_IF_E_S_HBM_CRED_EN_1,
2302 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2303 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2304 WREG32(mmDMA_IF_W_N_HBM_CRED_EN_1,
2305 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2306 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2307 WREG32(mmDMA_IF_W_S_HBM_CRED_EN_1,
2308 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2309 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2310}
2311
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03002312static void gaudi_init_golden_registers(struct hl_device *hdev)
2313{
2314 u32 tpc_offset;
2315 int tpc_id, i;
2316
2317 gaudi_init_e2e(hdev);
2318
2319 gaudi_init_hbm_cred(hdev);
2320
Oded Gabbaye38bfd32020-07-03 20:46:12 +03002321 hdev->asic_funcs->disable_clock_gating(hdev);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03002322
2323 for (tpc_id = 0, tpc_offset = 0;
2324 tpc_id < TPC_NUMBER_OF_ENGINES;
2325 tpc_id++, tpc_offset += TPC_CFG_OFFSET) {
2326 /* Mask all arithmetic interrupts from TPC */
2327 WREG32(mmTPC0_CFG_TPC_INTR_MASK + tpc_offset, 0x8FFF);
2328 /* Set 16 cache lines */
2329 WREG32_FIELD(TPC0_CFG_MSS_CONFIG, tpc_offset,
2330 ICACHE_FETCH_LINE_NUM, 2);
2331 }
2332
2333 /* Make sure 1st 128 bytes in SRAM are 0 for Tensor DMA */
2334 for (i = 0 ; i < 128 ; i += 8)
2335 writeq(0, hdev->pcie_bar[SRAM_BAR_ID] + i);
2336
2337 WREG32(mmMME0_CTRL_EUS_ROLLUP_CNT_ADD, 3);
2338 WREG32(mmMME1_CTRL_EUS_ROLLUP_CNT_ADD, 3);
2339 WREG32(mmMME2_CTRL_EUS_ROLLUP_CNT_ADD, 3);
2340 WREG32(mmMME3_CTRL_EUS_ROLLUP_CNT_ADD, 3);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03002341}
2342
2343static void gaudi_init_pci_dma_qman(struct hl_device *hdev, int dma_id,
2344 int qman_id, dma_addr_t qman_pq_addr)
2345{
2346 u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi;
2347 u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi;
2348 u32 q_off, dma_qm_offset;
2349 u32 dma_qm_err_cfg;
2350
2351 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
2352
2353 mtr_base_en_lo = lower_32_bits(CFG_BASE +
2354 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2355 mtr_base_en_hi = upper_32_bits(CFG_BASE +
2356 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2357 so_base_en_lo = lower_32_bits(CFG_BASE +
2358 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2359 so_base_en_hi = upper_32_bits(CFG_BASE +
2360 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2361 mtr_base_ws_lo = lower_32_bits(CFG_BASE +
2362 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2363 mtr_base_ws_hi = upper_32_bits(CFG_BASE +
2364 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2365 so_base_ws_lo = lower_32_bits(CFG_BASE +
2366 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
2367 so_base_ws_hi = upper_32_bits(CFG_BASE +
2368 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
2369
2370 q_off = dma_qm_offset + qman_id * 4;
2371
2372 WREG32(mmDMA0_QM_PQ_BASE_LO_0 + q_off, lower_32_bits(qman_pq_addr));
2373 WREG32(mmDMA0_QM_PQ_BASE_HI_0 + q_off, upper_32_bits(qman_pq_addr));
2374
2375 WREG32(mmDMA0_QM_PQ_SIZE_0 + q_off, ilog2(HL_QUEUE_LENGTH));
2376 WREG32(mmDMA0_QM_PQ_PI_0 + q_off, 0);
2377 WREG32(mmDMA0_QM_PQ_CI_0 + q_off, 0);
2378
Ofir Bitton25121d92020-09-24 08:22:58 +03002379 WREG32(mmDMA0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off, QMAN_LDMA_SIZE_OFFSET);
2380 WREG32(mmDMA0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2381 QMAN_LDMA_SRC_OFFSET);
2382 WREG32(mmDMA0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2383 QMAN_LDMA_DST_OFFSET);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03002384
2385 WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo);
2386 WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi);
2387 WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo);
2388 WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi);
2389 WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_LO_0 + q_off, mtr_base_ws_lo);
2390 WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_HI_0 + q_off, mtr_base_ws_hi);
2391 WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_LO_0 + q_off, so_base_ws_lo);
2392 WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_HI_0 + q_off, so_base_ws_hi);
2393
Omer Shpigelmance043262020-06-16 17:56:27 +03002394 WREG32(mmDMA0_QM_CP_BARRIER_CFG_0 + q_off, 0x100);
2395
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03002396 /* The following configuration is needed only once per QMAN */
2397 if (qman_id == 0) {
2398 /* Configure RAZWI IRQ */
2399 dma_qm_err_cfg = PCI_DMA_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
2400 if (hdev->stop_on_err) {
2401 dma_qm_err_cfg |=
2402 PCI_DMA_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
2403 }
2404
2405 WREG32(mmDMA0_QM_GLBL_ERR_CFG + dma_qm_offset, dma_qm_err_cfg);
2406 WREG32(mmDMA0_QM_GLBL_ERR_ADDR_LO + dma_qm_offset,
2407 lower_32_bits(CFG_BASE +
2408 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR));
2409 WREG32(mmDMA0_QM_GLBL_ERR_ADDR_HI + dma_qm_offset,
2410 upper_32_bits(CFG_BASE +
2411 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR));
2412 WREG32(mmDMA0_QM_GLBL_ERR_WDATA + dma_qm_offset,
2413 gaudi_irq_map_table[GAUDI_EVENT_DMA0_QM].cpu_id +
2414 dma_id);
2415
2416 WREG32(mmDMA0_QM_ARB_ERR_MSG_EN + dma_qm_offset,
2417 QM_ARB_ERR_MSG_EN_MASK);
2418
2419 /* Increase ARB WDT to support streams architecture */
2420 WREG32(mmDMA0_QM_ARB_SLV_CHOISE_WDT + dma_qm_offset,
2421 GAUDI_ARB_WDT_TIMEOUT);
2422
2423 WREG32(mmDMA0_QM_GLBL_PROT + dma_qm_offset,
2424 QMAN_EXTERNAL_MAKE_TRUSTED);
2425
2426 WREG32(mmDMA0_QM_GLBL_CFG1 + dma_qm_offset, 0);
2427 }
2428}
2429
2430static void gaudi_init_dma_core(struct hl_device *hdev, int dma_id)
2431{
2432 u32 dma_offset = dma_id * DMA_CORE_OFFSET;
2433 u32 dma_err_cfg = 1 << DMA0_CORE_ERR_CFG_ERR_MSG_EN_SHIFT;
2434
2435 /* Set to maximum possible according to physical size */
2436 WREG32(mmDMA0_CORE_RD_MAX_OUTSTAND + dma_offset, 0);
2437 WREG32(mmDMA0_CORE_RD_MAX_SIZE + dma_offset, 0);
2438
Oded Gabbayd1f36332020-09-14 09:26:54 +03002439 /* WA for H/W bug H3-2116 */
2440 WREG32(mmDMA0_CORE_LBW_MAX_OUTSTAND + dma_offset, 15);
2441
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03002442 /* STOP_ON bit implies no completion to operation in case of RAZWI */
2443 if (hdev->stop_on_err)
2444 dma_err_cfg |= 1 << DMA0_CORE_ERR_CFG_STOP_ON_ERR_SHIFT;
2445
2446 WREG32(mmDMA0_CORE_ERR_CFG + dma_offset, dma_err_cfg);
2447 WREG32(mmDMA0_CORE_ERRMSG_ADDR_LO + dma_offset,
2448 lower_32_bits(CFG_BASE + mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR));
2449 WREG32(mmDMA0_CORE_ERRMSG_ADDR_HI + dma_offset,
2450 upper_32_bits(CFG_BASE + mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR));
2451 WREG32(mmDMA0_CORE_ERRMSG_WDATA + dma_offset,
2452 gaudi_irq_map_table[GAUDI_EVENT_DMA0_CORE].cpu_id + dma_id);
2453 WREG32(mmDMA0_CORE_PROT + dma_offset,
2454 1 << DMA0_CORE_PROT_ERR_VAL_SHIFT);
2455 /* If the channel is secured, it should be in MMU bypass mode */
2456 WREG32(mmDMA0_CORE_SECURE_PROPS + dma_offset,
2457 1 << DMA0_CORE_SECURE_PROPS_MMBP_SHIFT);
2458 WREG32(mmDMA0_CORE_CFG_0 + dma_offset, 1 << DMA0_CORE_CFG_0_EN_SHIFT);
2459}
2460
2461static void gaudi_enable_qman(struct hl_device *hdev, int dma_id,
2462 u32 enable_mask)
2463{
2464 u32 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
2465
2466 WREG32(mmDMA0_QM_GLBL_CFG0 + dma_qm_offset, enable_mask);
2467}
2468
2469static void gaudi_init_pci_dma_qmans(struct hl_device *hdev)
2470{
2471 struct gaudi_device *gaudi = hdev->asic_specific;
2472 struct hl_hw_queue *q;
2473 int i, j, dma_id, cpu_skip, nic_skip, cq_id = 0, q_idx, msi_vec = 0;
2474
2475 if (gaudi->hw_cap_initialized & HW_CAP_PCI_DMA)
2476 return;
2477
2478 for (i = 0 ; i < PCI_DMA_NUMBER_OF_CHNLS ; i++) {
2479 dma_id = gaudi_dma_assignment[i];
2480 /*
2481 * For queues after the CPU Q need to add 1 to get the correct
2482 * queue. In addition, need to add the CPU EQ and NIC IRQs in
2483 * order to get the correct MSI register.
2484 */
2485 if (dma_id > 1) {
2486 cpu_skip = 1;
2487 nic_skip = NIC_NUMBER_OF_ENGINES;
2488 } else {
2489 cpu_skip = 0;
2490 nic_skip = 0;
2491 }
2492
2493 for (j = 0 ; j < QMAN_STREAMS ; j++) {
2494 q_idx = 4 * dma_id + j + cpu_skip;
2495 q = &hdev->kernel_queues[q_idx];
2496 q->cq_id = cq_id++;
2497 q->msi_vec = nic_skip + cpu_skip + msi_vec++;
2498 gaudi_init_pci_dma_qman(hdev, dma_id, j,
2499 q->bus_address);
2500 }
2501
2502 gaudi_init_dma_core(hdev, dma_id);
2503
2504 gaudi_enable_qman(hdev, dma_id, PCI_DMA_QMAN_ENABLE);
2505 }
2506
2507 gaudi->hw_cap_initialized |= HW_CAP_PCI_DMA;
2508}
2509
2510static void gaudi_init_hbm_dma_qman(struct hl_device *hdev, int dma_id,
2511 int qman_id, u64 qman_base_addr)
2512{
Ofir Bitton5de406c2020-09-10 10:56:26 +03002513 u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi;
2514 u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03002515 u32 q_off, dma_qm_offset;
2516 u32 dma_qm_err_cfg;
2517
2518 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
2519
Ofir Bitton5de406c2020-09-10 10:56:26 +03002520 mtr_base_en_lo = lower_32_bits(CFG_BASE +
2521 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2522 mtr_base_en_hi = upper_32_bits(CFG_BASE +
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03002523 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
Ofir Bitton5de406c2020-09-10 10:56:26 +03002524 so_base_en_lo = lower_32_bits(CFG_BASE +
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03002525 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
Ofir Bitton5de406c2020-09-10 10:56:26 +03002526 so_base_en_hi = upper_32_bits(CFG_BASE +
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03002527 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
Ofir Bitton5de406c2020-09-10 10:56:26 +03002528 mtr_base_ws_lo = lower_32_bits(CFG_BASE +
2529 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2530 mtr_base_ws_hi = upper_32_bits(CFG_BASE +
2531 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2532 so_base_ws_lo = lower_32_bits(CFG_BASE +
2533 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
2534 so_base_ws_hi = upper_32_bits(CFG_BASE +
2535 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03002536
2537 q_off = dma_qm_offset + qman_id * 4;
2538
2539 if (qman_id < 4) {
2540 WREG32(mmDMA0_QM_PQ_BASE_LO_0 + q_off,
2541 lower_32_bits(qman_base_addr));
2542 WREG32(mmDMA0_QM_PQ_BASE_HI_0 + q_off,
2543 upper_32_bits(qman_base_addr));
2544
2545 WREG32(mmDMA0_QM_PQ_SIZE_0 + q_off, ilog2(HBM_DMA_QMAN_LENGTH));
2546 WREG32(mmDMA0_QM_PQ_PI_0 + q_off, 0);
2547 WREG32(mmDMA0_QM_PQ_CI_0 + q_off, 0);
2548
Ofir Bitton25121d92020-09-24 08:22:58 +03002549 WREG32(mmDMA0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
2550 QMAN_CPDMA_SIZE_OFFSET);
2551 WREG32(mmDMA0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2552 QMAN_CPDMA_SRC_OFFSET);
2553 WREG32(mmDMA0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2554 QMAN_CPDMA_DST_OFFSET);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03002555 } else {
Ofir Bitton25121d92020-09-24 08:22:58 +03002556 WREG32(mmDMA0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
2557 QMAN_LDMA_SIZE_OFFSET);
2558 WREG32(mmDMA0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2559 QMAN_LDMA_SRC_OFFSET);
2560 WREG32(mmDMA0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
Oded Gabbay5b94d6e2020-09-25 20:14:15 +03002561 QMAN_LDMA_DST_OFFSET);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03002562
2563 /* Configure RAZWI IRQ */
2564 dma_qm_err_cfg = HBM_DMA_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
2565 if (hdev->stop_on_err) {
2566 dma_qm_err_cfg |=
2567 HBM_DMA_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
2568 }
2569 WREG32(mmDMA0_QM_GLBL_ERR_CFG + dma_qm_offset, dma_qm_err_cfg);
2570
2571 WREG32(mmDMA0_QM_GLBL_ERR_ADDR_LO + dma_qm_offset,
2572 lower_32_bits(CFG_BASE +
2573 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR));
2574 WREG32(mmDMA0_QM_GLBL_ERR_ADDR_HI + dma_qm_offset,
2575 upper_32_bits(CFG_BASE +
2576 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR));
2577 WREG32(mmDMA0_QM_GLBL_ERR_WDATA + dma_qm_offset,
2578 gaudi_irq_map_table[GAUDI_EVENT_DMA0_QM].cpu_id +
2579 dma_id);
2580
2581 WREG32(mmDMA0_QM_ARB_ERR_MSG_EN + dma_qm_offset,
2582 QM_ARB_ERR_MSG_EN_MASK);
2583
2584 /* Increase ARB WDT to support streams architecture */
2585 WREG32(mmDMA0_QM_ARB_SLV_CHOISE_WDT + dma_qm_offset,
2586 GAUDI_ARB_WDT_TIMEOUT);
2587
2588 WREG32(mmDMA0_QM_GLBL_CFG1 + dma_qm_offset, 0);
2589 WREG32(mmDMA0_QM_GLBL_PROT + dma_qm_offset,
2590 QMAN_INTERNAL_MAKE_TRUSTED);
2591 }
2592
Ofir Bitton5de406c2020-09-10 10:56:26 +03002593 WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo);
2594 WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi);
2595 WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo);
2596 WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi);
2597
2598 /* Configure DMA5 CP_MSG_BASE 2/3 for sync stream collective */
2599 if (gaudi_dma_assignment[dma_id] == GAUDI_ENGINE_ID_DMA_5) {
2600 WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_LO_0 + q_off,
2601 mtr_base_ws_lo);
2602 WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_HI_0 + q_off,
2603 mtr_base_ws_hi);
2604 WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_LO_0 + q_off,
2605 so_base_ws_lo);
2606 WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_HI_0 + q_off,
2607 so_base_ws_hi);
2608 }
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03002609}
2610
2611static void gaudi_init_hbm_dma_qmans(struct hl_device *hdev)
2612{
2613 struct gaudi_device *gaudi = hdev->asic_specific;
2614 struct gaudi_internal_qman_info *q;
2615 u64 qman_base_addr;
2616 int i, j, dma_id, internal_q_index;
2617
2618 if (gaudi->hw_cap_initialized & HW_CAP_HBM_DMA)
2619 return;
2620
2621 for (i = 0 ; i < HBM_DMA_NUMBER_OF_CHNLS ; i++) {
2622 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_1 + i];
2623
2624 for (j = 0 ; j < QMAN_STREAMS ; j++) {
2625 /*
2626 * Add the CPU queue in order to get the correct queue
2627 * number as all internal queue are placed after it
2628 */
2629 internal_q_index = dma_id * QMAN_STREAMS + j + 1;
2630
2631 q = &gaudi->internal_qmans[internal_q_index];
2632 qman_base_addr = (u64) q->pq_dma_addr;
2633 gaudi_init_hbm_dma_qman(hdev, dma_id, j,
2634 qman_base_addr);
2635 }
2636
2637 /* Initializing lower CP for HBM DMA QMAN */
2638 gaudi_init_hbm_dma_qman(hdev, dma_id, 4, 0);
2639
2640 gaudi_init_dma_core(hdev, dma_id);
2641
2642 gaudi_enable_qman(hdev, dma_id, HBM_DMA_QMAN_ENABLE);
2643 }
2644
2645 gaudi->hw_cap_initialized |= HW_CAP_HBM_DMA;
2646}
2647
2648static void gaudi_init_mme_qman(struct hl_device *hdev, u32 mme_offset,
2649 int qman_id, u64 qman_base_addr)
2650{
2651 u32 mtr_base_lo, mtr_base_hi;
2652 u32 so_base_lo, so_base_hi;
2653 u32 q_off, mme_id;
2654 u32 mme_qm_err_cfg;
2655
2656 mtr_base_lo = lower_32_bits(CFG_BASE +
2657 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2658 mtr_base_hi = upper_32_bits(CFG_BASE +
2659 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2660 so_base_lo = lower_32_bits(CFG_BASE +
2661 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2662 so_base_hi = upper_32_bits(CFG_BASE +
2663 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2664
2665 q_off = mme_offset + qman_id * 4;
2666
2667 if (qman_id < 4) {
2668 WREG32(mmMME0_QM_PQ_BASE_LO_0 + q_off,
2669 lower_32_bits(qman_base_addr));
2670 WREG32(mmMME0_QM_PQ_BASE_HI_0 + q_off,
2671 upper_32_bits(qman_base_addr));
2672
2673 WREG32(mmMME0_QM_PQ_SIZE_0 + q_off, ilog2(MME_QMAN_LENGTH));
2674 WREG32(mmMME0_QM_PQ_PI_0 + q_off, 0);
2675 WREG32(mmMME0_QM_PQ_CI_0 + q_off, 0);
2676
Ofir Bitton25121d92020-09-24 08:22:58 +03002677 WREG32(mmMME0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
2678 QMAN_CPDMA_SIZE_OFFSET);
2679 WREG32(mmMME0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2680 QMAN_CPDMA_SRC_OFFSET);
2681 WREG32(mmMME0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2682 QMAN_CPDMA_DST_OFFSET);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03002683 } else {
Ofir Bitton25121d92020-09-24 08:22:58 +03002684 WREG32(mmMME0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
2685 QMAN_LDMA_SIZE_OFFSET);
2686 WREG32(mmMME0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2687 QMAN_LDMA_SRC_OFFSET);
2688 WREG32(mmMME0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2689 QMAN_LDMA_DST_OFFSET);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03002690
2691 /* Configure RAZWI IRQ */
2692 mme_id = mme_offset /
2693 (mmMME1_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0);
2694
2695 mme_qm_err_cfg = MME_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
2696 if (hdev->stop_on_err) {
2697 mme_qm_err_cfg |=
2698 MME_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
2699 }
2700 WREG32(mmMME0_QM_GLBL_ERR_CFG + mme_offset, mme_qm_err_cfg);
2701 WREG32(mmMME0_QM_GLBL_ERR_ADDR_LO + mme_offset,
2702 lower_32_bits(CFG_BASE +
2703 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR));
2704 WREG32(mmMME0_QM_GLBL_ERR_ADDR_HI + mme_offset,
2705 upper_32_bits(CFG_BASE +
2706 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR));
2707 WREG32(mmMME0_QM_GLBL_ERR_WDATA + mme_offset,
2708 gaudi_irq_map_table[GAUDI_EVENT_MME0_QM].cpu_id +
2709 mme_id);
2710
2711 WREG32(mmMME0_QM_ARB_ERR_MSG_EN + mme_offset,
2712 QM_ARB_ERR_MSG_EN_MASK);
2713
2714 /* Increase ARB WDT to support streams architecture */
2715 WREG32(mmMME0_QM_ARB_SLV_CHOISE_WDT + mme_offset,
2716 GAUDI_ARB_WDT_TIMEOUT);
2717
2718 WREG32(mmMME0_QM_GLBL_CFG1 + mme_offset, 0);
2719 WREG32(mmMME0_QM_GLBL_PROT + mme_offset,
2720 QMAN_INTERNAL_MAKE_TRUSTED);
2721 }
2722
2723 WREG32(mmMME0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_lo);
2724 WREG32(mmMME0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_hi);
2725 WREG32(mmMME0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_lo);
2726 WREG32(mmMME0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_hi);
2727}
2728
2729static void gaudi_init_mme_qmans(struct hl_device *hdev)
2730{
2731 struct gaudi_device *gaudi = hdev->asic_specific;
2732 struct gaudi_internal_qman_info *q;
2733 u64 qman_base_addr;
2734 u32 mme_offset;
2735 int i, internal_q_index;
2736
2737 if (gaudi->hw_cap_initialized & HW_CAP_MME)
2738 return;
2739
2740 /*
2741 * map GAUDI_QUEUE_ID_MME_0_X to the N_W_MME (mmMME2_QM_BASE)
2742 * and GAUDI_QUEUE_ID_MME_1_X to the S_W_MME (mmMME0_QM_BASE)
2743 */
2744
2745 mme_offset = mmMME2_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0;
2746
2747 for (i = 0 ; i < MME_NUMBER_OF_QMANS ; i++) {
2748 internal_q_index = GAUDI_QUEUE_ID_MME_0_0 + i;
2749 q = &gaudi->internal_qmans[internal_q_index];
2750 qman_base_addr = (u64) q->pq_dma_addr;
2751 gaudi_init_mme_qman(hdev, mme_offset, (i & 0x3),
2752 qman_base_addr);
2753 if (i == 3)
2754 mme_offset = 0;
2755 }
2756
2757 /* Initializing lower CP for MME QMANs */
2758 mme_offset = mmMME2_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0;
2759 gaudi_init_mme_qman(hdev, mme_offset, 4, 0);
2760 gaudi_init_mme_qman(hdev, 0, 4, 0);
2761
2762 WREG32(mmMME2_QM_GLBL_CFG0, QMAN_MME_ENABLE);
2763 WREG32(mmMME0_QM_GLBL_CFG0, QMAN_MME_ENABLE);
2764
2765 gaudi->hw_cap_initialized |= HW_CAP_MME;
2766}
2767
2768static void gaudi_init_tpc_qman(struct hl_device *hdev, u32 tpc_offset,
2769 int qman_id, u64 qman_base_addr)
2770{
Ofir Bitton5de406c2020-09-10 10:56:26 +03002771 u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi;
2772 u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03002773 u32 q_off, tpc_id;
2774 u32 tpc_qm_err_cfg;
2775
Ofir Bitton5de406c2020-09-10 10:56:26 +03002776 mtr_base_en_lo = lower_32_bits(CFG_BASE +
2777 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2778 mtr_base_en_hi = upper_32_bits(CFG_BASE +
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03002779 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
Ofir Bitton5de406c2020-09-10 10:56:26 +03002780 so_base_en_lo = lower_32_bits(CFG_BASE +
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03002781 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
Ofir Bitton5de406c2020-09-10 10:56:26 +03002782 so_base_en_hi = upper_32_bits(CFG_BASE +
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03002783 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
Ofir Bitton5de406c2020-09-10 10:56:26 +03002784 mtr_base_ws_lo = lower_32_bits(CFG_BASE +
2785 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2786 mtr_base_ws_hi = upper_32_bits(CFG_BASE +
2787 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2788 so_base_ws_lo = lower_32_bits(CFG_BASE +
2789 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
2790 so_base_ws_hi = upper_32_bits(CFG_BASE +
2791 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03002792
2793 q_off = tpc_offset + qman_id * 4;
2794
Ofir Bitton5de406c2020-09-10 10:56:26 +03002795 tpc_id = tpc_offset /
2796 (mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0);
2797
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03002798 if (qman_id < 4) {
2799 WREG32(mmTPC0_QM_PQ_BASE_LO_0 + q_off,
2800 lower_32_bits(qman_base_addr));
2801 WREG32(mmTPC0_QM_PQ_BASE_HI_0 + q_off,
2802 upper_32_bits(qman_base_addr));
2803
2804 WREG32(mmTPC0_QM_PQ_SIZE_0 + q_off, ilog2(TPC_QMAN_LENGTH));
2805 WREG32(mmTPC0_QM_PQ_PI_0 + q_off, 0);
2806 WREG32(mmTPC0_QM_PQ_CI_0 + q_off, 0);
2807
Ofir Bitton25121d92020-09-24 08:22:58 +03002808 WREG32(mmTPC0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
2809 QMAN_CPDMA_SIZE_OFFSET);
2810 WREG32(mmTPC0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2811 QMAN_CPDMA_SRC_OFFSET);
2812 WREG32(mmTPC0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2813 QMAN_CPDMA_DST_OFFSET);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03002814 } else {
Ofir Bitton25121d92020-09-24 08:22:58 +03002815 WREG32(mmTPC0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
2816 QMAN_LDMA_SIZE_OFFSET);
2817 WREG32(mmTPC0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2818 QMAN_LDMA_SRC_OFFSET);
2819 WREG32(mmTPC0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2820 QMAN_LDMA_DST_OFFSET);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03002821
2822 /* Configure RAZWI IRQ */
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03002823 tpc_qm_err_cfg = TPC_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
2824 if (hdev->stop_on_err) {
2825 tpc_qm_err_cfg |=
2826 TPC_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
2827 }
2828
2829 WREG32(mmTPC0_QM_GLBL_ERR_CFG + tpc_offset, tpc_qm_err_cfg);
2830 WREG32(mmTPC0_QM_GLBL_ERR_ADDR_LO + tpc_offset,
2831 lower_32_bits(CFG_BASE +
2832 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR));
2833 WREG32(mmTPC0_QM_GLBL_ERR_ADDR_HI + tpc_offset,
2834 upper_32_bits(CFG_BASE +
2835 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR));
2836 WREG32(mmTPC0_QM_GLBL_ERR_WDATA + tpc_offset,
2837 gaudi_irq_map_table[GAUDI_EVENT_TPC0_QM].cpu_id +
2838 tpc_id);
2839
2840 WREG32(mmTPC0_QM_ARB_ERR_MSG_EN + tpc_offset,
2841 QM_ARB_ERR_MSG_EN_MASK);
2842
2843 /* Increase ARB WDT to support streams architecture */
2844 WREG32(mmTPC0_QM_ARB_SLV_CHOISE_WDT + tpc_offset,
2845 GAUDI_ARB_WDT_TIMEOUT);
2846
2847 WREG32(mmTPC0_QM_GLBL_CFG1 + tpc_offset, 0);
2848 WREG32(mmTPC0_QM_GLBL_PROT + tpc_offset,
2849 QMAN_INTERNAL_MAKE_TRUSTED);
2850 }
2851
Ofir Bitton5de406c2020-09-10 10:56:26 +03002852 WREG32(mmTPC0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo);
2853 WREG32(mmTPC0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi);
2854 WREG32(mmTPC0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo);
2855 WREG32(mmTPC0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi);
2856
2857 /* Configure TPC7 CP_MSG_BASE 2/3 for sync stream collective */
2858 if (tpc_id == 6) {
2859 WREG32(mmTPC0_QM_CP_MSG_BASE2_ADDR_LO_0 + q_off,
2860 mtr_base_ws_lo);
2861 WREG32(mmTPC0_QM_CP_MSG_BASE2_ADDR_HI_0 + q_off,
2862 mtr_base_ws_hi);
2863 WREG32(mmTPC0_QM_CP_MSG_BASE3_ADDR_LO_0 + q_off,
2864 so_base_ws_lo);
2865 WREG32(mmTPC0_QM_CP_MSG_BASE3_ADDR_HI_0 + q_off,
2866 so_base_ws_hi);
2867 }
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03002868}
2869
2870static void gaudi_init_tpc_qmans(struct hl_device *hdev)
2871{
2872 struct gaudi_device *gaudi = hdev->asic_specific;
2873 struct gaudi_internal_qman_info *q;
2874 u64 qman_base_addr;
2875 u32 so_base_hi, tpc_offset = 0;
2876 u32 tpc_delta = mmTPC1_CFG_SM_BASE_ADDRESS_HIGH -
2877 mmTPC0_CFG_SM_BASE_ADDRESS_HIGH;
2878 int i, tpc_id, internal_q_index;
2879
2880 if (gaudi->hw_cap_initialized & HW_CAP_TPC_MASK)
2881 return;
2882
2883 so_base_hi = upper_32_bits(CFG_BASE +
2884 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2885
2886 for (tpc_id = 0 ; tpc_id < TPC_NUMBER_OF_ENGINES ; tpc_id++) {
2887 for (i = 0 ; i < QMAN_STREAMS ; i++) {
2888 internal_q_index = GAUDI_QUEUE_ID_TPC_0_0 +
2889 tpc_id * QMAN_STREAMS + i;
2890 q = &gaudi->internal_qmans[internal_q_index];
2891 qman_base_addr = (u64) q->pq_dma_addr;
2892 gaudi_init_tpc_qman(hdev, tpc_offset, i,
2893 qman_base_addr);
2894
2895 if (i == 3) {
2896 /* Initializing lower CP for TPC QMAN */
2897 gaudi_init_tpc_qman(hdev, tpc_offset, 4, 0);
2898
2899 /* Enable the QMAN and TPC channel */
2900 WREG32(mmTPC0_QM_GLBL_CFG0 + tpc_offset,
2901 QMAN_TPC_ENABLE);
2902 }
2903 }
2904
2905 WREG32(mmTPC0_CFG_SM_BASE_ADDRESS_HIGH + tpc_id * tpc_delta,
2906 so_base_hi);
2907
2908 tpc_offset += mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0;
2909
Oded Gabbay65887292020-08-12 11:21:01 +03002910 gaudi->hw_cap_initialized |=
2911 FIELD_PREP(HW_CAP_TPC_MASK, 1 << tpc_id);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03002912 }
2913}
2914
Oded Gabbay3c681572020-11-02 21:10:39 +02002915static void gaudi_init_nic_qman(struct hl_device *hdev, u32 nic_offset,
2916 int qman_id, u64 qman_base_addr, int nic_id)
2917{
Ofir Bitton5de406c2020-09-10 10:56:26 +03002918 u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi;
2919 u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi;
Oded Gabbay3c681572020-11-02 21:10:39 +02002920 u32 q_off;
2921 u32 nic_qm_err_cfg;
2922
Ofir Bitton5de406c2020-09-10 10:56:26 +03002923 mtr_base_en_lo = lower_32_bits(CFG_BASE +
2924 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2925 mtr_base_en_hi = upper_32_bits(CFG_BASE +
Oded Gabbay3c681572020-11-02 21:10:39 +02002926 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
Ofir Bitton5de406c2020-09-10 10:56:26 +03002927 so_base_en_lo = lower_32_bits(CFG_BASE +
Oded Gabbay3c681572020-11-02 21:10:39 +02002928 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
Ofir Bitton5de406c2020-09-10 10:56:26 +03002929 so_base_en_hi = upper_32_bits(CFG_BASE +
Oded Gabbay3c681572020-11-02 21:10:39 +02002930 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
Ofir Bitton5de406c2020-09-10 10:56:26 +03002931 mtr_base_ws_lo = lower_32_bits(CFG_BASE +
2932 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2933 mtr_base_ws_hi = upper_32_bits(CFG_BASE +
2934 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2935 so_base_ws_lo = lower_32_bits(CFG_BASE +
2936 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
2937 so_base_ws_hi = upper_32_bits(CFG_BASE +
2938 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
Oded Gabbay3c681572020-11-02 21:10:39 +02002939
2940 q_off = nic_offset + qman_id * 4;
2941
2942 WREG32(mmNIC0_QM0_PQ_BASE_LO_0 + q_off, lower_32_bits(qman_base_addr));
2943 WREG32(mmNIC0_QM0_PQ_BASE_HI_0 + q_off, upper_32_bits(qman_base_addr));
2944
2945 WREG32(mmNIC0_QM0_PQ_SIZE_0 + q_off, ilog2(NIC_QMAN_LENGTH));
2946 WREG32(mmNIC0_QM0_PQ_PI_0 + q_off, 0);
2947 WREG32(mmNIC0_QM0_PQ_CI_0 + q_off, 0);
2948
Ofir Bitton5de406c2020-09-10 10:56:26 +03002949 WREG32(mmNIC0_QM0_CP_LDMA_TSIZE_OFFSET_0 + q_off,
2950 QMAN_LDMA_SIZE_OFFSET);
2951 WREG32(mmNIC0_QM0_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2952 QMAN_LDMA_SRC_OFFSET);
2953 WREG32(mmNIC0_QM0_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2954 QMAN_LDMA_DST_OFFSET);
Oded Gabbay3c681572020-11-02 21:10:39 +02002955
Ofir Bitton5de406c2020-09-10 10:56:26 +03002956 WREG32(mmNIC0_QM0_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo);
2957 WREG32(mmNIC0_QM0_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi);
2958 WREG32(mmNIC0_QM0_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo);
2959 WREG32(mmNIC0_QM0_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi);
2960
2961 /* Configure NIC CP_MSG_BASE 2/3 for sync stream collective */
2962 WREG32(mmNIC0_QM0_CP_MSG_BASE2_ADDR_LO_0 + q_off, mtr_base_ws_lo);
2963 WREG32(mmNIC0_QM0_CP_MSG_BASE2_ADDR_HI_0 + q_off, mtr_base_ws_hi);
2964 WREG32(mmNIC0_QM0_CP_MSG_BASE3_ADDR_LO_0 + q_off, so_base_ws_lo);
2965 WREG32(mmNIC0_QM0_CP_MSG_BASE3_ADDR_HI_0 + q_off, so_base_ws_hi);
Oded Gabbay3c681572020-11-02 21:10:39 +02002966
2967 if (qman_id == 0) {
2968 /* Configure RAZWI IRQ */
2969 nic_qm_err_cfg = NIC_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
2970 if (hdev->stop_on_err) {
2971 nic_qm_err_cfg |=
2972 NIC_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
2973 }
2974
2975 WREG32(mmNIC0_QM0_GLBL_ERR_CFG + nic_offset, nic_qm_err_cfg);
2976 WREG32(mmNIC0_QM0_GLBL_ERR_ADDR_LO + nic_offset,
2977 lower_32_bits(CFG_BASE +
2978 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR));
2979 WREG32(mmNIC0_QM0_GLBL_ERR_ADDR_HI + nic_offset,
2980 upper_32_bits(CFG_BASE +
2981 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR));
2982 WREG32(mmNIC0_QM0_GLBL_ERR_WDATA + nic_offset,
2983 gaudi_irq_map_table[GAUDI_EVENT_NIC0_QM0].cpu_id +
2984 nic_id);
2985
2986 WREG32(mmNIC0_QM0_ARB_ERR_MSG_EN + nic_offset,
2987 QM_ARB_ERR_MSG_EN_MASK);
2988
2989 /* Increase ARB WDT to support streams architecture */
2990 WREG32(mmNIC0_QM0_ARB_SLV_CHOISE_WDT + nic_offset,
2991 GAUDI_ARB_WDT_TIMEOUT);
2992
2993 WREG32(mmNIC0_QM0_GLBL_CFG1 + nic_offset, 0);
2994 WREG32(mmNIC0_QM0_GLBL_PROT + nic_offset,
2995 QMAN_INTERNAL_MAKE_TRUSTED);
2996 }
2997}
2998
2999static void gaudi_init_nic_qmans(struct hl_device *hdev)
3000{
3001 struct gaudi_device *gaudi = hdev->asic_specific;
3002 struct gaudi_internal_qman_info *q;
3003 u64 qman_base_addr;
3004 u32 nic_offset = 0;
3005 u32 nic_delta_between_qmans =
3006 mmNIC0_QM1_GLBL_CFG0 - mmNIC0_QM0_GLBL_CFG0;
3007 u32 nic_delta_between_nics =
3008 mmNIC1_QM0_GLBL_CFG0 - mmNIC0_QM0_GLBL_CFG0;
3009 int i, nic_id, internal_q_index;
3010
3011 if (!hdev->nic_ports_mask)
3012 return;
3013
3014 if (gaudi->hw_cap_initialized & HW_CAP_NIC_MASK)
3015 return;
3016
3017 dev_dbg(hdev->dev, "Initializing NIC QMANs\n");
3018
3019 for (nic_id = 0 ; nic_id < NIC_NUMBER_OF_ENGINES ; nic_id++) {
3020 if (!(hdev->nic_ports_mask & (1 << nic_id))) {
3021 nic_offset += nic_delta_between_qmans;
3022 if (nic_id & 1) {
3023 nic_offset -= (nic_delta_between_qmans * 2);
3024 nic_offset += nic_delta_between_nics;
3025 }
3026 continue;
3027 }
3028
3029 for (i = 0 ; i < QMAN_STREAMS ; i++) {
3030 internal_q_index = GAUDI_QUEUE_ID_NIC_0_0 +
3031 nic_id * QMAN_STREAMS + i;
3032 q = &gaudi->internal_qmans[internal_q_index];
3033 qman_base_addr = (u64) q->pq_dma_addr;
3034 gaudi_init_nic_qman(hdev, nic_offset, (i & 0x3),
3035 qman_base_addr, nic_id);
3036 }
3037
3038 /* Enable the QMAN */
3039 WREG32(mmNIC0_QM0_GLBL_CFG0 + nic_offset, NIC_QMAN_ENABLE);
3040
3041 nic_offset += nic_delta_between_qmans;
3042 if (nic_id & 1) {
3043 nic_offset -= (nic_delta_between_qmans * 2);
3044 nic_offset += nic_delta_between_nics;
3045 }
3046
3047 gaudi->hw_cap_initialized |= 1 << (HW_CAP_NIC_SHIFT + nic_id);
3048 }
3049}
3050
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003051static void gaudi_disable_pci_dma_qmans(struct hl_device *hdev)
3052{
3053 struct gaudi_device *gaudi = hdev->asic_specific;
3054
3055 if (!(gaudi->hw_cap_initialized & HW_CAP_PCI_DMA))
3056 return;
3057
3058 WREG32(mmDMA0_QM_GLBL_CFG0, 0);
3059 WREG32(mmDMA1_QM_GLBL_CFG0, 0);
3060 WREG32(mmDMA5_QM_GLBL_CFG0, 0);
3061}
3062
3063static void gaudi_disable_hbm_dma_qmans(struct hl_device *hdev)
3064{
3065 struct gaudi_device *gaudi = hdev->asic_specific;
3066
3067 if (!(gaudi->hw_cap_initialized & HW_CAP_HBM_DMA))
3068 return;
3069
3070 WREG32(mmDMA2_QM_GLBL_CFG0, 0);
3071 WREG32(mmDMA3_QM_GLBL_CFG0, 0);
3072 WREG32(mmDMA4_QM_GLBL_CFG0, 0);
3073 WREG32(mmDMA6_QM_GLBL_CFG0, 0);
3074 WREG32(mmDMA7_QM_GLBL_CFG0, 0);
3075}
3076
3077static void gaudi_disable_mme_qmans(struct hl_device *hdev)
3078{
3079 struct gaudi_device *gaudi = hdev->asic_specific;
3080
3081 if (!(gaudi->hw_cap_initialized & HW_CAP_MME))
3082 return;
3083
3084 WREG32(mmMME2_QM_GLBL_CFG0, 0);
3085 WREG32(mmMME0_QM_GLBL_CFG0, 0);
3086}
3087
3088static void gaudi_disable_tpc_qmans(struct hl_device *hdev)
3089{
3090 struct gaudi_device *gaudi = hdev->asic_specific;
3091 u32 tpc_offset = 0;
3092 int tpc_id;
3093
3094 if (!(gaudi->hw_cap_initialized & HW_CAP_TPC_MASK))
3095 return;
3096
3097 for (tpc_id = 0 ; tpc_id < TPC_NUMBER_OF_ENGINES ; tpc_id++) {
3098 WREG32(mmTPC0_QM_GLBL_CFG0 + tpc_offset, 0);
3099 tpc_offset += mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0;
3100 }
3101}
3102
Oded Gabbay3c681572020-11-02 21:10:39 +02003103static void gaudi_disable_nic_qmans(struct hl_device *hdev)
3104{
3105 struct gaudi_device *gaudi = hdev->asic_specific;
3106 u32 nic_mask, nic_offset = 0;
3107 u32 nic_delta_between_qmans =
3108 mmNIC0_QM1_GLBL_CFG0 - mmNIC0_QM0_GLBL_CFG0;
3109 u32 nic_delta_between_nics =
3110 mmNIC1_QM0_GLBL_CFG0 - mmNIC0_QM0_GLBL_CFG0;
3111 int nic_id;
3112
3113 for (nic_id = 0 ; nic_id < NIC_NUMBER_OF_ENGINES ; nic_id++) {
3114 nic_mask = 1 << (HW_CAP_NIC_SHIFT + nic_id);
3115
3116 if (gaudi->hw_cap_initialized & nic_mask)
3117 WREG32(mmNIC0_QM0_GLBL_CFG0 + nic_offset, 0);
3118
3119 nic_offset += nic_delta_between_qmans;
3120 if (nic_id & 1) {
3121 nic_offset -= (nic_delta_between_qmans * 2);
3122 nic_offset += nic_delta_between_nics;
3123 }
3124 }
3125}
3126
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003127static void gaudi_stop_pci_dma_qmans(struct hl_device *hdev)
3128{
3129 struct gaudi_device *gaudi = hdev->asic_specific;
3130
3131 if (!(gaudi->hw_cap_initialized & HW_CAP_PCI_DMA))
3132 return;
3133
3134 /* Stop upper CPs of QMANs 0.0 to 1.3 and 5.0 to 5.3 */
3135 WREG32(mmDMA0_QM_GLBL_CFG1, 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3136 WREG32(mmDMA1_QM_GLBL_CFG1, 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3137 WREG32(mmDMA5_QM_GLBL_CFG1, 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3138}
3139
3140static void gaudi_stop_hbm_dma_qmans(struct hl_device *hdev)
3141{
3142 struct gaudi_device *gaudi = hdev->asic_specific;
3143
3144 if (!(gaudi->hw_cap_initialized & HW_CAP_HBM_DMA))
3145 return;
3146
3147 /* Stop CPs of HBM DMA QMANs */
3148
3149 WREG32(mmDMA2_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3150 WREG32(mmDMA3_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3151 WREG32(mmDMA4_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3152 WREG32(mmDMA6_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3153 WREG32(mmDMA7_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3154}
3155
3156static void gaudi_stop_mme_qmans(struct hl_device *hdev)
3157{
3158 struct gaudi_device *gaudi = hdev->asic_specific;
3159
3160 if (!(gaudi->hw_cap_initialized & HW_CAP_MME))
3161 return;
3162
3163 /* Stop CPs of MME QMANs */
3164 WREG32(mmMME2_QM_GLBL_CFG1, 0x1F << MME0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3165 WREG32(mmMME0_QM_GLBL_CFG1, 0x1F << MME0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3166}
3167
3168static void gaudi_stop_tpc_qmans(struct hl_device *hdev)
3169{
3170 struct gaudi_device *gaudi = hdev->asic_specific;
3171
3172 if (!(gaudi->hw_cap_initialized & HW_CAP_TPC_MASK))
3173 return;
3174
3175 WREG32(mmTPC0_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3176 WREG32(mmTPC1_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3177 WREG32(mmTPC2_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3178 WREG32(mmTPC3_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3179 WREG32(mmTPC4_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3180 WREG32(mmTPC5_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3181 WREG32(mmTPC6_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3182 WREG32(mmTPC7_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3183}
3184
Oded Gabbay3c681572020-11-02 21:10:39 +02003185static void gaudi_stop_nic_qmans(struct hl_device *hdev)
3186{
3187 struct gaudi_device *gaudi = hdev->asic_specific;
3188
3189 /* Stop upper CPs of QMANs */
3190
3191 if (gaudi->hw_cap_initialized & HW_CAP_NIC0)
3192 WREG32(mmNIC0_QM0_GLBL_CFG1,
3193 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3194 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3195 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3196
3197 if (gaudi->hw_cap_initialized & HW_CAP_NIC1)
3198 WREG32(mmNIC0_QM1_GLBL_CFG1,
3199 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3200 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3201 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3202
3203 if (gaudi->hw_cap_initialized & HW_CAP_NIC2)
3204 WREG32(mmNIC1_QM0_GLBL_CFG1,
3205 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3206 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3207 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3208
3209 if (gaudi->hw_cap_initialized & HW_CAP_NIC3)
3210 WREG32(mmNIC1_QM1_GLBL_CFG1,
3211 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3212 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3213 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3214
3215 if (gaudi->hw_cap_initialized & HW_CAP_NIC4)
3216 WREG32(mmNIC2_QM0_GLBL_CFG1,
3217 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3218 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3219 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3220
3221 if (gaudi->hw_cap_initialized & HW_CAP_NIC5)
3222 WREG32(mmNIC2_QM1_GLBL_CFG1,
3223 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3224 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3225 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3226
3227 if (gaudi->hw_cap_initialized & HW_CAP_NIC6)
3228 WREG32(mmNIC3_QM0_GLBL_CFG1,
3229 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3230 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3231 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3232
3233 if (gaudi->hw_cap_initialized & HW_CAP_NIC7)
3234 WREG32(mmNIC3_QM1_GLBL_CFG1,
3235 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3236 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3237 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3238
3239 if (gaudi->hw_cap_initialized & HW_CAP_NIC8)
3240 WREG32(mmNIC4_QM0_GLBL_CFG1,
3241 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3242 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3243 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3244
3245 if (gaudi->hw_cap_initialized & HW_CAP_NIC9)
3246 WREG32(mmNIC4_QM1_GLBL_CFG1,
3247 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3248 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3249 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3250}
3251
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003252static void gaudi_pci_dma_stall(struct hl_device *hdev)
3253{
3254 struct gaudi_device *gaudi = hdev->asic_specific;
3255
3256 if (!(gaudi->hw_cap_initialized & HW_CAP_PCI_DMA))
3257 return;
3258
3259 WREG32(mmDMA0_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3260 WREG32(mmDMA1_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3261 WREG32(mmDMA5_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3262}
3263
3264static void gaudi_hbm_dma_stall(struct hl_device *hdev)
3265{
3266 struct gaudi_device *gaudi = hdev->asic_specific;
3267
3268 if (!(gaudi->hw_cap_initialized & HW_CAP_HBM_DMA))
3269 return;
3270
3271 WREG32(mmDMA2_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3272 WREG32(mmDMA3_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3273 WREG32(mmDMA4_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3274 WREG32(mmDMA6_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3275 WREG32(mmDMA7_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3276}
3277
3278static void gaudi_mme_stall(struct hl_device *hdev)
3279{
3280 struct gaudi_device *gaudi = hdev->asic_specific;
3281
3282 if (!(gaudi->hw_cap_initialized & HW_CAP_MME))
3283 return;
3284
3285 /* WA for H3-1800 bug: do ACC and SBAB writes twice */
3286 WREG32(mmMME0_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3287 WREG32(mmMME0_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3288 WREG32(mmMME0_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3289 WREG32(mmMME0_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3290 WREG32(mmMME1_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3291 WREG32(mmMME1_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3292 WREG32(mmMME1_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3293 WREG32(mmMME1_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3294 WREG32(mmMME2_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3295 WREG32(mmMME2_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3296 WREG32(mmMME2_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3297 WREG32(mmMME2_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3298 WREG32(mmMME3_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3299 WREG32(mmMME3_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3300 WREG32(mmMME3_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3301 WREG32(mmMME3_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3302}
3303
3304static void gaudi_tpc_stall(struct hl_device *hdev)
3305{
3306 struct gaudi_device *gaudi = hdev->asic_specific;
3307
3308 if (!(gaudi->hw_cap_initialized & HW_CAP_TPC_MASK))
3309 return;
3310
3311 WREG32(mmTPC0_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3312 WREG32(mmTPC1_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3313 WREG32(mmTPC2_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3314 WREG32(mmTPC3_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3315 WREG32(mmTPC4_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3316 WREG32(mmTPC5_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3317 WREG32(mmTPC6_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3318 WREG32(mmTPC7_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3319}
3320
Oded Gabbaye38bfd32020-07-03 20:46:12 +03003321static void gaudi_set_clock_gating(struct hl_device *hdev)
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003322{
3323 struct gaudi_device *gaudi = hdev->asic_specific;
3324 u32 qman_offset;
Ofir Bittonf44d23b2020-08-04 13:38:43 +03003325 bool enable;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003326 int i;
3327
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003328 /* In case we are during debug session, don't enable the clock gate
3329 * as it may interfere
3330 */
3331 if (hdev->in_debug)
3332 return;
3333
Oded Gabbaye38bfd32020-07-03 20:46:12 +03003334 for (i = GAUDI_PCI_DMA_1, qman_offset = 0 ; i < GAUDI_HBM_DMA_1 ; i++) {
Ofir Bittonf44d23b2020-08-04 13:38:43 +03003335 enable = !!(hdev->clock_gating_mask &
3336 (BIT_ULL(gaudi_dma_assignment[i])));
Oded Gabbaye38bfd32020-07-03 20:46:12 +03003337
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003338 qman_offset = gaudi_dma_assignment[i] * DMA_QMAN_OFFSET;
Ofir Bittonf44d23b2020-08-04 13:38:43 +03003339 WREG32(mmDMA0_QM_CGM_CFG1 + qman_offset,
3340 enable ? QMAN_CGM1_PWR_GATE_EN : 0);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003341 WREG32(mmDMA0_QM_CGM_CFG + qman_offset,
Ofir Bittonf44d23b2020-08-04 13:38:43 +03003342 enable ? QMAN_UPPER_CP_CGM_PWR_GATE_EN : 0);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003343 }
3344
Oded Gabbaye38bfd32020-07-03 20:46:12 +03003345 for (i = GAUDI_HBM_DMA_1 ; i < GAUDI_DMA_MAX ; i++) {
Ofir Bittonf44d23b2020-08-04 13:38:43 +03003346 enable = !!(hdev->clock_gating_mask &
3347 (BIT_ULL(gaudi_dma_assignment[i])));
Oded Gabbaye38bfd32020-07-03 20:46:12 +03003348
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003349 qman_offset = gaudi_dma_assignment[i] * DMA_QMAN_OFFSET;
Ofir Bittonf44d23b2020-08-04 13:38:43 +03003350 WREG32(mmDMA0_QM_CGM_CFG1 + qman_offset,
3351 enable ? QMAN_CGM1_PWR_GATE_EN : 0);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003352 WREG32(mmDMA0_QM_CGM_CFG + qman_offset,
Ofir Bittonf44d23b2020-08-04 13:38:43 +03003353 enable ? QMAN_COMMON_CP_CGM_PWR_GATE_EN : 0);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003354 }
3355
Ofir Bittonf44d23b2020-08-04 13:38:43 +03003356 enable = !!(hdev->clock_gating_mask & (BIT_ULL(GAUDI_ENGINE_ID_MME_0)));
3357 WREG32(mmMME0_QM_CGM_CFG1, enable ? QMAN_CGM1_PWR_GATE_EN : 0);
3358 WREG32(mmMME0_QM_CGM_CFG, enable ? QMAN_COMMON_CP_CGM_PWR_GATE_EN : 0);
Oded Gabbaye38bfd32020-07-03 20:46:12 +03003359
Ofir Bittonf44d23b2020-08-04 13:38:43 +03003360 enable = !!(hdev->clock_gating_mask & (BIT_ULL(GAUDI_ENGINE_ID_MME_2)));
3361 WREG32(mmMME2_QM_CGM_CFG1, enable ? QMAN_CGM1_PWR_GATE_EN : 0);
3362 WREG32(mmMME2_QM_CGM_CFG, enable ? QMAN_COMMON_CP_CGM_PWR_GATE_EN : 0);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003363
3364 for (i = 0, qman_offset = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) {
Ofir Bittonf44d23b2020-08-04 13:38:43 +03003365 enable = !!(hdev->clock_gating_mask &
3366 (BIT_ULL(GAUDI_ENGINE_ID_TPC_0 + i)));
Oded Gabbaye38bfd32020-07-03 20:46:12 +03003367
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003368 WREG32(mmTPC0_QM_CGM_CFG1 + qman_offset,
Ofir Bittonf44d23b2020-08-04 13:38:43 +03003369 enable ? QMAN_CGM1_PWR_GATE_EN : 0);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003370 WREG32(mmTPC0_QM_CGM_CFG + qman_offset,
Ofir Bittonf44d23b2020-08-04 13:38:43 +03003371 enable ? QMAN_COMMON_CP_CGM_PWR_GATE_EN : 0);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003372
3373 qman_offset += TPC_QMAN_OFFSET;
3374 }
3375
3376 gaudi->hw_cap_initialized |= HW_CAP_CLK_GATE;
3377}
3378
3379static void gaudi_disable_clock_gating(struct hl_device *hdev)
3380{
3381 struct gaudi_device *gaudi = hdev->asic_specific;
3382 u32 qman_offset;
3383 int i;
3384
3385 if (!(gaudi->hw_cap_initialized & HW_CAP_CLK_GATE))
3386 return;
3387
3388 for (i = 0, qman_offset = 0 ; i < DMA_NUMBER_OF_CHANNELS ; i++) {
3389 WREG32(mmDMA0_QM_CGM_CFG + qman_offset, 0);
3390 WREG32(mmDMA0_QM_CGM_CFG1 + qman_offset, 0);
3391
3392 qman_offset += (mmDMA1_QM_CGM_CFG - mmDMA0_QM_CGM_CFG);
3393 }
3394
3395 WREG32(mmMME0_QM_CGM_CFG, 0);
3396 WREG32(mmMME0_QM_CGM_CFG1, 0);
3397 WREG32(mmMME2_QM_CGM_CFG, 0);
3398 WREG32(mmMME2_QM_CGM_CFG1, 0);
3399
3400 for (i = 0, qman_offset = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) {
3401 WREG32(mmTPC0_QM_CGM_CFG + qman_offset, 0);
3402 WREG32(mmTPC0_QM_CGM_CFG1 + qman_offset, 0);
3403
3404 qman_offset += (mmTPC1_QM_CGM_CFG - mmTPC0_QM_CGM_CFG);
3405 }
3406
3407 gaudi->hw_cap_initialized &= ~(HW_CAP_CLK_GATE);
3408}
3409
3410static void gaudi_enable_timestamp(struct hl_device *hdev)
3411{
3412 /* Disable the timestamp counter */
3413 WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 0);
3414
3415 /* Zero the lower/upper parts of the 64-bit counter */
3416 WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE + 0xC, 0);
3417 WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE + 0x8, 0);
3418
3419 /* Enable the counter */
3420 WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 1);
3421}
3422
3423static void gaudi_disable_timestamp(struct hl_device *hdev)
3424{
3425 /* Disable the timestamp counter */
3426 WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 0);
3427}
3428
3429static void gaudi_halt_engines(struct hl_device *hdev, bool hard_reset)
3430{
Oded Gabbayc83c4172020-07-05 15:48:34 +03003431 u32 wait_timeout_ms;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003432
3433 dev_info(hdev->dev,
3434 "Halting compute engines and disabling interrupts\n");
3435
Oded Gabbayc83c4172020-07-05 15:48:34 +03003436 if (hdev->pldm)
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003437 wait_timeout_ms = GAUDI_PLDM_RESET_WAIT_MSEC;
Oded Gabbayc83c4172020-07-05 15:48:34 +03003438 else
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003439 wait_timeout_ms = GAUDI_RESET_WAIT_MSEC;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003440
Oded Gabbay3c681572020-11-02 21:10:39 +02003441 gaudi_stop_nic_qmans(hdev);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003442
3443 gaudi_stop_mme_qmans(hdev);
3444 gaudi_stop_tpc_qmans(hdev);
3445 gaudi_stop_hbm_dma_qmans(hdev);
3446 gaudi_stop_pci_dma_qmans(hdev);
3447
Oded Gabbaye38bfd32020-07-03 20:46:12 +03003448 hdev->asic_funcs->disable_clock_gating(hdev);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003449
3450 msleep(wait_timeout_ms);
3451
3452 gaudi_pci_dma_stall(hdev);
3453 gaudi_hbm_dma_stall(hdev);
3454 gaudi_tpc_stall(hdev);
3455 gaudi_mme_stall(hdev);
3456
3457 msleep(wait_timeout_ms);
3458
Oded Gabbay3c681572020-11-02 21:10:39 +02003459 gaudi_disable_nic_qmans(hdev);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003460 gaudi_disable_mme_qmans(hdev);
3461 gaudi_disable_tpc_qmans(hdev);
3462 gaudi_disable_hbm_dma_qmans(hdev);
3463 gaudi_disable_pci_dma_qmans(hdev);
3464
3465 gaudi_disable_timestamp(hdev);
3466
Oded Gabbay12ae3132020-07-03 20:58:23 +03003467 gaudi_disable_msi(hdev);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003468}
3469
3470static int gaudi_mmu_init(struct hl_device *hdev)
3471{
3472 struct asic_fixed_properties *prop = &hdev->asic_prop;
3473 struct gaudi_device *gaudi = hdev->asic_specific;
3474 u64 hop0_addr;
3475 int rc, i;
3476
3477 if (!hdev->mmu_enable)
3478 return 0;
3479
3480 if (gaudi->hw_cap_initialized & HW_CAP_MMU)
3481 return 0;
3482
3483 hdev->dram_supports_virtual_memory = false;
3484
3485 for (i = 0 ; i < prop->max_asid ; i++) {
3486 hop0_addr = prop->mmu_pgt_addr +
3487 (i * prop->mmu_hop_table_size);
3488
3489 rc = gaudi_mmu_update_asid_hop0_addr(hdev, i, hop0_addr);
3490 if (rc) {
3491 dev_err(hdev->dev,
3492 "failed to set hop0 addr for asid %d\n", i);
3493 goto err;
3494 }
3495 }
3496
3497 /* init MMU cache manage page */
3498 WREG32(mmSTLB_CACHE_INV_BASE_39_8, MMU_CACHE_MNG_ADDR >> 8);
3499 WREG32(mmSTLB_CACHE_INV_BASE_49_40, MMU_CACHE_MNG_ADDR >> 40);
3500
Tomer Tayar644883e2020-07-19 11:00:03 +03003501 hdev->asic_funcs->mmu_invalidate_cache(hdev, true, 0);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003502
3503 WREG32(mmMMU_UP_MMU_ENABLE, 1);
3504 WREG32(mmMMU_UP_SPI_MASK, 0xF);
3505
3506 WREG32(mmSTLB_HOP_CONFIGURATION,
3507 hdev->mmu_huge_page_opt ? 0x30440 : 0x40440);
3508
Omer Shpigelmancfd41762020-06-03 13:03:35 +03003509 /*
3510 * The H/W expects the first PI after init to be 1. After wraparound
3511 * we'll write 0.
3512 */
3513 gaudi->mmu_cache_inv_pi = 1;
3514
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003515 gaudi->hw_cap_initialized |= HW_CAP_MMU;
3516
3517 return 0;
3518
3519err:
3520 return rc;
3521}
3522
3523static int gaudi_load_firmware_to_device(struct hl_device *hdev)
3524{
3525 void __iomem *dst;
3526
3527 /* HBM scrambler must be initialized before pushing F/W to HBM */
3528 gaudi_init_scrambler_hbm(hdev);
3529
3530 dst = hdev->pcie_bar[HBM_BAR_ID] + LINUX_FW_OFFSET;
3531
Ofir Bitton9bb86b62020-10-20 10:45:37 +03003532 return hl_fw_load_fw_to_device(hdev, GAUDI_LINUX_FW_FILE, dst, 0, 0);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003533}
3534
3535static int gaudi_load_boot_fit_to_device(struct hl_device *hdev)
3536{
3537 void __iomem *dst;
3538
3539 dst = hdev->pcie_bar[SRAM_BAR_ID] + BOOT_FIT_SRAM_OFFSET;
3540
Ofir Bitton9bb86b62020-10-20 10:45:37 +03003541 return hl_fw_load_fw_to_device(hdev, GAUDI_BOOT_FIT_FILE, dst, 0, 0);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003542}
3543
3544static void gaudi_read_device_fw_version(struct hl_device *hdev,
3545 enum hl_fw_component fwc)
3546{
3547 const char *name;
3548 u32 ver_off;
3549 char *dest;
3550
3551 switch (fwc) {
3552 case FW_COMP_UBOOT:
3553 ver_off = RREG32(mmUBOOT_VER_OFFSET);
3554 dest = hdev->asic_prop.uboot_ver;
3555 name = "U-Boot";
3556 break;
3557 case FW_COMP_PREBOOT:
3558 ver_off = RREG32(mmPREBOOT_VER_OFFSET);
3559 dest = hdev->asic_prop.preboot_ver;
3560 name = "Preboot";
3561 break;
3562 default:
3563 dev_warn(hdev->dev, "Undefined FW component: %d\n", fwc);
3564 return;
3565 }
3566
3567 ver_off &= ~((u32)SRAM_BASE_ADDR);
3568
3569 if (ver_off < SRAM_SIZE - VERSION_MAX_LEN) {
3570 memcpy_fromio(dest, hdev->pcie_bar[SRAM_BAR_ID] + ver_off,
3571 VERSION_MAX_LEN);
3572 } else {
3573 dev_err(hdev->dev, "%s version offset (0x%x) is above SRAM\n",
3574 name, ver_off);
3575 strcpy(dest, "unavailable");
3576 }
3577}
3578
3579static int gaudi_init_cpu(struct hl_device *hdev)
3580{
3581 struct gaudi_device *gaudi = hdev->asic_specific;
3582 int rc;
3583
3584 if (!hdev->cpu_enable)
3585 return 0;
3586
3587 if (gaudi->hw_cap_initialized & HW_CAP_CPU)
3588 return 0;
3589
3590 /*
3591 * The device CPU works with 40 bits addresses.
3592 * This register sets the extension to 50 bits.
3593 */
3594 WREG32(mmCPU_IF_CPU_MSB_ADDR, hdev->cpu_pci_msb_addr);
3595
3596 rc = hl_fw_init_cpu(hdev, mmPSOC_GLOBAL_CONF_CPU_BOOT_STATUS,
3597 mmPSOC_GLOBAL_CONF_KMD_MSG_TO_CPU,
3598 mmCPU_CMD_STATUS_TO_HOST,
3599 mmCPU_BOOT_ERR0,
3600 !hdev->bmc_enable, GAUDI_CPU_TIMEOUT_USEC,
3601 GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC);
3602
3603 if (rc)
3604 return rc;
3605
3606 gaudi->hw_cap_initialized |= HW_CAP_CPU;
3607
3608 return 0;
3609}
3610
3611static int gaudi_init_cpu_queues(struct hl_device *hdev, u32 cpu_timeout)
3612{
3613 struct gaudi_device *gaudi = hdev->asic_specific;
3614 struct hl_eq *eq;
3615 u32 status;
3616 struct hl_hw_queue *cpu_pq =
3617 &hdev->kernel_queues[GAUDI_QUEUE_ID_CPU_PQ];
3618 int err;
3619
3620 if (!hdev->cpu_queues_enable)
3621 return 0;
3622
3623 if (gaudi->hw_cap_initialized & HW_CAP_CPU_Q)
3624 return 0;
3625
3626 eq = &hdev->event_queue;
3627
3628 WREG32(mmCPU_IF_PQ_BASE_ADDR_LOW, lower_32_bits(cpu_pq->bus_address));
3629 WREG32(mmCPU_IF_PQ_BASE_ADDR_HIGH, upper_32_bits(cpu_pq->bus_address));
3630
3631 WREG32(mmCPU_IF_EQ_BASE_ADDR_LOW, lower_32_bits(eq->bus_address));
3632 WREG32(mmCPU_IF_EQ_BASE_ADDR_HIGH, upper_32_bits(eq->bus_address));
3633
3634 WREG32(mmCPU_IF_CQ_BASE_ADDR_LOW,
3635 lower_32_bits(hdev->cpu_accessible_dma_address));
3636 WREG32(mmCPU_IF_CQ_BASE_ADDR_HIGH,
3637 upper_32_bits(hdev->cpu_accessible_dma_address));
3638
3639 WREG32(mmCPU_IF_PQ_LENGTH, HL_QUEUE_SIZE_IN_BYTES);
3640 WREG32(mmCPU_IF_EQ_LENGTH, HL_EQ_SIZE_IN_BYTES);
3641 WREG32(mmCPU_IF_CQ_LENGTH, HL_CPU_ACCESSIBLE_MEM_SIZE);
3642
3643 /* Used for EQ CI */
3644 WREG32(mmCPU_IF_EQ_RD_OFFS, 0);
3645
3646 WREG32(mmCPU_IF_PF_PQ_PI, 0);
3647
3648 if (gaudi->multi_msi_mode)
3649 WREG32(mmCPU_IF_QUEUE_INIT, PQ_INIT_STATUS_READY_FOR_CP);
3650 else
3651 WREG32(mmCPU_IF_QUEUE_INIT,
3652 PQ_INIT_STATUS_READY_FOR_CP_SINGLE_MSI);
3653
3654 WREG32(mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR, GAUDI_EVENT_PI_UPDATE);
3655
3656 err = hl_poll_timeout(
3657 hdev,
3658 mmCPU_IF_QUEUE_INIT,
3659 status,
3660 (status == PQ_INIT_STATUS_READY_FOR_HOST),
3661 1000,
3662 cpu_timeout);
3663
3664 if (err) {
3665 dev_err(hdev->dev,
Oded Gabbay6138bbe2020-09-04 20:18:16 +03003666 "Failed to communicate with Device CPU (CPU-CP timeout)\n");
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003667 return -EIO;
3668 }
3669
3670 gaudi->hw_cap_initialized |= HW_CAP_CPU_Q;
3671 return 0;
3672}
3673
3674static void gaudi_pre_hw_init(struct hl_device *hdev)
3675{
3676 /* Perform read from the device to make sure device is up */
3677 RREG32(mmPCIE_DBI_DEVICE_ID_VENDOR_ID_REG);
3678
Oded Gabbay57799ce2020-09-13 15:51:28 +03003679 /* Set the access through PCI bars (Linux driver only) as
3680 * secured
3681 */
3682 WREG32(mmPCIE_WRAP_LBW_PROT_OVR,
3683 (PCIE_WRAP_LBW_PROT_OVR_RD_EN_MASK |
3684 PCIE_WRAP_LBW_PROT_OVR_WR_EN_MASK));
3685
3686 /* Perform read to flush the waiting writes to ensure
3687 * configuration was set in the device
3688 */
3689 RREG32(mmPCIE_WRAP_LBW_PROT_OVR);
3690
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003691 /*
3692 * Let's mark in the H/W that we have reached this point. We check
3693 * this value in the reset_before_init function to understand whether
3694 * we need to reset the chip before doing H/W init. This register is
3695 * cleared by the H/W upon H/W reset
3696 */
3697 WREG32(mmHW_STATE, HL_DEVICE_HW_STATE_DIRTY);
3698
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003699 /* Configure the reset registers. Must be done as early as possible
3700 * in case we fail during H/W initialization
3701 */
3702 WREG32(mmPSOC_GLOBAL_CONF_SOFT_RST_CFG_H,
3703 (CFG_RST_H_DMA_MASK |
3704 CFG_RST_H_MME_MASK |
3705 CFG_RST_H_SM_MASK |
Oded Gabbay65887292020-08-12 11:21:01 +03003706 CFG_RST_H_TPC_7_MASK));
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003707
3708 WREG32(mmPSOC_GLOBAL_CONF_SOFT_RST_CFG_L, CFG_RST_L_TPC_MASK);
3709
3710 WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST_CFG_H,
3711 (CFG_RST_H_HBM_MASK |
Oded Gabbay65887292020-08-12 11:21:01 +03003712 CFG_RST_H_TPC_7_MASK |
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003713 CFG_RST_H_NIC_MASK |
3714 CFG_RST_H_SM_MASK |
3715 CFG_RST_H_DMA_MASK |
3716 CFG_RST_H_MME_MASK |
3717 CFG_RST_H_CPU_MASK |
3718 CFG_RST_H_MMU_MASK));
3719
3720 WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST_CFG_L,
3721 (CFG_RST_L_IF_MASK |
3722 CFG_RST_L_PSOC_MASK |
3723 CFG_RST_L_TPC_MASK));
3724}
3725
3726static int gaudi_hw_init(struct hl_device *hdev)
3727{
3728 int rc;
3729
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003730 gaudi_pre_hw_init(hdev);
3731
3732 gaudi_init_pci_dma_qmans(hdev);
3733
3734 gaudi_init_hbm_dma_qmans(hdev);
3735
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003736 rc = gaudi_init_cpu(hdev);
3737 if (rc) {
3738 dev_err(hdev->dev, "failed to initialize CPU\n");
3739 return rc;
3740 }
3741
3742 /* SRAM scrambler must be initialized after CPU is running from HBM */
3743 gaudi_init_scrambler_sram(hdev);
3744
3745 /* This is here just in case we are working without CPU */
3746 gaudi_init_scrambler_hbm(hdev);
3747
3748 gaudi_init_golden_registers(hdev);
3749
3750 rc = gaudi_mmu_init(hdev);
3751 if (rc)
3752 return rc;
3753
Omer Shpigelman3a3a5bf12020-05-11 10:45:12 +03003754 gaudi_init_security(hdev);
3755
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003756 gaudi_init_mme_qmans(hdev);
3757
3758 gaudi_init_tpc_qmans(hdev);
3759
Oded Gabbay3c681572020-11-02 21:10:39 +02003760 gaudi_init_nic_qmans(hdev);
3761
Oded Gabbaye38bfd32020-07-03 20:46:12 +03003762 hdev->asic_funcs->set_clock_gating(hdev);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003763
3764 gaudi_enable_timestamp(hdev);
3765
Oded Gabbay3c681572020-11-02 21:10:39 +02003766 /* MSI must be enabled before CPU queues and NIC are initialized */
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003767 rc = gaudi_enable_msi(hdev);
3768 if (rc)
3769 goto disable_queues;
3770
3771 /* must be called after MSI was enabled */
3772 rc = gaudi_init_cpu_queues(hdev, GAUDI_CPU_TIMEOUT_USEC);
3773 if (rc) {
3774 dev_err(hdev->dev, "failed to initialize CPU H/W queues %d\n",
3775 rc);
3776 goto disable_msi;
3777 }
3778
3779 /* Perform read from the device to flush all configuration */
3780 RREG32(mmPCIE_DBI_DEVICE_ID_VENDOR_ID_REG);
3781
3782 return 0;
3783
3784disable_msi:
3785 gaudi_disable_msi(hdev);
3786disable_queues:
3787 gaudi_disable_mme_qmans(hdev);
3788 gaudi_disable_pci_dma_qmans(hdev);
3789
3790 return rc;
3791}
3792
3793static void gaudi_hw_fini(struct hl_device *hdev, bool hard_reset)
3794{
3795 struct gaudi_device *gaudi = hdev->asic_specific;
Oded Gabbayc83c4172020-07-05 15:48:34 +03003796 u32 status, reset_timeout_ms, cpu_timeout_ms, boot_strap = 0;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003797
Oded Gabbay12ae3132020-07-03 20:58:23 +03003798 if (!hard_reset) {
3799 dev_err(hdev->dev, "GAUDI doesn't support soft-reset\n");
3800 return;
3801 }
3802
Oded Gabbayc83c4172020-07-05 15:48:34 +03003803 if (hdev->pldm) {
Oded Gabbay12ae3132020-07-03 20:58:23 +03003804 reset_timeout_ms = GAUDI_PLDM_HRESET_TIMEOUT_MSEC;
Oded Gabbayc83c4172020-07-05 15:48:34 +03003805 cpu_timeout_ms = GAUDI_PLDM_RESET_WAIT_MSEC;
3806 } else {
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003807 reset_timeout_ms = GAUDI_RESET_TIMEOUT_MSEC;
Oded Gabbayc83c4172020-07-05 15:48:34 +03003808 cpu_timeout_ms = GAUDI_CPU_RESET_WAIT_MSEC;
3809 }
3810
3811 /* Set device to handle FLR by H/W as we will put the device CPU to
3812 * halt mode
3813 */
3814 WREG32(mmPCIE_AUX_FLR_CTRL, (PCIE_AUX_FLR_CTRL_HW_CTRL_MASK |
3815 PCIE_AUX_FLR_CTRL_INT_MASK_MASK));
3816
3817 /* I don't know what is the state of the CPU so make sure it is
3818 * stopped in any means necessary
3819 */
3820 WREG32(mmPSOC_GLOBAL_CONF_KMD_MSG_TO_CPU, KMD_MSG_GOTO_WFE);
3821 WREG32(mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR, GAUDI_EVENT_HALT_MACHINE);
3822
3823 msleep(cpu_timeout_ms);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003824
Oded Gabbay12ae3132020-07-03 20:58:23 +03003825 /* Tell ASIC not to re-initialize PCIe */
3826 WREG32(mmPREBOOT_PCIE_EN, LKD_HARD_RESET_MAGIC);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003827
Oded Gabbay12ae3132020-07-03 20:58:23 +03003828 boot_strap = RREG32(mmPSOC_GLOBAL_CONF_BOOT_STRAP_PINS);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003829
Oded Gabbay12ae3132020-07-03 20:58:23 +03003830 /* H/W bug WA:
3831 * rdata[31:0] = strap_read_val;
3832 * wdata[31:0] = rdata[30:21],1'b0,rdata[20:0]
3833 */
3834 boot_strap = (((boot_strap & 0x7FE00000) << 1) |
3835 (boot_strap & 0x001FFFFF));
3836 WREG32(mmPSOC_GLOBAL_CONF_BOOT_STRAP_PINS, boot_strap & ~0x2);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003837
Oded Gabbay12ae3132020-07-03 20:58:23 +03003838 /* Restart BTL/BLR upon hard-reset */
3839 WREG32(mmPSOC_GLOBAL_CONF_BOOT_SEQ_RE_START, 1);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003840
Oded Gabbay12ae3132020-07-03 20:58:23 +03003841 WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST,
3842 1 << PSOC_GLOBAL_CONF_SW_ALL_RST_IND_SHIFT);
3843 dev_info(hdev->dev,
3844 "Issued HARD reset command, going to wait %dms\n",
3845 reset_timeout_ms);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003846
3847 /*
3848 * After hard reset, we can't poll the BTM_FSM register because the PSOC
3849 * itself is in reset. Need to wait until the reset is deasserted
3850 */
3851 msleep(reset_timeout_ms);
3852
3853 status = RREG32(mmPSOC_GLOBAL_CONF_BTM_FSM);
3854 if (status & PSOC_GLOBAL_CONF_BTM_FSM_STATE_MASK)
3855 dev_err(hdev->dev,
3856 "Timeout while waiting for device to reset 0x%x\n",
3857 status);
3858
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003859 WREG32(mmPSOC_GLOBAL_CONF_BOOT_STRAP_PINS, boot_strap);
3860
3861 gaudi->hw_cap_initialized &= ~(HW_CAP_CPU | HW_CAP_CPU_Q |
3862 HW_CAP_HBM | HW_CAP_PCI_DMA |
3863 HW_CAP_MME | HW_CAP_TPC_MASK |
3864 HW_CAP_HBM_DMA | HW_CAP_PLL |
Oded Gabbay3c681572020-11-02 21:10:39 +02003865 HW_CAP_NIC_MASK | HW_CAP_MMU |
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003866 HW_CAP_SRAM_SCRAMBLER |
Oded Gabbaye38bfd32020-07-03 20:46:12 +03003867 HW_CAP_HBM_SCRAMBLER |
3868 HW_CAP_CLK_GATE);
3869
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003870 memset(gaudi->events_stat, 0, sizeof(gaudi->events_stat));
3871}
3872
3873static int gaudi_suspend(struct hl_device *hdev)
3874{
3875 int rc;
3876
Oded Gabbay2f553422020-08-15 16:28:10 +03003877 rc = hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_DISABLE_PCI_ACCESS);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003878 if (rc)
3879 dev_err(hdev->dev, "Failed to disable PCI access from CPU\n");
3880
3881 return rc;
3882}
3883
3884static int gaudi_resume(struct hl_device *hdev)
3885{
3886 return gaudi_init_iatu(hdev);
3887}
3888
3889static int gaudi_cb_mmap(struct hl_device *hdev, struct vm_area_struct *vma,
Hillf Danton0db57532020-08-23 07:32:42 +08003890 void *cpu_addr, dma_addr_t dma_addr, size_t size)
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003891{
3892 int rc;
3893
3894 vma->vm_flags |= VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP |
3895 VM_DONTCOPY | VM_NORESERVE;
3896
Hillf Danton0db57532020-08-23 07:32:42 +08003897 rc = dma_mmap_coherent(hdev->dev, vma, cpu_addr, dma_addr, size);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003898 if (rc)
Hillf Danton0db57532020-08-23 07:32:42 +08003899 dev_err(hdev->dev, "dma_mmap_coherent error %d", rc);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003900
3901 return rc;
3902}
3903
3904static void gaudi_ring_doorbell(struct hl_device *hdev, u32 hw_queue_id, u32 pi)
3905{
3906 struct gaudi_device *gaudi = hdev->asic_specific;
3907 u32 db_reg_offset, db_value, dma_qm_offset, q_off;
3908 int dma_id;
3909 bool invalid_queue = false;
3910
3911 switch (hw_queue_id) {
3912 case GAUDI_QUEUE_ID_DMA_0_0...GAUDI_QUEUE_ID_DMA_0_3:
3913 dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_1];
3914 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
3915 q_off = dma_qm_offset + (hw_queue_id & 0x3) * 4;
3916 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
3917 break;
3918
3919 case GAUDI_QUEUE_ID_DMA_1_0...GAUDI_QUEUE_ID_DMA_1_3:
3920 dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_2];
3921 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
3922 q_off = dma_qm_offset + (hw_queue_id & 0x3) * 4;
3923 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
3924 break;
3925
3926 case GAUDI_QUEUE_ID_DMA_2_0...GAUDI_QUEUE_ID_DMA_2_3:
3927 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_1];
3928 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
3929 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
3930 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
3931 break;
3932
3933 case GAUDI_QUEUE_ID_DMA_3_0...GAUDI_QUEUE_ID_DMA_3_3:
3934 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_2];
3935 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
3936 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
3937 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
3938 break;
3939
3940 case GAUDI_QUEUE_ID_DMA_4_0...GAUDI_QUEUE_ID_DMA_4_3:
3941 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_3];
3942 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
3943 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
3944 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
3945 break;
3946
3947 case GAUDI_QUEUE_ID_DMA_5_0...GAUDI_QUEUE_ID_DMA_5_3:
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003948 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_4];
3949 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
3950 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
3951 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
3952 break;
3953
Ofir Bitton0940cab2020-08-31 08:52:56 +03003954 case GAUDI_QUEUE_ID_DMA_6_0...GAUDI_QUEUE_ID_DMA_6_3:
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003955 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_5];
3956 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
3957 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
3958 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
3959 break;
3960
Ofir Bitton0940cab2020-08-31 08:52:56 +03003961 case GAUDI_QUEUE_ID_DMA_7_0...GAUDI_QUEUE_ID_DMA_7_3:
3962 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_6];
3963 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
3964 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
3965 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
3966 break;
3967
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003968 case GAUDI_QUEUE_ID_CPU_PQ:
3969 if (gaudi->hw_cap_initialized & HW_CAP_CPU_Q)
3970 db_reg_offset = mmCPU_IF_PF_PQ_PI;
3971 else
3972 invalid_queue = true;
3973 break;
3974
3975 case GAUDI_QUEUE_ID_MME_0_0:
3976 db_reg_offset = mmMME2_QM_PQ_PI_0;
3977 break;
3978
3979 case GAUDI_QUEUE_ID_MME_0_1:
3980 db_reg_offset = mmMME2_QM_PQ_PI_1;
3981 break;
3982
3983 case GAUDI_QUEUE_ID_MME_0_2:
3984 db_reg_offset = mmMME2_QM_PQ_PI_2;
3985 break;
3986
3987 case GAUDI_QUEUE_ID_MME_0_3:
3988 db_reg_offset = mmMME2_QM_PQ_PI_3;
3989 break;
3990
3991 case GAUDI_QUEUE_ID_MME_1_0:
3992 db_reg_offset = mmMME0_QM_PQ_PI_0;
3993 break;
3994
3995 case GAUDI_QUEUE_ID_MME_1_1:
3996 db_reg_offset = mmMME0_QM_PQ_PI_1;
3997 break;
3998
3999 case GAUDI_QUEUE_ID_MME_1_2:
4000 db_reg_offset = mmMME0_QM_PQ_PI_2;
4001 break;
4002
4003 case GAUDI_QUEUE_ID_MME_1_3:
4004 db_reg_offset = mmMME0_QM_PQ_PI_3;
4005 break;
4006
4007 case GAUDI_QUEUE_ID_TPC_0_0:
4008 db_reg_offset = mmTPC0_QM_PQ_PI_0;
4009 break;
4010
4011 case GAUDI_QUEUE_ID_TPC_0_1:
4012 db_reg_offset = mmTPC0_QM_PQ_PI_1;
4013 break;
4014
4015 case GAUDI_QUEUE_ID_TPC_0_2:
4016 db_reg_offset = mmTPC0_QM_PQ_PI_2;
4017 break;
4018
4019 case GAUDI_QUEUE_ID_TPC_0_3:
4020 db_reg_offset = mmTPC0_QM_PQ_PI_3;
4021 break;
4022
4023 case GAUDI_QUEUE_ID_TPC_1_0:
4024 db_reg_offset = mmTPC1_QM_PQ_PI_0;
4025 break;
4026
4027 case GAUDI_QUEUE_ID_TPC_1_1:
4028 db_reg_offset = mmTPC1_QM_PQ_PI_1;
4029 break;
4030
4031 case GAUDI_QUEUE_ID_TPC_1_2:
4032 db_reg_offset = mmTPC1_QM_PQ_PI_2;
4033 break;
4034
4035 case GAUDI_QUEUE_ID_TPC_1_3:
4036 db_reg_offset = mmTPC1_QM_PQ_PI_3;
4037 break;
4038
4039 case GAUDI_QUEUE_ID_TPC_2_0:
4040 db_reg_offset = mmTPC2_QM_PQ_PI_0;
4041 break;
4042
4043 case GAUDI_QUEUE_ID_TPC_2_1:
4044 db_reg_offset = mmTPC2_QM_PQ_PI_1;
4045 break;
4046
4047 case GAUDI_QUEUE_ID_TPC_2_2:
4048 db_reg_offset = mmTPC2_QM_PQ_PI_2;
4049 break;
4050
4051 case GAUDI_QUEUE_ID_TPC_2_3:
4052 db_reg_offset = mmTPC2_QM_PQ_PI_3;
4053 break;
4054
4055 case GAUDI_QUEUE_ID_TPC_3_0:
4056 db_reg_offset = mmTPC3_QM_PQ_PI_0;
4057 break;
4058
4059 case GAUDI_QUEUE_ID_TPC_3_1:
4060 db_reg_offset = mmTPC3_QM_PQ_PI_1;
4061 break;
4062
4063 case GAUDI_QUEUE_ID_TPC_3_2:
4064 db_reg_offset = mmTPC3_QM_PQ_PI_2;
4065 break;
4066
4067 case GAUDI_QUEUE_ID_TPC_3_3:
4068 db_reg_offset = mmTPC3_QM_PQ_PI_3;
4069 break;
4070
4071 case GAUDI_QUEUE_ID_TPC_4_0:
4072 db_reg_offset = mmTPC4_QM_PQ_PI_0;
4073 break;
4074
4075 case GAUDI_QUEUE_ID_TPC_4_1:
4076 db_reg_offset = mmTPC4_QM_PQ_PI_1;
4077 break;
4078
4079 case GAUDI_QUEUE_ID_TPC_4_2:
4080 db_reg_offset = mmTPC4_QM_PQ_PI_2;
4081 break;
4082
4083 case GAUDI_QUEUE_ID_TPC_4_3:
4084 db_reg_offset = mmTPC4_QM_PQ_PI_3;
4085 break;
4086
4087 case GAUDI_QUEUE_ID_TPC_5_0:
4088 db_reg_offset = mmTPC5_QM_PQ_PI_0;
4089 break;
4090
4091 case GAUDI_QUEUE_ID_TPC_5_1:
4092 db_reg_offset = mmTPC5_QM_PQ_PI_1;
4093 break;
4094
4095 case GAUDI_QUEUE_ID_TPC_5_2:
4096 db_reg_offset = mmTPC5_QM_PQ_PI_2;
4097 break;
4098
4099 case GAUDI_QUEUE_ID_TPC_5_3:
4100 db_reg_offset = mmTPC5_QM_PQ_PI_3;
4101 break;
4102
4103 case GAUDI_QUEUE_ID_TPC_6_0:
4104 db_reg_offset = mmTPC6_QM_PQ_PI_0;
4105 break;
4106
4107 case GAUDI_QUEUE_ID_TPC_6_1:
4108 db_reg_offset = mmTPC6_QM_PQ_PI_1;
4109 break;
4110
4111 case GAUDI_QUEUE_ID_TPC_6_2:
4112 db_reg_offset = mmTPC6_QM_PQ_PI_2;
4113 break;
4114
4115 case GAUDI_QUEUE_ID_TPC_6_3:
4116 db_reg_offset = mmTPC6_QM_PQ_PI_3;
4117 break;
4118
4119 case GAUDI_QUEUE_ID_TPC_7_0:
4120 db_reg_offset = mmTPC7_QM_PQ_PI_0;
4121 break;
4122
4123 case GAUDI_QUEUE_ID_TPC_7_1:
4124 db_reg_offset = mmTPC7_QM_PQ_PI_1;
4125 break;
4126
4127 case GAUDI_QUEUE_ID_TPC_7_2:
4128 db_reg_offset = mmTPC7_QM_PQ_PI_2;
4129 break;
4130
4131 case GAUDI_QUEUE_ID_TPC_7_3:
4132 db_reg_offset = mmTPC7_QM_PQ_PI_3;
4133 break;
4134
Oded Gabbay3c681572020-11-02 21:10:39 +02004135 case GAUDI_QUEUE_ID_NIC_0_0:
4136 db_reg_offset = mmNIC0_QM0_PQ_PI_0;
4137 break;
4138
4139 case GAUDI_QUEUE_ID_NIC_0_1:
4140 db_reg_offset = mmNIC0_QM0_PQ_PI_1;
4141 break;
4142
4143 case GAUDI_QUEUE_ID_NIC_0_2:
4144 db_reg_offset = mmNIC0_QM0_PQ_PI_2;
4145 break;
4146
4147 case GAUDI_QUEUE_ID_NIC_0_3:
4148 db_reg_offset = mmNIC0_QM0_PQ_PI_3;
4149 break;
4150
4151 case GAUDI_QUEUE_ID_NIC_1_0:
4152 db_reg_offset = mmNIC0_QM1_PQ_PI_0;
4153 break;
4154
4155 case GAUDI_QUEUE_ID_NIC_1_1:
4156 db_reg_offset = mmNIC0_QM1_PQ_PI_1;
4157 break;
4158
4159 case GAUDI_QUEUE_ID_NIC_1_2:
4160 db_reg_offset = mmNIC0_QM1_PQ_PI_2;
4161 break;
4162
4163 case GAUDI_QUEUE_ID_NIC_1_3:
4164 db_reg_offset = mmNIC0_QM1_PQ_PI_3;
4165 break;
4166
4167 case GAUDI_QUEUE_ID_NIC_2_0:
4168 db_reg_offset = mmNIC1_QM0_PQ_PI_0;
4169 break;
4170
4171 case GAUDI_QUEUE_ID_NIC_2_1:
4172 db_reg_offset = mmNIC1_QM0_PQ_PI_1;
4173 break;
4174
4175 case GAUDI_QUEUE_ID_NIC_2_2:
4176 db_reg_offset = mmNIC1_QM0_PQ_PI_2;
4177 break;
4178
4179 case GAUDI_QUEUE_ID_NIC_2_3:
4180 db_reg_offset = mmNIC1_QM0_PQ_PI_3;
4181 break;
4182
4183 case GAUDI_QUEUE_ID_NIC_3_0:
4184 db_reg_offset = mmNIC1_QM1_PQ_PI_0;
4185 break;
4186
4187 case GAUDI_QUEUE_ID_NIC_3_1:
4188 db_reg_offset = mmNIC1_QM1_PQ_PI_1;
4189 break;
4190
4191 case GAUDI_QUEUE_ID_NIC_3_2:
4192 db_reg_offset = mmNIC1_QM1_PQ_PI_2;
4193 break;
4194
4195 case GAUDI_QUEUE_ID_NIC_3_3:
4196 db_reg_offset = mmNIC1_QM1_PQ_PI_3;
4197 break;
4198
4199 case GAUDI_QUEUE_ID_NIC_4_0:
4200 db_reg_offset = mmNIC2_QM0_PQ_PI_0;
4201 break;
4202
4203 case GAUDI_QUEUE_ID_NIC_4_1:
4204 db_reg_offset = mmNIC2_QM0_PQ_PI_1;
4205 break;
4206
4207 case GAUDI_QUEUE_ID_NIC_4_2:
4208 db_reg_offset = mmNIC2_QM0_PQ_PI_2;
4209 break;
4210
4211 case GAUDI_QUEUE_ID_NIC_4_3:
4212 db_reg_offset = mmNIC2_QM0_PQ_PI_3;
4213 break;
4214
4215 case GAUDI_QUEUE_ID_NIC_5_0:
4216 db_reg_offset = mmNIC2_QM1_PQ_PI_0;
4217 break;
4218
4219 case GAUDI_QUEUE_ID_NIC_5_1:
4220 db_reg_offset = mmNIC2_QM1_PQ_PI_1;
4221 break;
4222
4223 case GAUDI_QUEUE_ID_NIC_5_2:
4224 db_reg_offset = mmNIC2_QM1_PQ_PI_2;
4225 break;
4226
4227 case GAUDI_QUEUE_ID_NIC_5_3:
4228 db_reg_offset = mmNIC2_QM1_PQ_PI_3;
4229 break;
4230
4231 case GAUDI_QUEUE_ID_NIC_6_0:
4232 db_reg_offset = mmNIC3_QM0_PQ_PI_0;
4233 break;
4234
4235 case GAUDI_QUEUE_ID_NIC_6_1:
4236 db_reg_offset = mmNIC3_QM0_PQ_PI_1;
4237 break;
4238
4239 case GAUDI_QUEUE_ID_NIC_6_2:
4240 db_reg_offset = mmNIC3_QM0_PQ_PI_2;
4241 break;
4242
4243 case GAUDI_QUEUE_ID_NIC_6_3:
4244 db_reg_offset = mmNIC3_QM0_PQ_PI_3;
4245 break;
4246
4247 case GAUDI_QUEUE_ID_NIC_7_0:
4248 db_reg_offset = mmNIC3_QM1_PQ_PI_0;
4249 break;
4250
4251 case GAUDI_QUEUE_ID_NIC_7_1:
4252 db_reg_offset = mmNIC3_QM1_PQ_PI_1;
4253 break;
4254
4255 case GAUDI_QUEUE_ID_NIC_7_2:
4256 db_reg_offset = mmNIC3_QM1_PQ_PI_2;
4257 break;
4258
4259 case GAUDI_QUEUE_ID_NIC_7_3:
4260 db_reg_offset = mmNIC3_QM1_PQ_PI_3;
4261 break;
4262
4263 case GAUDI_QUEUE_ID_NIC_8_0:
4264 db_reg_offset = mmNIC4_QM0_PQ_PI_0;
4265 break;
4266
4267 case GAUDI_QUEUE_ID_NIC_8_1:
4268 db_reg_offset = mmNIC4_QM0_PQ_PI_1;
4269 break;
4270
4271 case GAUDI_QUEUE_ID_NIC_8_2:
4272 db_reg_offset = mmNIC4_QM0_PQ_PI_2;
4273 break;
4274
4275 case GAUDI_QUEUE_ID_NIC_8_3:
4276 db_reg_offset = mmNIC4_QM0_PQ_PI_3;
4277 break;
4278
4279 case GAUDI_QUEUE_ID_NIC_9_0:
4280 db_reg_offset = mmNIC4_QM1_PQ_PI_0;
4281 break;
4282
4283 case GAUDI_QUEUE_ID_NIC_9_1:
4284 db_reg_offset = mmNIC4_QM1_PQ_PI_1;
4285 break;
4286
4287 case GAUDI_QUEUE_ID_NIC_9_2:
4288 db_reg_offset = mmNIC4_QM1_PQ_PI_2;
4289 break;
4290
4291 case GAUDI_QUEUE_ID_NIC_9_3:
4292 db_reg_offset = mmNIC4_QM1_PQ_PI_3;
4293 break;
4294
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03004295 default:
4296 invalid_queue = true;
4297 }
4298
4299 if (invalid_queue) {
4300 /* Should never get here */
4301 dev_err(hdev->dev, "h/w queue %d is invalid. Can't set pi\n",
4302 hw_queue_id);
4303 return;
4304 }
4305
4306 db_value = pi;
4307
4308 /* ring the doorbell */
4309 WREG32(db_reg_offset, db_value);
4310
4311 if (hw_queue_id == GAUDI_QUEUE_ID_CPU_PQ)
4312 WREG32(mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR,
4313 GAUDI_EVENT_PI_UPDATE);
4314}
4315
4316static void gaudi_pqe_write(struct hl_device *hdev, __le64 *pqe,
4317 struct hl_bd *bd)
4318{
4319 __le64 *pbd = (__le64 *) bd;
4320
4321 /* The QMANs are on the host memory so a simple copy suffice */
4322 pqe[0] = pbd[0];
4323 pqe[1] = pbd[1];
4324}
4325
4326static void *gaudi_dma_alloc_coherent(struct hl_device *hdev, size_t size,
4327 dma_addr_t *dma_handle, gfp_t flags)
4328{
4329 void *kernel_addr = dma_alloc_coherent(&hdev->pdev->dev, size,
4330 dma_handle, flags);
4331
4332 /* Shift to the device's base physical address of host memory */
4333 if (kernel_addr)
4334 *dma_handle += HOST_PHYS_BASE;
4335
4336 return kernel_addr;
4337}
4338
4339static void gaudi_dma_free_coherent(struct hl_device *hdev, size_t size,
4340 void *cpu_addr, dma_addr_t dma_handle)
4341{
4342 /* Cancel the device's base physical address of host memory */
4343 dma_addr_t fixed_dma_handle = dma_handle - HOST_PHYS_BASE;
4344
4345 dma_free_coherent(&hdev->pdev->dev, size, cpu_addr, fixed_dma_handle);
4346}
4347
4348static void *gaudi_get_int_queue_base(struct hl_device *hdev,
4349 u32 queue_id, dma_addr_t *dma_handle,
4350 u16 *queue_len)
4351{
4352 struct gaudi_device *gaudi = hdev->asic_specific;
4353 struct gaudi_internal_qman_info *q;
4354
4355 if (queue_id >= GAUDI_QUEUE_ID_SIZE ||
4356 gaudi_queue_type[queue_id] != QUEUE_TYPE_INT) {
4357 dev_err(hdev->dev, "Got invalid queue id %d\n", queue_id);
4358 return NULL;
4359 }
4360
4361 q = &gaudi->internal_qmans[queue_id];
4362 *dma_handle = q->pq_dma_addr;
4363 *queue_len = q->pq_size / QMAN_PQ_ENTRY_SIZE;
4364
4365 return q->pq_kernel_addr;
4366}
4367
4368static int gaudi_send_cpu_message(struct hl_device *hdev, u32 *msg,
4369 u16 len, u32 timeout, long *result)
4370{
4371 struct gaudi_device *gaudi = hdev->asic_specific;
4372
4373 if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q)) {
4374 if (result)
4375 *result = 0;
4376 return 0;
4377 }
4378
Oded Gabbay788cacf2020-07-07 17:30:13 +03004379 if (!timeout)
4380 timeout = GAUDI_MSG_TO_CPU_TIMEOUT_USEC;
4381
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03004382 return hl_fw_send_cpu_message(hdev, GAUDI_QUEUE_ID_CPU_PQ, msg, len,
4383 timeout, result);
4384}
4385
4386static int gaudi_test_queue(struct hl_device *hdev, u32 hw_queue_id)
4387{
4388 struct packet_msg_prot *fence_pkt;
4389 dma_addr_t pkt_dma_addr;
4390 u32 fence_val, tmp, timeout_usec;
4391 dma_addr_t fence_dma_addr;
4392 u32 *fence_ptr;
4393 int rc;
4394
4395 if (hdev->pldm)
4396 timeout_usec = GAUDI_PLDM_TEST_QUEUE_WAIT_USEC;
4397 else
4398 timeout_usec = GAUDI_TEST_QUEUE_WAIT_USEC;
4399
4400 fence_val = GAUDI_QMAN0_FENCE_VAL;
4401
4402 fence_ptr = hdev->asic_funcs->asic_dma_pool_zalloc(hdev, 4, GFP_KERNEL,
4403 &fence_dma_addr);
4404 if (!fence_ptr) {
4405 dev_err(hdev->dev,
Dotan Barakd6b045c2020-08-06 09:20:49 +03004406 "Failed to allocate memory for H/W queue %d testing\n",
4407 hw_queue_id);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03004408 return -ENOMEM;
4409 }
4410
4411 *fence_ptr = 0;
4412
4413 fence_pkt = hdev->asic_funcs->asic_dma_pool_zalloc(hdev,
4414 sizeof(struct packet_msg_prot),
4415 GFP_KERNEL, &pkt_dma_addr);
4416 if (!fence_pkt) {
4417 dev_err(hdev->dev,
Dotan Barakd6b045c2020-08-06 09:20:49 +03004418 "Failed to allocate packet for H/W queue %d testing\n",
4419 hw_queue_id);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03004420 rc = -ENOMEM;
4421 goto free_fence_ptr;
4422 }
4423
Oded Gabbay65887292020-08-12 11:21:01 +03004424 tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT);
4425 tmp |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);
4426 tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
4427
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03004428 fence_pkt->ctl = cpu_to_le32(tmp);
4429 fence_pkt->value = cpu_to_le32(fence_val);
4430 fence_pkt->addr = cpu_to_le64(fence_dma_addr);
4431
4432 rc = hl_hw_queue_send_cb_no_cmpl(hdev, hw_queue_id,
4433 sizeof(struct packet_msg_prot),
4434 pkt_dma_addr);
4435 if (rc) {
4436 dev_err(hdev->dev,
Dotan Barakd6b045c2020-08-06 09:20:49 +03004437 "Failed to send fence packet to H/W queue %d\n",
4438 hw_queue_id);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03004439 goto free_pkt;
4440 }
4441
4442 rc = hl_poll_timeout_memory(hdev, fence_ptr, tmp, (tmp == fence_val),
4443 1000, timeout_usec, true);
4444
4445 hl_hw_queue_inc_ci_kernel(hdev, hw_queue_id);
4446
4447 if (rc == -ETIMEDOUT) {
4448 dev_err(hdev->dev,
4449 "H/W queue %d test failed (scratch(0x%08llX) == 0x%08X)\n",
4450 hw_queue_id, (unsigned long long) fence_dma_addr, tmp);
4451 rc = -EIO;
4452 }
4453
4454free_pkt:
4455 hdev->asic_funcs->asic_dma_pool_free(hdev, (void *) fence_pkt,
4456 pkt_dma_addr);
4457free_fence_ptr:
4458 hdev->asic_funcs->asic_dma_pool_free(hdev, (void *) fence_ptr,
4459 fence_dma_addr);
4460 return rc;
4461}
4462
4463static int gaudi_test_cpu_queue(struct hl_device *hdev)
4464{
4465 struct gaudi_device *gaudi = hdev->asic_specific;
4466
4467 /*
4468 * check capability here as send_cpu_message() won't update the result
4469 * value if no capability
4470 */
4471 if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
4472 return 0;
4473
4474 return hl_fw_test_cpu_queue(hdev);
4475}
4476
4477static int gaudi_test_queues(struct hl_device *hdev)
4478{
4479 int i, rc, ret_val = 0;
4480
Ofir Bitton3abc99b2020-06-23 14:50:39 +03004481 for (i = 0 ; i < hdev->asic_prop.max_queues ; i++) {
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03004482 if (hdev->asic_prop.hw_queues_props[i].type == QUEUE_TYPE_EXT) {
4483 rc = gaudi_test_queue(hdev, i);
4484 if (rc)
4485 ret_val = -EINVAL;
4486 }
4487 }
4488
4489 rc = gaudi_test_cpu_queue(hdev);
4490 if (rc)
4491 ret_val = -EINVAL;
4492
4493 return ret_val;
4494}
4495
4496static void *gaudi_dma_pool_zalloc(struct hl_device *hdev, size_t size,
4497 gfp_t mem_flags, dma_addr_t *dma_handle)
4498{
4499 void *kernel_addr;
4500
4501 if (size > GAUDI_DMA_POOL_BLK_SIZE)
4502 return NULL;
4503
4504 kernel_addr = dma_pool_zalloc(hdev->dma_pool, mem_flags, dma_handle);
4505
4506 /* Shift to the device's base physical address of host memory */
4507 if (kernel_addr)
4508 *dma_handle += HOST_PHYS_BASE;
4509
4510 return kernel_addr;
4511}
4512
4513static void gaudi_dma_pool_free(struct hl_device *hdev, void *vaddr,
4514 dma_addr_t dma_addr)
4515{
4516 /* Cancel the device's base physical address of host memory */
4517 dma_addr_t fixed_dma_addr = dma_addr - HOST_PHYS_BASE;
4518
4519 dma_pool_free(hdev->dma_pool, vaddr, fixed_dma_addr);
4520}
4521
4522static void *gaudi_cpu_accessible_dma_pool_alloc(struct hl_device *hdev,
4523 size_t size, dma_addr_t *dma_handle)
4524{
4525 return hl_fw_cpu_accessible_dma_pool_alloc(hdev, size, dma_handle);
4526}
4527
4528static void gaudi_cpu_accessible_dma_pool_free(struct hl_device *hdev,
4529 size_t size, void *vaddr)
4530{
4531 hl_fw_cpu_accessible_dma_pool_free(hdev, size, vaddr);
4532}
4533
4534static int gaudi_dma_map_sg(struct hl_device *hdev, struct scatterlist *sgl,
4535 int nents, enum dma_data_direction dir)
4536{
4537 struct scatterlist *sg;
4538 int i;
4539
4540 if (!dma_map_sg(&hdev->pdev->dev, sgl, nents, dir))
4541 return -ENOMEM;
4542
4543 /* Shift to the device's base physical address of host memory */
4544 for_each_sg(sgl, sg, nents, i)
4545 sg->dma_address += HOST_PHYS_BASE;
4546
4547 return 0;
4548}
4549
4550static void gaudi_dma_unmap_sg(struct hl_device *hdev, struct scatterlist *sgl,
4551 int nents, enum dma_data_direction dir)
4552{
4553 struct scatterlist *sg;
4554 int i;
4555
4556 /* Cancel the device's base physical address of host memory */
4557 for_each_sg(sgl, sg, nents, i)
4558 sg->dma_address -= HOST_PHYS_BASE;
4559
4560 dma_unmap_sg(&hdev->pdev->dev, sgl, nents, dir);
4561}
4562
4563static u32 gaudi_get_dma_desc_list_size(struct hl_device *hdev,
4564 struct sg_table *sgt)
4565{
4566 struct scatterlist *sg, *sg_next_iter;
4567 u32 count, dma_desc_cnt;
4568 u64 len, len_next;
4569 dma_addr_t addr, addr_next;
4570
4571 dma_desc_cnt = 0;
4572
4573 for_each_sg(sgt->sgl, sg, sgt->nents, count) {
4574
4575 len = sg_dma_len(sg);
4576 addr = sg_dma_address(sg);
4577
4578 if (len == 0)
4579 break;
4580
4581 while ((count + 1) < sgt->nents) {
4582 sg_next_iter = sg_next(sg);
4583 len_next = sg_dma_len(sg_next_iter);
4584 addr_next = sg_dma_address(sg_next_iter);
4585
4586 if (len_next == 0)
4587 break;
4588
4589 if ((addr + len == addr_next) &&
4590 (len + len_next <= DMA_MAX_TRANSFER_SIZE)) {
4591 len += len_next;
4592 count++;
4593 sg = sg_next_iter;
4594 } else {
4595 break;
4596 }
4597 }
4598
4599 dma_desc_cnt++;
4600 }
4601
4602 return dma_desc_cnt * sizeof(struct packet_lin_dma);
4603}
4604
4605static int gaudi_pin_memory_before_cs(struct hl_device *hdev,
4606 struct hl_cs_parser *parser,
4607 struct packet_lin_dma *user_dma_pkt,
4608 u64 addr, enum dma_data_direction dir)
4609{
4610 struct hl_userptr *userptr;
4611 int rc;
4612
4613 if (hl_userptr_is_pinned(hdev, addr, le32_to_cpu(user_dma_pkt->tsize),
4614 parser->job_userptr_list, &userptr))
4615 goto already_pinned;
4616
4617 userptr = kzalloc(sizeof(*userptr), GFP_ATOMIC);
4618 if (!userptr)
4619 return -ENOMEM;
4620
4621 rc = hl_pin_host_memory(hdev, addr, le32_to_cpu(user_dma_pkt->tsize),
4622 userptr);
4623 if (rc)
4624 goto free_userptr;
4625
4626 list_add_tail(&userptr->job_node, parser->job_userptr_list);
4627
4628 rc = hdev->asic_funcs->asic_dma_map_sg(hdev, userptr->sgt->sgl,
4629 userptr->sgt->nents, dir);
4630 if (rc) {
4631 dev_err(hdev->dev, "failed to map sgt with DMA region\n");
4632 goto unpin_memory;
4633 }
4634
4635 userptr->dma_mapped = true;
4636 userptr->dir = dir;
4637
4638already_pinned:
4639 parser->patched_cb_size +=
4640 gaudi_get_dma_desc_list_size(hdev, userptr->sgt);
4641
4642 return 0;
4643
4644unpin_memory:
4645 hl_unpin_host_memory(hdev, userptr);
4646free_userptr:
4647 kfree(userptr);
4648 return rc;
4649}
4650
4651static int gaudi_validate_dma_pkt_host(struct hl_device *hdev,
4652 struct hl_cs_parser *parser,
4653 struct packet_lin_dma *user_dma_pkt,
4654 bool src_in_host)
4655{
4656 enum dma_data_direction dir;
4657 bool skip_host_mem_pin = false, user_memset;
4658 u64 addr;
4659 int rc = 0;
4660
4661 user_memset = (le32_to_cpu(user_dma_pkt->ctl) &
4662 GAUDI_PKT_LIN_DMA_CTL_MEMSET_MASK) >>
4663 GAUDI_PKT_LIN_DMA_CTL_MEMSET_SHIFT;
4664
4665 if (src_in_host) {
4666 if (user_memset)
4667 skip_host_mem_pin = true;
4668
4669 dev_dbg(hdev->dev, "DMA direction is HOST --> DEVICE\n");
4670 dir = DMA_TO_DEVICE;
4671 addr = le64_to_cpu(user_dma_pkt->src_addr);
4672 } else {
4673 dev_dbg(hdev->dev, "DMA direction is DEVICE --> HOST\n");
4674 dir = DMA_FROM_DEVICE;
4675 addr = (le64_to_cpu(user_dma_pkt->dst_addr) &
4676 GAUDI_PKT_LIN_DMA_DST_ADDR_MASK) >>
4677 GAUDI_PKT_LIN_DMA_DST_ADDR_SHIFT;
4678 }
4679
4680 if (skip_host_mem_pin)
4681 parser->patched_cb_size += sizeof(*user_dma_pkt);
4682 else
4683 rc = gaudi_pin_memory_before_cs(hdev, parser, user_dma_pkt,
4684 addr, dir);
4685
4686 return rc;
4687}
4688
4689static int gaudi_validate_dma_pkt_no_mmu(struct hl_device *hdev,
4690 struct hl_cs_parser *parser,
4691 struct packet_lin_dma *user_dma_pkt)
4692{
4693 bool src_in_host = false;
4694 u64 dst_addr = (le64_to_cpu(user_dma_pkt->dst_addr) &
4695 GAUDI_PKT_LIN_DMA_DST_ADDR_MASK) >>
4696 GAUDI_PKT_LIN_DMA_DST_ADDR_SHIFT;
4697
4698 dev_dbg(hdev->dev, "DMA packet details:\n");
4699 dev_dbg(hdev->dev, "source == 0x%llx\n",
4700 le64_to_cpu(user_dma_pkt->src_addr));
4701 dev_dbg(hdev->dev, "destination == 0x%llx\n", dst_addr);
4702 dev_dbg(hdev->dev, "size == %u\n", le32_to_cpu(user_dma_pkt->tsize));
4703
4704 /*
4705 * Special handling for DMA with size 0. Bypass all validations
4706 * because no transactions will be done except for WR_COMP, which
4707 * is not a security issue
4708 */
4709 if (!le32_to_cpu(user_dma_pkt->tsize)) {
4710 parser->patched_cb_size += sizeof(*user_dma_pkt);
4711 return 0;
4712 }
4713
4714 if (parser->hw_queue_id <= GAUDI_QUEUE_ID_DMA_0_3)
4715 src_in_host = true;
4716
4717 return gaudi_validate_dma_pkt_host(hdev, parser, user_dma_pkt,
4718 src_in_host);
4719}
4720
Oded Gabbay64536ab2020-05-27 12:38:16 +03004721static int gaudi_validate_load_and_exe_pkt(struct hl_device *hdev,
4722 struct hl_cs_parser *parser,
4723 struct packet_load_and_exe *user_pkt)
4724{
4725 u32 cfg;
4726
4727 cfg = le32_to_cpu(user_pkt->cfg);
4728
4729 if (cfg & GAUDI_PKT_LOAD_AND_EXE_CFG_DST_MASK) {
4730 dev_err(hdev->dev,
4731 "User not allowed to use Load and Execute\n");
4732 return -EPERM;
4733 }
4734
4735 parser->patched_cb_size += sizeof(struct packet_load_and_exe);
4736
4737 return 0;
4738}
4739
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03004740static int gaudi_validate_cb(struct hl_device *hdev,
4741 struct hl_cs_parser *parser, bool is_mmu)
4742{
4743 u32 cb_parsed_length = 0;
4744 int rc = 0;
4745
4746 parser->patched_cb_size = 0;
4747
4748 /* cb_user_size is more than 0 so loop will always be executed */
4749 while (cb_parsed_length < parser->user_cb_size) {
4750 enum packet_id pkt_id;
4751 u16 pkt_size;
4752 struct gaudi_packet *user_pkt;
4753
Arnd Bergmann82948e62020-10-26 17:08:06 +01004754 user_pkt = parser->user_cb->kernel_address + cb_parsed_length;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03004755
4756 pkt_id = (enum packet_id) (
4757 (le64_to_cpu(user_pkt->header) &
4758 PACKET_HEADER_PACKET_ID_MASK) >>
4759 PACKET_HEADER_PACKET_ID_SHIFT);
4760
Ofir Bittonbc75be22020-07-30 14:56:38 +03004761 if (!validate_packet_id(pkt_id)) {
4762 dev_err(hdev->dev, "Invalid packet id %u\n", pkt_id);
4763 rc = -EINVAL;
4764 break;
4765 }
4766
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03004767 pkt_size = gaudi_packet_sizes[pkt_id];
4768 cb_parsed_length += pkt_size;
4769 if (cb_parsed_length > parser->user_cb_size) {
4770 dev_err(hdev->dev,
4771 "packet 0x%x is out of CB boundary\n", pkt_id);
4772 rc = -EINVAL;
4773 break;
4774 }
4775
4776 switch (pkt_id) {
4777 case PACKET_MSG_PROT:
4778 dev_err(hdev->dev,
4779 "User not allowed to use MSG_PROT\n");
4780 rc = -EPERM;
4781 break;
4782
4783 case PACKET_CP_DMA:
4784 dev_err(hdev->dev, "User not allowed to use CP_DMA\n");
4785 rc = -EPERM;
4786 break;
4787
4788 case PACKET_STOP:
4789 dev_err(hdev->dev, "User not allowed to use STOP\n");
4790 rc = -EPERM;
4791 break;
4792
Oded Gabbay2edc66e2020-07-03 19:28:54 +03004793 case PACKET_WREG_BULK:
4794 dev_err(hdev->dev,
4795 "User not allowed to use WREG_BULK\n");
4796 rc = -EPERM;
4797 break;
4798
Oded Gabbay64536ab2020-05-27 12:38:16 +03004799 case PACKET_LOAD_AND_EXE:
4800 rc = gaudi_validate_load_and_exe_pkt(hdev, parser,
4801 (struct packet_load_and_exe *) user_pkt);
4802 break;
4803
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03004804 case PACKET_LIN_DMA:
4805 parser->contains_dma_pkt = true;
4806 if (is_mmu)
4807 parser->patched_cb_size += pkt_size;
4808 else
4809 rc = gaudi_validate_dma_pkt_no_mmu(hdev, parser,
4810 (struct packet_lin_dma *) user_pkt);
4811 break;
4812
4813 case PACKET_WREG_32:
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03004814 case PACKET_MSG_LONG:
4815 case PACKET_MSG_SHORT:
4816 case PACKET_REPEAT:
4817 case PACKET_FENCE:
4818 case PACKET_NOP:
4819 case PACKET_ARB_POINT:
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03004820 parser->patched_cb_size += pkt_size;
4821 break;
4822
4823 default:
4824 dev_err(hdev->dev, "Invalid packet header 0x%x\n",
4825 pkt_id);
4826 rc = -EINVAL;
4827 break;
4828 }
4829
4830 if (rc)
4831 break;
4832 }
4833
4834 /*
4835 * The new CB should have space at the end for two MSG_PROT packets:
4836 * 1. A packet that will act as a completion packet
4837 * 2. A packet that will generate MSI-X interrupt
4838 */
4839 parser->patched_cb_size += sizeof(struct packet_msg_prot) * 2;
4840
4841 return rc;
4842}
4843
4844static int gaudi_patch_dma_packet(struct hl_device *hdev,
4845 struct hl_cs_parser *parser,
4846 struct packet_lin_dma *user_dma_pkt,
4847 struct packet_lin_dma *new_dma_pkt,
4848 u32 *new_dma_pkt_size)
4849{
4850 struct hl_userptr *userptr;
4851 struct scatterlist *sg, *sg_next_iter;
4852 u32 count, dma_desc_cnt, user_wrcomp_en_mask, ctl;
4853 u64 len, len_next;
4854 dma_addr_t dma_addr, dma_addr_next;
4855 u64 device_memory_addr, addr;
4856 enum dma_data_direction dir;
4857 struct sg_table *sgt;
4858 bool src_in_host = false;
4859 bool skip_host_mem_pin = false;
4860 bool user_memset;
4861
4862 ctl = le32_to_cpu(user_dma_pkt->ctl);
4863
4864 if (parser->hw_queue_id <= GAUDI_QUEUE_ID_DMA_0_3)
4865 src_in_host = true;
4866
4867 user_memset = (ctl & GAUDI_PKT_LIN_DMA_CTL_MEMSET_MASK) >>
4868 GAUDI_PKT_LIN_DMA_CTL_MEMSET_SHIFT;
4869
4870 if (src_in_host) {
4871 addr = le64_to_cpu(user_dma_pkt->src_addr);
4872 device_memory_addr = le64_to_cpu(user_dma_pkt->dst_addr);
4873 dir = DMA_TO_DEVICE;
4874 if (user_memset)
4875 skip_host_mem_pin = true;
4876 } else {
4877 addr = le64_to_cpu(user_dma_pkt->dst_addr);
4878 device_memory_addr = le64_to_cpu(user_dma_pkt->src_addr);
4879 dir = DMA_FROM_DEVICE;
4880 }
4881
4882 if ((!skip_host_mem_pin) &&
4883 (!hl_userptr_is_pinned(hdev, addr,
4884 le32_to_cpu(user_dma_pkt->tsize),
4885 parser->job_userptr_list, &userptr))) {
4886 dev_err(hdev->dev, "Userptr 0x%llx + 0x%x NOT mapped\n",
4887 addr, user_dma_pkt->tsize);
4888 return -EFAULT;
4889 }
4890
4891 if ((user_memset) && (dir == DMA_TO_DEVICE)) {
4892 memcpy(new_dma_pkt, user_dma_pkt, sizeof(*user_dma_pkt));
4893 *new_dma_pkt_size = sizeof(*user_dma_pkt);
4894 return 0;
4895 }
4896
4897 user_wrcomp_en_mask = ctl & GAUDI_PKT_LIN_DMA_CTL_WRCOMP_EN_MASK;
4898
4899 sgt = userptr->sgt;
4900 dma_desc_cnt = 0;
4901
4902 for_each_sg(sgt->sgl, sg, sgt->nents, count) {
4903 len = sg_dma_len(sg);
4904 dma_addr = sg_dma_address(sg);
4905
4906 if (len == 0)
4907 break;
4908
4909 while ((count + 1) < sgt->nents) {
4910 sg_next_iter = sg_next(sg);
4911 len_next = sg_dma_len(sg_next_iter);
4912 dma_addr_next = sg_dma_address(sg_next_iter);
4913
4914 if (len_next == 0)
4915 break;
4916
4917 if ((dma_addr + len == dma_addr_next) &&
4918 (len + len_next <= DMA_MAX_TRANSFER_SIZE)) {
4919 len += len_next;
4920 count++;
4921 sg = sg_next_iter;
4922 } else {
4923 break;
4924 }
4925 }
4926
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03004927 ctl = le32_to_cpu(user_dma_pkt->ctl);
4928 if (likely(dma_desc_cnt))
4929 ctl &= ~GAUDI_PKT_CTL_EB_MASK;
4930 ctl &= ~GAUDI_PKT_LIN_DMA_CTL_WRCOMP_EN_MASK;
4931 new_dma_pkt->ctl = cpu_to_le32(ctl);
4932 new_dma_pkt->tsize = cpu_to_le32(len);
4933
4934 if (dir == DMA_TO_DEVICE) {
4935 new_dma_pkt->src_addr = cpu_to_le64(dma_addr);
4936 new_dma_pkt->dst_addr = cpu_to_le64(device_memory_addr);
4937 } else {
4938 new_dma_pkt->src_addr = cpu_to_le64(device_memory_addr);
4939 new_dma_pkt->dst_addr = cpu_to_le64(dma_addr);
4940 }
4941
4942 if (!user_memset)
4943 device_memory_addr += len;
4944 dma_desc_cnt++;
4945 new_dma_pkt++;
4946 }
4947
4948 if (!dma_desc_cnt) {
4949 dev_err(hdev->dev,
4950 "Error of 0 SG entries when patching DMA packet\n");
4951 return -EFAULT;
4952 }
4953
4954 /* Fix the last dma packet - wrcomp must be as user set it */
4955 new_dma_pkt--;
4956 new_dma_pkt->ctl |= cpu_to_le32(user_wrcomp_en_mask);
4957
4958 *new_dma_pkt_size = dma_desc_cnt * sizeof(struct packet_lin_dma);
4959
4960 return 0;
4961}
4962
4963static int gaudi_patch_cb(struct hl_device *hdev,
4964 struct hl_cs_parser *parser)
4965{
4966 u32 cb_parsed_length = 0;
4967 u32 cb_patched_cur_length = 0;
4968 int rc = 0;
4969
4970 /* cb_user_size is more than 0 so loop will always be executed */
4971 while (cb_parsed_length < parser->user_cb_size) {
4972 enum packet_id pkt_id;
4973 u16 pkt_size;
4974 u32 new_pkt_size = 0;
4975 struct gaudi_packet *user_pkt, *kernel_pkt;
4976
Arnd Bergmann82948e62020-10-26 17:08:06 +01004977 user_pkt = parser->user_cb->kernel_address + cb_parsed_length;
4978 kernel_pkt = parser->patched_cb->kernel_address +
4979 cb_patched_cur_length;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03004980
4981 pkt_id = (enum packet_id) (
4982 (le64_to_cpu(user_pkt->header) &
4983 PACKET_HEADER_PACKET_ID_MASK) >>
4984 PACKET_HEADER_PACKET_ID_SHIFT);
4985
Ofir Bittonbc75be22020-07-30 14:56:38 +03004986 if (!validate_packet_id(pkt_id)) {
4987 dev_err(hdev->dev, "Invalid packet id %u\n", pkt_id);
4988 rc = -EINVAL;
4989 break;
4990 }
4991
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03004992 pkt_size = gaudi_packet_sizes[pkt_id];
4993 cb_parsed_length += pkt_size;
4994 if (cb_parsed_length > parser->user_cb_size) {
4995 dev_err(hdev->dev,
4996 "packet 0x%x is out of CB boundary\n", pkt_id);
4997 rc = -EINVAL;
4998 break;
4999 }
5000
5001 switch (pkt_id) {
5002 case PACKET_LIN_DMA:
5003 rc = gaudi_patch_dma_packet(hdev, parser,
5004 (struct packet_lin_dma *) user_pkt,
5005 (struct packet_lin_dma *) kernel_pkt,
5006 &new_pkt_size);
5007 cb_patched_cur_length += new_pkt_size;
5008 break;
5009
5010 case PACKET_MSG_PROT:
5011 dev_err(hdev->dev,
5012 "User not allowed to use MSG_PROT\n");
5013 rc = -EPERM;
5014 break;
5015
5016 case PACKET_CP_DMA:
5017 dev_err(hdev->dev, "User not allowed to use CP_DMA\n");
5018 rc = -EPERM;
5019 break;
5020
5021 case PACKET_STOP:
5022 dev_err(hdev->dev, "User not allowed to use STOP\n");
5023 rc = -EPERM;
5024 break;
5025
5026 case PACKET_WREG_32:
5027 case PACKET_WREG_BULK:
5028 case PACKET_MSG_LONG:
5029 case PACKET_MSG_SHORT:
5030 case PACKET_REPEAT:
5031 case PACKET_FENCE:
5032 case PACKET_NOP:
5033 case PACKET_ARB_POINT:
5034 case PACKET_LOAD_AND_EXE:
5035 memcpy(kernel_pkt, user_pkt, pkt_size);
5036 cb_patched_cur_length += pkt_size;
5037 break;
5038
5039 default:
5040 dev_err(hdev->dev, "Invalid packet header 0x%x\n",
5041 pkt_id);
5042 rc = -EINVAL;
5043 break;
5044 }
5045
5046 if (rc)
5047 break;
5048 }
5049
5050 return rc;
5051}
5052
5053static int gaudi_parse_cb_mmu(struct hl_device *hdev,
5054 struct hl_cs_parser *parser)
5055{
5056 u64 patched_cb_handle;
5057 u32 patched_cb_size;
5058 struct hl_cb *user_cb;
5059 int rc;
5060
5061 /*
5062 * The new CB should have space at the end for two MSG_PROT pkt:
5063 * 1. A packet that will act as a completion packet
5064 * 2. A packet that will generate MSI interrupt
5065 */
5066 parser->patched_cb_size = parser->user_cb_size +
5067 sizeof(struct packet_msg_prot) * 2;
5068
Tomer Tayarfa8641a12020-09-07 17:36:41 +03005069 rc = hl_cb_create(hdev, &hdev->kernel_cb_mgr, hdev->kernel_ctx,
Tomer Tayaref6a0f62020-07-09 16:17:48 +03005070 parser->patched_cb_size, false, false,
Tomer Tayarfa8641a12020-09-07 17:36:41 +03005071 &patched_cb_handle);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005072
5073 if (rc) {
5074 dev_err(hdev->dev,
5075 "Failed to allocate patched CB for DMA CS %d\n",
5076 rc);
5077 return rc;
5078 }
5079
5080 patched_cb_handle >>= PAGE_SHIFT;
5081 parser->patched_cb = hl_cb_get(hdev, &hdev->kernel_cb_mgr,
5082 (u32) patched_cb_handle);
5083 /* hl_cb_get should never fail here so use kernel WARN */
5084 WARN(!parser->patched_cb, "DMA CB handle invalid 0x%x\n",
5085 (u32) patched_cb_handle);
5086 if (!parser->patched_cb) {
5087 rc = -EFAULT;
5088 goto out;
5089 }
5090
5091 /*
5092 * The check that parser->user_cb_size <= parser->user_cb->size was done
5093 * in validate_queue_index().
5094 */
Arnd Bergmann82948e62020-10-26 17:08:06 +01005095 memcpy(parser->patched_cb->kernel_address,
5096 parser->user_cb->kernel_address,
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005097 parser->user_cb_size);
5098
5099 patched_cb_size = parser->patched_cb_size;
5100
5101 /* Validate patched CB instead of user CB */
5102 user_cb = parser->user_cb;
5103 parser->user_cb = parser->patched_cb;
5104 rc = gaudi_validate_cb(hdev, parser, true);
5105 parser->user_cb = user_cb;
5106
5107 if (rc) {
5108 hl_cb_put(parser->patched_cb);
5109 goto out;
5110 }
5111
5112 if (patched_cb_size != parser->patched_cb_size) {
5113 dev_err(hdev->dev, "user CB size mismatch\n");
5114 hl_cb_put(parser->patched_cb);
5115 rc = -EINVAL;
5116 goto out;
5117 }
5118
5119out:
5120 /*
5121 * Always call cb destroy here because we still have 1 reference
5122 * to it by calling cb_get earlier. After the job will be completed,
5123 * cb_put will release it, but here we want to remove it from the
5124 * idr
5125 */
5126 hl_cb_destroy(hdev, &hdev->kernel_cb_mgr,
5127 patched_cb_handle << PAGE_SHIFT);
5128
5129 return rc;
5130}
5131
5132static int gaudi_parse_cb_no_mmu(struct hl_device *hdev,
5133 struct hl_cs_parser *parser)
5134{
5135 u64 patched_cb_handle;
5136 int rc;
5137
5138 rc = gaudi_validate_cb(hdev, parser, false);
5139
5140 if (rc)
5141 goto free_userptr;
5142
Tomer Tayarfa8641a12020-09-07 17:36:41 +03005143 rc = hl_cb_create(hdev, &hdev->kernel_cb_mgr, hdev->kernel_ctx,
Tomer Tayaref6a0f62020-07-09 16:17:48 +03005144 parser->patched_cb_size, false, false,
Tomer Tayarfa8641a12020-09-07 17:36:41 +03005145 &patched_cb_handle);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005146 if (rc) {
5147 dev_err(hdev->dev,
5148 "Failed to allocate patched CB for DMA CS %d\n", rc);
5149 goto free_userptr;
5150 }
5151
5152 patched_cb_handle >>= PAGE_SHIFT;
5153 parser->patched_cb = hl_cb_get(hdev, &hdev->kernel_cb_mgr,
5154 (u32) patched_cb_handle);
5155 /* hl_cb_get should never fail here so use kernel WARN */
5156 WARN(!parser->patched_cb, "DMA CB handle invalid 0x%x\n",
5157 (u32) patched_cb_handle);
5158 if (!parser->patched_cb) {
5159 rc = -EFAULT;
5160 goto out;
5161 }
5162
5163 rc = gaudi_patch_cb(hdev, parser);
5164
5165 if (rc)
5166 hl_cb_put(parser->patched_cb);
5167
5168out:
5169 /*
5170 * Always call cb destroy here because we still have 1 reference
5171 * to it by calling cb_get earlier. After the job will be completed,
5172 * cb_put will release it, but here we want to remove it from the
5173 * idr
5174 */
5175 hl_cb_destroy(hdev, &hdev->kernel_cb_mgr,
5176 patched_cb_handle << PAGE_SHIFT);
5177
5178free_userptr:
5179 if (rc)
5180 hl_userptr_delete_list(hdev, parser->job_userptr_list);
5181 return rc;
5182}
5183
5184static int gaudi_parse_cb_no_ext_queue(struct hl_device *hdev,
5185 struct hl_cs_parser *parser)
5186{
5187 struct asic_fixed_properties *asic_prop = &hdev->asic_prop;
Oded Gabbay3c681572020-11-02 21:10:39 +02005188 struct gaudi_device *gaudi = hdev->asic_specific;
5189 u32 nic_mask_q_id = 1 << (HW_CAP_NIC_SHIFT +
5190 ((parser->hw_queue_id - GAUDI_QUEUE_ID_NIC_0_0) >> 2));
5191
5192 if ((parser->hw_queue_id >= GAUDI_QUEUE_ID_NIC_0_0) &&
5193 (parser->hw_queue_id <= GAUDI_QUEUE_ID_NIC_9_3) &&
5194 (!(gaudi->hw_cap_initialized & nic_mask_q_id))) {
5195 dev_err(hdev->dev, "h/w queue %d is disabled\n",
5196 parser->hw_queue_id);
5197 return -EINVAL;
5198 }
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005199
5200 /* For internal queue jobs just check if CB address is valid */
5201 if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,
5202 parser->user_cb_size,
5203 asic_prop->sram_user_base_address,
5204 asic_prop->sram_end_address))
5205 return 0;
5206
5207 if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,
5208 parser->user_cb_size,
5209 asic_prop->dram_user_base_address,
5210 asic_prop->dram_end_address))
5211 return 0;
5212
5213 /* PMMU and HPMMU addresses are equal, check only one of them */
5214 if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,
5215 parser->user_cb_size,
5216 asic_prop->pmmu.start_addr,
5217 asic_prop->pmmu.end_addr))
5218 return 0;
5219
5220 dev_err(hdev->dev,
5221 "CB address 0x%px + 0x%x for internal QMAN is not valid\n",
5222 parser->user_cb, parser->user_cb_size);
5223
5224 return -EFAULT;
5225}
5226
5227static int gaudi_cs_parser(struct hl_device *hdev, struct hl_cs_parser *parser)
5228{
5229 struct gaudi_device *gaudi = hdev->asic_specific;
5230
5231 if (parser->queue_type == QUEUE_TYPE_INT)
5232 return gaudi_parse_cb_no_ext_queue(hdev, parser);
5233
5234 if (gaudi->hw_cap_initialized & HW_CAP_MMU)
5235 return gaudi_parse_cb_mmu(hdev, parser);
5236 else
5237 return gaudi_parse_cb_no_mmu(hdev, parser);
5238}
5239
5240static void gaudi_add_end_of_cb_packets(struct hl_device *hdev,
Arnd Bergmann82948e62020-10-26 17:08:06 +01005241 void *kernel_address, u32 len,
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005242 u64 cq_addr, u32 cq_val, u32 msi_vec,
5243 bool eb)
5244{
5245 struct gaudi_device *gaudi = hdev->asic_specific;
5246 struct packet_msg_prot *cq_pkt;
5247 u32 tmp;
5248
Arnd Bergmann82948e62020-10-26 17:08:06 +01005249 cq_pkt = kernel_address + len - (sizeof(struct packet_msg_prot) * 2);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005250
Oded Gabbay65887292020-08-12 11:21:01 +03005251 tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT);
5252 tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005253
5254 if (eb)
Oded Gabbay65887292020-08-12 11:21:01 +03005255 tmp |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005256
5257 cq_pkt->ctl = cpu_to_le32(tmp);
5258 cq_pkt->value = cpu_to_le32(cq_val);
5259 cq_pkt->addr = cpu_to_le64(cq_addr);
5260
5261 cq_pkt++;
5262
Oded Gabbay65887292020-08-12 11:21:01 +03005263 tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT);
5264 tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005265 cq_pkt->ctl = cpu_to_le32(tmp);
5266 cq_pkt->value = cpu_to_le32(1);
5267
5268 if (!gaudi->multi_msi_mode)
5269 msi_vec = 0;
5270
5271 cq_pkt->addr = cpu_to_le64(CFG_BASE + mmPCIE_MSI_INTR_0 + msi_vec * 4);
5272}
5273
5274static void gaudi_update_eq_ci(struct hl_device *hdev, u32 val)
5275{
5276 WREG32(mmCPU_IF_EQ_RD_OFFS, val);
5277}
5278
5279static int gaudi_memset_device_memory(struct hl_device *hdev, u64 addr,
5280 u32 size, u64 val)
5281{
5282 struct packet_lin_dma *lin_dma_pkt;
5283 struct hl_cs_job *job;
Moti Haimovskia9855a2d92020-06-24 19:40:57 +03005284 u32 cb_size, ctl, err_cause;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005285 struct hl_cb *cb;
5286 int rc;
5287
Ofir Bittona04b7cd2020-07-13 13:36:55 +03005288 cb = hl_cb_kernel_create(hdev, PAGE_SIZE, false);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005289 if (!cb)
5290 return -EFAULT;
5291
Arnd Bergmann82948e62020-10-26 17:08:06 +01005292 lin_dma_pkt = cb->kernel_address;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005293 memset(lin_dma_pkt, 0, sizeof(*lin_dma_pkt));
5294 cb_size = sizeof(*lin_dma_pkt);
5295
Oded Gabbay65887292020-08-12 11:21:01 +03005296 ctl = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_LIN_DMA);
5297 ctl |= FIELD_PREP(GAUDI_PKT_LIN_DMA_CTL_MEMSET_MASK, 1);
5298 ctl |= FIELD_PREP(GAUDI_PKT_LIN_DMA_CTL_LIN_MASK, 1);
5299 ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
5300 ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
5301
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005302 lin_dma_pkt->ctl = cpu_to_le32(ctl);
5303 lin_dma_pkt->src_addr = cpu_to_le64(val);
5304 lin_dma_pkt->dst_addr |= cpu_to_le64(addr);
5305 lin_dma_pkt->tsize = cpu_to_le32(size);
5306
5307 job = hl_cs_allocate_job(hdev, QUEUE_TYPE_EXT, true);
5308 if (!job) {
5309 dev_err(hdev->dev, "Failed to allocate a new job\n");
5310 rc = -ENOMEM;
5311 goto release_cb;
5312 }
5313
Moti Haimovskia9855a2d92020-06-24 19:40:57 +03005314 /* Verify DMA is OK */
5315 err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE);
5316 if (err_cause && !hdev->init_done) {
5317 dev_dbg(hdev->dev,
5318 "Clearing DMA0 engine from errors (cause 0x%x)\n",
5319 err_cause);
5320 WREG32(mmDMA0_CORE_ERR_CAUSE, err_cause);
5321 }
5322
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005323 job->id = 0;
5324 job->user_cb = cb;
5325 job->user_cb->cs_cnt++;
5326 job->user_cb_size = cb_size;
5327 job->hw_queue_id = GAUDI_QUEUE_ID_DMA_0_0;
5328 job->patched_cb = job->user_cb;
5329 job->job_cb_size = job->user_cb_size + sizeof(struct packet_msg_prot);
5330
5331 hl_debugfs_add_job(hdev, job);
5332
5333 rc = gaudi_send_job_on_qman0(hdev, job);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005334 hl_debugfs_remove_job(hdev, job);
5335 kfree(job);
5336 cb->cs_cnt--;
5337
Moti Haimovskia9855a2d92020-06-24 19:40:57 +03005338 /* Verify DMA is OK */
5339 err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE);
5340 if (err_cause) {
5341 dev_err(hdev->dev, "DMA Failed, cause 0x%x\n", err_cause);
5342 rc = -EIO;
5343 if (!hdev->init_done) {
5344 dev_dbg(hdev->dev,
5345 "Clearing DMA0 engine from errors (cause 0x%x)\n",
5346 err_cause);
5347 WREG32(mmDMA0_CORE_ERR_CAUSE, err_cause);
5348 }
5349 }
5350
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005351release_cb:
5352 hl_cb_put(cb);
5353 hl_cb_destroy(hdev, &hdev->kernel_cb_mgr, cb->id << PAGE_SHIFT);
5354
5355 return rc;
5356}
5357
5358static void gaudi_restore_sm_registers(struct hl_device *hdev)
5359{
5360 int i;
5361
5362 for (i = 0 ; i < NUM_OF_SOB_IN_BLOCK << 2 ; i += 4) {
5363 WREG32(mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0 + i, 0);
5364 WREG32(mmSYNC_MNGR_E_S_SYNC_MNGR_OBJS_SOB_OBJ_0 + i, 0);
5365 WREG32(mmSYNC_MNGR_W_N_SYNC_MNGR_OBJS_SOB_OBJ_0 + i, 0);
5366 }
5367
5368 for (i = 0 ; i < NUM_OF_MONITORS_IN_BLOCK << 2 ; i += 4) {
5369 WREG32(mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_STATUS_0 + i, 0);
5370 WREG32(mmSYNC_MNGR_E_S_SYNC_MNGR_OBJS_MON_STATUS_0 + i, 0);
5371 WREG32(mmSYNC_MNGR_W_N_SYNC_MNGR_OBJS_MON_STATUS_0 + i, 0);
5372 }
5373
5374 i = GAUDI_FIRST_AVAILABLE_W_S_SYNC_OBJECT * 4;
5375
5376 for (; i < NUM_OF_SOB_IN_BLOCK << 2 ; i += 4)
5377 WREG32(mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 + i, 0);
5378
5379 i = GAUDI_FIRST_AVAILABLE_W_S_MONITOR * 4;
5380
5381 for (; i < NUM_OF_MONITORS_IN_BLOCK << 2 ; i += 4)
5382 WREG32(mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_STATUS_0 + i, 0);
5383}
5384
5385static void gaudi_restore_dma_registers(struct hl_device *hdev)
5386{
5387 u32 sob_delta = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_1 -
5388 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0;
5389 int i;
5390
5391 for (i = 0 ; i < DMA_NUMBER_OF_CHANNELS ; i++) {
5392 u64 sob_addr = CFG_BASE +
5393 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0 +
5394 (i * sob_delta);
5395 u32 dma_offset = i * DMA_CORE_OFFSET;
5396
5397 WREG32(mmDMA0_CORE_WR_COMP_ADDR_LO + dma_offset,
5398 lower_32_bits(sob_addr));
5399 WREG32(mmDMA0_CORE_WR_COMP_ADDR_HI + dma_offset,
5400 upper_32_bits(sob_addr));
5401 WREG32(mmDMA0_CORE_WR_COMP_WDATA + dma_offset, 0x80000001);
5402
5403 /* For DMAs 2-7, need to restore WR_AWUSER_31_11 as it can be
5404 * modified by the user for SRAM reduction
5405 */
5406 if (i > 1)
5407 WREG32(mmDMA0_CORE_WR_AWUSER_31_11 + dma_offset,
5408 0x00000001);
5409 }
5410}
5411
5412static void gaudi_restore_qm_registers(struct hl_device *hdev)
5413{
5414 u32 qman_offset;
5415 int i;
5416
5417 for (i = 0 ; i < DMA_NUMBER_OF_CHANNELS ; i++) {
5418 qman_offset = i * DMA_QMAN_OFFSET;
5419 WREG32(mmDMA0_QM_ARB_CFG_0 + qman_offset, 0);
5420 }
5421
5422 for (i = 0 ; i < MME_NUMBER_OF_MASTER_ENGINES ; i++) {
5423 qman_offset = i * (mmMME2_QM_BASE - mmMME0_QM_BASE);
5424 WREG32(mmMME0_QM_ARB_CFG_0 + qman_offset, 0);
5425 }
5426
5427 for (i = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) {
5428 qman_offset = i * TPC_QMAN_OFFSET;
5429 WREG32(mmTPC0_QM_ARB_CFG_0 + qman_offset, 0);
5430 }
Oded Gabbay3c681572020-11-02 21:10:39 +02005431
5432 for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++) {
5433 qman_offset = (i >> 1) * NIC_MACRO_QMAN_OFFSET +
5434 (i & 0x1) * NIC_ENGINE_QMAN_OFFSET;
5435 WREG32(mmNIC0_QM0_ARB_CFG_0 + qman_offset, 0);
5436 }
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005437}
5438
5439static void gaudi_restore_user_registers(struct hl_device *hdev)
5440{
5441 gaudi_restore_sm_registers(hdev);
5442 gaudi_restore_dma_registers(hdev);
5443 gaudi_restore_qm_registers(hdev);
5444}
5445
5446static int gaudi_context_switch(struct hl_device *hdev, u32 asid)
5447{
5448 struct asic_fixed_properties *prop = &hdev->asic_prop;
5449 u64 addr = prop->sram_user_base_address;
5450 u32 size = hdev->pldm ? 0x10000 :
5451 (prop->sram_size - SRAM_USER_BASE_OFFSET);
5452 u64 val = 0x7777777777777777ull;
5453 int rc;
5454
5455 rc = gaudi_memset_device_memory(hdev, addr, size, val);
5456 if (rc) {
5457 dev_err(hdev->dev, "Failed to clear SRAM in context switch\n");
5458 return rc;
5459 }
5460
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005461 gaudi_restore_user_registers(hdev);
5462
5463 return 0;
5464}
5465
5466static int gaudi_mmu_clear_pgt_range(struct hl_device *hdev)
5467{
5468 struct asic_fixed_properties *prop = &hdev->asic_prop;
5469 struct gaudi_device *gaudi = hdev->asic_specific;
5470 u64 addr = prop->mmu_pgt_addr;
5471 u32 size = prop->mmu_pgt_size + MMU_CACHE_MNG_SIZE;
5472
5473 if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
5474 return 0;
5475
5476 return gaudi_memset_device_memory(hdev, addr, size, 0);
5477}
5478
5479static void gaudi_restore_phase_topology(struct hl_device *hdev)
5480{
5481
5482}
5483
5484static int gaudi_debugfs_read32(struct hl_device *hdev, u64 addr, u32 *val)
5485{
5486 struct asic_fixed_properties *prop = &hdev->asic_prop;
5487 struct gaudi_device *gaudi = hdev->asic_specific;
5488 u64 hbm_bar_addr;
5489 int rc = 0;
5490
5491 if ((addr >= CFG_BASE) && (addr < CFG_BASE + CFG_SIZE)) {
Oded Gabbaye38bfd32020-07-03 20:46:12 +03005492
5493 if ((gaudi->hw_cap_initialized & HW_CAP_CLK_GATE) &&
5494 (hdev->clock_gating_mask &
5495 GAUDI_CLK_GATE_DEBUGFS_MASK)) {
5496
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005497 dev_err_ratelimited(hdev->dev,
5498 "Can't read register - clock gating is enabled!\n");
5499 rc = -EFAULT;
5500 } else {
5501 *val = RREG32(addr - CFG_BASE);
5502 }
Oded Gabbaye38bfd32020-07-03 20:46:12 +03005503
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005504 } else if ((addr >= SRAM_BASE_ADDR) &&
5505 (addr < SRAM_BASE_ADDR + SRAM_BAR_SIZE)) {
5506 *val = readl(hdev->pcie_bar[SRAM_BAR_ID] +
5507 (addr - SRAM_BASE_ADDR));
5508 } else if (addr < DRAM_PHYS_BASE + hdev->asic_prop.dram_size) {
5509 u64 bar_base_addr = DRAM_PHYS_BASE +
5510 (addr & ~(prop->dram_pci_bar_size - 0x1ull));
5511
5512 hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, bar_base_addr);
5513 if (hbm_bar_addr != U64_MAX) {
5514 *val = readl(hdev->pcie_bar[HBM_BAR_ID] +
5515 (addr - bar_base_addr));
5516
5517 hbm_bar_addr = gaudi_set_hbm_bar_base(hdev,
5518 hbm_bar_addr);
5519 }
5520 if (hbm_bar_addr == U64_MAX)
5521 rc = -EIO;
5522 } else if (addr >= HOST_PHYS_BASE && !iommu_present(&pci_bus_type)) {
5523 *val = *(u32 *) phys_to_virt(addr - HOST_PHYS_BASE);
5524 } else {
5525 rc = -EFAULT;
5526 }
5527
5528 return rc;
5529}
5530
5531static int gaudi_debugfs_write32(struct hl_device *hdev, u64 addr, u32 val)
5532{
5533 struct asic_fixed_properties *prop = &hdev->asic_prop;
5534 struct gaudi_device *gaudi = hdev->asic_specific;
5535 u64 hbm_bar_addr;
5536 int rc = 0;
5537
5538 if ((addr >= CFG_BASE) && (addr < CFG_BASE + CFG_SIZE)) {
Oded Gabbaye38bfd32020-07-03 20:46:12 +03005539
5540 if ((gaudi->hw_cap_initialized & HW_CAP_CLK_GATE) &&
5541 (hdev->clock_gating_mask &
5542 GAUDI_CLK_GATE_DEBUGFS_MASK)) {
5543
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005544 dev_err_ratelimited(hdev->dev,
5545 "Can't write register - clock gating is enabled!\n");
5546 rc = -EFAULT;
5547 } else {
5548 WREG32(addr - CFG_BASE, val);
5549 }
Oded Gabbaye38bfd32020-07-03 20:46:12 +03005550
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005551 } else if ((addr >= SRAM_BASE_ADDR) &&
5552 (addr < SRAM_BASE_ADDR + SRAM_BAR_SIZE)) {
5553 writel(val, hdev->pcie_bar[SRAM_BAR_ID] +
5554 (addr - SRAM_BASE_ADDR));
5555 } else if (addr < DRAM_PHYS_BASE + hdev->asic_prop.dram_size) {
5556 u64 bar_base_addr = DRAM_PHYS_BASE +
5557 (addr & ~(prop->dram_pci_bar_size - 0x1ull));
5558
5559 hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, bar_base_addr);
5560 if (hbm_bar_addr != U64_MAX) {
5561 writel(val, hdev->pcie_bar[HBM_BAR_ID] +
5562 (addr - bar_base_addr));
5563
5564 hbm_bar_addr = gaudi_set_hbm_bar_base(hdev,
5565 hbm_bar_addr);
5566 }
5567 if (hbm_bar_addr == U64_MAX)
5568 rc = -EIO;
5569 } else if (addr >= HOST_PHYS_BASE && !iommu_present(&pci_bus_type)) {
5570 *(u32 *) phys_to_virt(addr - HOST_PHYS_BASE) = val;
5571 } else {
5572 rc = -EFAULT;
5573 }
5574
5575 return rc;
5576}
5577
5578static int gaudi_debugfs_read64(struct hl_device *hdev, u64 addr, u64 *val)
5579{
5580 struct asic_fixed_properties *prop = &hdev->asic_prop;
5581 struct gaudi_device *gaudi = hdev->asic_specific;
5582 u64 hbm_bar_addr;
5583 int rc = 0;
5584
5585 if ((addr >= CFG_BASE) && (addr <= CFG_BASE + CFG_SIZE - sizeof(u64))) {
Oded Gabbaye38bfd32020-07-03 20:46:12 +03005586
5587 if ((gaudi->hw_cap_initialized & HW_CAP_CLK_GATE) &&
5588 (hdev->clock_gating_mask &
5589 GAUDI_CLK_GATE_DEBUGFS_MASK)) {
5590
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005591 dev_err_ratelimited(hdev->dev,
5592 "Can't read register - clock gating is enabled!\n");
5593 rc = -EFAULT;
5594 } else {
5595 u32 val_l = RREG32(addr - CFG_BASE);
5596 u32 val_h = RREG32(addr + sizeof(u32) - CFG_BASE);
5597
5598 *val = (((u64) val_h) << 32) | val_l;
5599 }
Oded Gabbaye38bfd32020-07-03 20:46:12 +03005600
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005601 } else if ((addr >= SRAM_BASE_ADDR) &&
5602 (addr <= SRAM_BASE_ADDR + SRAM_BAR_SIZE - sizeof(u64))) {
5603 *val = readq(hdev->pcie_bar[SRAM_BAR_ID] +
5604 (addr - SRAM_BASE_ADDR));
5605 } else if (addr <=
5606 DRAM_PHYS_BASE + hdev->asic_prop.dram_size - sizeof(u64)) {
5607 u64 bar_base_addr = DRAM_PHYS_BASE +
5608 (addr & ~(prop->dram_pci_bar_size - 0x1ull));
5609
5610 hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, bar_base_addr);
5611 if (hbm_bar_addr != U64_MAX) {
5612 *val = readq(hdev->pcie_bar[HBM_BAR_ID] +
5613 (addr - bar_base_addr));
5614
5615 hbm_bar_addr = gaudi_set_hbm_bar_base(hdev,
5616 hbm_bar_addr);
5617 }
5618 if (hbm_bar_addr == U64_MAX)
5619 rc = -EIO;
5620 } else if (addr >= HOST_PHYS_BASE && !iommu_present(&pci_bus_type)) {
5621 *val = *(u64 *) phys_to_virt(addr - HOST_PHYS_BASE);
5622 } else {
5623 rc = -EFAULT;
5624 }
5625
5626 return rc;
5627}
5628
5629static int gaudi_debugfs_write64(struct hl_device *hdev, u64 addr, u64 val)
5630{
5631 struct asic_fixed_properties *prop = &hdev->asic_prop;
5632 struct gaudi_device *gaudi = hdev->asic_specific;
5633 u64 hbm_bar_addr;
5634 int rc = 0;
5635
5636 if ((addr >= CFG_BASE) && (addr <= CFG_BASE + CFG_SIZE - sizeof(u64))) {
Oded Gabbaye38bfd32020-07-03 20:46:12 +03005637
5638 if ((gaudi->hw_cap_initialized & HW_CAP_CLK_GATE) &&
5639 (hdev->clock_gating_mask &
5640 GAUDI_CLK_GATE_DEBUGFS_MASK)) {
5641
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005642 dev_err_ratelimited(hdev->dev,
5643 "Can't write register - clock gating is enabled!\n");
5644 rc = -EFAULT;
5645 } else {
5646 WREG32(addr - CFG_BASE, lower_32_bits(val));
5647 WREG32(addr + sizeof(u32) - CFG_BASE,
5648 upper_32_bits(val));
5649 }
Oded Gabbaye38bfd32020-07-03 20:46:12 +03005650
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005651 } else if ((addr >= SRAM_BASE_ADDR) &&
5652 (addr <= SRAM_BASE_ADDR + SRAM_BAR_SIZE - sizeof(u64))) {
5653 writeq(val, hdev->pcie_bar[SRAM_BAR_ID] +
5654 (addr - SRAM_BASE_ADDR));
5655 } else if (addr <=
5656 DRAM_PHYS_BASE + hdev->asic_prop.dram_size - sizeof(u64)) {
5657 u64 bar_base_addr = DRAM_PHYS_BASE +
5658 (addr & ~(prop->dram_pci_bar_size - 0x1ull));
5659
5660 hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, bar_base_addr);
5661 if (hbm_bar_addr != U64_MAX) {
5662 writeq(val, hdev->pcie_bar[HBM_BAR_ID] +
5663 (addr - bar_base_addr));
5664
5665 hbm_bar_addr = gaudi_set_hbm_bar_base(hdev,
5666 hbm_bar_addr);
5667 }
5668 if (hbm_bar_addr == U64_MAX)
5669 rc = -EIO;
5670 } else if (addr >= HOST_PHYS_BASE && !iommu_present(&pci_bus_type)) {
5671 *(u64 *) phys_to_virt(addr - HOST_PHYS_BASE) = val;
5672 } else {
5673 rc = -EFAULT;
5674 }
5675
5676 return rc;
5677}
5678
5679static u64 gaudi_read_pte(struct hl_device *hdev, u64 addr)
5680{
5681 struct gaudi_device *gaudi = hdev->asic_specific;
5682
5683 if (hdev->hard_reset_pending)
5684 return U64_MAX;
5685
5686 return readq(hdev->pcie_bar[HBM_BAR_ID] +
5687 (addr - gaudi->hbm_bar_cur_addr));
5688}
5689
5690static void gaudi_write_pte(struct hl_device *hdev, u64 addr, u64 val)
5691{
5692 struct gaudi_device *gaudi = hdev->asic_specific;
5693
5694 if (hdev->hard_reset_pending)
5695 return;
5696
5697 writeq(val, hdev->pcie_bar[HBM_BAR_ID] +
5698 (addr - gaudi->hbm_bar_cur_addr));
5699}
5700
Ofir Bitton1137e1e2020-09-30 18:43:52 +03005701void gaudi_mmu_prepare_reg(struct hl_device *hdev, u64 reg, u32 asid)
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005702{
5703 /* mask to zero the MMBP and ASID bits */
5704 WREG32_AND(reg, ~0x7FF);
5705 WREG32_OR(reg, asid);
5706}
5707
5708static void gaudi_mmu_prepare(struct hl_device *hdev, u32 asid)
5709{
5710 struct gaudi_device *gaudi = hdev->asic_specific;
5711
5712 if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
5713 return;
5714
5715 if (asid & ~DMA0_QM_GLBL_NON_SECURE_PROPS_0_ASID_MASK) {
5716 WARN(1, "asid %u is too big\n", asid);
5717 return;
5718 }
5719
5720 mutex_lock(&gaudi->clk_gate_mutex);
5721
5722 hdev->asic_funcs->disable_clock_gating(hdev);
5723
5724 gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_0, asid);
5725 gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_1, asid);
5726 gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_2, asid);
5727 gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_3, asid);
5728 gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_4, asid);
5729
5730 gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_0, asid);
5731 gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_1, asid);
5732 gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_2, asid);
5733 gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_3, asid);
5734 gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_4, asid);
5735
5736 gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_0, asid);
5737 gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_1, asid);
5738 gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_2, asid);
5739 gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_3, asid);
5740 gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_4, asid);
5741
5742 gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_0, asid);
5743 gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_1, asid);
5744 gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_2, asid);
5745 gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_3, asid);
5746 gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_4, asid);
5747
5748 gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_0, asid);
5749 gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_1, asid);
5750 gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_2, asid);
5751 gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_3, asid);
5752 gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_4, asid);
5753
5754 gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_0, asid);
5755 gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_1, asid);
5756 gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_2, asid);
5757 gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_3, asid);
5758 gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_4, asid);
5759
5760 gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_0, asid);
5761 gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_1, asid);
5762 gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_2, asid);
5763 gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_3, asid);
5764 gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_4, asid);
5765
5766 gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_0, asid);
5767 gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_1, asid);
5768 gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_2, asid);
5769 gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_3, asid);
5770 gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_4, asid);
5771
5772 gaudi_mmu_prepare_reg(hdev, mmDMA0_CORE_NON_SECURE_PROPS, asid);
5773 gaudi_mmu_prepare_reg(hdev, mmDMA1_CORE_NON_SECURE_PROPS, asid);
5774 gaudi_mmu_prepare_reg(hdev, mmDMA2_CORE_NON_SECURE_PROPS, asid);
5775 gaudi_mmu_prepare_reg(hdev, mmDMA3_CORE_NON_SECURE_PROPS, asid);
5776 gaudi_mmu_prepare_reg(hdev, mmDMA4_CORE_NON_SECURE_PROPS, asid);
5777 gaudi_mmu_prepare_reg(hdev, mmDMA5_CORE_NON_SECURE_PROPS, asid);
5778 gaudi_mmu_prepare_reg(hdev, mmDMA6_CORE_NON_SECURE_PROPS, asid);
5779 gaudi_mmu_prepare_reg(hdev, mmDMA7_CORE_NON_SECURE_PROPS, asid);
5780
5781 gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_0, asid);
5782 gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_1, asid);
5783 gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_2, asid);
5784 gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_3, asid);
5785 gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_4, asid);
5786 gaudi_mmu_prepare_reg(hdev, mmTPC0_CFG_ARUSER_LO, asid);
5787 gaudi_mmu_prepare_reg(hdev, mmTPC0_CFG_AWUSER_LO, asid);
5788
5789 gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_0, asid);
5790 gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_1, asid);
5791 gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_2, asid);
5792 gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_3, asid);
5793 gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_4, asid);
5794 gaudi_mmu_prepare_reg(hdev, mmTPC1_CFG_ARUSER_LO, asid);
5795 gaudi_mmu_prepare_reg(hdev, mmTPC1_CFG_AWUSER_LO, asid);
5796
5797 gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_0, asid);
5798 gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_1, asid);
5799 gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_2, asid);
5800 gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_3, asid);
5801 gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_4, asid);
5802 gaudi_mmu_prepare_reg(hdev, mmTPC2_CFG_ARUSER_LO, asid);
5803 gaudi_mmu_prepare_reg(hdev, mmTPC2_CFG_AWUSER_LO, asid);
5804
5805 gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_0, asid);
5806 gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_1, asid);
5807 gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_2, asid);
5808 gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_3, asid);
5809 gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_4, asid);
5810 gaudi_mmu_prepare_reg(hdev, mmTPC3_CFG_ARUSER_LO, asid);
5811 gaudi_mmu_prepare_reg(hdev, mmTPC3_CFG_AWUSER_LO, asid);
5812
5813 gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_0, asid);
5814 gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_1, asid);
5815 gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_2, asid);
5816 gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_3, asid);
5817 gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_4, asid);
5818 gaudi_mmu_prepare_reg(hdev, mmTPC4_CFG_ARUSER_LO, asid);
5819 gaudi_mmu_prepare_reg(hdev, mmTPC4_CFG_AWUSER_LO, asid);
5820
5821 gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_0, asid);
5822 gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_1, asid);
5823 gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_2, asid);
5824 gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_3, asid);
5825 gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_4, asid);
5826 gaudi_mmu_prepare_reg(hdev, mmTPC5_CFG_ARUSER_LO, asid);
5827 gaudi_mmu_prepare_reg(hdev, mmTPC5_CFG_AWUSER_LO, asid);
5828
5829 gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_0, asid);
5830 gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_1, asid);
5831 gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_2, asid);
5832 gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_3, asid);
5833 gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_4, asid);
5834 gaudi_mmu_prepare_reg(hdev, mmTPC6_CFG_ARUSER_LO, asid);
5835 gaudi_mmu_prepare_reg(hdev, mmTPC6_CFG_AWUSER_LO, asid);
5836
5837 gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_0, asid);
5838 gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_1, asid);
5839 gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_2, asid);
5840 gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_3, asid);
5841 gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_4, asid);
5842 gaudi_mmu_prepare_reg(hdev, mmTPC7_CFG_ARUSER_LO, asid);
5843 gaudi_mmu_prepare_reg(hdev, mmTPC7_CFG_AWUSER_LO, asid);
5844
5845 gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_0, asid);
5846 gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_1, asid);
5847 gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_2, asid);
5848 gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_3, asid);
5849 gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_4, asid);
5850 gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_0, asid);
5851 gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_1, asid);
5852 gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_2, asid);
5853 gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_3, asid);
5854 gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_4, asid);
5855
5856 gaudi_mmu_prepare_reg(hdev, mmMME0_SBAB_ARUSER0, asid);
5857 gaudi_mmu_prepare_reg(hdev, mmMME0_SBAB_ARUSER1, asid);
5858 gaudi_mmu_prepare_reg(hdev, mmMME1_SBAB_ARUSER0, asid);
5859 gaudi_mmu_prepare_reg(hdev, mmMME1_SBAB_ARUSER1, asid);
5860 gaudi_mmu_prepare_reg(hdev, mmMME2_SBAB_ARUSER0, asid);
5861 gaudi_mmu_prepare_reg(hdev, mmMME2_SBAB_ARUSER1, asid);
5862 gaudi_mmu_prepare_reg(hdev, mmMME3_SBAB_ARUSER0, asid);
5863 gaudi_mmu_prepare_reg(hdev, mmMME3_SBAB_ARUSER1, asid);
5864 gaudi_mmu_prepare_reg(hdev, mmMME0_ACC_WBC, asid);
5865 gaudi_mmu_prepare_reg(hdev, mmMME1_ACC_WBC, asid);
5866 gaudi_mmu_prepare_reg(hdev, mmMME2_ACC_WBC, asid);
5867 gaudi_mmu_prepare_reg(hdev, mmMME3_ACC_WBC, asid);
5868
Oded Gabbay3c681572020-11-02 21:10:39 +02005869 if (hdev->nic_ports_mask & GAUDI_NIC_MASK_NIC0) {
5870 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_0,
5871 asid);
5872 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_1,
5873 asid);
5874 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_2,
5875 asid);
5876 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_3,
5877 asid);
5878 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_4,
5879 asid);
5880 }
5881
5882 if (hdev->nic_ports_mask & GAUDI_NIC_MASK_NIC1) {
5883 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_0,
5884 asid);
5885 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_1,
5886 asid);
5887 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_2,
5888 asid);
5889 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_3,
5890 asid);
5891 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_4,
5892 asid);
5893 }
5894
5895 if (hdev->nic_ports_mask & GAUDI_NIC_MASK_NIC2) {
5896 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_0,
5897 asid);
5898 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_1,
5899 asid);
5900 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_2,
5901 asid);
5902 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_3,
5903 asid);
5904 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_4,
5905 asid);
5906 }
5907
5908 if (hdev->nic_ports_mask & GAUDI_NIC_MASK_NIC3) {
5909 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_0,
5910 asid);
5911 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_1,
5912 asid);
5913 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_2,
5914 asid);
5915 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_3,
5916 asid);
5917 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_4,
5918 asid);
5919 }
5920
5921 if (hdev->nic_ports_mask & GAUDI_NIC_MASK_NIC4) {
5922 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_0,
5923 asid);
5924 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_1,
5925 asid);
5926 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_2,
5927 asid);
5928 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_3,
5929 asid);
5930 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_4,
5931 asid);
5932 }
5933
5934 if (hdev->nic_ports_mask & GAUDI_NIC_MASK_NIC5) {
5935 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_0,
5936 asid);
5937 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_1,
5938 asid);
5939 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_2,
5940 asid);
5941 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_3,
5942 asid);
5943 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_4,
5944 asid);
5945 }
5946
5947 if (hdev->nic_ports_mask & GAUDI_NIC_MASK_NIC6) {
5948 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_0,
5949 asid);
5950 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_1,
5951 asid);
5952 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_2,
5953 asid);
5954 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_3,
5955 asid);
5956 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_4,
5957 asid);
5958 }
5959
5960 if (hdev->nic_ports_mask & GAUDI_NIC_MASK_NIC7) {
5961 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_0,
5962 asid);
5963 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_1,
5964 asid);
5965 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_2,
5966 asid);
5967 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_3,
5968 asid);
5969 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_4,
5970 asid);
5971 }
5972
5973 if (hdev->nic_ports_mask & GAUDI_NIC_MASK_NIC8) {
5974 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_0,
5975 asid);
5976 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_1,
5977 asid);
5978 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_2,
5979 asid);
5980 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_3,
5981 asid);
5982 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_4,
5983 asid);
5984 }
5985
5986 if (hdev->nic_ports_mask & GAUDI_NIC_MASK_NIC9) {
5987 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_0,
5988 asid);
5989 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_1,
5990 asid);
5991 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_2,
5992 asid);
5993 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_3,
5994 asid);
5995 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_4,
5996 asid);
5997 }
5998
Oded Gabbaye38bfd32020-07-03 20:46:12 +03005999 hdev->asic_funcs->set_clock_gating(hdev);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006000
6001 mutex_unlock(&gaudi->clk_gate_mutex);
6002}
6003
6004static int gaudi_send_job_on_qman0(struct hl_device *hdev,
6005 struct hl_cs_job *job)
6006{
6007 struct packet_msg_prot *fence_pkt;
6008 u32 *fence_ptr;
6009 dma_addr_t fence_dma_addr;
6010 struct hl_cb *cb;
6011 u32 tmp, timeout, dma_offset;
6012 int rc;
6013
6014 if (hdev->pldm)
6015 timeout = GAUDI_PLDM_QMAN0_TIMEOUT_USEC;
6016 else
6017 timeout = HL_DEVICE_TIMEOUT_USEC;
6018
6019 if (!hdev->asic_funcs->is_device_idle(hdev, NULL, NULL)) {
6020 dev_err_ratelimited(hdev->dev,
6021 "Can't send driver job on QMAN0 because the device is not idle\n");
6022 return -EBUSY;
6023 }
6024
6025 fence_ptr = hdev->asic_funcs->asic_dma_pool_zalloc(hdev, 4, GFP_KERNEL,
6026 &fence_dma_addr);
6027 if (!fence_ptr) {
6028 dev_err(hdev->dev,
6029 "Failed to allocate fence memory for QMAN0\n");
6030 return -ENOMEM;
6031 }
6032
6033 cb = job->patched_cb;
6034
Arnd Bergmann82948e62020-10-26 17:08:06 +01006035 fence_pkt = cb->kernel_address +
6036 job->job_cb_size - sizeof(struct packet_msg_prot);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006037
Oded Gabbay65887292020-08-12 11:21:01 +03006038 tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT);
6039 tmp |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);
6040 tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
6041
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006042 fence_pkt->ctl = cpu_to_le32(tmp);
6043 fence_pkt->value = cpu_to_le32(GAUDI_QMAN0_FENCE_VAL);
6044 fence_pkt->addr = cpu_to_le64(fence_dma_addr);
6045
6046 dma_offset = gaudi_dma_assignment[GAUDI_PCI_DMA_1] * DMA_CORE_OFFSET;
6047
6048 WREG32_OR(mmDMA0_CORE_PROT + dma_offset, BIT(DMA0_CORE_PROT_VAL_SHIFT));
6049
6050 rc = hl_hw_queue_send_cb_no_cmpl(hdev, GAUDI_QUEUE_ID_DMA_0_0,
6051 job->job_cb_size, cb->bus_address);
6052 if (rc) {
6053 dev_err(hdev->dev, "Failed to send CB on QMAN0, %d\n", rc);
6054 goto free_fence_ptr;
6055 }
6056
6057 rc = hl_poll_timeout_memory(hdev, fence_ptr, tmp,
6058 (tmp == GAUDI_QMAN0_FENCE_VAL), 1000,
6059 timeout, true);
6060
6061 hl_hw_queue_inc_ci_kernel(hdev, GAUDI_QUEUE_ID_DMA_0_0);
6062
6063 if (rc == -ETIMEDOUT) {
6064 dev_err(hdev->dev, "QMAN0 Job timeout (0x%x)\n", tmp);
6065 goto free_fence_ptr;
6066 }
6067
6068free_fence_ptr:
6069 WREG32_AND(mmDMA0_CORE_PROT + dma_offset,
6070 ~BIT(DMA0_CORE_PROT_VAL_SHIFT));
6071
6072 hdev->asic_funcs->asic_dma_pool_free(hdev, (void *) fence_ptr,
6073 fence_dma_addr);
6074 return rc;
6075}
6076
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006077static void gaudi_get_event_desc(u16 event_type, char *desc, size_t size)
6078{
Ofir Bittonebd8d122020-05-10 13:41:28 +03006079 if (event_type >= GAUDI_EVENT_SIZE)
6080 goto event_not_supported;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006081
Ofir Bittonebd8d122020-05-10 13:41:28 +03006082 if (!gaudi_irq_map_table[event_type].valid)
6083 goto event_not_supported;
6084
6085 snprintf(desc, size, gaudi_irq_map_table[event_type].name);
6086
6087 return;
6088
6089event_not_supported:
6090 snprintf(desc, size, "N/A");
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006091}
6092
6093static const char *gaudi_get_razwi_initiator_dma_name(struct hl_device *hdev,
6094 u32 x_y, bool is_write)
6095{
6096 u32 dma_id[2], dma_offset, err_cause[2], mask, i;
6097
6098 mask = is_write ? DMA0_CORE_ERR_CAUSE_HBW_WR_ERR_MASK :
6099 DMA0_CORE_ERR_CAUSE_HBW_RD_ERR_MASK;
6100
6101 switch (x_y) {
6102 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_0:
6103 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_1:
6104 dma_id[0] = 0;
6105 dma_id[1] = 2;
6106 break;
6107 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_0:
6108 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_1:
6109 dma_id[0] = 1;
6110 dma_id[1] = 3;
6111 break;
6112 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_0:
6113 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_1:
6114 dma_id[0] = 4;
6115 dma_id[1] = 6;
6116 break;
6117 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_0:
6118 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_1:
6119 dma_id[0] = 5;
6120 dma_id[1] = 7;
6121 break;
6122 default:
6123 goto unknown_initiator;
6124 }
6125
6126 for (i = 0 ; i < 2 ; i++) {
6127 dma_offset = dma_id[i] * DMA_CORE_OFFSET;
6128 err_cause[i] = RREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset);
6129 }
6130
6131 switch (x_y) {
6132 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_0:
6133 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_1:
6134 if ((err_cause[0] & mask) && !(err_cause[1] & mask))
6135 return "DMA0";
6136 else if (!(err_cause[0] & mask) && (err_cause[1] & mask))
6137 return "DMA2";
6138 else
6139 return "DMA0 or DMA2";
6140 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_0:
6141 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_1:
6142 if ((err_cause[0] & mask) && !(err_cause[1] & mask))
6143 return "DMA1";
6144 else if (!(err_cause[0] & mask) && (err_cause[1] & mask))
6145 return "DMA3";
6146 else
6147 return "DMA1 or DMA3";
6148 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_0:
6149 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_1:
6150 if ((err_cause[0] & mask) && !(err_cause[1] & mask))
6151 return "DMA4";
6152 else if (!(err_cause[0] & mask) && (err_cause[1] & mask))
6153 return "DMA6";
6154 else
6155 return "DMA4 or DMA6";
6156 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_0:
6157 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_1:
6158 if ((err_cause[0] & mask) && !(err_cause[1] & mask))
6159 return "DMA5";
6160 else if (!(err_cause[0] & mask) && (err_cause[1] & mask))
6161 return "DMA7";
6162 else
6163 return "DMA5 or DMA7";
6164 }
6165
6166unknown_initiator:
6167 return "unknown initiator";
6168}
6169
6170static const char *gaudi_get_razwi_initiator_name(struct hl_device *hdev,
6171 bool is_write)
6172{
6173 u32 val, x_y, axi_id;
6174
6175 val = is_write ? RREG32(mmMMU_UP_RAZWI_WRITE_ID) :
6176 RREG32(mmMMU_UP_RAZWI_READ_ID);
6177 x_y = val & ((RAZWI_INITIATOR_Y_MASK << RAZWI_INITIATOR_Y_SHIFT) |
6178 (RAZWI_INITIATOR_X_MASK << RAZWI_INITIATOR_X_SHIFT));
6179 axi_id = val & (RAZWI_INITIATOR_AXI_ID_MASK <<
6180 RAZWI_INITIATOR_AXI_ID_SHIFT);
6181
6182 switch (x_y) {
6183 case RAZWI_INITIATOR_ID_X_Y_TPC0_NIC0:
6184 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC))
6185 return "TPC0";
6186 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC))
6187 return "NIC0";
6188 break;
6189 case RAZWI_INITIATOR_ID_X_Y_TPC1:
6190 return "TPC1";
6191 case RAZWI_INITIATOR_ID_X_Y_MME0_0:
6192 case RAZWI_INITIATOR_ID_X_Y_MME0_1:
6193 return "MME0";
6194 case RAZWI_INITIATOR_ID_X_Y_MME1_0:
6195 case RAZWI_INITIATOR_ID_X_Y_MME1_1:
6196 return "MME1";
6197 case RAZWI_INITIATOR_ID_X_Y_TPC2:
6198 return "TPC2";
6199 case RAZWI_INITIATOR_ID_X_Y_TPC3_PCI_CPU_PSOC:
6200 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC))
6201 return "TPC3";
6202 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_PCI))
6203 return "PCI";
6204 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_CPU))
6205 return "CPU";
6206 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_PSOC))
6207 return "PSOC";
6208 break;
6209 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_0:
6210 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_1:
6211 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_0:
6212 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_1:
6213 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_0:
6214 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_1:
6215 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_0:
6216 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_1:
6217 return gaudi_get_razwi_initiator_dma_name(hdev, x_y, is_write);
6218 case RAZWI_INITIATOR_ID_X_Y_TPC4_NIC1_NIC2:
6219 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC))
6220 return "TPC4";
6221 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC))
6222 return "NIC1";
6223 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC_FT))
6224 return "NIC2";
6225 break;
6226 case RAZWI_INITIATOR_ID_X_Y_TPC5:
6227 return "TPC5";
6228 case RAZWI_INITIATOR_ID_X_Y_MME2_0:
6229 case RAZWI_INITIATOR_ID_X_Y_MME2_1:
6230 return "MME2";
6231 case RAZWI_INITIATOR_ID_X_Y_MME3_0:
6232 case RAZWI_INITIATOR_ID_X_Y_MME3_1:
6233 return "MME3";
6234 case RAZWI_INITIATOR_ID_X_Y_TPC6:
6235 return "TPC6";
6236 case RAZWI_INITIATOR_ID_X_Y_TPC7_NIC4_NIC5:
6237 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC))
6238 return "TPC7";
6239 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC))
6240 return "NIC4";
6241 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC_FT))
6242 return "NIC5";
6243 break;
6244 default:
6245 break;
6246 }
6247
6248 dev_err(hdev->dev,
6249 "Unknown RAZWI initiator ID 0x%x [Y=%d, X=%d, AXI_ID=%d]\n",
6250 val,
6251 (val >> RAZWI_INITIATOR_Y_SHIFT) & RAZWI_INITIATOR_Y_MASK,
6252 (val >> RAZWI_INITIATOR_X_SHIFT) & RAZWI_INITIATOR_X_MASK,
6253 (val >> RAZWI_INITIATOR_AXI_ID_SHIFT) &
6254 RAZWI_INITIATOR_AXI_ID_MASK);
6255
6256 return "unknown initiator";
6257}
6258
6259static void gaudi_print_razwi_info(struct hl_device *hdev)
6260{
6261 if (RREG32(mmMMU_UP_RAZWI_WRITE_VLD)) {
6262 dev_err_ratelimited(hdev->dev,
6263 "RAZWI event caused by illegal write of %s\n",
6264 gaudi_get_razwi_initiator_name(hdev, true));
6265 WREG32(mmMMU_UP_RAZWI_WRITE_VLD, 0);
6266 }
6267
6268 if (RREG32(mmMMU_UP_RAZWI_READ_VLD)) {
6269 dev_err_ratelimited(hdev->dev,
6270 "RAZWI event caused by illegal read of %s\n",
6271 gaudi_get_razwi_initiator_name(hdev, false));
6272 WREG32(mmMMU_UP_RAZWI_READ_VLD, 0);
6273 }
6274}
6275
6276static void gaudi_print_mmu_error_info(struct hl_device *hdev)
6277{
6278 struct gaudi_device *gaudi = hdev->asic_specific;
6279 u64 addr;
6280 u32 val;
6281
6282 if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
6283 return;
6284
6285 val = RREG32(mmMMU_UP_PAGE_ERROR_CAPTURE);
6286 if (val & MMU_UP_PAGE_ERROR_CAPTURE_ENTRY_VALID_MASK) {
6287 addr = val & MMU_UP_PAGE_ERROR_CAPTURE_VA_49_32_MASK;
6288 addr <<= 32;
6289 addr |= RREG32(mmMMU_UP_PAGE_ERROR_CAPTURE_VA);
6290
6291 dev_err_ratelimited(hdev->dev, "MMU page fault on va 0x%llx\n",
6292 addr);
6293
6294 WREG32(mmMMU_UP_PAGE_ERROR_CAPTURE, 0);
6295 }
6296
6297 val = RREG32(mmMMU_UP_ACCESS_ERROR_CAPTURE);
6298 if (val & MMU_UP_ACCESS_ERROR_CAPTURE_ENTRY_VALID_MASK) {
6299 addr = val & MMU_UP_ACCESS_ERROR_CAPTURE_VA_49_32_MASK;
6300 addr <<= 32;
6301 addr |= RREG32(mmMMU_UP_ACCESS_ERROR_CAPTURE_VA);
6302
6303 dev_err_ratelimited(hdev->dev,
6304 "MMU access error on va 0x%llx\n", addr);
6305
6306 WREG32(mmMMU_UP_ACCESS_ERROR_CAPTURE, 0);
6307 }
6308}
6309
6310/*
6311 * +-------------------+------------------------------------------------------+
6312 * | Configuration Reg | Description |
6313 * | Address | |
6314 * +-------------------+------------------------------------------------------+
6315 * | 0xF30 - 0xF3F |ECC single error indication (1 bit per memory wrapper)|
6316 * | |0xF30 memory wrappers 31:0 (MSB to LSB) |
6317 * | |0xF34 memory wrappers 63:32 |
6318 * | |0xF38 memory wrappers 95:64 |
6319 * | |0xF3C memory wrappers 127:96 |
6320 * +-------------------+------------------------------------------------------+
6321 * | 0xF40 - 0xF4F |ECC double error indication (1 bit per memory wrapper)|
6322 * | |0xF40 memory wrappers 31:0 (MSB to LSB) |
6323 * | |0xF44 memory wrappers 63:32 |
6324 * | |0xF48 memory wrappers 95:64 |
6325 * | |0xF4C memory wrappers 127:96 |
6326 * +-------------------+------------------------------------------------------+
6327 */
Oded Gabbayfcc6a4e2020-05-17 08:20:35 +03006328static int gaudi_extract_ecc_info(struct hl_device *hdev,
6329 struct ecc_info_extract_params *params, u64 *ecc_address,
6330 u64 *ecc_syndrom, u8 *memory_wrapper_idx)
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006331{
6332 struct gaudi_device *gaudi = hdev->asic_specific;
Oded Gabbayfcc6a4e2020-05-17 08:20:35 +03006333 u32 i, num_mem_regs, reg, err_bit;
6334 u64 err_addr, err_word = 0;
6335 int rc = 0;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006336
Oded Gabbayfcc6a4e2020-05-17 08:20:35 +03006337 num_mem_regs = params->num_memories / 32 +
6338 ((params->num_memories % 32) ? 1 : 0);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006339
Oded Gabbayfcc6a4e2020-05-17 08:20:35 +03006340 if (params->block_address >= CFG_BASE)
6341 params->block_address -= CFG_BASE;
6342
6343 if (params->derr)
6344 err_addr = params->block_address + GAUDI_ECC_DERR0_OFFSET;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006345 else
Oded Gabbayfcc6a4e2020-05-17 08:20:35 +03006346 err_addr = params->block_address + GAUDI_ECC_SERR0_OFFSET;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006347
Oded Gabbayfcc6a4e2020-05-17 08:20:35 +03006348 if (params->disable_clock_gating) {
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006349 mutex_lock(&gaudi->clk_gate_mutex);
6350 hdev->asic_funcs->disable_clock_gating(hdev);
6351 }
6352
Oded Gabbayfcc6a4e2020-05-17 08:20:35 +03006353 /* Set invalid wrapper index */
6354 *memory_wrapper_idx = 0xFF;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006355
Oded Gabbayfcc6a4e2020-05-17 08:20:35 +03006356 /* Iterate through memory wrappers, a single bit must be set */
Dan Carpenterb0353542020-08-05 12:51:05 +03006357 for (i = 0 ; i < num_mem_regs ; i++) {
Oded Gabbayfcc6a4e2020-05-17 08:20:35 +03006358 err_addr += i * 4;
6359 err_word = RREG32(err_addr);
6360 if (err_word) {
6361 err_bit = __ffs(err_word);
6362 *memory_wrapper_idx = err_bit + (32 * i);
6363 break;
6364 }
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006365 }
6366
Oded Gabbayfcc6a4e2020-05-17 08:20:35 +03006367 if (*memory_wrapper_idx == 0xFF) {
6368 dev_err(hdev->dev, "ECC error information cannot be found\n");
6369 rc = -EINVAL;
6370 goto enable_clk_gate;
6371 }
6372
6373 WREG32(params->block_address + GAUDI_ECC_MEM_SEL_OFFSET,
6374 *memory_wrapper_idx);
6375
6376 *ecc_address =
6377 RREG32(params->block_address + GAUDI_ECC_ADDRESS_OFFSET);
6378 *ecc_syndrom =
6379 RREG32(params->block_address + GAUDI_ECC_SYNDROME_OFFSET);
6380
6381 /* Clear error indication */
6382 reg = RREG32(params->block_address + GAUDI_ECC_MEM_INFO_CLR_OFFSET);
6383 if (params->derr)
6384 reg |= FIELD_PREP(GAUDI_ECC_MEM_INFO_CLR_DERR_MASK, 1);
6385 else
6386 reg |= FIELD_PREP(GAUDI_ECC_MEM_INFO_CLR_SERR_MASK, 1);
6387
6388 WREG32(params->block_address + GAUDI_ECC_MEM_INFO_CLR_OFFSET, reg);
6389
6390enable_clk_gate:
6391 if (params->disable_clock_gating) {
Oded Gabbaye38bfd32020-07-03 20:46:12 +03006392 hdev->asic_funcs->set_clock_gating(hdev);
Greg Kroah-Hartman65a9bde62020-07-27 11:49:37 +02006393
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006394 mutex_unlock(&gaudi->clk_gate_mutex);
6395 }
Oded Gabbayfcc6a4e2020-05-17 08:20:35 +03006396
6397 return rc;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006398}
6399
6400static void gaudi_handle_qman_err_generic(struct hl_device *hdev,
6401 const char *qm_name,
6402 u64 glbl_sts_addr,
6403 u64 arb_err_addr)
6404{
6405 u32 i, j, glbl_sts_val, arb_err_val, glbl_sts_clr_val;
6406 char reg_desc[32];
6407
6408 /* Iterate through all stream GLBL_STS1 registers + Lower CP */
6409 for (i = 0 ; i < QMAN_STREAMS + 1 ; i++) {
6410 glbl_sts_clr_val = 0;
6411 glbl_sts_val = RREG32(glbl_sts_addr + 4 * i);
6412
6413 if (!glbl_sts_val)
6414 continue;
6415
6416 if (i == QMAN_STREAMS)
6417 snprintf(reg_desc, ARRAY_SIZE(reg_desc), "LowerCP");
6418 else
6419 snprintf(reg_desc, ARRAY_SIZE(reg_desc), "stream%u", i);
6420
6421 for (j = 0 ; j < GAUDI_NUM_OF_QM_ERR_CAUSE ; j++) {
6422 if (glbl_sts_val & BIT(j)) {
6423 dev_err_ratelimited(hdev->dev,
6424 "%s %s. err cause: %s\n",
6425 qm_name, reg_desc,
6426 gaudi_qman_error_cause[j]);
6427 glbl_sts_clr_val |= BIT(j);
6428 }
6429 }
6430
6431 /* Write 1 clear errors */
6432 WREG32(glbl_sts_addr + 4 * i, glbl_sts_clr_val);
6433 }
6434
6435 arb_err_val = RREG32(arb_err_addr);
6436
6437 if (!arb_err_val)
6438 return;
6439
6440 for (j = 0 ; j < GAUDI_NUM_OF_QM_ARB_ERR_CAUSE ; j++) {
6441 if (arb_err_val & BIT(j)) {
6442 dev_err_ratelimited(hdev->dev,
6443 "%s ARB_ERR. err cause: %s\n",
6444 qm_name,
6445 gaudi_qman_arb_error_cause[j]);
6446 }
6447 }
6448}
6449
Oded Gabbayfcc6a4e2020-05-17 08:20:35 +03006450static void gaudi_handle_ecc_event(struct hl_device *hdev, u16 event_type,
6451 struct hl_eq_ecc_data *ecc_data)
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006452{
Oded Gabbayfcc6a4e2020-05-17 08:20:35 +03006453 struct ecc_info_extract_params params;
6454 u64 ecc_address = 0, ecc_syndrom = 0;
6455 u8 index, memory_wrapper_idx = 0;
6456 bool extract_info_from_fw;
6457 int rc;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006458
6459 switch (event_type) {
Oded Gabbayfcc6a4e2020-05-17 08:20:35 +03006460 case GAUDI_EVENT_PCIE_CORE_SERR ... GAUDI_EVENT_PCIE_PHY_DERR:
6461 case GAUDI_EVENT_DMA0_SERR_ECC ... GAUDI_EVENT_MMU_DERR:
6462 extract_info_from_fw = true;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006463 break;
6464 case GAUDI_EVENT_TPC0_SERR ... GAUDI_EVENT_TPC7_SERR:
6465 index = event_type - GAUDI_EVENT_TPC0_SERR;
Oded Gabbayfcc6a4e2020-05-17 08:20:35 +03006466 params.block_address = mmTPC0_CFG_BASE + index * TPC_CFG_OFFSET;
6467 params.num_memories = 90;
6468 params.derr = false;
6469 params.disable_clock_gating = true;
6470 extract_info_from_fw = false;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006471 break;
6472 case GAUDI_EVENT_TPC0_DERR ... GAUDI_EVENT_TPC7_DERR:
6473 index = event_type - GAUDI_EVENT_TPC0_DERR;
Oded Gabbayfcc6a4e2020-05-17 08:20:35 +03006474 params.block_address =
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006475 mmTPC0_CFG_BASE + index * TPC_CFG_OFFSET;
Oded Gabbayfcc6a4e2020-05-17 08:20:35 +03006476 params.num_memories = 90;
6477 params.derr = true;
6478 params.disable_clock_gating = true;
6479 extract_info_from_fw = false;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006480 break;
6481 case GAUDI_EVENT_MME0_ACC_SERR:
6482 case GAUDI_EVENT_MME1_ACC_SERR:
6483 case GAUDI_EVENT_MME2_ACC_SERR:
6484 case GAUDI_EVENT_MME3_ACC_SERR:
6485 index = (event_type - GAUDI_EVENT_MME0_ACC_SERR) / 4;
Oded Gabbayfcc6a4e2020-05-17 08:20:35 +03006486 params.block_address = mmMME0_ACC_BASE + index * MME_ACC_OFFSET;
6487 params.num_memories = 128;
6488 params.derr = false;
6489 params.disable_clock_gating = true;
6490 extract_info_from_fw = false;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006491 break;
6492 case GAUDI_EVENT_MME0_ACC_DERR:
6493 case GAUDI_EVENT_MME1_ACC_DERR:
6494 case GAUDI_EVENT_MME2_ACC_DERR:
6495 case GAUDI_EVENT_MME3_ACC_DERR:
6496 index = (event_type - GAUDI_EVENT_MME0_ACC_DERR) / 4;
Oded Gabbayfcc6a4e2020-05-17 08:20:35 +03006497 params.block_address = mmMME0_ACC_BASE + index * MME_ACC_OFFSET;
6498 params.num_memories = 128;
6499 params.derr = true;
6500 params.disable_clock_gating = true;
6501 extract_info_from_fw = false;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006502 break;
6503 case GAUDI_EVENT_MME0_SBAB_SERR:
6504 case GAUDI_EVENT_MME1_SBAB_SERR:
6505 case GAUDI_EVENT_MME2_SBAB_SERR:
6506 case GAUDI_EVENT_MME3_SBAB_SERR:
6507 index = (event_type - GAUDI_EVENT_MME0_SBAB_SERR) / 4;
Oded Gabbayfcc6a4e2020-05-17 08:20:35 +03006508 params.block_address =
6509 mmMME0_SBAB_BASE + index * MME_ACC_OFFSET;
6510 params.num_memories = 33;
6511 params.derr = false;
6512 params.disable_clock_gating = true;
6513 extract_info_from_fw = false;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006514 break;
6515 case GAUDI_EVENT_MME0_SBAB_DERR:
6516 case GAUDI_EVENT_MME1_SBAB_DERR:
6517 case GAUDI_EVENT_MME2_SBAB_DERR:
6518 case GAUDI_EVENT_MME3_SBAB_DERR:
6519 index = (event_type - GAUDI_EVENT_MME0_SBAB_DERR) / 4;
Oded Gabbayfcc6a4e2020-05-17 08:20:35 +03006520 params.block_address =
6521 mmMME0_SBAB_BASE + index * MME_ACC_OFFSET;
6522 params.num_memories = 33;
6523 params.derr = true;
6524 params.disable_clock_gating = true;
Oded Gabbay652b4442020-11-21 14:35:35 +02006525 extract_info_from_fw = false;
6526 break;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006527 default:
6528 return;
6529 }
6530
Oded Gabbayfcc6a4e2020-05-17 08:20:35 +03006531 if (extract_info_from_fw) {
6532 ecc_address = le64_to_cpu(ecc_data->ecc_address);
6533 ecc_syndrom = le64_to_cpu(ecc_data->ecc_syndrom);
6534 memory_wrapper_idx = ecc_data->memory_wrapper_idx;
6535 } else {
6536 rc = gaudi_extract_ecc_info(hdev, &params, &ecc_address,
6537 &ecc_syndrom, &memory_wrapper_idx);
6538 if (rc)
6539 return;
6540 }
6541
6542 dev_err(hdev->dev,
6543 "ECC error detected. address: %#llx. Syndrom: %#llx. block id %u\n",
6544 ecc_address, ecc_syndrom, memory_wrapper_idx);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006545}
6546
6547static void gaudi_handle_qman_err(struct hl_device *hdev, u16 event_type)
6548{
6549 u64 glbl_sts_addr, arb_err_addr;
6550 u8 index;
6551 char desc[32];
6552
6553 switch (event_type) {
6554 case GAUDI_EVENT_TPC0_QM ... GAUDI_EVENT_TPC7_QM:
6555 index = event_type - GAUDI_EVENT_TPC0_QM;
6556 glbl_sts_addr =
6557 mmTPC0_QM_GLBL_STS1_0 + index * TPC_QMAN_OFFSET;
6558 arb_err_addr =
6559 mmTPC0_QM_ARB_ERR_CAUSE + index * TPC_QMAN_OFFSET;
6560 snprintf(desc, ARRAY_SIZE(desc), "%s%d", "TPC_QM", index);
6561 break;
6562 case GAUDI_EVENT_MME0_QM ... GAUDI_EVENT_MME2_QM:
6563 index = event_type - GAUDI_EVENT_MME0_QM;
6564 glbl_sts_addr =
6565 mmMME0_QM_GLBL_STS1_0 + index * MME_QMAN_OFFSET;
6566 arb_err_addr =
6567 mmMME0_QM_ARB_ERR_CAUSE + index * MME_QMAN_OFFSET;
6568 snprintf(desc, ARRAY_SIZE(desc), "%s%d", "MME_QM", index);
6569 break;
6570 case GAUDI_EVENT_DMA0_QM ... GAUDI_EVENT_DMA7_QM:
6571 index = event_type - GAUDI_EVENT_DMA0_QM;
6572 glbl_sts_addr =
6573 mmDMA0_QM_GLBL_STS1_0 + index * DMA_QMAN_OFFSET;
6574 arb_err_addr =
6575 mmDMA0_QM_ARB_ERR_CAUSE + index * DMA_QMAN_OFFSET;
6576 snprintf(desc, ARRAY_SIZE(desc), "%s%d", "DMA_QM", index);
6577 break;
Oded Gabbay3c681572020-11-02 21:10:39 +02006578 case GAUDI_EVENT_NIC0_QM0:
6579 glbl_sts_addr = mmNIC0_QM0_GLBL_STS1_0;
6580 arb_err_addr = mmNIC0_QM0_ARB_ERR_CAUSE;
6581 snprintf(desc, ARRAY_SIZE(desc), "NIC0_QM0");
6582 break;
6583 case GAUDI_EVENT_NIC0_QM1:
6584 glbl_sts_addr = mmNIC0_QM1_GLBL_STS1_0;
6585 arb_err_addr = mmNIC0_QM1_ARB_ERR_CAUSE;
6586 snprintf(desc, ARRAY_SIZE(desc), "NIC0_QM1");
6587 break;
6588 case GAUDI_EVENT_NIC1_QM0:
6589 glbl_sts_addr = mmNIC1_QM0_GLBL_STS1_0;
6590 arb_err_addr = mmNIC1_QM0_ARB_ERR_CAUSE;
6591 snprintf(desc, ARRAY_SIZE(desc), "NIC1_QM0");
6592 break;
6593 case GAUDI_EVENT_NIC1_QM1:
6594 glbl_sts_addr = mmNIC1_QM1_GLBL_STS1_0;
6595 arb_err_addr = mmNIC1_QM1_ARB_ERR_CAUSE;
6596 snprintf(desc, ARRAY_SIZE(desc), "NIC1_QM1");
6597 break;
6598 case GAUDI_EVENT_NIC2_QM0:
6599 glbl_sts_addr = mmNIC2_QM0_GLBL_STS1_0;
6600 arb_err_addr = mmNIC2_QM0_ARB_ERR_CAUSE;
6601 snprintf(desc, ARRAY_SIZE(desc), "NIC2_QM0");
6602 break;
6603 case GAUDI_EVENT_NIC2_QM1:
6604 glbl_sts_addr = mmNIC2_QM1_GLBL_STS1_0;
6605 arb_err_addr = mmNIC2_QM1_ARB_ERR_CAUSE;
6606 snprintf(desc, ARRAY_SIZE(desc), "NIC2_QM1");
6607 break;
6608 case GAUDI_EVENT_NIC3_QM0:
6609 glbl_sts_addr = mmNIC3_QM0_GLBL_STS1_0;
6610 arb_err_addr = mmNIC3_QM0_ARB_ERR_CAUSE;
6611 snprintf(desc, ARRAY_SIZE(desc), "NIC3_QM0");
6612 break;
6613 case GAUDI_EVENT_NIC3_QM1:
6614 glbl_sts_addr = mmNIC3_QM1_GLBL_STS1_0;
6615 arb_err_addr = mmNIC3_QM1_ARB_ERR_CAUSE;
6616 snprintf(desc, ARRAY_SIZE(desc), "NIC3_QM1");
6617 break;
6618 case GAUDI_EVENT_NIC4_QM0:
6619 glbl_sts_addr = mmNIC4_QM0_GLBL_STS1_0;
6620 arb_err_addr = mmNIC4_QM0_ARB_ERR_CAUSE;
6621 snprintf(desc, ARRAY_SIZE(desc), "NIC4_QM0");
6622 break;
6623 case GAUDI_EVENT_NIC4_QM1:
6624 glbl_sts_addr = mmNIC4_QM1_GLBL_STS1_0;
6625 arb_err_addr = mmNIC4_QM1_ARB_ERR_CAUSE;
6626 snprintf(desc, ARRAY_SIZE(desc), "NIC4_QM1");
6627 break;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006628 default:
6629 return;
6630 }
6631
6632 gaudi_handle_qman_err_generic(hdev, desc, glbl_sts_addr, arb_err_addr);
6633}
6634
6635static void gaudi_print_irq_info(struct hl_device *hdev, u16 event_type,
6636 bool razwi)
6637{
Ofir Bittonebd8d122020-05-10 13:41:28 +03006638 char desc[64] = "";
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006639
6640 gaudi_get_event_desc(event_type, desc, sizeof(desc));
6641 dev_err_ratelimited(hdev->dev, "Received H/W interrupt %d [\"%s\"]\n",
6642 event_type, desc);
6643
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006644 if (razwi) {
6645 gaudi_print_razwi_info(hdev);
6646 gaudi_print_mmu_error_info(hdev);
6647 }
6648}
6649
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006650static int gaudi_soft_reset_late_init(struct hl_device *hdev)
6651{
Ofir Bittonebd8d122020-05-10 13:41:28 +03006652 struct gaudi_device *gaudi = hdev->asic_specific;
6653
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006654 /* Unmask all IRQs since some could have been received
6655 * during the soft reset
6656 */
Ofir Bittonebd8d122020-05-10 13:41:28 +03006657 return hl_fw_unmask_irq_arr(hdev, gaudi->events, sizeof(gaudi->events));
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006658}
6659
6660static int gaudi_hbm_read_interrupts(struct hl_device *hdev, int device)
6661{
6662 int ch, err = 0;
6663 u32 base, val, val2;
6664
6665 base = GAUDI_HBM_CFG_BASE + device * GAUDI_HBM_CFG_OFFSET;
6666 for (ch = 0 ; ch < GAUDI_HBM_CHANNELS ; ch++) {
6667 val = RREG32_MASK(base + ch * 0x1000 + 0x06C, 0x0000FFFF);
6668 val = (val & 0xFF) | ((val >> 8) & 0xFF);
6669 if (val) {
6670 err = 1;
6671 dev_err(hdev->dev,
6672 "HBM%d pc%d interrupts info: WR_PAR=%d, RD_PAR=%d, CA_PAR=%d, SERR=%d, DERR=%d\n",
6673 device, ch * 2, val & 0x1, (val >> 1) & 0x1,
6674 (val >> 2) & 0x1, (val >> 3) & 0x1,
6675 (val >> 4) & 0x1);
6676
6677 val2 = RREG32(base + ch * 0x1000 + 0x060);
6678 dev_err(hdev->dev,
6679 "HBM%d pc%d ECC info: 1ST_ERR_ADDR=0x%x, 1ST_ERR_TYPE=%d, SEC_CONT_CNT=%d, SEC_CNT=%d, DED_CNT=%d\n",
6680 device, ch * 2,
6681 RREG32(base + ch * 0x1000 + 0x064),
6682 (val2 & 0x200) >> 9, (val2 & 0xFC00) >> 10,
6683 (val2 & 0xFF0000) >> 16,
6684 (val2 & 0xFF000000) >> 24);
6685 }
6686
6687 val = RREG32_MASK(base + ch * 0x1000 + 0x07C, 0x0000FFFF);
6688 val = (val & 0xFF) | ((val >> 8) & 0xFF);
6689 if (val) {
6690 err = 1;
6691 dev_err(hdev->dev,
6692 "HBM%d pc%d interrupts info: WR_PAR=%d, RD_PAR=%d, CA_PAR=%d, SERR=%d, DERR=%d\n",
6693 device, ch * 2 + 1, val & 0x1, (val >> 1) & 0x1,
6694 (val >> 2) & 0x1, (val >> 3) & 0x1,
6695 (val >> 4) & 0x1);
6696
6697 val2 = RREG32(base + ch * 0x1000 + 0x070);
6698 dev_err(hdev->dev,
6699 "HBM%d pc%d ECC info: 1ST_ERR_ADDR=0x%x, 1ST_ERR_TYPE=%d, SEC_CONT_CNT=%d, SEC_CNT=%d, DED_CNT=%d\n",
6700 device, ch * 2 + 1,
6701 RREG32(base + ch * 0x1000 + 0x074),
6702 (val2 & 0x200) >> 9, (val2 & 0xFC00) >> 10,
6703 (val2 & 0xFF0000) >> 16,
6704 (val2 & 0xFF000000) >> 24);
6705 }
6706
6707 /* Clear interrupts */
6708 RMWREG32(base + (ch * 0x1000) + 0x060, 0x1C8, 0x1FF);
6709 RMWREG32(base + (ch * 0x1000) + 0x070, 0x1C8, 0x1FF);
6710 WREG32(base + (ch * 0x1000) + 0x06C, 0x1F1F);
6711 WREG32(base + (ch * 0x1000) + 0x07C, 0x1F1F);
6712 RMWREG32(base + (ch * 0x1000) + 0x060, 0x0, 0xF);
6713 RMWREG32(base + (ch * 0x1000) + 0x070, 0x0, 0xF);
6714 }
6715
6716 val = RREG32(base + 0x8F30);
6717 val2 = RREG32(base + 0x8F34);
6718 if (val | val2) {
6719 err = 1;
6720 dev_err(hdev->dev,
6721 "HBM %d MC SRAM SERR info: Reg 0x8F30=0x%x, Reg 0x8F34=0x%x\n",
6722 device, val, val2);
6723 }
6724 val = RREG32(base + 0x8F40);
6725 val2 = RREG32(base + 0x8F44);
6726 if (val | val2) {
6727 err = 1;
6728 dev_err(hdev->dev,
6729 "HBM %d MC SRAM DERR info: Reg 0x8F40=0x%x, Reg 0x8F44=0x%x\n",
6730 device, val, val2);
6731 }
6732
6733 return err;
6734}
6735
6736static int gaudi_hbm_event_to_dev(u16 hbm_event_type)
6737{
6738 switch (hbm_event_type) {
6739 case GAUDI_EVENT_HBM0_SPI_0:
6740 case GAUDI_EVENT_HBM0_SPI_1:
6741 return 0;
6742 case GAUDI_EVENT_HBM1_SPI_0:
6743 case GAUDI_EVENT_HBM1_SPI_1:
6744 return 1;
6745 case GAUDI_EVENT_HBM2_SPI_0:
6746 case GAUDI_EVENT_HBM2_SPI_1:
6747 return 2;
6748 case GAUDI_EVENT_HBM3_SPI_0:
6749 case GAUDI_EVENT_HBM3_SPI_1:
6750 return 3;
6751 default:
6752 break;
6753 }
6754
6755 /* Should never happen */
6756 return 0;
6757}
6758
6759static bool gaudi_tpc_read_interrupts(struct hl_device *hdev, u8 tpc_id,
6760 char *interrupt_name)
6761{
6762 struct gaudi_device *gaudi = hdev->asic_specific;
6763 u32 tpc_offset = tpc_id * TPC_CFG_OFFSET, tpc_interrupts_cause, i;
6764 bool soft_reset_required = false;
6765
6766 /* Accessing the TPC_INTR_CAUSE registers requires disabling the clock
Oded Gabbay6138bbe2020-09-04 20:18:16 +03006767 * gating, and thus cannot be done in CPU-CP and should be done instead
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006768 * by the driver.
6769 */
6770
6771 mutex_lock(&gaudi->clk_gate_mutex);
6772
6773 hdev->asic_funcs->disable_clock_gating(hdev);
6774
6775 tpc_interrupts_cause = RREG32(mmTPC0_CFG_TPC_INTR_CAUSE + tpc_offset) &
6776 TPC0_CFG_TPC_INTR_CAUSE_CAUSE_MASK;
6777
6778 for (i = 0 ; i < GAUDI_NUM_OF_TPC_INTR_CAUSE ; i++)
6779 if (tpc_interrupts_cause & BIT(i)) {
6780 dev_err_ratelimited(hdev->dev,
6781 "TPC%d_%s interrupt cause: %s\n",
6782 tpc_id, interrupt_name,
6783 gaudi_tpc_interrupts_cause[i]);
6784 /* If this is QM error, we need to soft-reset */
6785 if (i == 15)
6786 soft_reset_required = true;
6787 }
6788
6789 /* Clear interrupts */
6790 WREG32(mmTPC0_CFG_TPC_INTR_CAUSE + tpc_offset, 0);
6791
Oded Gabbaye38bfd32020-07-03 20:46:12 +03006792 hdev->asic_funcs->set_clock_gating(hdev);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006793
6794 mutex_unlock(&gaudi->clk_gate_mutex);
6795
6796 return soft_reset_required;
6797}
6798
6799static int tpc_dec_event_to_tpc_id(u16 tpc_dec_event_type)
6800{
6801 return (tpc_dec_event_type - GAUDI_EVENT_TPC0_DEC) >> 1;
6802}
6803
6804static int tpc_krn_event_to_tpc_id(u16 tpc_dec_event_type)
6805{
6806 return (tpc_dec_event_type - GAUDI_EVENT_TPC0_KRN_ERR) / 6;
6807}
6808
6809static void gaudi_print_clk_change_info(struct hl_device *hdev,
6810 u16 event_type)
6811{
6812 switch (event_type) {
6813 case GAUDI_EVENT_FIX_POWER_ENV_S:
Ofir Bitton0a068ad2020-07-21 10:49:51 +03006814 hdev->clk_throttling_reason |= HL_CLK_THROTTLE_POWER;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006815 dev_info_ratelimited(hdev->dev,
6816 "Clock throttling due to power consumption\n");
6817 break;
6818
6819 case GAUDI_EVENT_FIX_POWER_ENV_E:
Ofir Bitton0a068ad2020-07-21 10:49:51 +03006820 hdev->clk_throttling_reason &= ~HL_CLK_THROTTLE_POWER;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006821 dev_info_ratelimited(hdev->dev,
6822 "Power envelop is safe, back to optimal clock\n");
6823 break;
6824
6825 case GAUDI_EVENT_FIX_THERMAL_ENV_S:
Ofir Bitton0a068ad2020-07-21 10:49:51 +03006826 hdev->clk_throttling_reason |= HL_CLK_THROTTLE_THERMAL;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006827 dev_info_ratelimited(hdev->dev,
6828 "Clock throttling due to overheating\n");
6829 break;
6830
6831 case GAUDI_EVENT_FIX_THERMAL_ENV_E:
Ofir Bitton0a068ad2020-07-21 10:49:51 +03006832 hdev->clk_throttling_reason &= ~HL_CLK_THROTTLE_THERMAL;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006833 dev_info_ratelimited(hdev->dev,
6834 "Thermal envelop is safe, back to optimal clock\n");
6835 break;
6836
6837 default:
6838 dev_err(hdev->dev, "Received invalid clock change event %d\n",
6839 event_type);
6840 break;
6841 }
6842}
6843
6844static void gaudi_handle_eqe(struct hl_device *hdev,
6845 struct hl_eq_entry *eq_entry)
6846{
6847 struct gaudi_device *gaudi = hdev->asic_specific;
6848 u32 ctl = le32_to_cpu(eq_entry->hdr.ctl);
6849 u16 event_type = ((ctl & EQ_CTL_EVENT_TYPE_MASK)
6850 >> EQ_CTL_EVENT_TYPE_SHIFT);
6851 u8 cause;
Oded Gabbay66446822020-05-18 16:48:01 +03006852 bool reset_required;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006853
6854 gaudi->events_stat[event_type]++;
6855 gaudi->events_stat_aggregate[event_type]++;
6856
6857 switch (event_type) {
6858 case GAUDI_EVENT_PCIE_CORE_DERR:
6859 case GAUDI_EVENT_PCIE_IF_DERR:
6860 case GAUDI_EVENT_PCIE_PHY_DERR:
6861 case GAUDI_EVENT_TPC0_DERR ... GAUDI_EVENT_TPC7_DERR:
6862 case GAUDI_EVENT_MME0_ACC_DERR:
6863 case GAUDI_EVENT_MME0_SBAB_DERR:
6864 case GAUDI_EVENT_MME1_ACC_DERR:
6865 case GAUDI_EVENT_MME1_SBAB_DERR:
6866 case GAUDI_EVENT_MME2_ACC_DERR:
6867 case GAUDI_EVENT_MME2_SBAB_DERR:
6868 case GAUDI_EVENT_MME3_ACC_DERR:
6869 case GAUDI_EVENT_MME3_SBAB_DERR:
6870 case GAUDI_EVENT_DMA0_DERR_ECC ... GAUDI_EVENT_DMA7_DERR_ECC:
6871 fallthrough;
6872 case GAUDI_EVENT_CPU_IF_ECC_DERR:
6873 case GAUDI_EVENT_PSOC_MEM_DERR:
6874 case GAUDI_EVENT_PSOC_CORESIGHT_DERR:
6875 case GAUDI_EVENT_SRAM0_DERR ... GAUDI_EVENT_SRAM28_DERR:
6876 case GAUDI_EVENT_DMA_IF0_DERR ... GAUDI_EVENT_DMA_IF3_DERR:
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006877 case GAUDI_EVENT_HBM_0_DERR ... GAUDI_EVENT_HBM_3_DERR:
6878 case GAUDI_EVENT_MMU_DERR:
Oded Gabbayfcc6a4e2020-05-17 08:20:35 +03006879 gaudi_print_irq_info(hdev, event_type, true);
6880 gaudi_handle_ecc_event(hdev, event_type, &eq_entry->ecc_data);
6881 if (hdev->hard_reset_on_fw_events)
6882 hl_device_reset(hdev, true, false);
6883 break;
6884
6885 case GAUDI_EVENT_GIC500:
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006886 case GAUDI_EVENT_AXI_ECC:
6887 case GAUDI_EVENT_L2_RAM_ECC:
6888 case GAUDI_EVENT_PLL0 ... GAUDI_EVENT_PLL17:
6889 gaudi_print_irq_info(hdev, event_type, false);
6890 if (hdev->hard_reset_on_fw_events)
6891 hl_device_reset(hdev, true, false);
6892 break;
6893
6894 case GAUDI_EVENT_HBM0_SPI_0:
6895 case GAUDI_EVENT_HBM1_SPI_0:
6896 case GAUDI_EVENT_HBM2_SPI_0:
6897 case GAUDI_EVENT_HBM3_SPI_0:
6898 gaudi_print_irq_info(hdev, event_type, false);
6899 gaudi_hbm_read_interrupts(hdev,
6900 gaudi_hbm_event_to_dev(event_type));
6901 if (hdev->hard_reset_on_fw_events)
6902 hl_device_reset(hdev, true, false);
6903 break;
6904
6905 case GAUDI_EVENT_HBM0_SPI_1:
6906 case GAUDI_EVENT_HBM1_SPI_1:
6907 case GAUDI_EVENT_HBM2_SPI_1:
6908 case GAUDI_EVENT_HBM3_SPI_1:
6909 gaudi_print_irq_info(hdev, event_type, false);
6910 gaudi_hbm_read_interrupts(hdev,
6911 gaudi_hbm_event_to_dev(event_type));
6912 break;
6913
6914 case GAUDI_EVENT_TPC0_DEC:
6915 case GAUDI_EVENT_TPC1_DEC:
6916 case GAUDI_EVENT_TPC2_DEC:
6917 case GAUDI_EVENT_TPC3_DEC:
6918 case GAUDI_EVENT_TPC4_DEC:
6919 case GAUDI_EVENT_TPC5_DEC:
6920 case GAUDI_EVENT_TPC6_DEC:
6921 case GAUDI_EVENT_TPC7_DEC:
6922 gaudi_print_irq_info(hdev, event_type, true);
Oded Gabbay66446822020-05-18 16:48:01 +03006923 reset_required = gaudi_tpc_read_interrupts(hdev,
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006924 tpc_dec_event_to_tpc_id(event_type),
6925 "AXI_SLV_DEC_Error");
Oded Gabbay66446822020-05-18 16:48:01 +03006926 if (reset_required) {
6927 dev_err(hdev->dev, "hard reset required due to %s\n",
6928 gaudi_irq_map_table[event_type].name);
6929
6930 if (hdev->hard_reset_on_fw_events)
6931 hl_device_reset(hdev, true, false);
6932 } else {
6933 hl_fw_unmask_irq(hdev, event_type);
Omer Shpigelmand7985072020-05-17 23:01:22 +03006934 }
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006935 break;
6936
6937 case GAUDI_EVENT_TPC0_KRN_ERR:
6938 case GAUDI_EVENT_TPC1_KRN_ERR:
6939 case GAUDI_EVENT_TPC2_KRN_ERR:
6940 case GAUDI_EVENT_TPC3_KRN_ERR:
6941 case GAUDI_EVENT_TPC4_KRN_ERR:
6942 case GAUDI_EVENT_TPC5_KRN_ERR:
6943 case GAUDI_EVENT_TPC6_KRN_ERR:
6944 case GAUDI_EVENT_TPC7_KRN_ERR:
6945 gaudi_print_irq_info(hdev, event_type, true);
Oded Gabbay66446822020-05-18 16:48:01 +03006946 reset_required = gaudi_tpc_read_interrupts(hdev,
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006947 tpc_krn_event_to_tpc_id(event_type),
6948 "KRN_ERR");
Oded Gabbay66446822020-05-18 16:48:01 +03006949 if (reset_required) {
6950 dev_err(hdev->dev, "hard reset required due to %s\n",
6951 gaudi_irq_map_table[event_type].name);
6952
6953 if (hdev->hard_reset_on_fw_events)
6954 hl_device_reset(hdev, true, false);
6955 } else {
6956 hl_fw_unmask_irq(hdev, event_type);
Omer Shpigelmand7985072020-05-17 23:01:22 +03006957 }
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006958 break;
6959
6960 case GAUDI_EVENT_PCIE_CORE_SERR:
6961 case GAUDI_EVENT_PCIE_IF_SERR:
6962 case GAUDI_EVENT_PCIE_PHY_SERR:
6963 case GAUDI_EVENT_TPC0_SERR ... GAUDI_EVENT_TPC7_SERR:
6964 case GAUDI_EVENT_MME0_ACC_SERR:
6965 case GAUDI_EVENT_MME0_SBAB_SERR:
6966 case GAUDI_EVENT_MME1_ACC_SERR:
6967 case GAUDI_EVENT_MME1_SBAB_SERR:
6968 case GAUDI_EVENT_MME2_ACC_SERR:
6969 case GAUDI_EVENT_MME2_SBAB_SERR:
6970 case GAUDI_EVENT_MME3_ACC_SERR:
6971 case GAUDI_EVENT_MME3_SBAB_SERR:
6972 case GAUDI_EVENT_DMA0_SERR_ECC ... GAUDI_EVENT_DMA7_SERR_ECC:
6973 case GAUDI_EVENT_CPU_IF_ECC_SERR:
6974 case GAUDI_EVENT_PSOC_MEM_SERR:
6975 case GAUDI_EVENT_PSOC_CORESIGHT_SERR:
6976 case GAUDI_EVENT_SRAM0_SERR ... GAUDI_EVENT_SRAM28_SERR:
6977 case GAUDI_EVENT_DMA_IF0_SERR ... GAUDI_EVENT_DMA_IF3_SERR:
6978 case GAUDI_EVENT_HBM_0_SERR ... GAUDI_EVENT_HBM_3_SERR:
6979 fallthrough;
6980 case GAUDI_EVENT_MMU_SERR:
Oded Gabbayfcc6a4e2020-05-17 08:20:35 +03006981 gaudi_print_irq_info(hdev, event_type, true);
6982 gaudi_handle_ecc_event(hdev, event_type, &eq_entry->ecc_data);
6983 hl_fw_unmask_irq(hdev, event_type);
6984 break;
6985
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006986 case GAUDI_EVENT_PCIE_DEC:
6987 case GAUDI_EVENT_MME0_WBC_RSP:
6988 case GAUDI_EVENT_MME0_SBAB0_RSP:
6989 case GAUDI_EVENT_MME1_WBC_RSP:
6990 case GAUDI_EVENT_MME1_SBAB0_RSP:
6991 case GAUDI_EVENT_MME2_WBC_RSP:
6992 case GAUDI_EVENT_MME2_SBAB0_RSP:
6993 case GAUDI_EVENT_MME3_WBC_RSP:
6994 case GAUDI_EVENT_MME3_SBAB0_RSP:
6995 case GAUDI_EVENT_CPU_AXI_SPLITTER:
6996 case GAUDI_EVENT_PSOC_AXI_DEC:
6997 case GAUDI_EVENT_PSOC_PRSTN_FALL:
6998 case GAUDI_EVENT_MMU_PAGE_FAULT:
6999 case GAUDI_EVENT_MMU_WR_PERM:
7000 case GAUDI_EVENT_RAZWI_OR_ADC:
7001 case GAUDI_EVENT_TPC0_QM ... GAUDI_EVENT_TPC7_QM:
7002 case GAUDI_EVENT_MME0_QM ... GAUDI_EVENT_MME2_QM:
7003 case GAUDI_EVENT_DMA0_QM ... GAUDI_EVENT_DMA7_QM:
7004 fallthrough;
Oded Gabbay3c681572020-11-02 21:10:39 +02007005 case GAUDI_EVENT_NIC0_QM0:
7006 case GAUDI_EVENT_NIC0_QM1:
7007 case GAUDI_EVENT_NIC1_QM0:
7008 case GAUDI_EVENT_NIC1_QM1:
7009 case GAUDI_EVENT_NIC2_QM0:
7010 case GAUDI_EVENT_NIC2_QM1:
7011 case GAUDI_EVENT_NIC3_QM0:
7012 case GAUDI_EVENT_NIC3_QM1:
7013 case GAUDI_EVENT_NIC4_QM0:
7014 case GAUDI_EVENT_NIC4_QM1:
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007015 case GAUDI_EVENT_DMA0_CORE ... GAUDI_EVENT_DMA7_CORE:
7016 gaudi_print_irq_info(hdev, event_type, true);
7017 gaudi_handle_qman_err(hdev, event_type);
Ofir Bittonebd8d122020-05-10 13:41:28 +03007018 hl_fw_unmask_irq(hdev, event_type);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007019 break;
7020
7021 case GAUDI_EVENT_RAZWI_OR_ADC_SW:
7022 gaudi_print_irq_info(hdev, event_type, true);
Oded Gabbay66446822020-05-18 16:48:01 +03007023 if (hdev->hard_reset_on_fw_events)
7024 hl_device_reset(hdev, true, false);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007025 break;
7026
7027 case GAUDI_EVENT_TPC0_BMON_SPMU:
7028 case GAUDI_EVENT_TPC1_BMON_SPMU:
7029 case GAUDI_EVENT_TPC2_BMON_SPMU:
7030 case GAUDI_EVENT_TPC3_BMON_SPMU:
7031 case GAUDI_EVENT_TPC4_BMON_SPMU:
7032 case GAUDI_EVENT_TPC5_BMON_SPMU:
7033 case GAUDI_EVENT_TPC6_BMON_SPMU:
7034 case GAUDI_EVENT_TPC7_BMON_SPMU:
7035 case GAUDI_EVENT_DMA_BM_CH0 ... GAUDI_EVENT_DMA_BM_CH7:
7036 gaudi_print_irq_info(hdev, event_type, false);
Ofir Bittonebd8d122020-05-10 13:41:28 +03007037 hl_fw_unmask_irq(hdev, event_type);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007038 break;
7039
7040 case GAUDI_EVENT_FIX_POWER_ENV_S ... GAUDI_EVENT_FIX_THERMAL_ENV_E:
7041 gaudi_print_clk_change_info(hdev, event_type);
Ofir Bittonebd8d122020-05-10 13:41:28 +03007042 hl_fw_unmask_irq(hdev, event_type);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007043 break;
7044
7045 case GAUDI_EVENT_PSOC_GPIO_U16_0:
7046 cause = le64_to_cpu(eq_entry->data[0]) & 0xFF;
7047 dev_err(hdev->dev,
7048 "Received high temp H/W interrupt %d (cause %d)\n",
7049 event_type, cause);
7050 break;
7051
7052 default:
7053 dev_err(hdev->dev, "Received invalid H/W interrupt %d\n",
7054 event_type);
7055 break;
7056 }
7057}
7058
7059static void *gaudi_get_events_stat(struct hl_device *hdev, bool aggregate,
7060 u32 *size)
7061{
7062 struct gaudi_device *gaudi = hdev->asic_specific;
7063
7064 if (aggregate) {
7065 *size = (u32) sizeof(gaudi->events_stat_aggregate);
7066 return gaudi->events_stat_aggregate;
7067 }
7068
7069 *size = (u32) sizeof(gaudi->events_stat);
7070 return gaudi->events_stat;
7071}
7072
Omer Shpigelman8ff5f4f2020-05-24 23:06:59 +03007073static int gaudi_mmu_invalidate_cache(struct hl_device *hdev, bool is_hard,
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007074 u32 flags)
7075{
7076 struct gaudi_device *gaudi = hdev->asic_specific;
7077 u32 status, timeout_usec;
7078 int rc;
7079
7080 if (!(gaudi->hw_cap_initialized & HW_CAP_MMU) ||
7081 hdev->hard_reset_pending)
Omer Shpigelman8ff5f4f2020-05-24 23:06:59 +03007082 return 0;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007083
7084 if (hdev->pldm)
7085 timeout_usec = GAUDI_PLDM_MMU_TIMEOUT_USEC;
7086 else
7087 timeout_usec = MMU_CONFIG_TIMEOUT_USEC;
7088
Omer Shpigelman8ff5f4f2020-05-24 23:06:59 +03007089 mutex_lock(&hdev->mmu_cache_lock);
7090
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007091 /* L0 & L1 invalidation */
Omer Shpigelmancfd41762020-06-03 13:03:35 +03007092 WREG32(mmSTLB_INV_PS, 3);
7093 WREG32(mmSTLB_CACHE_INV, gaudi->mmu_cache_inv_pi++);
Omer Shpigelman42d0b0b2020-05-17 17:35:39 +03007094 WREG32(mmSTLB_INV_PS, 2);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007095
7096 rc = hl_poll_timeout(
7097 hdev,
Omer Shpigelman42d0b0b2020-05-17 17:35:39 +03007098 mmSTLB_INV_PS,
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007099 status,
7100 !status,
7101 1000,
7102 timeout_usec);
7103
Omer Shpigelman42d0b0b2020-05-17 17:35:39 +03007104 WREG32(mmSTLB_INV_SET, 0);
7105
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007106 mutex_unlock(&hdev->mmu_cache_lock);
Omer Shpigelman8ff5f4f2020-05-24 23:06:59 +03007107
7108 if (rc) {
7109 dev_err_ratelimited(hdev->dev,
7110 "MMU cache invalidation timeout\n");
7111 hl_device_reset(hdev, true, false);
7112 }
7113
7114 return rc;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007115}
7116
Omer Shpigelman8ff5f4f2020-05-24 23:06:59 +03007117static int gaudi_mmu_invalidate_cache_range(struct hl_device *hdev,
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007118 bool is_hard, u32 asid, u64 va, u64 size)
7119{
7120 struct gaudi_device *gaudi = hdev->asic_specific;
7121 u32 status, timeout_usec;
7122 u32 inv_data;
7123 u32 pi;
7124 int rc;
7125
7126 if (!(gaudi->hw_cap_initialized & HW_CAP_MMU) ||
7127 hdev->hard_reset_pending)
Omer Shpigelman8ff5f4f2020-05-24 23:06:59 +03007128 return 0;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007129
7130 mutex_lock(&hdev->mmu_cache_lock);
7131
7132 if (hdev->pldm)
7133 timeout_usec = GAUDI_PLDM_MMU_TIMEOUT_USEC;
7134 else
7135 timeout_usec = MMU_CONFIG_TIMEOUT_USEC;
7136
7137 /*
7138 * TODO: currently invalidate entire L0 & L1 as in regular hard
7139 * invalidation. Need to apply invalidation of specific cache
7140 * lines with mask of ASID & VA & size.
7141 * Note that L1 with be flushed entirely in any case.
7142 */
7143
7144 /* L0 & L1 invalidation */
7145 inv_data = RREG32(mmSTLB_CACHE_INV);
7146 /* PI is 8 bit */
7147 pi = ((inv_data & STLB_CACHE_INV_PRODUCER_INDEX_MASK) + 1) & 0xFF;
7148 WREG32(mmSTLB_CACHE_INV,
7149 (inv_data & STLB_CACHE_INV_INDEX_MASK_MASK) | pi);
7150
7151 rc = hl_poll_timeout(
7152 hdev,
7153 mmSTLB_INV_CONSUMER_INDEX,
7154 status,
7155 status == pi,
7156 1000,
7157 timeout_usec);
7158
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007159 mutex_unlock(&hdev->mmu_cache_lock);
Omer Shpigelman8ff5f4f2020-05-24 23:06:59 +03007160
7161 if (rc) {
7162 dev_err_ratelimited(hdev->dev,
7163 "MMU cache invalidation timeout\n");
7164 hl_device_reset(hdev, true, false);
7165 }
7166
7167 return rc;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007168}
7169
7170static int gaudi_mmu_update_asid_hop0_addr(struct hl_device *hdev,
7171 u32 asid, u64 phys_addr)
7172{
7173 u32 status, timeout_usec;
7174 int rc;
7175
7176 if (hdev->pldm)
7177 timeout_usec = GAUDI_PLDM_MMU_TIMEOUT_USEC;
7178 else
7179 timeout_usec = MMU_CONFIG_TIMEOUT_USEC;
7180
7181 WREG32(MMU_ASID, asid);
7182 WREG32(MMU_HOP0_PA43_12, phys_addr >> MMU_HOP0_PA43_12_SHIFT);
7183 WREG32(MMU_HOP0_PA49_44, phys_addr >> MMU_HOP0_PA49_44_SHIFT);
7184 WREG32(MMU_BUSY, 0x80000000);
7185
7186 rc = hl_poll_timeout(
7187 hdev,
7188 MMU_BUSY,
7189 status,
7190 !(status & 0x80000000),
7191 1000,
7192 timeout_usec);
7193
7194 if (rc) {
7195 dev_err(hdev->dev,
7196 "Timeout during MMU hop0 config of asid %d\n", asid);
7197 return rc;
7198 }
7199
7200 return 0;
7201}
7202
7203static int gaudi_send_heartbeat(struct hl_device *hdev)
7204{
7205 struct gaudi_device *gaudi = hdev->asic_specific;
7206
7207 if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
7208 return 0;
7209
7210 return hl_fw_send_heartbeat(hdev);
7211}
7212
Oded Gabbay2f553422020-08-15 16:28:10 +03007213static int gaudi_cpucp_info_get(struct hl_device *hdev)
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007214{
7215 struct gaudi_device *gaudi = hdev->asic_specific;
7216 struct asic_fixed_properties *prop = &hdev->asic_prop;
7217 int rc;
7218
7219 if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
7220 return 0;
7221
Oded Gabbay2f553422020-08-15 16:28:10 +03007222 rc = hl_fw_cpucp_info_get(hdev);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007223 if (rc)
7224 return rc;
7225
Oded Gabbay2f553422020-08-15 16:28:10 +03007226 if (!strlen(prop->cpucp_info.card_name))
7227 strncpy(prop->cpucp_info.card_name, GAUDI_DEFAULT_CARD_NAME,
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007228 CARD_NAME_MAX_LEN);
7229
Oded Gabbay2f553422020-08-15 16:28:10 +03007230 hdev->card_type = le32_to_cpu(hdev->asic_prop.cpucp_info.card_type);
Oded Gabbay58361aa2020-08-08 23:34:47 +03007231
Oded Gabbay2f553422020-08-15 16:28:10 +03007232 if (hdev->card_type == cpucp_card_type_pci)
Oded Gabbay58361aa2020-08-08 23:34:47 +03007233 prop->max_power_default = MAX_POWER_DEFAULT_PCI;
Oded Gabbay2f553422020-08-15 16:28:10 +03007234 else if (hdev->card_type == cpucp_card_type_pmc)
Oded Gabbay58361aa2020-08-08 23:34:47 +03007235 prop->max_power_default = MAX_POWER_DEFAULT_PMC;
7236
7237 hdev->max_power = prop->max_power_default;
7238
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007239 return 0;
7240}
7241
farah kassabrid90416c2020-08-12 17:20:13 +03007242static bool gaudi_is_device_idle(struct hl_device *hdev, u64 *mask,
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007243 struct seq_file *s)
7244{
7245 struct gaudi_device *gaudi = hdev->asic_specific;
7246 const char *fmt = "%-5d%-9s%#-14x%#-12x%#x\n";
7247 const char *mme_slave_fmt = "%-5d%-9s%-14s%-12s%#x\n";
Oded Gabbay3c681572020-11-02 21:10:39 +02007248 const char *nic_fmt = "%-5d%-9s%#-14x%#x\n";
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007249 u32 qm_glbl_sts0, qm_cgm_sts, dma_core_sts0, tpc_cfg_sts, mme_arch_sts;
7250 bool is_idle = true, is_eng_idle, is_slave;
7251 u64 offset;
Oded Gabbay3c681572020-11-02 21:10:39 +02007252 int i, dma_id, port;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007253
7254 mutex_lock(&gaudi->clk_gate_mutex);
7255
7256 hdev->asic_funcs->disable_clock_gating(hdev);
7257
7258 if (s)
7259 seq_puts(s,
7260 "\nDMA is_idle QM_GLBL_STS0 QM_CGM_STS DMA_CORE_STS0\n"
7261 "--- ------- ------------ ---------- -------------\n");
7262
7263 for (i = 0 ; i < DMA_NUMBER_OF_CHNLS ; i++) {
7264 dma_id = gaudi_dma_assignment[i];
7265 offset = dma_id * DMA_QMAN_OFFSET;
7266
7267 qm_glbl_sts0 = RREG32(mmDMA0_QM_GLBL_STS0 + offset);
7268 qm_cgm_sts = RREG32(mmDMA0_QM_CGM_STS + offset);
7269 dma_core_sts0 = RREG32(mmDMA0_CORE_STS0 + offset);
7270 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts) &&
7271 IS_DMA_IDLE(dma_core_sts0);
7272 is_idle &= is_eng_idle;
7273
7274 if (mask)
Oded Gabbayf7639462020-08-29 11:24:03 +03007275 *mask |= ((u64) !is_eng_idle) <<
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007276 (GAUDI_ENGINE_ID_DMA_0 + dma_id);
7277 if (s)
7278 seq_printf(s, fmt, dma_id,
7279 is_eng_idle ? "Y" : "N", qm_glbl_sts0,
7280 qm_cgm_sts, dma_core_sts0);
7281 }
7282
7283 if (s)
7284 seq_puts(s,
7285 "\nTPC is_idle QM_GLBL_STS0 QM_CGM_STS CFG_STATUS\n"
7286 "--- ------- ------------ ---------- ----------\n");
7287
7288 for (i = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) {
7289 offset = i * TPC_QMAN_OFFSET;
7290 qm_glbl_sts0 = RREG32(mmTPC0_QM_GLBL_STS0 + offset);
7291 qm_cgm_sts = RREG32(mmTPC0_QM_CGM_STS + offset);
7292 tpc_cfg_sts = RREG32(mmTPC0_CFG_STATUS + offset);
7293 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts) &&
7294 IS_TPC_IDLE(tpc_cfg_sts);
7295 is_idle &= is_eng_idle;
7296
7297 if (mask)
Oded Gabbayf7639462020-08-29 11:24:03 +03007298 *mask |= ((u64) !is_eng_idle) <<
7299 (GAUDI_ENGINE_ID_TPC_0 + i);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007300 if (s)
7301 seq_printf(s, fmt, i,
7302 is_eng_idle ? "Y" : "N",
7303 qm_glbl_sts0, qm_cgm_sts, tpc_cfg_sts);
7304 }
7305
7306 if (s)
7307 seq_puts(s,
7308 "\nMME is_idle QM_GLBL_STS0 QM_CGM_STS ARCH_STATUS\n"
7309 "--- ------- ------------ ---------- -----------\n");
7310
7311 for (i = 0 ; i < MME_NUMBER_OF_ENGINES ; i++) {
7312 offset = i * MME_QMAN_OFFSET;
7313 mme_arch_sts = RREG32(mmMME0_CTRL_ARCH_STATUS + offset);
7314 is_eng_idle = IS_MME_IDLE(mme_arch_sts);
7315
7316 /* MME 1 & 3 are slaves, no need to check their QMANs */
7317 is_slave = i % 2;
7318 if (!is_slave) {
7319 qm_glbl_sts0 = RREG32(mmMME0_QM_GLBL_STS0 + offset);
7320 qm_cgm_sts = RREG32(mmMME0_QM_CGM_STS + offset);
7321 is_eng_idle &= IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts);
7322 }
7323
7324 is_idle &= is_eng_idle;
7325
7326 if (mask)
Oded Gabbayf7639462020-08-29 11:24:03 +03007327 *mask |= ((u64) !is_eng_idle) <<
7328 (GAUDI_ENGINE_ID_MME_0 + i);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007329 if (s) {
7330 if (!is_slave)
7331 seq_printf(s, fmt, i,
7332 is_eng_idle ? "Y" : "N",
7333 qm_glbl_sts0, qm_cgm_sts, mme_arch_sts);
7334 else
7335 seq_printf(s, mme_slave_fmt, i,
7336 is_eng_idle ? "Y" : "N", "-",
7337 "-", mme_arch_sts);
7338 }
7339 }
7340
7341 if (s)
Oded Gabbay3c681572020-11-02 21:10:39 +02007342 seq_puts(s, "\nNIC is_idle QM_GLBL_STS0 QM_CGM_STS\n"
7343 "--- ------- ------------ ----------\n");
7344
7345 for (i = 0 ; i < (NIC_NUMBER_OF_ENGINES / 2) ; i++) {
7346 offset = i * NIC_MACRO_QMAN_OFFSET;
7347 port = 2 * i;
7348 if (hdev->nic_ports_mask & BIT(port)) {
7349 qm_glbl_sts0 = RREG32(mmNIC0_QM0_GLBL_STS0 + offset);
7350 qm_cgm_sts = RREG32(mmNIC0_QM0_CGM_STS + offset);
7351 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts);
7352 is_idle &= is_eng_idle;
7353
7354 if (mask)
7355 *mask |= ((u64) !is_eng_idle) <<
7356 (GAUDI_ENGINE_ID_NIC_0 + port);
7357 if (s)
7358 seq_printf(s, nic_fmt, port,
7359 is_eng_idle ? "Y" : "N",
7360 qm_glbl_sts0, qm_cgm_sts);
7361 }
7362
7363 port = 2 * i + 1;
7364 if (hdev->nic_ports_mask & BIT(port)) {
7365 qm_glbl_sts0 = RREG32(mmNIC0_QM1_GLBL_STS0 + offset);
7366 qm_cgm_sts = RREG32(mmNIC0_QM1_CGM_STS + offset);
7367 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts);
7368 is_idle &= is_eng_idle;
7369
7370 if (mask)
7371 *mask |= ((u64) !is_eng_idle) <<
7372 (GAUDI_ENGINE_ID_NIC_0 + port);
7373 if (s)
7374 seq_printf(s, nic_fmt, port,
7375 is_eng_idle ? "Y" : "N",
7376 qm_glbl_sts0, qm_cgm_sts);
7377 }
7378 }
7379
7380 if (s)
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007381 seq_puts(s, "\n");
7382
Oded Gabbaye38bfd32020-07-03 20:46:12 +03007383 hdev->asic_funcs->set_clock_gating(hdev);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007384
7385 mutex_unlock(&gaudi->clk_gate_mutex);
7386
7387 return is_idle;
7388}
7389
7390static void gaudi_hw_queues_lock(struct hl_device *hdev)
7391 __acquires(&gaudi->hw_queues_lock)
7392{
7393 struct gaudi_device *gaudi = hdev->asic_specific;
7394
7395 spin_lock(&gaudi->hw_queues_lock);
7396}
7397
7398static void gaudi_hw_queues_unlock(struct hl_device *hdev)
7399 __releases(&gaudi->hw_queues_lock)
7400{
7401 struct gaudi_device *gaudi = hdev->asic_specific;
7402
7403 spin_unlock(&gaudi->hw_queues_lock);
7404}
7405
7406static u32 gaudi_get_pci_id(struct hl_device *hdev)
7407{
7408 return hdev->pdev->device;
7409}
7410
7411static int gaudi_get_eeprom_data(struct hl_device *hdev, void *data,
7412 size_t max_size)
7413{
7414 struct gaudi_device *gaudi = hdev->asic_specific;
7415
7416 if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
7417 return 0;
7418
7419 return hl_fw_get_eeprom_data(hdev, data, max_size);
7420}
7421
7422/*
7423 * this function should be used only during initialization and/or after reset,
7424 * when there are no active users.
7425 */
7426static int gaudi_run_tpc_kernel(struct hl_device *hdev, u64 tpc_kernel,
7427 u32 tpc_id)
7428{
7429 struct gaudi_device *gaudi = hdev->asic_specific;
7430 u64 kernel_timeout;
7431 u32 status, offset;
7432 int rc;
7433
7434 offset = tpc_id * (mmTPC1_CFG_STATUS - mmTPC0_CFG_STATUS);
7435
7436 if (hdev->pldm)
7437 kernel_timeout = GAUDI_PLDM_TPC_KERNEL_WAIT_USEC;
7438 else
7439 kernel_timeout = HL_DEVICE_TIMEOUT_USEC;
7440
7441 mutex_lock(&gaudi->clk_gate_mutex);
7442
7443 hdev->asic_funcs->disable_clock_gating(hdev);
7444
7445 WREG32(mmTPC0_CFG_QM_KERNEL_BASE_ADDRESS_LOW + offset,
7446 lower_32_bits(tpc_kernel));
7447 WREG32(mmTPC0_CFG_QM_KERNEL_BASE_ADDRESS_HIGH + offset,
7448 upper_32_bits(tpc_kernel));
7449
7450 WREG32(mmTPC0_CFG_ICACHE_BASE_ADDERESS_LOW + offset,
7451 lower_32_bits(tpc_kernel));
7452 WREG32(mmTPC0_CFG_ICACHE_BASE_ADDERESS_HIGH + offset,
7453 upper_32_bits(tpc_kernel));
7454 /* set a valid LUT pointer, content is of no significance */
7455 WREG32(mmTPC0_CFG_LUT_FUNC256_BASE_ADDR_LO + offset,
7456 lower_32_bits(tpc_kernel));
7457 WREG32(mmTPC0_CFG_LUT_FUNC256_BASE_ADDR_HI + offset,
7458 upper_32_bits(tpc_kernel));
7459
7460 WREG32(mmTPC0_CFG_QM_SYNC_OBJECT_ADDR + offset,
7461 lower_32_bits(CFG_BASE +
7462 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0));
7463
7464 WREG32(mmTPC0_CFG_TPC_CMD + offset,
7465 (1 << TPC0_CFG_TPC_CMD_ICACHE_INVALIDATE_SHIFT |
7466 1 << TPC0_CFG_TPC_CMD_ICACHE_PREFETCH_64KB_SHIFT));
7467 /* wait a bit for the engine to start executing */
7468 usleep_range(1000, 1500);
7469
7470 /* wait until engine has finished executing */
7471 rc = hl_poll_timeout(
7472 hdev,
7473 mmTPC0_CFG_STATUS + offset,
7474 status,
7475 (status & TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK) ==
7476 TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK,
7477 1000,
7478 kernel_timeout);
7479
7480 if (rc) {
7481 dev_err(hdev->dev,
7482 "Timeout while waiting for TPC%d icache prefetch\n",
7483 tpc_id);
Oded Gabbaye38bfd32020-07-03 20:46:12 +03007484 hdev->asic_funcs->set_clock_gating(hdev);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007485 mutex_unlock(&gaudi->clk_gate_mutex);
7486 return -EIO;
7487 }
7488
7489 WREG32(mmTPC0_CFG_TPC_EXECUTE + offset,
7490 1 << TPC0_CFG_TPC_EXECUTE_V_SHIFT);
7491
7492 /* wait a bit for the engine to start executing */
7493 usleep_range(1000, 1500);
7494
7495 /* wait until engine has finished executing */
7496 rc = hl_poll_timeout(
7497 hdev,
7498 mmTPC0_CFG_STATUS + offset,
7499 status,
7500 (status & TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK) ==
7501 TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK,
7502 1000,
7503 kernel_timeout);
7504
Oded Gabbay31ac1f12020-08-12 11:28:13 +03007505 if (rc) {
7506 dev_err(hdev->dev,
7507 "Timeout while waiting for TPC%d vector pipe\n",
7508 tpc_id);
7509 hdev->asic_funcs->set_clock_gating(hdev);
7510 mutex_unlock(&gaudi->clk_gate_mutex);
7511 return -EIO;
7512 }
7513
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007514 rc = hl_poll_timeout(
7515 hdev,
7516 mmTPC0_CFG_WQ_INFLIGHT_CNTR + offset,
7517 status,
7518 (status == 0),
7519 1000,
7520 kernel_timeout);
7521
Oded Gabbaye38bfd32020-07-03 20:46:12 +03007522 hdev->asic_funcs->set_clock_gating(hdev);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007523 mutex_unlock(&gaudi->clk_gate_mutex);
7524
7525 if (rc) {
7526 dev_err(hdev->dev,
7527 "Timeout while waiting for TPC%d kernel to execute\n",
7528 tpc_id);
7529 return -EIO;
7530 }
7531
7532 return 0;
7533}
7534
7535static enum hl_device_hw_state gaudi_get_hw_state(struct hl_device *hdev)
7536{
7537 return RREG32(mmHW_STATE);
7538}
7539
Ofir Bitton5de406c2020-09-10 10:56:26 +03007540static int gaudi_internal_cb_pool_init(struct hl_device *hdev,
7541 struct hl_ctx *ctx)
7542{
7543 struct gaudi_device *gaudi = hdev->asic_specific;
7544 bool flush_pte;
7545 u64 va, pa;
7546 s64 off;
7547 int min_alloc_order, rc, collective_cb_size;
7548
7549 if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
7550 return 0;
7551
7552 hdev->internal_cb_pool_virt_addr =
7553 hdev->asic_funcs->asic_dma_alloc_coherent(hdev,
7554 HOST_SPACE_INTERNAL_CB_SZ,
7555 &hdev->internal_cb_pool_dma_addr,
7556 GFP_KERNEL | __GFP_ZERO);
7557
7558 if (!hdev->internal_cb_pool_virt_addr)
7559 return -ENOMEM;
7560
7561 collective_cb_size = sizeof(struct packet_msg_short) * 5 +
7562 sizeof(struct packet_fence);
7563 min_alloc_order = ilog2(collective_cb_size);
7564
7565 hdev->internal_cb_pool = gen_pool_create(min_alloc_order, -1);
7566 if (!hdev->internal_cb_pool) {
7567 dev_err(hdev->dev,
7568 "Failed to create internal CB pool\n");
7569 rc = -ENOMEM;
7570 goto free_internal_cb_pool;
7571 }
7572
7573 rc = gen_pool_add(hdev->internal_cb_pool,
7574 (uintptr_t) hdev->internal_cb_pool_virt_addr,
7575 HOST_SPACE_INTERNAL_CB_SZ, -1);
7576 if (rc) {
7577 dev_err(hdev->dev,
7578 "Failed to add memory to internal CB pool\n");
7579 rc = -EFAULT;
7580 goto destroy_internal_cb_pool;
7581 }
7582
7583 hdev->internal_cb_va_base = VA_HOST_SPACE_INTERNAL_CB_START;
7584
7585 mutex_lock(&ctx->mmu_lock);
7586
7587 /* The mapping is done page by page since we can't assure allocated ptr
7588 * is aligned to HOST_SPACE_INTERNAL_CB_SZ
7589 */
7590 for (off = 0 ; off < HOST_SPACE_INTERNAL_CB_SZ ; off += PAGE_SIZE_4KB) {
7591 va = VA_HOST_SPACE_INTERNAL_CB_START + off;
7592 pa = hdev->internal_cb_pool_dma_addr + off;
7593 flush_pte = (off + PAGE_SIZE_4KB) >= HOST_SPACE_INTERNAL_CB_SZ;
7594 rc = hl_mmu_map(ctx, va, pa, PAGE_SIZE_4KB, flush_pte);
7595 if (rc) {
7596 dev_err(hdev->dev,
7597 "Map failed for va 0x%llx to pa 0x%llx\n",
7598 va, pa);
7599 goto unmap;
7600 }
7601 }
7602
7603 hdev->asic_funcs->mmu_invalidate_cache(hdev, false, VM_TYPE_USERPTR);
7604
7605 mutex_unlock(&ctx->mmu_lock);
7606
7607 return 0;
7608
7609unmap:
7610 for (; off >= 0 ; off -= PAGE_SIZE_4KB) {
7611 va = VA_HOST_SPACE_INTERNAL_CB_START + off;
7612 flush_pte = (off - (s32) PAGE_SIZE_4KB) < 0;
7613 if (hl_mmu_unmap(ctx, va, PAGE_SIZE_4KB, flush_pte))
7614 dev_warn_ratelimited(hdev->dev,
7615 "failed to unmap va 0x%llx\n", va);
7616 }
7617
7618 hdev->asic_funcs->mmu_invalidate_cache(hdev, true, VM_TYPE_USERPTR);
7619
7620 mutex_unlock(&ctx->mmu_lock);
7621
7622destroy_internal_cb_pool:
7623 gen_pool_destroy(hdev->internal_cb_pool);
7624
7625free_internal_cb_pool:
7626 hdev->asic_funcs->asic_dma_free_coherent(hdev,
7627 HOST_SPACE_INTERNAL_CB_SZ,
7628 hdev->internal_cb_pool_virt_addr,
7629 hdev->internal_cb_pool_dma_addr);
7630
7631 return rc;
7632}
7633
7634static void gaudi_internal_cb_pool_fini(struct hl_device *hdev,
7635 struct hl_ctx *ctx)
7636{
7637 struct gaudi_device *gaudi = hdev->asic_specific;
7638 bool flush_pte = false;
7639 u64 va, off;
7640
7641 if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
7642 return;
7643
7644 mutex_lock(&ctx->mmu_lock);
7645
7646 for (off = 0 ; off < HOST_SPACE_INTERNAL_CB_SZ ; off += PAGE_SIZE_4KB) {
7647 va = VA_HOST_SPACE_INTERNAL_CB_START + off;
7648
7649 if (off + PAGE_SIZE_4KB >= HOST_SPACE_INTERNAL_CB_SZ)
7650 flush_pte = true;
7651
7652 if (hl_mmu_unmap(ctx, va, PAGE_SIZE_4KB, flush_pte))
7653 dev_warn_ratelimited(hdev->dev,
7654 "failed to unmap va 0x%llx\n", va);
7655 }
7656
7657 hdev->asic_funcs->mmu_invalidate_cache(hdev, true, VM_TYPE_USERPTR);
7658
7659 mutex_unlock(&ctx->mmu_lock);
7660
7661 gen_pool_destroy(hdev->internal_cb_pool);
7662
7663 hdev->asic_funcs->asic_dma_free_coherent(hdev,
7664 HOST_SPACE_INTERNAL_CB_SZ,
7665 hdev->internal_cb_pool_virt_addr,
7666 hdev->internal_cb_pool_dma_addr);
7667}
7668
kernel test robotbb34bf72020-07-29 08:03:13 +08007669static int gaudi_ctx_init(struct hl_ctx *ctx)
Ofir Bittona04b7cd2020-07-13 13:36:55 +03007670{
Ofir Bitton20b75252020-09-30 15:51:10 +03007671 gaudi_mmu_prepare(ctx->hdev, ctx->asid);
Ofir Bitton5de406c2020-09-10 10:56:26 +03007672 return gaudi_internal_cb_pool_init(ctx->hdev, ctx);
7673}
Ofir Bitton20b75252020-09-30 15:51:10 +03007674
Ofir Bitton5de406c2020-09-10 10:56:26 +03007675void gaudi_ctx_fini(struct hl_ctx *ctx)
7676{
7677 struct hl_device *hdev = ctx->hdev;
7678
7679 /* Gaudi will NEVER support more then a single compute context.
7680 * Therefore, don't clear anything unless it is the compute context
7681 */
7682 if (hdev->compute_ctx != ctx)
7683 return;
7684
7685 gaudi_internal_cb_pool_fini(ctx->hdev, ctx);
Ofir Bittona04b7cd2020-07-13 13:36:55 +03007686}
7687
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007688static u32 gaudi_get_queue_id_for_cq(struct hl_device *hdev, u32 cq_idx)
7689{
7690 return gaudi_cq_assignment[cq_idx];
7691}
7692
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007693static u32 gaudi_get_signal_cb_size(struct hl_device *hdev)
7694{
7695 return sizeof(struct packet_msg_short) +
7696 sizeof(struct packet_msg_prot) * 2;
7697}
7698
7699static u32 gaudi_get_wait_cb_size(struct hl_device *hdev)
7700{
7701 return sizeof(struct packet_msg_short) * 4 +
7702 sizeof(struct packet_fence) +
7703 sizeof(struct packet_msg_prot) * 2;
7704}
7705
Ofir Bitton2992c1d2020-09-10 09:40:35 +03007706static u32 gaudi_gen_signal_cb(struct hl_device *hdev, void *data, u16 sob_id,
Ofir Bitton5de406c2020-09-10 10:56:26 +03007707 u32 size)
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007708{
7709 struct hl_cb *cb = (struct hl_cb *) data;
7710 struct packet_msg_short *pkt;
Ofir Bitton2992c1d2020-09-10 09:40:35 +03007711 u32 value, ctl, pkt_size = sizeof(*pkt);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007712
Ofir Bitton2992c1d2020-09-10 09:40:35 +03007713 pkt = cb->kernel_address + size;
7714 memset(pkt, 0, pkt_size);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007715
Ofir Bitton6c07bab2020-06-01 10:38:46 +03007716 /* Inc by 1, Mode ADD */
7717 value = FIELD_PREP(GAUDI_PKT_SHORT_VAL_SOB_SYNC_VAL_MASK, 1);
7718 value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_SOB_MOD_MASK, 1);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007719
Ofir Bitton6c07bab2020-06-01 10:38:46 +03007720 ctl = FIELD_PREP(GAUDI_PKT_SHORT_CTL_ADDR_MASK, sob_id * 4);
7721 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_OP_MASK, 0); /* write the value */
7722 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_BASE_MASK, 3); /* W_S SOB base */
7723 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_OPCODE_MASK, PACKET_MSG_SHORT);
7724 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_EB_MASK, 1);
7725 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_RB_MASK, 1);
7726 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_MB_MASK, 1);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007727
7728 pkt->value = cpu_to_le32(value);
7729 pkt->ctl = cpu_to_le32(ctl);
Ofir Bitton2992c1d2020-09-10 09:40:35 +03007730
7731 return size + pkt_size;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007732}
7733
7734static u32 gaudi_add_mon_msg_short(struct packet_msg_short *pkt, u32 value,
7735 u16 addr)
7736{
7737 u32 ctl, pkt_size = sizeof(*pkt);
7738
7739 memset(pkt, 0, pkt_size);
7740
Ofir Bitton6c07bab2020-06-01 10:38:46 +03007741 ctl = FIELD_PREP(GAUDI_PKT_SHORT_CTL_ADDR_MASK, addr);
7742 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_BASE_MASK, 2); /* W_S MON base */
7743 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_OPCODE_MASK, PACKET_MSG_SHORT);
7744 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_EB_MASK, 0);
7745 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_RB_MASK, 1);
7746 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_MB_MASK, 0); /* last pkt MB */
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007747
7748 pkt->value = cpu_to_le32(value);
7749 pkt->ctl = cpu_to_le32(ctl);
7750
7751 return pkt_size;
7752}
7753
Ofir Bitton2992c1d2020-09-10 09:40:35 +03007754static u32 gaudi_add_arm_monitor_pkt(struct hl_device *hdev,
7755 struct packet_msg_short *pkt, u16 sob_base, u8 sob_mask,
7756 u16 sob_val, u16 mon_id)
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007757{
Ofir Bitton2992c1d2020-09-10 09:40:35 +03007758 u64 monitor_base;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007759 u32 ctl, value, pkt_size = sizeof(*pkt);
Ofir Bitton2992c1d2020-09-10 09:40:35 +03007760 u16 msg_addr_offset;
7761 u8 mask;
7762
7763 if (hl_gen_sob_mask(sob_base, sob_mask, &mask)) {
7764 dev_err(hdev->dev,
7765 "sob_base %u (mask %#x) is not valid\n",
7766 sob_base, sob_mask);
7767 return 0;
7768 }
7769
7770 /*
7771 * monitor_base should be the content of the base0 address registers,
7772 * so it will be added to the msg short offsets
7773 */
7774 monitor_base = mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0;
7775
7776 msg_addr_offset =
7777 (mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0 + mon_id * 4) -
7778 monitor_base;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007779
7780 memset(pkt, 0, pkt_size);
7781
Ofir Bitton2992c1d2020-09-10 09:40:35 +03007782 /* Monitor config packet: bind the monitor to a sync object */
7783 value = FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_SYNC_GID_MASK, sob_base / 8);
Ofir Bitton6c07bab2020-06-01 10:38:46 +03007784 value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_SYNC_VAL_MASK, sob_val);
7785 value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_MODE_MASK,
7786 0); /* GREATER OR EQUAL*/
7787 value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_MASK_MASK, mask);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007788
Ofir Bitton2992c1d2020-09-10 09:40:35 +03007789 ctl = FIELD_PREP(GAUDI_PKT_SHORT_CTL_ADDR_MASK, msg_addr_offset);
Ofir Bitton6c07bab2020-06-01 10:38:46 +03007790 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_OP_MASK, 0); /* write the value */
7791 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_BASE_MASK, 2); /* W_S MON base */
7792 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_OPCODE_MASK, PACKET_MSG_SHORT);
7793 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_EB_MASK, 0);
7794 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_RB_MASK, 1);
7795 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_MB_MASK, 1);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007796
7797 pkt->value = cpu_to_le32(value);
7798 pkt->ctl = cpu_to_le32(ctl);
7799
7800 return pkt_size;
7801}
7802
7803static u32 gaudi_add_fence_pkt(struct packet_fence *pkt)
7804{
7805 u32 ctl, cfg, pkt_size = sizeof(*pkt);
7806
7807 memset(pkt, 0, pkt_size);
7808
Ofir Bitton6c07bab2020-06-01 10:38:46 +03007809 cfg = FIELD_PREP(GAUDI_PKT_FENCE_CFG_DEC_VAL_MASK, 1);
7810 cfg |= FIELD_PREP(GAUDI_PKT_FENCE_CFG_TARGET_VAL_MASK, 1);
7811 cfg |= FIELD_PREP(GAUDI_PKT_FENCE_CFG_ID_MASK, 2);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007812
Ofir Bitton6c07bab2020-06-01 10:38:46 +03007813 ctl = FIELD_PREP(GAUDI_PKT_FENCE_CTL_OPCODE_MASK, PACKET_FENCE);
7814 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_EB_MASK, 0);
7815 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_RB_MASK, 1);
7816 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_MB_MASK, 1);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007817
7818 pkt->cfg = cpu_to_le32(cfg);
7819 pkt->ctl = cpu_to_le32(ctl);
7820
7821 return pkt_size;
7822}
7823
Ofir Bitton2992c1d2020-09-10 09:40:35 +03007824static int gaudi_get_fence_addr(struct hl_device *hdev, u32 queue_id, u64 *addr)
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007825{
Ofir Bitton5de406c2020-09-10 10:56:26 +03007826 u32 offset, nic_index;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007827
Ofir Bitton2992c1d2020-09-10 09:40:35 +03007828 switch (queue_id) {
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007829 case GAUDI_QUEUE_ID_DMA_0_0:
Ofir Bitton2992c1d2020-09-10 09:40:35 +03007830 offset = mmDMA0_QM_CP_FENCE2_RDATA_0;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007831 break;
7832 case GAUDI_QUEUE_ID_DMA_0_1:
Ofir Bitton2992c1d2020-09-10 09:40:35 +03007833 offset = mmDMA0_QM_CP_FENCE2_RDATA_1;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007834 break;
7835 case GAUDI_QUEUE_ID_DMA_0_2:
Ofir Bitton2992c1d2020-09-10 09:40:35 +03007836 offset = mmDMA0_QM_CP_FENCE2_RDATA_2;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007837 break;
7838 case GAUDI_QUEUE_ID_DMA_0_3:
Ofir Bitton2992c1d2020-09-10 09:40:35 +03007839 offset = mmDMA0_QM_CP_FENCE2_RDATA_3;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007840 break;
7841 case GAUDI_QUEUE_ID_DMA_1_0:
Ofir Bitton2992c1d2020-09-10 09:40:35 +03007842 offset = mmDMA1_QM_CP_FENCE2_RDATA_0;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007843 break;
7844 case GAUDI_QUEUE_ID_DMA_1_1:
Ofir Bitton2992c1d2020-09-10 09:40:35 +03007845 offset = mmDMA1_QM_CP_FENCE2_RDATA_1;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007846 break;
7847 case GAUDI_QUEUE_ID_DMA_1_2:
Ofir Bitton2992c1d2020-09-10 09:40:35 +03007848 offset = mmDMA1_QM_CP_FENCE2_RDATA_2;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007849 break;
7850 case GAUDI_QUEUE_ID_DMA_1_3:
Ofir Bitton2992c1d2020-09-10 09:40:35 +03007851 offset = mmDMA1_QM_CP_FENCE2_RDATA_3;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007852 break;
7853 case GAUDI_QUEUE_ID_DMA_5_0:
Ofir Bitton2992c1d2020-09-10 09:40:35 +03007854 offset = mmDMA5_QM_CP_FENCE2_RDATA_0;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007855 break;
7856 case GAUDI_QUEUE_ID_DMA_5_1:
Ofir Bitton2992c1d2020-09-10 09:40:35 +03007857 offset = mmDMA5_QM_CP_FENCE2_RDATA_1;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007858 break;
7859 case GAUDI_QUEUE_ID_DMA_5_2:
Ofir Bitton2992c1d2020-09-10 09:40:35 +03007860 offset = mmDMA5_QM_CP_FENCE2_RDATA_2;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007861 break;
7862 case GAUDI_QUEUE_ID_DMA_5_3:
Ofir Bitton2992c1d2020-09-10 09:40:35 +03007863 offset = mmDMA5_QM_CP_FENCE2_RDATA_3;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007864 break;
Ofir Bitton5de406c2020-09-10 10:56:26 +03007865 case GAUDI_QUEUE_ID_TPC_7_0:
7866 offset = mmTPC7_QM_CP_FENCE2_RDATA_0;
7867 break;
7868 case GAUDI_QUEUE_ID_TPC_7_1:
7869 offset = mmTPC7_QM_CP_FENCE2_RDATA_1;
7870 break;
7871 case GAUDI_QUEUE_ID_TPC_7_2:
7872 offset = mmTPC7_QM_CP_FENCE2_RDATA_2;
7873 break;
7874 case GAUDI_QUEUE_ID_TPC_7_3:
7875 offset = mmTPC7_QM_CP_FENCE2_RDATA_3;
7876 break;
7877 case GAUDI_QUEUE_ID_NIC_0_0:
7878 case GAUDI_QUEUE_ID_NIC_1_0:
7879 case GAUDI_QUEUE_ID_NIC_2_0:
7880 case GAUDI_QUEUE_ID_NIC_3_0:
7881 case GAUDI_QUEUE_ID_NIC_4_0:
7882 case GAUDI_QUEUE_ID_NIC_5_0:
7883 case GAUDI_QUEUE_ID_NIC_6_0:
7884 case GAUDI_QUEUE_ID_NIC_7_0:
7885 case GAUDI_QUEUE_ID_NIC_8_0:
7886 case GAUDI_QUEUE_ID_NIC_9_0:
7887 nic_index = (queue_id - GAUDI_QUEUE_ID_NIC_0_0) >> 2;
7888 offset = mmNIC0_QM0_CP_FENCE2_RDATA_0 +
7889 (nic_index >> 1) * NIC_MACRO_QMAN_OFFSET +
7890 (nic_index & 0x1) * NIC_ENGINE_QMAN_OFFSET;
7891 break;
7892 case GAUDI_QUEUE_ID_NIC_0_1:
7893 case GAUDI_QUEUE_ID_NIC_1_1:
7894 case GAUDI_QUEUE_ID_NIC_2_1:
7895 case GAUDI_QUEUE_ID_NIC_3_1:
7896 case GAUDI_QUEUE_ID_NIC_4_1:
7897 case GAUDI_QUEUE_ID_NIC_5_1:
7898 case GAUDI_QUEUE_ID_NIC_6_1:
7899 case GAUDI_QUEUE_ID_NIC_7_1:
7900 case GAUDI_QUEUE_ID_NIC_8_1:
7901 case GAUDI_QUEUE_ID_NIC_9_1:
7902 nic_index = (queue_id - GAUDI_QUEUE_ID_NIC_0_1) >> 2;
7903 offset = mmNIC0_QM0_CP_FENCE2_RDATA_1 +
7904 (nic_index >> 1) * NIC_MACRO_QMAN_OFFSET +
7905 (nic_index & 0x1) * NIC_ENGINE_QMAN_OFFSET;
7906 break;
7907 case GAUDI_QUEUE_ID_NIC_0_2:
7908 case GAUDI_QUEUE_ID_NIC_1_2:
7909 case GAUDI_QUEUE_ID_NIC_2_2:
7910 case GAUDI_QUEUE_ID_NIC_3_2:
7911 case GAUDI_QUEUE_ID_NIC_4_2:
7912 case GAUDI_QUEUE_ID_NIC_5_2:
7913 case GAUDI_QUEUE_ID_NIC_6_2:
7914 case GAUDI_QUEUE_ID_NIC_7_2:
7915 case GAUDI_QUEUE_ID_NIC_8_2:
7916 case GAUDI_QUEUE_ID_NIC_9_2:
7917 nic_index = (queue_id - GAUDI_QUEUE_ID_NIC_0_2) >> 2;
7918 offset = mmNIC0_QM0_CP_FENCE2_RDATA_2 +
7919 (nic_index >> 1) * NIC_MACRO_QMAN_OFFSET +
7920 (nic_index & 0x1) * NIC_ENGINE_QMAN_OFFSET;
7921 break;
7922 case GAUDI_QUEUE_ID_NIC_0_3:
7923 case GAUDI_QUEUE_ID_NIC_1_3:
7924 case GAUDI_QUEUE_ID_NIC_2_3:
7925 case GAUDI_QUEUE_ID_NIC_3_3:
7926 case GAUDI_QUEUE_ID_NIC_4_3:
7927 case GAUDI_QUEUE_ID_NIC_5_3:
7928 case GAUDI_QUEUE_ID_NIC_6_3:
7929 case GAUDI_QUEUE_ID_NIC_7_3:
7930 case GAUDI_QUEUE_ID_NIC_8_3:
7931 case GAUDI_QUEUE_ID_NIC_9_3:
7932 nic_index = (queue_id - GAUDI_QUEUE_ID_NIC_0_3) >> 2;
7933 offset = mmNIC0_QM0_CP_FENCE2_RDATA_3 +
7934 (nic_index >> 1) * NIC_MACRO_QMAN_OFFSET +
7935 (nic_index & 0x1) * NIC_ENGINE_QMAN_OFFSET;
7936 break;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007937 default:
Ofir Bitton2992c1d2020-09-10 09:40:35 +03007938 return -EINVAL;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007939 }
7940
Ofir Bitton2992c1d2020-09-10 09:40:35 +03007941 *addr = CFG_BASE + offset;
7942
7943 return 0;
7944}
7945
7946static u32 gaudi_add_mon_pkts(void *buf, u16 mon_id, u64 fence_addr)
7947{
7948 u64 monitor_base;
7949 u32 size = 0;
7950 u16 msg_addr_offset;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007951
7952 /*
7953 * monitor_base should be the content of the base0 address registers,
7954 * so it will be added to the msg short offsets
7955 */
7956 monitor_base = mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0;
7957
7958 /* First monitor config packet: low address of the sync */
Ofir Bitton2992c1d2020-09-10 09:40:35 +03007959 msg_addr_offset =
7960 (mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0 + mon_id * 4) -
7961 monitor_base;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007962
7963 size += gaudi_add_mon_msg_short(buf + size, (u32) fence_addr,
7964 msg_addr_offset);
7965
7966 /* Second monitor config packet: high address of the sync */
Ofir Bitton2992c1d2020-09-10 09:40:35 +03007967 msg_addr_offset =
7968 (mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRH_0 + mon_id * 4) -
7969 monitor_base;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007970
7971 size += gaudi_add_mon_msg_short(buf + size, (u32) (fence_addr >> 32),
7972 msg_addr_offset);
7973
7974 /*
7975 * Third monitor config packet: the payload, i.e. what to write when the
7976 * sync triggers
7977 */
Ofir Bitton2992c1d2020-09-10 09:40:35 +03007978 msg_addr_offset =
7979 (mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_DATA_0 + mon_id * 4) -
7980 monitor_base;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007981
7982 size += gaudi_add_mon_msg_short(buf + size, 1, msg_addr_offset);
7983
Ofir Bitton2992c1d2020-09-10 09:40:35 +03007984 return size;
7985}
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03007986
Oded Gabbay3c681572020-11-02 21:10:39 +02007987static u32 gaudi_gen_wait_cb(struct hl_device *hdev,
7988 struct hl_gen_wait_properties *prop)
Ofir Bitton2992c1d2020-09-10 09:40:35 +03007989{
7990 struct hl_cb *cb = (struct hl_cb *) prop->data;
7991 void *buf = cb->kernel_address;
7992 u64 fence_addr = 0;
7993 u32 size = prop->size;
7994
7995 if (gaudi_get_fence_addr(hdev, prop->q_idx, &fence_addr)) {
7996 dev_crit(hdev->dev, "wrong queue id %d for wait packet\n",
7997 prop->q_idx);
7998 return 0;
7999 }
8000
8001 size += gaudi_add_mon_pkts(buf + size, prop->mon_id, fence_addr);
8002 size += gaudi_add_arm_monitor_pkt(hdev, buf + size, prop->sob_base,
8003 prop->sob_mask, prop->sob_val, prop->mon_id);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03008004 size += gaudi_add_fence_pkt(buf + size);
Ofir Bitton2992c1d2020-09-10 09:40:35 +03008005
8006 return size;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03008007}
8008
8009static void gaudi_reset_sob(struct hl_device *hdev, void *data)
8010{
8011 struct hl_hw_sob *hw_sob = (struct hl_hw_sob *) data;
8012
8013 dev_dbg(hdev->dev, "reset SOB, q_idx: %d, sob_id: %d\n", hw_sob->q_idx,
8014 hw_sob->sob_id);
8015
8016 WREG32(mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 + hw_sob->sob_id * 4,
8017 0);
8018
8019 kref_init(&hw_sob->kref);
8020}
8021
8022static void gaudi_set_dma_mask_from_fw(struct hl_device *hdev)
8023{
8024 if (RREG32(mmPSOC_GLOBAL_CONF_NON_RST_FLOPS_0) ==
8025 HL_POWER9_HOST_MAGIC) {
8026 hdev->power9_64bit_dma_enable = 1;
8027 hdev->dma_mask = 64;
8028 } else {
8029 hdev->power9_64bit_dma_enable = 0;
8030 hdev->dma_mask = 48;
8031 }
8032}
8033
8034static u64 gaudi_get_device_time(struct hl_device *hdev)
8035{
8036 u64 device_time = ((u64) RREG32(mmPSOC_TIMESTAMP_CNTCVU)) << 32;
8037
8038 return device_time | RREG32(mmPSOC_TIMESTAMP_CNTCVL);
8039}
8040
8041static const struct hl_asic_funcs gaudi_funcs = {
8042 .early_init = gaudi_early_init,
8043 .early_fini = gaudi_early_fini,
8044 .late_init = gaudi_late_init,
8045 .late_fini = gaudi_late_fini,
8046 .sw_init = gaudi_sw_init,
8047 .sw_fini = gaudi_sw_fini,
8048 .hw_init = gaudi_hw_init,
8049 .hw_fini = gaudi_hw_fini,
8050 .halt_engines = gaudi_halt_engines,
8051 .suspend = gaudi_suspend,
8052 .resume = gaudi_resume,
8053 .cb_mmap = gaudi_cb_mmap,
8054 .ring_doorbell = gaudi_ring_doorbell,
8055 .pqe_write = gaudi_pqe_write,
8056 .asic_dma_alloc_coherent = gaudi_dma_alloc_coherent,
8057 .asic_dma_free_coherent = gaudi_dma_free_coherent,
8058 .get_int_queue_base = gaudi_get_int_queue_base,
8059 .test_queues = gaudi_test_queues,
8060 .asic_dma_pool_zalloc = gaudi_dma_pool_zalloc,
8061 .asic_dma_pool_free = gaudi_dma_pool_free,
8062 .cpu_accessible_dma_pool_alloc = gaudi_cpu_accessible_dma_pool_alloc,
8063 .cpu_accessible_dma_pool_free = gaudi_cpu_accessible_dma_pool_free,
8064 .hl_dma_unmap_sg = gaudi_dma_unmap_sg,
8065 .cs_parser = gaudi_cs_parser,
8066 .asic_dma_map_sg = gaudi_dma_map_sg,
8067 .get_dma_desc_list_size = gaudi_get_dma_desc_list_size,
8068 .add_end_of_cb_packets = gaudi_add_end_of_cb_packets,
8069 .update_eq_ci = gaudi_update_eq_ci,
8070 .context_switch = gaudi_context_switch,
8071 .restore_phase_topology = gaudi_restore_phase_topology,
8072 .debugfs_read32 = gaudi_debugfs_read32,
8073 .debugfs_write32 = gaudi_debugfs_write32,
8074 .debugfs_read64 = gaudi_debugfs_read64,
8075 .debugfs_write64 = gaudi_debugfs_write64,
Oded Gabbaybcaf4152020-05-11 10:41:37 +03008076 .add_device_attr = gaudi_add_device_attr,
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03008077 .handle_eqe = gaudi_handle_eqe,
Oded Gabbaybcaf4152020-05-11 10:41:37 +03008078 .set_pll_profile = gaudi_set_pll_profile,
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03008079 .get_events_stat = gaudi_get_events_stat,
8080 .read_pte = gaudi_read_pte,
8081 .write_pte = gaudi_write_pte,
8082 .mmu_invalidate_cache = gaudi_mmu_invalidate_cache,
8083 .mmu_invalidate_cache_range = gaudi_mmu_invalidate_cache_range,
8084 .send_heartbeat = gaudi_send_heartbeat,
Oded Gabbaye38bfd32020-07-03 20:46:12 +03008085 .set_clock_gating = gaudi_set_clock_gating,
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03008086 .disable_clock_gating = gaudi_disable_clock_gating,
Omer Shpigelman79fc7a92020-05-11 10:46:29 +03008087 .debug_coresight = gaudi_debug_coresight,
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03008088 .is_device_idle = gaudi_is_device_idle,
8089 .soft_reset_late_init = gaudi_soft_reset_late_init,
8090 .hw_queues_lock = gaudi_hw_queues_lock,
8091 .hw_queues_unlock = gaudi_hw_queues_unlock,
8092 .get_pci_id = gaudi_get_pci_id,
8093 .get_eeprom_data = gaudi_get_eeprom_data,
8094 .send_cpu_message = gaudi_send_cpu_message,
8095 .get_hw_state = gaudi_get_hw_state,
8096 .pci_bars_map = gaudi_pci_bars_map,
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03008097 .init_iatu = gaudi_init_iatu,
8098 .rreg = hl_rreg,
8099 .wreg = hl_wreg,
Omer Shpigelman79fc7a92020-05-11 10:46:29 +03008100 .halt_coresight = gaudi_halt_coresight,
Ofir Bittona04b7cd2020-07-13 13:36:55 +03008101 .ctx_init = gaudi_ctx_init,
Ofir Bitton5de406c2020-09-10 10:56:26 +03008102 .ctx_fini = gaudi_ctx_fini,
Oded Gabbaybcaf4152020-05-11 10:41:37 +03008103 .get_clk_rate = gaudi_get_clk_rate,
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03008104 .get_queue_id_for_cq = gaudi_get_queue_id_for_cq,
8105 .read_device_fw_version = gaudi_read_device_fw_version,
8106 .load_firmware_to_device = gaudi_load_firmware_to_device,
8107 .load_boot_fit_to_device = gaudi_load_boot_fit_to_device,
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03008108 .get_signal_cb_size = gaudi_get_signal_cb_size,
8109 .get_wait_cb_size = gaudi_get_wait_cb_size,
8110 .gen_signal_cb = gaudi_gen_signal_cb,
8111 .gen_wait_cb = gaudi_gen_wait_cb,
8112 .reset_sob = gaudi_reset_sob,
Ofir Bitton5fe1c172020-09-10 10:10:55 +03008113 .reset_sob_group = gaudi_reset_sob_group,
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03008114 .set_dma_mask_from_fw = gaudi_set_dma_mask_from_fw,
Ofir Bitton5fe1c172020-09-10 10:10:55 +03008115 .get_device_time = gaudi_get_device_time,
8116 .collective_wait_init_cs = gaudi_collective_wait_init_cs,
8117 .collective_wait_create_jobs = gaudi_collective_wait_create_jobs
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03008118};
8119
8120/**
8121 * gaudi_set_asic_funcs - set GAUDI function pointers
8122 *
Lee Jonesf7d227c2020-07-01 09:58:42 +01008123 * @hdev: pointer to hl_device structure
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03008124 *
8125 */
8126void gaudi_set_asic_funcs(struct hl_device *hdev)
8127{
8128 hdev->asic_funcs = &gaudi_funcs;
8129}