blob: 6f7f6ad7a3582a74707a888e4c7060deb20f59c6 [file] [log] [blame]
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03001// SPDX-License-Identifier: GPL-2.0
2
3/*
4 * Copyright 2016-2020 HabanaLabs, Ltd.
5 * All Rights Reserved.
6 */
7
8#include "gaudiP.h"
Greg Kroah-Hartman7b16a152020-07-28 19:18:51 +02009#include "../include/hw_ip/mmu/mmu_general.h"
10#include "../include/hw_ip/mmu/mmu_v1_1.h"
11#include "../include/gaudi/gaudi_masks.h"
12#include "../include/gaudi/gaudi_fw_if.h"
13#include "../include/gaudi/gaudi_reg_map.h"
14#include "../include/gaudi/gaudi_async_ids_map_extended.h"
Oded Gabbayac0ae6a2020-05-11 10:29:27 +030015
16#include <linux/module.h>
17#include <linux/pci.h>
18#include <linux/firmware.h>
19#include <linux/hwmon.h>
20#include <linux/genalloc.h>
21#include <linux/io-64-nonatomic-lo-hi.h>
22#include <linux/iommu.h>
23#include <linux/seq_file.h>
24
25/*
26 * Gaudi security scheme:
27 *
28 * 1. Host is protected by:
29 * - Range registers
30 * - MMU
31 *
32 * 2. DDR is protected by:
33 * - Range registers (protect the first 512MB)
34 *
35 * 3. Configuration is protected by:
36 * - Range registers
37 * - Protection bits
38 *
39 * MMU is always enabled.
40 *
41 * QMAN DMA channels 0,1,5 (PCI DMAN):
42 * - DMA is not secured.
43 * - PQ and CQ are secured.
44 * - CP is secured: The driver needs to parse CB but WREG should be allowed
45 * because of TDMA (tensor DMA). Hence, WREG is always not
46 * secured.
47 *
48 * When the driver needs to use DMA it will check that Gaudi is idle, set DMA
49 * channel 0 to be secured, execute the DMA and change it back to not secured.
50 * Currently, the driver doesn't use the DMA while there are compute jobs
51 * running.
52 *
53 * The current use cases for the driver to use the DMA are:
54 * - Clear SRAM on context switch (happens on context switch when device is
55 * idle)
56 * - MMU page tables area clear (happens on init)
57 *
58 * QMAN DMA 2-4,6,7, TPC, MME, NIC:
59 * PQ is secured and is located on the Host (HBM CON TPC3 bug)
60 * CQ, CP and the engine are not secured
61 *
62 */
63
64#define GAUDI_BOOT_FIT_FILE "habanalabs/gaudi/gaudi-boot-fit.itb"
65#define GAUDI_LINUX_FW_FILE "habanalabs/gaudi/gaudi-fit.itb"
66#define GAUDI_TPC_FW_FILE "habanalabs/gaudi/gaudi_tpc.bin"
67
68#define GAUDI_DMA_POOL_BLK_SIZE 0x100 /* 256 bytes */
69
70#define GAUDI_RESET_TIMEOUT_MSEC 1000 /* 1000ms */
71#define GAUDI_RESET_WAIT_MSEC 1 /* 1ms */
72#define GAUDI_CPU_RESET_WAIT_MSEC 200 /* 200ms */
73#define GAUDI_TEST_QUEUE_WAIT_USEC 100000 /* 100ms */
74
75#define GAUDI_PLDM_RESET_WAIT_MSEC 1000 /* 1s */
76#define GAUDI_PLDM_HRESET_TIMEOUT_MSEC 20000 /* 20s */
Oded Gabbayac0ae6a2020-05-11 10:29:27 +030077#define GAUDI_PLDM_TEST_QUEUE_WAIT_USEC 1000000 /* 1s */
78#define GAUDI_PLDM_MMU_TIMEOUT_USEC (MMU_CONFIG_TIMEOUT_USEC * 100)
79#define GAUDI_PLDM_QMAN0_TIMEOUT_USEC (HL_DEVICE_TIMEOUT_USEC * 30)
80#define GAUDI_PLDM_TPC_KERNEL_WAIT_USEC (HL_DEVICE_TIMEOUT_USEC * 30)
81#define GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC 1000000 /* 1s */
Oded Gabbay788cacf2020-07-07 17:30:13 +030082#define GAUDI_MSG_TO_CPU_TIMEOUT_USEC 4000000 /* 4s */
Oded Gabbayac0ae6a2020-05-11 10:29:27 +030083
84#define GAUDI_QMAN0_FENCE_VAL 0x72E91AB9
85
86#define GAUDI_MAX_STRING_LEN 20
87
88#define GAUDI_CB_POOL_CB_CNT 512
89#define GAUDI_CB_POOL_CB_SIZE 0x20000 /* 128KB */
90
91#define GAUDI_ALLOC_CPU_MEM_RETRY_CNT 3
92
93#define GAUDI_NUM_OF_TPC_INTR_CAUSE 20
94
95#define GAUDI_NUM_OF_QM_ERR_CAUSE 16
96
97#define GAUDI_NUM_OF_QM_ARB_ERR_CAUSE 3
98
Oded Gabbay647e8352020-06-07 11:26:48 +030099#define GAUDI_ARB_WDT_TIMEOUT 0x1000000
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300100
Oded Gabbaye38bfd32020-07-03 20:46:12 +0300101#define GAUDI_CLK_GATE_DEBUGFS_MASK (\
102 BIT(GAUDI_ENGINE_ID_MME_0) |\
103 BIT(GAUDI_ENGINE_ID_MME_2) |\
104 GENMASK_ULL(GAUDI_ENGINE_ID_TPC_7, GAUDI_ENGINE_ID_TPC_0))
105
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300106static const char gaudi_irq_name[GAUDI_MSI_ENTRIES][GAUDI_MAX_STRING_LEN] = {
107 "gaudi cq 0_0", "gaudi cq 0_1", "gaudi cq 0_2", "gaudi cq 0_3",
108 "gaudi cq 1_0", "gaudi cq 1_1", "gaudi cq 1_2", "gaudi cq 1_3",
109 "gaudi cq 5_0", "gaudi cq 5_1", "gaudi cq 5_2", "gaudi cq 5_3",
110 "gaudi cpu eq"
111};
112
113static const u8 gaudi_dma_assignment[GAUDI_DMA_MAX] = {
Oded Gabbaye38bfd32020-07-03 20:46:12 +0300114 [GAUDI_PCI_DMA_1] = GAUDI_ENGINE_ID_DMA_0,
115 [GAUDI_PCI_DMA_2] = GAUDI_ENGINE_ID_DMA_1,
116 [GAUDI_PCI_DMA_3] = GAUDI_ENGINE_ID_DMA_5,
117 [GAUDI_HBM_DMA_1] = GAUDI_ENGINE_ID_DMA_2,
118 [GAUDI_HBM_DMA_2] = GAUDI_ENGINE_ID_DMA_3,
119 [GAUDI_HBM_DMA_3] = GAUDI_ENGINE_ID_DMA_4,
120 [GAUDI_HBM_DMA_4] = GAUDI_ENGINE_ID_DMA_6,
121 [GAUDI_HBM_DMA_5] = GAUDI_ENGINE_ID_DMA_7
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300122};
123
124static const u8 gaudi_cq_assignment[NUMBER_OF_CMPLT_QUEUES] = {
125 [0] = GAUDI_QUEUE_ID_DMA_0_0,
126 [1] = GAUDI_QUEUE_ID_DMA_0_1,
127 [2] = GAUDI_QUEUE_ID_DMA_0_2,
128 [3] = GAUDI_QUEUE_ID_DMA_0_3,
129 [4] = GAUDI_QUEUE_ID_DMA_1_0,
130 [5] = GAUDI_QUEUE_ID_DMA_1_1,
131 [6] = GAUDI_QUEUE_ID_DMA_1_2,
132 [7] = GAUDI_QUEUE_ID_DMA_1_3,
133 [8] = GAUDI_QUEUE_ID_DMA_5_0,
134 [9] = GAUDI_QUEUE_ID_DMA_5_1,
135 [10] = GAUDI_QUEUE_ID_DMA_5_2,
136 [11] = GAUDI_QUEUE_ID_DMA_5_3
137};
138
139static const u16 gaudi_packet_sizes[MAX_PACKET_ID] = {
140 [PACKET_WREG_32] = sizeof(struct packet_wreg32),
141 [PACKET_WREG_BULK] = sizeof(struct packet_wreg_bulk),
142 [PACKET_MSG_LONG] = sizeof(struct packet_msg_long),
143 [PACKET_MSG_SHORT] = sizeof(struct packet_msg_short),
144 [PACKET_CP_DMA] = sizeof(struct packet_cp_dma),
145 [PACKET_REPEAT] = sizeof(struct packet_repeat),
146 [PACKET_MSG_PROT] = sizeof(struct packet_msg_prot),
147 [PACKET_FENCE] = sizeof(struct packet_fence),
148 [PACKET_LIN_DMA] = sizeof(struct packet_lin_dma),
149 [PACKET_NOP] = sizeof(struct packet_nop),
150 [PACKET_STOP] = sizeof(struct packet_stop),
151 [PACKET_ARB_POINT] = sizeof(struct packet_arb_point),
152 [PACKET_WAIT] = sizeof(struct packet_wait),
153 [PACKET_LOAD_AND_EXE] = sizeof(struct packet_load_and_exe)
154};
155
Ofir Bittonbc75be22020-07-30 14:56:38 +0300156static inline bool validate_packet_id(enum packet_id id)
157{
158 switch (id) {
159 case PACKET_WREG_32:
160 case PACKET_WREG_BULK:
161 case PACKET_MSG_LONG:
162 case PACKET_MSG_SHORT:
163 case PACKET_CP_DMA:
164 case PACKET_REPEAT:
165 case PACKET_MSG_PROT:
166 case PACKET_FENCE:
167 case PACKET_LIN_DMA:
168 case PACKET_NOP:
169 case PACKET_STOP:
170 case PACKET_ARB_POINT:
171 case PACKET_WAIT:
172 case PACKET_LOAD_AND_EXE:
173 return true;
174 default:
175 return false;
176 }
177}
178
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300179static const char * const
180gaudi_tpc_interrupts_cause[GAUDI_NUM_OF_TPC_INTR_CAUSE] = {
181 "tpc_address_exceed_slm",
182 "tpc_div_by_0",
183 "tpc_spu_mac_overflow",
184 "tpc_spu_addsub_overflow",
185 "tpc_spu_abs_overflow",
186 "tpc_spu_fp_dst_nan_inf",
187 "tpc_spu_fp_dst_denorm",
188 "tpc_vpu_mac_overflow",
189 "tpc_vpu_addsub_overflow",
190 "tpc_vpu_abs_overflow",
191 "tpc_vpu_fp_dst_nan_inf",
192 "tpc_vpu_fp_dst_denorm",
193 "tpc_assertions",
194 "tpc_illegal_instruction",
195 "tpc_pc_wrap_around",
196 "tpc_qm_sw_err",
197 "tpc_hbw_rresp_err",
198 "tpc_hbw_bresp_err",
199 "tpc_lbw_rresp_err",
200 "tpc_lbw_bresp_err"
201};
202
203static const char * const
204gaudi_qman_error_cause[GAUDI_NUM_OF_QM_ERR_CAUSE] = {
205 "PQ AXI HBW error",
206 "CQ AXI HBW error",
207 "CP AXI HBW error",
208 "CP error due to undefined OPCODE",
209 "CP encountered STOP OPCODE",
210 "CP AXI LBW error",
211 "CP WRREG32 or WRBULK returned error",
212 "N/A",
213 "FENCE 0 inc over max value and clipped",
214 "FENCE 1 inc over max value and clipped",
215 "FENCE 2 inc over max value and clipped",
216 "FENCE 3 inc over max value and clipped",
217 "FENCE 0 dec under min value and clipped",
218 "FENCE 1 dec under min value and clipped",
219 "FENCE 2 dec under min value and clipped",
220 "FENCE 3 dec under min value and clipped"
221};
222
223static const char * const
224gaudi_qman_arb_error_cause[GAUDI_NUM_OF_QM_ARB_ERR_CAUSE] = {
225 "Choice push while full error",
226 "Choice Q watchdog error",
227 "MSG AXI LBW returned with error"
228};
229
230static enum hl_queue_type gaudi_queue_type[GAUDI_QUEUE_ID_SIZE] = {
231 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_0 */
232 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_1 */
233 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_2 */
234 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_3 */
235 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_0 */
236 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_1 */
237 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_2 */
238 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_3 */
239 QUEUE_TYPE_CPU, /* GAUDI_QUEUE_ID_CPU_PQ */
240 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_0 */
241 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_1 */
242 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_2 */
243 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_3 */
244 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_0 */
245 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_1 */
246 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_2 */
247 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_3 */
248 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_0 */
249 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_1 */
250 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_2 */
251 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_3 */
252 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_5_0 */
253 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_5_1 */
254 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_5_2 */
255 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_5_3 */
256 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_0 */
257 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_1 */
258 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_2 */
259 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_3 */
260 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_0 */
261 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_1 */
262 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_2 */
263 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_3 */
264 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_0 */
265 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_1 */
266 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_2 */
267 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_3 */
268 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_0 */
269 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_1 */
270 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_2 */
271 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_3 */
272 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_0 */
273 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_1 */
274 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_2 */
275 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_3 */
276 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_0 */
277 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_1 */
278 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_2 */
279 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_3 */
280 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_0 */
281 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_1 */
282 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_2 */
283 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_3 */
284 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_0 */
285 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_1 */
286 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_2 */
287 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_3 */
288 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_0 */
289 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_1 */
290 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_2 */
291 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_3 */
292 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_0 */
293 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_1 */
294 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_2 */
295 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_3 */
296 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_0 */
297 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_1 */
298 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_2 */
299 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_3 */
300 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_0 */
301 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_1 */
302 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_2 */
303 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_3 */
304 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_0_0 */
305 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_0_1 */
306 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_0_2 */
307 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_0_3 */
308 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_1_0 */
309 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_1_1 */
310 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_1_2 */
311 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_1_3 */
312 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_2_0 */
313 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_2_1 */
314 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_2_2 */
315 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_2_3 */
316 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_3_0 */
317 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_3_1 */
318 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_3_2 */
319 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_3_3 */
320 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_4_0 */
321 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_4_1 */
322 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_4_2 */
323 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_4_3 */
324 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_5_0 */
325 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_5_1 */
326 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_5_2 */
327 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_5_3 */
328 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_6_0 */
329 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_6_1 */
330 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_6_2 */
331 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_6_3 */
332 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_7_0 */
333 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_7_1 */
334 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_7_2 */
335 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_7_3 */
336 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_8_0 */
337 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_8_1 */
338 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_8_2 */
339 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_8_3 */
340 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_9_0 */
341 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_9_1 */
342 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_9_2 */
343 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_9_3 */
344};
345
Oded Gabbayfcc6a4e2020-05-17 08:20:35 +0300346struct ecc_info_extract_params {
347 u64 block_address;
348 u32 num_memories;
349 bool derr;
350 bool disable_clock_gating;
351};
352
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300353static int gaudi_mmu_update_asid_hop0_addr(struct hl_device *hdev, u32 asid,
354 u64 phys_addr);
355static int gaudi_send_job_on_qman0(struct hl_device *hdev,
356 struct hl_cs_job *job);
357static int gaudi_memset_device_memory(struct hl_device *hdev, u64 addr,
358 u32 size, u64 val);
359static int gaudi_run_tpc_kernel(struct hl_device *hdev, u64 tpc_kernel,
360 u32 tpc_id);
361static int gaudi_mmu_clear_pgt_range(struct hl_device *hdev);
Oded Gabbay2f553422020-08-15 16:28:10 +0300362static int gaudi_cpucp_info_get(struct hl_device *hdev);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300363static void gaudi_disable_clock_gating(struct hl_device *hdev);
364static void gaudi_mmu_prepare(struct hl_device *hdev, u32 asid);
365
366static int gaudi_get_fixed_properties(struct hl_device *hdev)
367{
368 struct asic_fixed_properties *prop = &hdev->asic_prop;
Ofir Bitton843839b2020-07-19 11:08:09 +0300369 u32 num_sync_stream_queues = 0;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300370 int i;
371
Ofir Bitton3abc99b2020-06-23 14:50:39 +0300372 prop->max_queues = GAUDI_QUEUE_ID_SIZE;
373 prop->hw_queues_props = kcalloc(prop->max_queues,
374 sizeof(struct hw_queue_properties),
375 GFP_KERNEL);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300376
Ofir Bitton3abc99b2020-06-23 14:50:39 +0300377 if (!prop->hw_queues_props)
378 return -ENOMEM;
379
380 for (i = 0 ; i < prop->max_queues ; i++) {
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300381 if (gaudi_queue_type[i] == QUEUE_TYPE_EXT) {
382 prop->hw_queues_props[i].type = QUEUE_TYPE_EXT;
383 prop->hw_queues_props[i].driver_only = 0;
384 prop->hw_queues_props[i].requires_kernel_cb = 1;
Ofir Bitton21e7a342020-05-14 18:25:47 +0300385 prop->hw_queues_props[i].supports_sync_stream = 1;
Ofir Bitton843839b2020-07-19 11:08:09 +0300386 num_sync_stream_queues++;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300387 } else if (gaudi_queue_type[i] == QUEUE_TYPE_CPU) {
388 prop->hw_queues_props[i].type = QUEUE_TYPE_CPU;
389 prop->hw_queues_props[i].driver_only = 1;
390 prop->hw_queues_props[i].requires_kernel_cb = 0;
Ofir Bitton21e7a342020-05-14 18:25:47 +0300391 prop->hw_queues_props[i].supports_sync_stream = 0;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300392 } else if (gaudi_queue_type[i] == QUEUE_TYPE_INT) {
393 prop->hw_queues_props[i].type = QUEUE_TYPE_INT;
394 prop->hw_queues_props[i].driver_only = 0;
395 prop->hw_queues_props[i].requires_kernel_cb = 0;
396 } else if (gaudi_queue_type[i] == QUEUE_TYPE_NA) {
397 prop->hw_queues_props[i].type = QUEUE_TYPE_NA;
398 prop->hw_queues_props[i].driver_only = 0;
399 prop->hw_queues_props[i].requires_kernel_cb = 0;
Ofir Bitton21e7a342020-05-14 18:25:47 +0300400 prop->hw_queues_props[i].supports_sync_stream = 0;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300401 }
402 }
403
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300404 prop->completion_queues_count = NUMBER_OF_CMPLT_QUEUES;
Ofir Bitton21e7a342020-05-14 18:25:47 +0300405 prop->sync_stream_first_sob = 0;
406 prop->sync_stream_first_mon = 0;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300407 prop->dram_base_address = DRAM_PHYS_BASE;
408 prop->dram_size = GAUDI_HBM_SIZE_32GB;
409 prop->dram_end_address = prop->dram_base_address +
410 prop->dram_size;
411 prop->dram_user_base_address = DRAM_BASE_ADDR_USER;
412
413 prop->sram_base_address = SRAM_BASE_ADDR;
414 prop->sram_size = SRAM_SIZE;
415 prop->sram_end_address = prop->sram_base_address +
416 prop->sram_size;
417 prop->sram_user_base_address = prop->sram_base_address +
418 SRAM_USER_BASE_OFFSET;
419
420 prop->mmu_pgt_addr = MMU_PAGE_TABLES_ADDR;
421 if (hdev->pldm)
422 prop->mmu_pgt_size = 0x800000; /* 8MB */
423 else
424 prop->mmu_pgt_size = MMU_PAGE_TABLES_SIZE;
425 prop->mmu_pte_size = HL_PTE_SIZE;
426 prop->mmu_hop_table_size = HOP_TABLE_SIZE;
427 prop->mmu_hop0_tables_total_size = HOP0_TABLES_TOTAL_SIZE;
428 prop->dram_page_size = PAGE_SIZE_2MB;
429
430 prop->pmmu.hop0_shift = HOP0_SHIFT;
431 prop->pmmu.hop1_shift = HOP1_SHIFT;
432 prop->pmmu.hop2_shift = HOP2_SHIFT;
433 prop->pmmu.hop3_shift = HOP3_SHIFT;
434 prop->pmmu.hop4_shift = HOP4_SHIFT;
435 prop->pmmu.hop0_mask = HOP0_MASK;
436 prop->pmmu.hop1_mask = HOP1_MASK;
437 prop->pmmu.hop2_mask = HOP2_MASK;
438 prop->pmmu.hop3_mask = HOP3_MASK;
439 prop->pmmu.hop4_mask = HOP4_MASK;
440 prop->pmmu.start_addr = VA_HOST_SPACE_START;
441 prop->pmmu.end_addr =
442 (VA_HOST_SPACE_START + VA_HOST_SPACE_SIZE / 2) - 1;
443 prop->pmmu.page_size = PAGE_SIZE_4KB;
Moti Haimovski7edf3412020-08-23 13:23:13 +0300444 prop->pmmu.num_hops = MMU_ARCH_5_HOPS;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300445
446 /* PMMU and HPMMU are the same except of page size */
447 memcpy(&prop->pmmu_huge, &prop->pmmu, sizeof(prop->pmmu));
448 prop->pmmu_huge.page_size = PAGE_SIZE_2MB;
449
450 /* shifts and masks are the same in PMMU and DMMU */
451 memcpy(&prop->dmmu, &prop->pmmu, sizeof(prop->pmmu));
452 prop->dmmu.start_addr = (VA_HOST_SPACE_START + VA_HOST_SPACE_SIZE / 2);
453 prop->dmmu.end_addr = VA_HOST_SPACE_END;
454 prop->dmmu.page_size = PAGE_SIZE_2MB;
455
456 prop->cfg_size = CFG_SIZE;
457 prop->max_asid = MAX_ASID;
458 prop->num_of_events = GAUDI_EVENT_SIZE;
459 prop->tpc_enabled_mask = TPC_ENABLED_MASK;
460
Oded Gabbay58361aa2020-08-08 23:34:47 +0300461 prop->max_power_default = MAX_POWER_DEFAULT_PCI;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300462
463 prop->cb_pool_cb_cnt = GAUDI_CB_POOL_CB_CNT;
464 prop->cb_pool_cb_size = GAUDI_CB_POOL_CB_SIZE;
465
466 prop->pcie_dbi_base_address = mmPCIE_DBI_BASE;
467 prop->pcie_aux_dbi_reg_addr = CFG_BASE + mmPCIE_AUX_DBI;
468
Oded Gabbay2f553422020-08-15 16:28:10 +0300469 strncpy(prop->cpucp_info.card_name, GAUDI_DEFAULT_CARD_NAME,
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300470 CARD_NAME_MAX_LEN);
471
Ofir Bittonc16d45f2020-06-02 12:28:27 +0300472 prop->max_pending_cs = GAUDI_MAX_PENDING_CS;
473
Ofir Bitton843839b2020-07-19 11:08:09 +0300474 prop->first_available_user_sob[HL_GAUDI_WS_DCORE] =
475 num_sync_stream_queues * HL_RSVD_SOBS;
476 prop->first_available_user_mon[HL_GAUDI_WS_DCORE] =
477 num_sync_stream_queues * HL_RSVD_MONS;
478
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300479 return 0;
480}
481
482static int gaudi_pci_bars_map(struct hl_device *hdev)
483{
484 static const char * const name[] = {"SRAM", "CFG", "HBM"};
485 bool is_wc[3] = {false, false, true};
486 int rc;
487
488 rc = hl_pci_bars_map(hdev, name, is_wc);
489 if (rc)
490 return rc;
491
492 hdev->rmmio = hdev->pcie_bar[CFG_BAR_ID] +
493 (CFG_BASE - SPI_FLASH_BASE_ADDR);
494
495 return 0;
496}
497
498static u64 gaudi_set_hbm_bar_base(struct hl_device *hdev, u64 addr)
499{
500 struct gaudi_device *gaudi = hdev->asic_specific;
Ofir Bittonf4cbfd22020-06-15 17:45:12 +0300501 struct hl_inbound_pci_region pci_region;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300502 u64 old_addr = addr;
503 int rc;
504
505 if ((gaudi) && (gaudi->hbm_bar_cur_addr == addr))
506 return old_addr;
507
508 /* Inbound Region 2 - Bar 4 - Point to HBM */
Ofir Bittonf4cbfd22020-06-15 17:45:12 +0300509 pci_region.mode = PCI_BAR_MATCH_MODE;
510 pci_region.bar = HBM_BAR_ID;
511 pci_region.addr = addr;
512 rc = hl_pci_set_inbound_region(hdev, 2, &pci_region);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300513 if (rc)
514 return U64_MAX;
515
516 if (gaudi) {
517 old_addr = gaudi->hbm_bar_cur_addr;
518 gaudi->hbm_bar_cur_addr = addr;
519 }
520
521 return old_addr;
522}
523
524static int gaudi_init_iatu(struct hl_device *hdev)
525{
Ofir Bittonf4cbfd22020-06-15 17:45:12 +0300526 struct hl_inbound_pci_region inbound_region;
527 struct hl_outbound_pci_region outbound_region;
528 int rc;
529
530 /* Inbound Region 0 - Bar 0 - Point to SRAM + CFG */
531 inbound_region.mode = PCI_BAR_MATCH_MODE;
532 inbound_region.bar = SRAM_BAR_ID;
533 inbound_region.addr = SRAM_BASE_ADDR;
534 rc = hl_pci_set_inbound_region(hdev, 0, &inbound_region);
535 if (rc)
536 goto done;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300537
538 /* Inbound Region 1 - Bar 2 - Point to SPI FLASH */
Ofir Bittonf4cbfd22020-06-15 17:45:12 +0300539 inbound_region.mode = PCI_BAR_MATCH_MODE;
540 inbound_region.bar = CFG_BAR_ID;
541 inbound_region.addr = SPI_FLASH_BASE_ADDR;
542 rc = hl_pci_set_inbound_region(hdev, 1, &inbound_region);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300543 if (rc)
Ofir Bittonf4cbfd22020-06-15 17:45:12 +0300544 goto done;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300545
Ofir Bittonf4cbfd22020-06-15 17:45:12 +0300546 /* Inbound Region 2 - Bar 4 - Point to HBM */
547 inbound_region.mode = PCI_BAR_MATCH_MODE;
548 inbound_region.bar = HBM_BAR_ID;
549 inbound_region.addr = DRAM_PHYS_BASE;
550 rc = hl_pci_set_inbound_region(hdev, 2, &inbound_region);
551 if (rc)
552 goto done;
553
554 hdev->asic_funcs->set_dma_mask_from_fw(hdev);
555
556 /* Outbound Region 0 - Point to Host */
557 outbound_region.addr = HOST_PHYS_BASE;
558 outbound_region.size = HOST_PHYS_SIZE;
559 rc = hl_pci_set_outbound_region(hdev, &outbound_region);
560
561done:
562 return rc;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300563}
564
565static int gaudi_early_init(struct hl_device *hdev)
566{
567 struct asic_fixed_properties *prop = &hdev->asic_prop;
568 struct pci_dev *pdev = hdev->pdev;
569 int rc;
570
571 rc = gaudi_get_fixed_properties(hdev);
572 if (rc) {
573 dev_err(hdev->dev, "Failed to get fixed properties\n");
574 return rc;
575 }
576
577 /* Check BAR sizes */
578 if (pci_resource_len(pdev, SRAM_BAR_ID) != SRAM_BAR_SIZE) {
579 dev_err(hdev->dev,
580 "Not " HL_NAME "? BAR %d size %llu, expecting %llu\n",
581 SRAM_BAR_ID,
582 (unsigned long long) pci_resource_len(pdev,
583 SRAM_BAR_ID),
584 SRAM_BAR_SIZE);
Ofir Bitton3abc99b2020-06-23 14:50:39 +0300585 rc = -ENODEV;
586 goto free_queue_props;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300587 }
588
589 if (pci_resource_len(pdev, CFG_BAR_ID) != CFG_BAR_SIZE) {
590 dev_err(hdev->dev,
591 "Not " HL_NAME "? BAR %d size %llu, expecting %llu\n",
592 CFG_BAR_ID,
593 (unsigned long long) pci_resource_len(pdev,
594 CFG_BAR_ID),
595 CFG_BAR_SIZE);
Ofir Bitton3abc99b2020-06-23 14:50:39 +0300596 rc = -ENODEV;
597 goto free_queue_props;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300598 }
599
600 prop->dram_pci_bar_size = pci_resource_len(pdev, HBM_BAR_ID);
601
602 rc = hl_pci_init(hdev);
603 if (rc)
Ofir Bitton3abc99b2020-06-23 14:50:39 +0300604 goto free_queue_props;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300605
606 return 0;
Ofir Bitton3abc99b2020-06-23 14:50:39 +0300607
608free_queue_props:
609 kfree(hdev->asic_prop.hw_queues_props);
610 return rc;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300611}
612
613static int gaudi_early_fini(struct hl_device *hdev)
614{
Ofir Bitton3abc99b2020-06-23 14:50:39 +0300615 kfree(hdev->asic_prop.hw_queues_props);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300616 hl_pci_fini(hdev);
617
618 return 0;
619}
620
621/**
622 * gaudi_fetch_psoc_frequency - Fetch PSOC frequency values
623 *
624 * @hdev: pointer to hl_device structure
625 *
626 */
627static void gaudi_fetch_psoc_frequency(struct hl_device *hdev)
628{
629 struct asic_fixed_properties *prop = &hdev->asic_prop;
Adam Aharone8edded2020-05-26 11:04:30 +0300630 u32 trace_freq = 0;
631 u32 pll_clk = 0;
632 u32 div_fctr = RREG32(mmPSOC_CPU_PLL_DIV_FACTOR_2);
633 u32 div_sel = RREG32(mmPSOC_CPU_PLL_DIV_SEL_2);
634 u32 nr = RREG32(mmPSOC_CPU_PLL_NR);
635 u32 nf = RREG32(mmPSOC_CPU_PLL_NF);
636 u32 od = RREG32(mmPSOC_CPU_PLL_OD);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300637
Adam Aharone8edded2020-05-26 11:04:30 +0300638 if (div_sel == DIV_SEL_REF_CLK || div_sel == DIV_SEL_DIVIDED_REF) {
639 if (div_sel == DIV_SEL_REF_CLK)
640 trace_freq = PLL_REF_CLK;
641 else
642 trace_freq = PLL_REF_CLK / (div_fctr + 1);
643 } else if (div_sel == DIV_SEL_PLL_CLK ||
644 div_sel == DIV_SEL_DIVIDED_PLL) {
645 pll_clk = PLL_REF_CLK * (nf + 1) / ((nr + 1) * (od + 1));
646 if (div_sel == DIV_SEL_PLL_CLK)
647 trace_freq = pll_clk;
648 else
649 trace_freq = pll_clk / (div_fctr + 1);
650 } else {
651 dev_warn(hdev->dev,
652 "Received invalid div select value: %d", div_sel);
653 }
654
655 prop->psoc_timestamp_frequency = trace_freq;
656 prop->psoc_pci_pll_nr = nr;
657 prop->psoc_pci_pll_nf = nf;
658 prop->psoc_pci_pll_od = od;
659 prop->psoc_pci_pll_div_factor = div_fctr;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300660}
661
662static int _gaudi_init_tpc_mem(struct hl_device *hdev,
663 dma_addr_t tpc_kernel_src_addr, u32 tpc_kernel_size)
664{
665 struct asic_fixed_properties *prop = &hdev->asic_prop;
666 struct packet_lin_dma *init_tpc_mem_pkt;
667 struct hl_cs_job *job;
668 struct hl_cb *cb;
669 u64 dst_addr;
670 u32 cb_size, ctl;
671 u8 tpc_id;
672 int rc;
673
Ofir Bittona04b7cd2020-07-13 13:36:55 +0300674 cb = hl_cb_kernel_create(hdev, PAGE_SIZE, false);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300675 if (!cb)
676 return -EFAULT;
677
678 init_tpc_mem_pkt = (struct packet_lin_dma *) (uintptr_t)
679 cb->kernel_address;
680 cb_size = sizeof(*init_tpc_mem_pkt);
681 memset(init_tpc_mem_pkt, 0, cb_size);
682
683 init_tpc_mem_pkt->tsize = cpu_to_le32(tpc_kernel_size);
684
Oded Gabbay65887292020-08-12 11:21:01 +0300685 ctl = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_LIN_DMA);
686 ctl |= FIELD_PREP(GAUDI_PKT_LIN_DMA_CTL_LIN_MASK, 1);
687 ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
688 ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300689
690 init_tpc_mem_pkt->ctl = cpu_to_le32(ctl);
691
692 init_tpc_mem_pkt->src_addr = cpu_to_le64(tpc_kernel_src_addr);
693 dst_addr = (prop->sram_user_base_address &
694 GAUDI_PKT_LIN_DMA_DST_ADDR_MASK) >>
695 GAUDI_PKT_LIN_DMA_DST_ADDR_SHIFT;
696 init_tpc_mem_pkt->dst_addr |= cpu_to_le64(dst_addr);
697
698 job = hl_cs_allocate_job(hdev, QUEUE_TYPE_EXT, true);
699 if (!job) {
700 dev_err(hdev->dev, "Failed to allocate a new job\n");
701 rc = -ENOMEM;
702 goto release_cb;
703 }
704
705 job->id = 0;
706 job->user_cb = cb;
707 job->user_cb->cs_cnt++;
708 job->user_cb_size = cb_size;
709 job->hw_queue_id = GAUDI_QUEUE_ID_DMA_0_0;
710 job->patched_cb = job->user_cb;
711 job->job_cb_size = job->user_cb_size + sizeof(struct packet_msg_prot);
712
713 hl_debugfs_add_job(hdev, job);
714
715 rc = gaudi_send_job_on_qman0(hdev, job);
716
717 if (rc)
718 goto free_job;
719
720 for (tpc_id = 0 ; tpc_id < TPC_NUMBER_OF_ENGINES ; tpc_id++) {
721 rc = gaudi_run_tpc_kernel(hdev, dst_addr, tpc_id);
722 if (rc)
723 break;
724 }
725
726free_job:
727 hl_userptr_delete_list(hdev, &job->userptr_list);
728 hl_debugfs_remove_job(hdev, job);
729 kfree(job);
730 cb->cs_cnt--;
731
732release_cb:
733 hl_cb_put(cb);
734 hl_cb_destroy(hdev, &hdev->kernel_cb_mgr, cb->id << PAGE_SHIFT);
735
736 return rc;
737}
738
739/*
740 * gaudi_init_tpc_mem() - Initialize TPC memories.
741 * @hdev: Pointer to hl_device structure.
742 *
743 * Copy TPC kernel fw from firmware file and run it to initialize TPC memories.
744 *
745 * Return: 0 for success, negative value for error.
746 */
747static int gaudi_init_tpc_mem(struct hl_device *hdev)
748{
749 const struct firmware *fw;
750 size_t fw_size;
751 void *cpu_addr;
752 dma_addr_t dma_handle;
753 int rc;
754
755 rc = request_firmware(&fw, GAUDI_TPC_FW_FILE, hdev->dev);
756 if (rc) {
757 dev_err(hdev->dev, "Firmware file %s is not found!\n",
758 GAUDI_TPC_FW_FILE);
759 goto out;
760 }
761
762 fw_size = fw->size;
763 cpu_addr = hdev->asic_funcs->asic_dma_alloc_coherent(hdev, fw_size,
764 &dma_handle, GFP_KERNEL | __GFP_ZERO);
765 if (!cpu_addr) {
766 dev_err(hdev->dev,
767 "Failed to allocate %zu of dma memory for TPC kernel\n",
768 fw_size);
769 rc = -ENOMEM;
770 goto out;
771 }
772
773 memcpy(cpu_addr, fw->data, fw_size);
774
775 rc = _gaudi_init_tpc_mem(hdev, dma_handle, fw_size);
776
777 hdev->asic_funcs->asic_dma_free_coherent(hdev, fw->size, cpu_addr,
778 dma_handle);
779
780out:
781 release_firmware(fw);
782 return rc;
783}
784
785static int gaudi_late_init(struct hl_device *hdev)
786{
787 struct gaudi_device *gaudi = hdev->asic_specific;
788 int rc;
789
Oded Gabbay2f553422020-08-15 16:28:10 +0300790 rc = gaudi->cpucp_info_get(hdev);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300791 if (rc) {
Oded Gabbay2f553422020-08-15 16:28:10 +0300792 dev_err(hdev->dev, "Failed to get cpucp info\n");
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300793 return rc;
794 }
795
Oded Gabbay2f553422020-08-15 16:28:10 +0300796 rc = hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_ENABLE_PCI_ACCESS);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300797 if (rc) {
798 dev_err(hdev->dev, "Failed to enable PCI access from CPU\n");
799 return rc;
800 }
801
802 WREG32(mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR, GAUDI_EVENT_INTS_REGISTER);
803
804 gaudi_fetch_psoc_frequency(hdev);
805
806 rc = gaudi_mmu_clear_pgt_range(hdev);
807 if (rc) {
808 dev_err(hdev->dev, "Failed to clear MMU page tables range\n");
809 goto disable_pci_access;
810 }
811
812 rc = gaudi_init_tpc_mem(hdev);
813 if (rc) {
814 dev_err(hdev->dev, "Failed to initialize TPC memories\n");
815 goto disable_pci_access;
816 }
817
818 return 0;
819
820disable_pci_access:
Oded Gabbay2f553422020-08-15 16:28:10 +0300821 hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_DISABLE_PCI_ACCESS);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300822
823 return rc;
824}
825
826static void gaudi_late_fini(struct hl_device *hdev)
827{
828 const struct hwmon_channel_info **channel_info_arr;
829 int i = 0;
830
831 if (!hdev->hl_chip_info->info)
832 return;
833
834 channel_info_arr = hdev->hl_chip_info->info;
835
836 while (channel_info_arr[i]) {
837 kfree(channel_info_arr[i]->config);
838 kfree(channel_info_arr[i]);
839 i++;
840 }
841
842 kfree(channel_info_arr);
843
844 hdev->hl_chip_info->info = NULL;
845}
846
847static int gaudi_alloc_cpu_accessible_dma_mem(struct hl_device *hdev)
848{
849 dma_addr_t dma_addr_arr[GAUDI_ALLOC_CPU_MEM_RETRY_CNT] = {}, end_addr;
850 void *virt_addr_arr[GAUDI_ALLOC_CPU_MEM_RETRY_CNT] = {};
851 int i, j, rc = 0;
852
853 /*
854 * The device CPU works with 40-bits addresses, while bit 39 must be set
855 * to '1' when accessing the host.
856 * Bits 49:39 of the full host address are saved for a later
857 * configuration of the HW to perform extension to 50 bits.
858 * Because there is a single HW register that holds the extension bits,
859 * these bits must be identical in all allocated range.
860 */
861
862 for (i = 0 ; i < GAUDI_ALLOC_CPU_MEM_RETRY_CNT ; i++) {
863 virt_addr_arr[i] =
864 hdev->asic_funcs->asic_dma_alloc_coherent(hdev,
865 HL_CPU_ACCESSIBLE_MEM_SIZE,
866 &dma_addr_arr[i],
867 GFP_KERNEL | __GFP_ZERO);
868 if (!virt_addr_arr[i]) {
869 rc = -ENOMEM;
870 goto free_dma_mem_arr;
871 }
872
873 end_addr = dma_addr_arr[i] + HL_CPU_ACCESSIBLE_MEM_SIZE - 1;
874 if (GAUDI_CPU_PCI_MSB_ADDR(dma_addr_arr[i]) ==
875 GAUDI_CPU_PCI_MSB_ADDR(end_addr))
876 break;
877 }
878
879 if (i == GAUDI_ALLOC_CPU_MEM_RETRY_CNT) {
880 dev_err(hdev->dev,
881 "MSB of CPU accessible DMA memory are not identical in all range\n");
882 rc = -EFAULT;
883 goto free_dma_mem_arr;
884 }
885
886 hdev->cpu_accessible_dma_mem = virt_addr_arr[i];
887 hdev->cpu_accessible_dma_address = dma_addr_arr[i];
888 hdev->cpu_pci_msb_addr =
889 GAUDI_CPU_PCI_MSB_ADDR(hdev->cpu_accessible_dma_address);
890
891 GAUDI_PCI_TO_CPU_ADDR(hdev->cpu_accessible_dma_address);
892
893free_dma_mem_arr:
894 for (j = 0 ; j < i ; j++)
895 hdev->asic_funcs->asic_dma_free_coherent(hdev,
896 HL_CPU_ACCESSIBLE_MEM_SIZE,
897 virt_addr_arr[j],
898 dma_addr_arr[j]);
899
900 return rc;
901}
902
903static void gaudi_free_internal_qmans_pq_mem(struct hl_device *hdev)
904{
905 struct gaudi_device *gaudi = hdev->asic_specific;
906 struct gaudi_internal_qman_info *q;
907 u32 i;
908
909 for (i = 0 ; i < GAUDI_QUEUE_ID_SIZE ; i++) {
910 q = &gaudi->internal_qmans[i];
911 if (!q->pq_kernel_addr)
912 continue;
913 hdev->asic_funcs->asic_dma_free_coherent(hdev, q->pq_size,
914 q->pq_kernel_addr,
915 q->pq_dma_addr);
916 }
917}
918
919static int gaudi_alloc_internal_qmans_pq_mem(struct hl_device *hdev)
920{
921 struct gaudi_device *gaudi = hdev->asic_specific;
922 struct gaudi_internal_qman_info *q;
923 int rc, i;
924
925 for (i = 0 ; i < GAUDI_QUEUE_ID_SIZE ; i++) {
926 if (gaudi_queue_type[i] != QUEUE_TYPE_INT)
927 continue;
928
929 q = &gaudi->internal_qmans[i];
930
931 switch (i) {
932 case GAUDI_QUEUE_ID_DMA_2_0 ... GAUDI_QUEUE_ID_DMA_4_3:
933 case GAUDI_QUEUE_ID_DMA_6_0 ... GAUDI_QUEUE_ID_DMA_7_3:
934 q->pq_size = HBM_DMA_QMAN_SIZE_IN_BYTES;
935 break;
936 case GAUDI_QUEUE_ID_MME_0_0 ... GAUDI_QUEUE_ID_MME_1_3:
937 q->pq_size = MME_QMAN_SIZE_IN_BYTES;
938 break;
939 case GAUDI_QUEUE_ID_TPC_0_0 ... GAUDI_QUEUE_ID_TPC_7_3:
940 q->pq_size = TPC_QMAN_SIZE_IN_BYTES;
941 break;
942 default:
943 dev_err(hdev->dev, "Bad internal queue index %d", i);
944 rc = -EINVAL;
945 goto free_internal_qmans_pq_mem;
946 }
947
948 q->pq_kernel_addr = hdev->asic_funcs->asic_dma_alloc_coherent(
949 hdev, q->pq_size,
950 &q->pq_dma_addr,
951 GFP_KERNEL | __GFP_ZERO);
952 if (!q->pq_kernel_addr) {
953 rc = -ENOMEM;
954 goto free_internal_qmans_pq_mem;
955 }
956 }
957
958 return 0;
959
960free_internal_qmans_pq_mem:
961 gaudi_free_internal_qmans_pq_mem(hdev);
962 return rc;
963}
964
965static int gaudi_sw_init(struct hl_device *hdev)
966{
967 struct gaudi_device *gaudi;
Ofir Bittonebd8d122020-05-10 13:41:28 +0300968 u32 i, event_id = 0;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300969 int rc;
970
971 /* Allocate device structure */
972 gaudi = kzalloc(sizeof(*gaudi), GFP_KERNEL);
973 if (!gaudi)
974 return -ENOMEM;
975
Ofir Bittonebd8d122020-05-10 13:41:28 +0300976 for (i = 0 ; i < ARRAY_SIZE(gaudi_irq_map_table) ; i++) {
977 if (gaudi_irq_map_table[i].valid) {
978 if (event_id == GAUDI_EVENT_SIZE) {
979 dev_err(hdev->dev,
980 "Event array exceeds the limit of %u events\n",
981 GAUDI_EVENT_SIZE);
982 rc = -EINVAL;
983 goto free_gaudi_device;
984 }
985
986 gaudi->events[event_id++] =
987 gaudi_irq_map_table[i].fc_id;
988 }
989 }
990
Oded Gabbay2f553422020-08-15 16:28:10 +0300991 gaudi->cpucp_info_get = gaudi_cpucp_info_get;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300992
993 gaudi->max_freq_value = GAUDI_MAX_CLK_FREQ;
994
995 hdev->asic_specific = gaudi;
996
997 /* Create DMA pool for small allocations */
998 hdev->dma_pool = dma_pool_create(dev_name(hdev->dev),
999 &hdev->pdev->dev, GAUDI_DMA_POOL_BLK_SIZE, 8, 0);
1000 if (!hdev->dma_pool) {
1001 dev_err(hdev->dev, "failed to create DMA pool\n");
1002 rc = -ENOMEM;
1003 goto free_gaudi_device;
1004 }
1005
1006 rc = gaudi_alloc_cpu_accessible_dma_mem(hdev);
1007 if (rc)
1008 goto free_dma_pool;
1009
1010 hdev->cpu_accessible_dma_pool = gen_pool_create(ilog2(32), -1);
1011 if (!hdev->cpu_accessible_dma_pool) {
1012 dev_err(hdev->dev,
1013 "Failed to create CPU accessible DMA pool\n");
1014 rc = -ENOMEM;
1015 goto free_cpu_dma_mem;
1016 }
1017
1018 rc = gen_pool_add(hdev->cpu_accessible_dma_pool,
1019 (uintptr_t) hdev->cpu_accessible_dma_mem,
1020 HL_CPU_ACCESSIBLE_MEM_SIZE, -1);
1021 if (rc) {
1022 dev_err(hdev->dev,
1023 "Failed to add memory to CPU accessible DMA pool\n");
1024 rc = -EFAULT;
1025 goto free_cpu_accessible_dma_pool;
1026 }
1027
1028 rc = gaudi_alloc_internal_qmans_pq_mem(hdev);
1029 if (rc)
1030 goto free_cpu_accessible_dma_pool;
1031
1032 spin_lock_init(&gaudi->hw_queues_lock);
1033 mutex_init(&gaudi->clk_gate_mutex);
1034
1035 hdev->supports_sync_stream = true;
1036 hdev->supports_coresight = true;
1037
1038 return 0;
1039
1040free_cpu_accessible_dma_pool:
1041 gen_pool_destroy(hdev->cpu_accessible_dma_pool);
1042free_cpu_dma_mem:
1043 GAUDI_CPU_TO_PCI_ADDR(hdev->cpu_accessible_dma_address,
1044 hdev->cpu_pci_msb_addr);
1045 hdev->asic_funcs->asic_dma_free_coherent(hdev,
1046 HL_CPU_ACCESSIBLE_MEM_SIZE,
1047 hdev->cpu_accessible_dma_mem,
1048 hdev->cpu_accessible_dma_address);
1049free_dma_pool:
1050 dma_pool_destroy(hdev->dma_pool);
1051free_gaudi_device:
1052 kfree(gaudi);
1053 return rc;
1054}
1055
1056static int gaudi_sw_fini(struct hl_device *hdev)
1057{
1058 struct gaudi_device *gaudi = hdev->asic_specific;
1059
1060 gaudi_free_internal_qmans_pq_mem(hdev);
1061
1062 gen_pool_destroy(hdev->cpu_accessible_dma_pool);
1063
1064 GAUDI_CPU_TO_PCI_ADDR(hdev->cpu_accessible_dma_address,
1065 hdev->cpu_pci_msb_addr);
1066 hdev->asic_funcs->asic_dma_free_coherent(hdev,
1067 HL_CPU_ACCESSIBLE_MEM_SIZE,
1068 hdev->cpu_accessible_dma_mem,
1069 hdev->cpu_accessible_dma_address);
1070
1071 dma_pool_destroy(hdev->dma_pool);
1072
1073 mutex_destroy(&gaudi->clk_gate_mutex);
1074
1075 kfree(gaudi);
1076
1077 return 0;
1078}
1079
1080static irqreturn_t gaudi_irq_handler_single(int irq, void *arg)
1081{
1082 struct hl_device *hdev = arg;
1083 int i;
1084
1085 if (hdev->disabled)
1086 return IRQ_HANDLED;
1087
1088 for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++)
1089 hl_irq_handler_cq(irq, &hdev->completion_queue[i]);
1090
1091 hl_irq_handler_eq(irq, &hdev->event_queue);
1092
1093 return IRQ_HANDLED;
1094}
1095
1096/*
1097 * For backward compatibility, new MSI interrupts should be set after the
1098 * existing CPU and NIC interrupts.
1099 */
1100static int gaudi_pci_irq_vector(struct hl_device *hdev, unsigned int nr,
1101 bool cpu_eq)
1102{
1103 int msi_vec;
1104
1105 if ((nr != GAUDI_EVENT_QUEUE_MSI_IDX) && (cpu_eq))
1106 dev_crit(hdev->dev, "CPU EQ must use IRQ %d\n",
1107 GAUDI_EVENT_QUEUE_MSI_IDX);
1108
1109 msi_vec = ((nr < GAUDI_EVENT_QUEUE_MSI_IDX) || (cpu_eq)) ? nr :
1110 (nr + NIC_NUMBER_OF_ENGINES + 1);
1111
1112 return pci_irq_vector(hdev->pdev, msi_vec);
1113}
1114
1115static int gaudi_enable_msi_single(struct hl_device *hdev)
1116{
1117 int rc, irq;
1118
1119 dev_info(hdev->dev, "Working in single MSI IRQ mode\n");
1120
1121 irq = gaudi_pci_irq_vector(hdev, 0, false);
1122 rc = request_irq(irq, gaudi_irq_handler_single, 0,
1123 "gaudi single msi", hdev);
1124 if (rc)
1125 dev_err(hdev->dev,
1126 "Failed to request single MSI IRQ\n");
1127
1128 return rc;
1129}
1130
1131static int gaudi_enable_msi_multi(struct hl_device *hdev)
1132{
1133 int cq_cnt = hdev->asic_prop.completion_queues_count;
1134 int rc, i, irq_cnt_init, irq;
1135
1136 for (i = 0, irq_cnt_init = 0 ; i < cq_cnt ; i++, irq_cnt_init++) {
1137 irq = gaudi_pci_irq_vector(hdev, i, false);
1138 rc = request_irq(irq, hl_irq_handler_cq, 0, gaudi_irq_name[i],
1139 &hdev->completion_queue[i]);
1140 if (rc) {
1141 dev_err(hdev->dev, "Failed to request IRQ %d", irq);
1142 goto free_irqs;
1143 }
1144 }
1145
1146 irq = gaudi_pci_irq_vector(hdev, GAUDI_EVENT_QUEUE_MSI_IDX, true);
1147 rc = request_irq(irq, hl_irq_handler_eq, 0, gaudi_irq_name[cq_cnt],
1148 &hdev->event_queue);
1149 if (rc) {
1150 dev_err(hdev->dev, "Failed to request IRQ %d", irq);
1151 goto free_irqs;
1152 }
1153
1154 return 0;
1155
1156free_irqs:
1157 for (i = 0 ; i < irq_cnt_init ; i++)
1158 free_irq(gaudi_pci_irq_vector(hdev, i, false),
1159 &hdev->completion_queue[i]);
1160 return rc;
1161}
1162
1163static int gaudi_enable_msi(struct hl_device *hdev)
1164{
1165 struct gaudi_device *gaudi = hdev->asic_specific;
1166 int rc;
1167
1168 if (gaudi->hw_cap_initialized & HW_CAP_MSI)
1169 return 0;
1170
1171 rc = pci_alloc_irq_vectors(hdev->pdev, 1, GAUDI_MSI_ENTRIES,
1172 PCI_IRQ_MSI);
1173 if (rc < 0) {
1174 dev_err(hdev->dev, "MSI: Failed to enable support %d\n", rc);
1175 return rc;
1176 }
1177
1178 if (rc < NUMBER_OF_INTERRUPTS) {
1179 gaudi->multi_msi_mode = false;
1180 rc = gaudi_enable_msi_single(hdev);
1181 } else {
1182 gaudi->multi_msi_mode = true;
1183 rc = gaudi_enable_msi_multi(hdev);
1184 }
1185
1186 if (rc)
1187 goto free_pci_irq_vectors;
1188
1189 gaudi->hw_cap_initialized |= HW_CAP_MSI;
1190
1191 return 0;
1192
1193free_pci_irq_vectors:
1194 pci_free_irq_vectors(hdev->pdev);
1195 return rc;
1196}
1197
1198static void gaudi_sync_irqs(struct hl_device *hdev)
1199{
1200 struct gaudi_device *gaudi = hdev->asic_specific;
1201 int i, cq_cnt = hdev->asic_prop.completion_queues_count;
1202
1203 if (!(gaudi->hw_cap_initialized & HW_CAP_MSI))
1204 return;
1205
1206 /* Wait for all pending IRQs to be finished */
1207 if (gaudi->multi_msi_mode) {
1208 for (i = 0 ; i < cq_cnt ; i++)
1209 synchronize_irq(gaudi_pci_irq_vector(hdev, i, false));
1210
1211 synchronize_irq(gaudi_pci_irq_vector(hdev,
1212 GAUDI_EVENT_QUEUE_MSI_IDX,
1213 true));
1214 } else {
1215 synchronize_irq(gaudi_pci_irq_vector(hdev, 0, false));
1216 }
1217}
1218
1219static void gaudi_disable_msi(struct hl_device *hdev)
1220{
1221 struct gaudi_device *gaudi = hdev->asic_specific;
1222 int i, irq, cq_cnt = hdev->asic_prop.completion_queues_count;
1223
1224 if (!(gaudi->hw_cap_initialized & HW_CAP_MSI))
1225 return;
1226
1227 gaudi_sync_irqs(hdev);
1228
1229 if (gaudi->multi_msi_mode) {
1230 irq = gaudi_pci_irq_vector(hdev, GAUDI_EVENT_QUEUE_MSI_IDX,
1231 true);
1232 free_irq(irq, &hdev->event_queue);
1233
1234 for (i = 0 ; i < cq_cnt ; i++) {
1235 irq = gaudi_pci_irq_vector(hdev, i, false);
1236 free_irq(irq, &hdev->completion_queue[i]);
1237 }
1238 } else {
1239 free_irq(gaudi_pci_irq_vector(hdev, 0, false), hdev);
1240 }
1241
1242 pci_free_irq_vectors(hdev->pdev);
1243
1244 gaudi->hw_cap_initialized &= ~HW_CAP_MSI;
1245}
1246
1247static void gaudi_init_scrambler_sram(struct hl_device *hdev)
1248{
1249 struct gaudi_device *gaudi = hdev->asic_specific;
1250
1251 if (gaudi->hw_cap_initialized & HW_CAP_SRAM_SCRAMBLER)
1252 return;
1253
1254 if (!hdev->sram_scrambler_enable)
1255 return;
1256
1257 WREG32(mmNIF_RTR_CTRL_0_SCRAM_SRAM_EN,
1258 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1259 WREG32(mmNIF_RTR_CTRL_1_SCRAM_SRAM_EN,
1260 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1261 WREG32(mmNIF_RTR_CTRL_2_SCRAM_SRAM_EN,
1262 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1263 WREG32(mmNIF_RTR_CTRL_3_SCRAM_SRAM_EN,
1264 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1265 WREG32(mmNIF_RTR_CTRL_4_SCRAM_SRAM_EN,
1266 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1267 WREG32(mmNIF_RTR_CTRL_5_SCRAM_SRAM_EN,
1268 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1269 WREG32(mmNIF_RTR_CTRL_6_SCRAM_SRAM_EN,
1270 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1271 WREG32(mmNIF_RTR_CTRL_7_SCRAM_SRAM_EN,
1272 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1273
1274 WREG32(mmSIF_RTR_CTRL_0_SCRAM_SRAM_EN,
1275 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1276 WREG32(mmSIF_RTR_CTRL_1_SCRAM_SRAM_EN,
1277 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1278 WREG32(mmSIF_RTR_CTRL_2_SCRAM_SRAM_EN,
1279 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1280 WREG32(mmSIF_RTR_CTRL_3_SCRAM_SRAM_EN,
1281 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1282 WREG32(mmSIF_RTR_CTRL_4_SCRAM_SRAM_EN,
1283 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1284 WREG32(mmSIF_RTR_CTRL_5_SCRAM_SRAM_EN,
1285 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1286 WREG32(mmSIF_RTR_CTRL_6_SCRAM_SRAM_EN,
1287 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1288 WREG32(mmSIF_RTR_CTRL_7_SCRAM_SRAM_EN,
1289 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1290
1291 WREG32(mmDMA_IF_E_N_DOWN_CH0_SCRAM_SRAM_EN,
1292 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
1293 WREG32(mmDMA_IF_E_N_DOWN_CH1_SCRAM_SRAM_EN,
1294 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
1295 WREG32(mmDMA_IF_E_S_DOWN_CH0_SCRAM_SRAM_EN,
1296 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
1297 WREG32(mmDMA_IF_E_S_DOWN_CH1_SCRAM_SRAM_EN,
1298 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
1299 WREG32(mmDMA_IF_W_N_DOWN_CH0_SCRAM_SRAM_EN,
1300 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
1301 WREG32(mmDMA_IF_W_N_DOWN_CH1_SCRAM_SRAM_EN,
1302 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
1303 WREG32(mmDMA_IF_W_S_DOWN_CH0_SCRAM_SRAM_EN,
1304 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
1305 WREG32(mmDMA_IF_W_S_DOWN_CH1_SCRAM_SRAM_EN,
1306 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
1307
1308 gaudi->hw_cap_initialized |= HW_CAP_SRAM_SCRAMBLER;
1309}
1310
1311static void gaudi_init_scrambler_hbm(struct hl_device *hdev)
1312{
1313 struct gaudi_device *gaudi = hdev->asic_specific;
1314
1315 if (gaudi->hw_cap_initialized & HW_CAP_HBM_SCRAMBLER)
1316 return;
1317
1318 if (!hdev->dram_scrambler_enable)
1319 return;
1320
1321 WREG32(mmNIF_RTR_CTRL_0_SCRAM_HBM_EN,
1322 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1323 WREG32(mmNIF_RTR_CTRL_1_SCRAM_HBM_EN,
1324 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1325 WREG32(mmNIF_RTR_CTRL_2_SCRAM_HBM_EN,
1326 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1327 WREG32(mmNIF_RTR_CTRL_3_SCRAM_HBM_EN,
1328 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1329 WREG32(mmNIF_RTR_CTRL_4_SCRAM_HBM_EN,
1330 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1331 WREG32(mmNIF_RTR_CTRL_5_SCRAM_HBM_EN,
1332 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1333 WREG32(mmNIF_RTR_CTRL_6_SCRAM_HBM_EN,
1334 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1335 WREG32(mmNIF_RTR_CTRL_7_SCRAM_HBM_EN,
1336 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1337
1338 WREG32(mmSIF_RTR_CTRL_0_SCRAM_HBM_EN,
1339 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1340 WREG32(mmSIF_RTR_CTRL_1_SCRAM_HBM_EN,
1341 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1342 WREG32(mmSIF_RTR_CTRL_2_SCRAM_HBM_EN,
1343 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1344 WREG32(mmSIF_RTR_CTRL_3_SCRAM_HBM_EN,
1345 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1346 WREG32(mmSIF_RTR_CTRL_4_SCRAM_HBM_EN,
1347 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1348 WREG32(mmSIF_RTR_CTRL_5_SCRAM_HBM_EN,
1349 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1350 WREG32(mmSIF_RTR_CTRL_6_SCRAM_HBM_EN,
1351 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1352 WREG32(mmSIF_RTR_CTRL_7_SCRAM_HBM_EN,
1353 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1354
1355 WREG32(mmDMA_IF_E_N_DOWN_CH0_SCRAM_HBM_EN,
1356 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
1357 WREG32(mmDMA_IF_E_N_DOWN_CH1_SCRAM_HBM_EN,
1358 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
1359 WREG32(mmDMA_IF_E_S_DOWN_CH0_SCRAM_HBM_EN,
1360 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
1361 WREG32(mmDMA_IF_E_S_DOWN_CH1_SCRAM_HBM_EN,
1362 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
1363 WREG32(mmDMA_IF_W_N_DOWN_CH0_SCRAM_HBM_EN,
1364 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
1365 WREG32(mmDMA_IF_W_N_DOWN_CH1_SCRAM_HBM_EN,
1366 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
1367 WREG32(mmDMA_IF_W_S_DOWN_CH0_SCRAM_HBM_EN,
1368 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
1369 WREG32(mmDMA_IF_W_S_DOWN_CH1_SCRAM_HBM_EN,
1370 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
1371
1372 gaudi->hw_cap_initialized |= HW_CAP_HBM_SCRAMBLER;
1373}
1374
1375static void gaudi_init_e2e(struct hl_device *hdev)
1376{
1377 WREG32(mmSIF_RTR_CTRL_0_E2E_HBM_WR_SIZE, 247 >> 3);
1378 WREG32(mmSIF_RTR_CTRL_0_E2E_HBM_RD_SIZE, 785 >> 3);
1379 WREG32(mmSIF_RTR_CTRL_0_E2E_PCI_WR_SIZE, 49);
1380 WREG32(mmSIF_RTR_CTRL_0_E2E_PCI_RD_SIZE, 101);
1381
1382 WREG32(mmSIF_RTR_CTRL_1_E2E_HBM_WR_SIZE, 275 >> 3);
1383 WREG32(mmSIF_RTR_CTRL_1_E2E_HBM_RD_SIZE, 614 >> 3);
1384 WREG32(mmSIF_RTR_CTRL_1_E2E_PCI_WR_SIZE, 1);
1385 WREG32(mmSIF_RTR_CTRL_1_E2E_PCI_RD_SIZE, 39);
1386
1387 WREG32(mmSIF_RTR_CTRL_2_E2E_HBM_WR_SIZE, 1);
1388 WREG32(mmSIF_RTR_CTRL_2_E2E_HBM_RD_SIZE, 1);
1389 WREG32(mmSIF_RTR_CTRL_2_E2E_PCI_WR_SIZE, 1);
1390 WREG32(mmSIF_RTR_CTRL_2_E2E_PCI_RD_SIZE, 32);
1391
1392 WREG32(mmSIF_RTR_CTRL_3_E2E_HBM_WR_SIZE, 176 >> 3);
1393 WREG32(mmSIF_RTR_CTRL_3_E2E_HBM_RD_SIZE, 32 >> 3);
1394 WREG32(mmSIF_RTR_CTRL_3_E2E_PCI_WR_SIZE, 19);
1395 WREG32(mmSIF_RTR_CTRL_3_E2E_PCI_RD_SIZE, 32);
1396
1397 WREG32(mmSIF_RTR_CTRL_4_E2E_HBM_WR_SIZE, 176 >> 3);
1398 WREG32(mmSIF_RTR_CTRL_4_E2E_HBM_RD_SIZE, 32 >> 3);
1399 WREG32(mmSIF_RTR_CTRL_4_E2E_PCI_WR_SIZE, 19);
1400 WREG32(mmSIF_RTR_CTRL_4_E2E_PCI_RD_SIZE, 32);
1401
1402 WREG32(mmSIF_RTR_CTRL_5_E2E_HBM_WR_SIZE, 1);
1403 WREG32(mmSIF_RTR_CTRL_5_E2E_HBM_RD_SIZE, 1);
1404 WREG32(mmSIF_RTR_CTRL_5_E2E_PCI_WR_SIZE, 1);
1405 WREG32(mmSIF_RTR_CTRL_5_E2E_PCI_RD_SIZE, 32);
1406
1407 WREG32(mmSIF_RTR_CTRL_6_E2E_HBM_WR_SIZE, 275 >> 3);
1408 WREG32(mmSIF_RTR_CTRL_6_E2E_HBM_RD_SIZE, 614 >> 3);
1409 WREG32(mmSIF_RTR_CTRL_6_E2E_PCI_WR_SIZE, 1);
1410 WREG32(mmSIF_RTR_CTRL_6_E2E_PCI_RD_SIZE, 39);
1411
1412 WREG32(mmSIF_RTR_CTRL_7_E2E_HBM_WR_SIZE, 297 >> 3);
1413 WREG32(mmSIF_RTR_CTRL_7_E2E_HBM_RD_SIZE, 908 >> 3);
1414 WREG32(mmSIF_RTR_CTRL_7_E2E_PCI_WR_SIZE, 19);
1415 WREG32(mmSIF_RTR_CTRL_7_E2E_PCI_RD_SIZE, 19);
1416
1417 WREG32(mmNIF_RTR_CTRL_0_E2E_HBM_WR_SIZE, 318 >> 3);
1418 WREG32(mmNIF_RTR_CTRL_0_E2E_HBM_RD_SIZE, 956 >> 3);
1419 WREG32(mmNIF_RTR_CTRL_0_E2E_PCI_WR_SIZE, 79);
1420 WREG32(mmNIF_RTR_CTRL_0_E2E_PCI_RD_SIZE, 163);
1421
1422 WREG32(mmNIF_RTR_CTRL_1_E2E_HBM_WR_SIZE, 275 >> 3);
1423 WREG32(mmNIF_RTR_CTRL_1_E2E_HBM_RD_SIZE, 614 >> 3);
1424 WREG32(mmNIF_RTR_CTRL_1_E2E_PCI_WR_SIZE, 1);
1425 WREG32(mmNIF_RTR_CTRL_1_E2E_PCI_RD_SIZE, 39);
1426
1427 WREG32(mmNIF_RTR_CTRL_2_E2E_HBM_WR_SIZE, 1);
1428 WREG32(mmNIF_RTR_CTRL_2_E2E_HBM_RD_SIZE, 1);
1429 WREG32(mmNIF_RTR_CTRL_2_E2E_PCI_WR_SIZE, 1);
1430 WREG32(mmNIF_RTR_CTRL_2_E2E_PCI_RD_SIZE, 32);
1431
1432 WREG32(mmNIF_RTR_CTRL_3_E2E_HBM_WR_SIZE, 176 >> 3);
1433 WREG32(mmNIF_RTR_CTRL_3_E2E_HBM_RD_SIZE, 32 >> 3);
1434 WREG32(mmNIF_RTR_CTRL_3_E2E_PCI_WR_SIZE, 19);
1435 WREG32(mmNIF_RTR_CTRL_3_E2E_PCI_RD_SIZE, 32);
1436
1437 WREG32(mmNIF_RTR_CTRL_4_E2E_HBM_WR_SIZE, 176 >> 3);
1438 WREG32(mmNIF_RTR_CTRL_4_E2E_HBM_RD_SIZE, 32 >> 3);
1439 WREG32(mmNIF_RTR_CTRL_4_E2E_PCI_WR_SIZE, 19);
1440 WREG32(mmNIF_RTR_CTRL_4_E2E_PCI_RD_SIZE, 32);
1441
1442 WREG32(mmNIF_RTR_CTRL_5_E2E_HBM_WR_SIZE, 1);
1443 WREG32(mmNIF_RTR_CTRL_5_E2E_HBM_RD_SIZE, 1);
1444 WREG32(mmNIF_RTR_CTRL_5_E2E_PCI_WR_SIZE, 1);
1445 WREG32(mmNIF_RTR_CTRL_5_E2E_PCI_RD_SIZE, 32);
1446
1447 WREG32(mmNIF_RTR_CTRL_6_E2E_HBM_WR_SIZE, 275 >> 3);
1448 WREG32(mmNIF_RTR_CTRL_6_E2E_HBM_RD_SIZE, 614 >> 3);
1449 WREG32(mmNIF_RTR_CTRL_6_E2E_PCI_WR_SIZE, 1);
1450 WREG32(mmNIF_RTR_CTRL_6_E2E_PCI_RD_SIZE, 39);
1451
1452 WREG32(mmNIF_RTR_CTRL_7_E2E_HBM_WR_SIZE, 318 >> 3);
1453 WREG32(mmNIF_RTR_CTRL_7_E2E_HBM_RD_SIZE, 956 >> 3);
1454 WREG32(mmNIF_RTR_CTRL_7_E2E_PCI_WR_SIZE, 79);
1455 WREG32(mmNIF_RTR_CTRL_7_E2E_PCI_RD_SIZE, 79);
1456
1457 WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
1458 WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
1459 WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
1460 WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
1461
1462 WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
1463 WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
1464 WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
1465 WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
1466
1467 WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
1468 WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
1469 WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
1470 WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
1471
1472 WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
1473 WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
1474 WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
1475 WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
1476
1477 WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
1478 WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
1479 WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
1480 WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
1481
1482 WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
1483 WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
1484 WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
1485 WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
1486
1487 WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
1488 WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
1489 WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
1490 WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
1491
1492 WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
1493 WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
1494 WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
1495 WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
1496
1497 if (!hdev->dram_scrambler_enable) {
1498 WREG32(mmSIF_RTR_CTRL_0_NL_HBM_SEL_0, 0x21);
1499 WREG32(mmSIF_RTR_CTRL_0_NL_HBM_SEL_1, 0x22);
1500 WREG32(mmSIF_RTR_CTRL_0_NL_HBM_OFFSET_18, 0x1F);
1501 WREG32(mmSIF_RTR_CTRL_0_NL_HBM_PC_SEL_3, 0x20);
1502
1503 WREG32(mmSIF_RTR_CTRL_1_NL_HBM_SEL_0, 0x21);
1504 WREG32(mmSIF_RTR_CTRL_1_NL_HBM_SEL_1, 0x22);
1505 WREG32(mmSIF_RTR_CTRL_1_NL_HBM_OFFSET_18, 0x1F);
1506 WREG32(mmSIF_RTR_CTRL_1_NL_HBM_PC_SEL_3, 0x20);
1507
1508 WREG32(mmSIF_RTR_CTRL_2_NL_HBM_SEL_0, 0x21);
1509 WREG32(mmSIF_RTR_CTRL_2_NL_HBM_SEL_1, 0x22);
1510 WREG32(mmSIF_RTR_CTRL_2_NL_HBM_OFFSET_18, 0x1F);
1511 WREG32(mmSIF_RTR_CTRL_2_NL_HBM_PC_SEL_3, 0x20);
1512
1513 WREG32(mmSIF_RTR_CTRL_3_NL_HBM_SEL_0, 0x21);
1514 WREG32(mmSIF_RTR_CTRL_3_NL_HBM_SEL_1, 0x22);
1515 WREG32(mmSIF_RTR_CTRL_3_NL_HBM_OFFSET_18, 0x1F);
1516 WREG32(mmSIF_RTR_CTRL_3_NL_HBM_PC_SEL_3, 0x20);
1517
1518 WREG32(mmSIF_RTR_CTRL_4_NL_HBM_SEL_0, 0x21);
1519 WREG32(mmSIF_RTR_CTRL_4_NL_HBM_SEL_1, 0x22);
1520 WREG32(mmSIF_RTR_CTRL_4_NL_HBM_OFFSET_18, 0x1F);
1521 WREG32(mmSIF_RTR_CTRL_4_NL_HBM_PC_SEL_3, 0x20);
1522
1523 WREG32(mmSIF_RTR_CTRL_5_NL_HBM_SEL_0, 0x21);
1524 WREG32(mmSIF_RTR_CTRL_5_NL_HBM_SEL_1, 0x22);
1525 WREG32(mmSIF_RTR_CTRL_5_NL_HBM_OFFSET_18, 0x1F);
1526 WREG32(mmSIF_RTR_CTRL_5_NL_HBM_PC_SEL_3, 0x20);
1527
1528 WREG32(mmSIF_RTR_CTRL_6_NL_HBM_SEL_0, 0x21);
1529 WREG32(mmSIF_RTR_CTRL_6_NL_HBM_SEL_1, 0x22);
1530 WREG32(mmSIF_RTR_CTRL_6_NL_HBM_OFFSET_18, 0x1F);
1531 WREG32(mmSIF_RTR_CTRL_6_NL_HBM_PC_SEL_3, 0x20);
1532
1533 WREG32(mmSIF_RTR_CTRL_7_NL_HBM_SEL_0, 0x21);
1534 WREG32(mmSIF_RTR_CTRL_7_NL_HBM_SEL_1, 0x22);
1535 WREG32(mmSIF_RTR_CTRL_7_NL_HBM_OFFSET_18, 0x1F);
1536 WREG32(mmSIF_RTR_CTRL_7_NL_HBM_PC_SEL_3, 0x20);
1537
1538 WREG32(mmNIF_RTR_CTRL_0_NL_HBM_SEL_0, 0x21);
1539 WREG32(mmNIF_RTR_CTRL_0_NL_HBM_SEL_1, 0x22);
1540 WREG32(mmNIF_RTR_CTRL_0_NL_HBM_OFFSET_18, 0x1F);
1541 WREG32(mmNIF_RTR_CTRL_0_NL_HBM_PC_SEL_3, 0x20);
1542
1543 WREG32(mmNIF_RTR_CTRL_1_NL_HBM_SEL_0, 0x21);
1544 WREG32(mmNIF_RTR_CTRL_1_NL_HBM_SEL_1, 0x22);
1545 WREG32(mmNIF_RTR_CTRL_1_NL_HBM_OFFSET_18, 0x1F);
1546 WREG32(mmNIF_RTR_CTRL_1_NL_HBM_PC_SEL_3, 0x20);
1547
1548 WREG32(mmNIF_RTR_CTRL_2_NL_HBM_SEL_0, 0x21);
1549 WREG32(mmNIF_RTR_CTRL_2_NL_HBM_SEL_1, 0x22);
1550 WREG32(mmNIF_RTR_CTRL_2_NL_HBM_OFFSET_18, 0x1F);
1551 WREG32(mmNIF_RTR_CTRL_2_NL_HBM_PC_SEL_3, 0x20);
1552
1553 WREG32(mmNIF_RTR_CTRL_3_NL_HBM_SEL_0, 0x21);
1554 WREG32(mmNIF_RTR_CTRL_3_NL_HBM_SEL_1, 0x22);
1555 WREG32(mmNIF_RTR_CTRL_3_NL_HBM_OFFSET_18, 0x1F);
1556 WREG32(mmNIF_RTR_CTRL_3_NL_HBM_PC_SEL_3, 0x20);
1557
1558 WREG32(mmNIF_RTR_CTRL_4_NL_HBM_SEL_0, 0x21);
1559 WREG32(mmNIF_RTR_CTRL_4_NL_HBM_SEL_1, 0x22);
1560 WREG32(mmNIF_RTR_CTRL_4_NL_HBM_OFFSET_18, 0x1F);
1561 WREG32(mmNIF_RTR_CTRL_4_NL_HBM_PC_SEL_3, 0x20);
1562
1563 WREG32(mmNIF_RTR_CTRL_5_NL_HBM_SEL_0, 0x21);
1564 WREG32(mmNIF_RTR_CTRL_5_NL_HBM_SEL_1, 0x22);
1565 WREG32(mmNIF_RTR_CTRL_5_NL_HBM_OFFSET_18, 0x1F);
1566 WREG32(mmNIF_RTR_CTRL_5_NL_HBM_PC_SEL_3, 0x20);
1567
1568 WREG32(mmNIF_RTR_CTRL_6_NL_HBM_SEL_0, 0x21);
1569 WREG32(mmNIF_RTR_CTRL_6_NL_HBM_SEL_1, 0x22);
1570 WREG32(mmNIF_RTR_CTRL_6_NL_HBM_OFFSET_18, 0x1F);
1571 WREG32(mmNIF_RTR_CTRL_6_NL_HBM_PC_SEL_3, 0x20);
1572
1573 WREG32(mmNIF_RTR_CTRL_7_NL_HBM_SEL_0, 0x21);
1574 WREG32(mmNIF_RTR_CTRL_7_NL_HBM_SEL_1, 0x22);
1575 WREG32(mmNIF_RTR_CTRL_7_NL_HBM_OFFSET_18, 0x1F);
1576 WREG32(mmNIF_RTR_CTRL_7_NL_HBM_PC_SEL_3, 0x20);
1577
1578 WREG32(mmDMA_IF_E_N_DOWN_CH0_NL_HBM_SEL_0, 0x21);
1579 WREG32(mmDMA_IF_E_N_DOWN_CH0_NL_HBM_SEL_1, 0x22);
1580 WREG32(mmDMA_IF_E_N_DOWN_CH0_NL_HBM_OFFSET_18, 0x1F);
1581 WREG32(mmDMA_IF_E_N_DOWN_CH0_NL_HBM_PC_SEL_3, 0x20);
1582
1583 WREG32(mmDMA_IF_E_N_DOWN_CH1_NL_HBM_SEL_0, 0x21);
1584 WREG32(mmDMA_IF_E_N_DOWN_CH1_NL_HBM_SEL_1, 0x22);
1585 WREG32(mmDMA_IF_E_N_DOWN_CH1_NL_HBM_OFFSET_18, 0x1F);
1586 WREG32(mmDMA_IF_E_N_DOWN_CH1_NL_HBM_PC_SEL_3, 0x20);
1587
1588 WREG32(mmDMA_IF_E_S_DOWN_CH0_NL_HBM_SEL_0, 0x21);
1589 WREG32(mmDMA_IF_E_S_DOWN_CH0_NL_HBM_SEL_1, 0x22);
1590 WREG32(mmDMA_IF_E_S_DOWN_CH0_NL_HBM_OFFSET_18, 0x1F);
1591 WREG32(mmDMA_IF_E_S_DOWN_CH0_NL_HBM_PC_SEL_3, 0x20);
1592
1593 WREG32(mmDMA_IF_E_S_DOWN_CH1_NL_HBM_SEL_0, 0x21);
1594 WREG32(mmDMA_IF_E_S_DOWN_CH1_NL_HBM_SEL_1, 0x22);
1595 WREG32(mmDMA_IF_E_S_DOWN_CH1_NL_HBM_OFFSET_18, 0x1F);
1596 WREG32(mmDMA_IF_E_S_DOWN_CH1_NL_HBM_PC_SEL_3, 0x20);
1597
1598 WREG32(mmDMA_IF_W_N_DOWN_CH0_NL_HBM_SEL_0, 0x21);
1599 WREG32(mmDMA_IF_W_N_DOWN_CH0_NL_HBM_SEL_1, 0x22);
1600 WREG32(mmDMA_IF_W_N_DOWN_CH0_NL_HBM_OFFSET_18, 0x1F);
1601 WREG32(mmDMA_IF_W_N_DOWN_CH0_NL_HBM_PC_SEL_3, 0x20);
1602
1603 WREG32(mmDMA_IF_W_N_DOWN_CH1_NL_HBM_SEL_0, 0x21);
1604 WREG32(mmDMA_IF_W_N_DOWN_CH1_NL_HBM_SEL_1, 0x22);
1605 WREG32(mmDMA_IF_W_N_DOWN_CH1_NL_HBM_OFFSET_18, 0x1F);
1606 WREG32(mmDMA_IF_W_N_DOWN_CH1_NL_HBM_PC_SEL_3, 0x20);
1607
1608 WREG32(mmDMA_IF_W_S_DOWN_CH0_NL_HBM_SEL_0, 0x21);
1609 WREG32(mmDMA_IF_W_S_DOWN_CH0_NL_HBM_SEL_1, 0x22);
1610 WREG32(mmDMA_IF_W_S_DOWN_CH0_NL_HBM_OFFSET_18, 0x1F);
1611 WREG32(mmDMA_IF_W_S_DOWN_CH0_NL_HBM_PC_SEL_3, 0x20);
1612
1613 WREG32(mmDMA_IF_W_S_DOWN_CH1_NL_HBM_SEL_0, 0x21);
1614 WREG32(mmDMA_IF_W_S_DOWN_CH1_NL_HBM_SEL_1, 0x22);
1615 WREG32(mmDMA_IF_W_S_DOWN_CH1_NL_HBM_OFFSET_18, 0x1F);
1616 WREG32(mmDMA_IF_W_S_DOWN_CH1_NL_HBM_PC_SEL_3, 0x20);
1617 }
1618
1619 WREG32(mmSIF_RTR_CTRL_0_E2E_HBM_EN,
1620 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
1621 WREG32(mmSIF_RTR_CTRL_0_E2E_PCI_EN,
1622 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
1623
1624 WREG32(mmSIF_RTR_CTRL_1_E2E_HBM_EN,
1625 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
1626 WREG32(mmSIF_RTR_CTRL_1_E2E_PCI_EN,
1627 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
1628
1629 WREG32(mmSIF_RTR_CTRL_2_E2E_HBM_EN,
1630 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
1631 WREG32(mmSIF_RTR_CTRL_2_E2E_PCI_EN,
1632 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
1633
1634 WREG32(mmSIF_RTR_CTRL_3_E2E_HBM_EN,
1635 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
1636 WREG32(mmSIF_RTR_CTRL_3_E2E_PCI_EN,
1637 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
1638
1639 WREG32(mmSIF_RTR_CTRL_4_E2E_HBM_EN,
1640 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
1641 WREG32(mmSIF_RTR_CTRL_4_E2E_PCI_EN,
1642 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
1643
1644 WREG32(mmSIF_RTR_CTRL_5_E2E_HBM_EN,
1645 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
1646 WREG32(mmSIF_RTR_CTRL_5_E2E_PCI_EN,
1647 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
1648
1649 WREG32(mmSIF_RTR_CTRL_6_E2E_HBM_EN,
1650 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
1651 WREG32(mmSIF_RTR_CTRL_6_E2E_PCI_EN,
1652 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
1653
1654 WREG32(mmSIF_RTR_CTRL_7_E2E_HBM_EN,
1655 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
1656 WREG32(mmSIF_RTR_CTRL_7_E2E_PCI_EN,
1657 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
1658
1659 WREG32(mmNIF_RTR_CTRL_0_E2E_HBM_EN,
1660 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
1661 WREG32(mmNIF_RTR_CTRL_0_E2E_PCI_EN,
1662 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
1663
1664 WREG32(mmNIF_RTR_CTRL_1_E2E_HBM_EN,
1665 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
1666 WREG32(mmNIF_RTR_CTRL_1_E2E_PCI_EN,
1667 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
1668
1669 WREG32(mmNIF_RTR_CTRL_2_E2E_HBM_EN,
1670 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
1671 WREG32(mmNIF_RTR_CTRL_2_E2E_PCI_EN,
1672 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
1673
1674 WREG32(mmNIF_RTR_CTRL_3_E2E_HBM_EN,
1675 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
1676 WREG32(mmNIF_RTR_CTRL_3_E2E_PCI_EN,
1677 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
1678
1679 WREG32(mmNIF_RTR_CTRL_4_E2E_HBM_EN,
1680 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
1681 WREG32(mmNIF_RTR_CTRL_4_E2E_PCI_EN,
1682 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
1683
1684 WREG32(mmNIF_RTR_CTRL_5_E2E_HBM_EN,
1685 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
1686 WREG32(mmNIF_RTR_CTRL_5_E2E_PCI_EN,
1687 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
1688
1689 WREG32(mmNIF_RTR_CTRL_6_E2E_HBM_EN,
1690 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
1691 WREG32(mmNIF_RTR_CTRL_6_E2E_PCI_EN,
1692 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
1693
1694 WREG32(mmNIF_RTR_CTRL_7_E2E_HBM_EN,
1695 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
1696 WREG32(mmNIF_RTR_CTRL_7_E2E_PCI_EN,
1697 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
1698
1699 WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_HBM_EN,
1700 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
1701 WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_PCI_EN,
1702 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
1703
1704 WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_HBM_EN,
1705 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
1706 WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_PCI_EN,
1707 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
1708
1709 WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_HBM_EN,
1710 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
1711 WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_PCI_EN,
1712 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
1713
1714 WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_HBM_EN,
1715 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
1716 WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_PCI_EN,
1717 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
1718
1719 WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_HBM_EN,
1720 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
1721 WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_PCI_EN,
1722 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
1723
1724 WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_HBM_EN,
1725 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
1726 WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_PCI_EN,
1727 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
1728
1729 WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_HBM_EN,
1730 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
1731 WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_PCI_EN,
1732 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
1733
1734 WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_HBM_EN,
1735 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
1736 WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_PCI_EN,
1737 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
1738}
1739
1740static void gaudi_init_hbm_cred(struct hl_device *hdev)
1741{
1742 uint32_t hbm0_wr, hbm1_wr, hbm0_rd, hbm1_rd;
1743
1744 hbm0_wr = 0x33333333;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03001745 hbm0_rd = 0x77777777;
Oded Gabbay0b168c82020-06-15 19:25:57 +03001746 hbm1_wr = 0x55555555;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03001747 hbm1_rd = 0xDDDDDDDD;
1748
1749 WREG32(mmDMA_IF_E_N_HBM0_WR_CRED_CNT, hbm0_wr);
1750 WREG32(mmDMA_IF_E_N_HBM1_WR_CRED_CNT, hbm1_wr);
1751 WREG32(mmDMA_IF_E_N_HBM0_RD_CRED_CNT, hbm0_rd);
1752 WREG32(mmDMA_IF_E_N_HBM1_RD_CRED_CNT, hbm1_rd);
1753
1754 WREG32(mmDMA_IF_E_S_HBM0_WR_CRED_CNT, hbm0_wr);
1755 WREG32(mmDMA_IF_E_S_HBM1_WR_CRED_CNT, hbm1_wr);
1756 WREG32(mmDMA_IF_E_S_HBM0_RD_CRED_CNT, hbm0_rd);
1757 WREG32(mmDMA_IF_E_S_HBM1_RD_CRED_CNT, hbm1_rd);
1758
1759 WREG32(mmDMA_IF_W_N_HBM0_WR_CRED_CNT, hbm0_wr);
1760 WREG32(mmDMA_IF_W_N_HBM1_WR_CRED_CNT, hbm1_wr);
1761 WREG32(mmDMA_IF_W_N_HBM0_RD_CRED_CNT, hbm0_rd);
1762 WREG32(mmDMA_IF_W_N_HBM1_RD_CRED_CNT, hbm1_rd);
1763
1764 WREG32(mmDMA_IF_W_S_HBM0_WR_CRED_CNT, hbm0_wr);
1765 WREG32(mmDMA_IF_W_S_HBM1_WR_CRED_CNT, hbm1_wr);
1766 WREG32(mmDMA_IF_W_S_HBM0_RD_CRED_CNT, hbm0_rd);
1767 WREG32(mmDMA_IF_W_S_HBM1_RD_CRED_CNT, hbm1_rd);
1768
1769 WREG32(mmDMA_IF_E_N_HBM_CRED_EN_0,
1770 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
1771 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
1772 WREG32(mmDMA_IF_E_S_HBM_CRED_EN_0,
1773 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
1774 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
1775 WREG32(mmDMA_IF_W_N_HBM_CRED_EN_0,
1776 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
1777 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
1778 WREG32(mmDMA_IF_W_S_HBM_CRED_EN_0,
1779 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
1780 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
1781
1782 WREG32(mmDMA_IF_E_N_HBM_CRED_EN_1,
1783 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
1784 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
1785 WREG32(mmDMA_IF_E_S_HBM_CRED_EN_1,
1786 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
1787 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
1788 WREG32(mmDMA_IF_W_N_HBM_CRED_EN_1,
1789 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
1790 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
1791 WREG32(mmDMA_IF_W_S_HBM_CRED_EN_1,
1792 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
1793 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
1794}
1795
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03001796static void gaudi_init_golden_registers(struct hl_device *hdev)
1797{
1798 u32 tpc_offset;
1799 int tpc_id, i;
1800
1801 gaudi_init_e2e(hdev);
1802
1803 gaudi_init_hbm_cred(hdev);
1804
Oded Gabbaye38bfd32020-07-03 20:46:12 +03001805 hdev->asic_funcs->disable_clock_gating(hdev);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03001806
1807 for (tpc_id = 0, tpc_offset = 0;
1808 tpc_id < TPC_NUMBER_OF_ENGINES;
1809 tpc_id++, tpc_offset += TPC_CFG_OFFSET) {
1810 /* Mask all arithmetic interrupts from TPC */
1811 WREG32(mmTPC0_CFG_TPC_INTR_MASK + tpc_offset, 0x8FFF);
1812 /* Set 16 cache lines */
1813 WREG32_FIELD(TPC0_CFG_MSS_CONFIG, tpc_offset,
1814 ICACHE_FETCH_LINE_NUM, 2);
1815 }
1816
1817 /* Make sure 1st 128 bytes in SRAM are 0 for Tensor DMA */
1818 for (i = 0 ; i < 128 ; i += 8)
1819 writeq(0, hdev->pcie_bar[SRAM_BAR_ID] + i);
1820
1821 WREG32(mmMME0_CTRL_EUS_ROLLUP_CNT_ADD, 3);
1822 WREG32(mmMME1_CTRL_EUS_ROLLUP_CNT_ADD, 3);
1823 WREG32(mmMME2_CTRL_EUS_ROLLUP_CNT_ADD, 3);
1824 WREG32(mmMME3_CTRL_EUS_ROLLUP_CNT_ADD, 3);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03001825}
1826
1827static void gaudi_init_pci_dma_qman(struct hl_device *hdev, int dma_id,
1828 int qman_id, dma_addr_t qman_pq_addr)
1829{
1830 u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi;
1831 u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi;
1832 u32 q_off, dma_qm_offset;
1833 u32 dma_qm_err_cfg;
1834
1835 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
1836
1837 mtr_base_en_lo = lower_32_bits(CFG_BASE +
1838 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
1839 mtr_base_en_hi = upper_32_bits(CFG_BASE +
1840 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
1841 so_base_en_lo = lower_32_bits(CFG_BASE +
1842 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
1843 so_base_en_hi = upper_32_bits(CFG_BASE +
1844 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
1845 mtr_base_ws_lo = lower_32_bits(CFG_BASE +
1846 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
1847 mtr_base_ws_hi = upper_32_bits(CFG_BASE +
1848 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
1849 so_base_ws_lo = lower_32_bits(CFG_BASE +
1850 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
1851 so_base_ws_hi = upper_32_bits(CFG_BASE +
1852 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
1853
1854 q_off = dma_qm_offset + qman_id * 4;
1855
1856 WREG32(mmDMA0_QM_PQ_BASE_LO_0 + q_off, lower_32_bits(qman_pq_addr));
1857 WREG32(mmDMA0_QM_PQ_BASE_HI_0 + q_off, upper_32_bits(qman_pq_addr));
1858
1859 WREG32(mmDMA0_QM_PQ_SIZE_0 + q_off, ilog2(HL_QUEUE_LENGTH));
1860 WREG32(mmDMA0_QM_PQ_PI_0 + q_off, 0);
1861 WREG32(mmDMA0_QM_PQ_CI_0 + q_off, 0);
1862
1863 WREG32(mmDMA0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off, 0x74);
1864 WREG32(mmDMA0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off, 0x14);
1865 WREG32(mmDMA0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off, 0x1C);
1866
1867 WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo);
1868 WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi);
1869 WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo);
1870 WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi);
1871 WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_LO_0 + q_off, mtr_base_ws_lo);
1872 WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_HI_0 + q_off, mtr_base_ws_hi);
1873 WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_LO_0 + q_off, so_base_ws_lo);
1874 WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_HI_0 + q_off, so_base_ws_hi);
1875
Omer Shpigelmance043262020-06-16 17:56:27 +03001876 WREG32(mmDMA0_QM_CP_BARRIER_CFG_0 + q_off, 0x100);
1877
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03001878 /* The following configuration is needed only once per QMAN */
1879 if (qman_id == 0) {
1880 /* Configure RAZWI IRQ */
1881 dma_qm_err_cfg = PCI_DMA_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
1882 if (hdev->stop_on_err) {
1883 dma_qm_err_cfg |=
1884 PCI_DMA_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
1885 }
1886
1887 WREG32(mmDMA0_QM_GLBL_ERR_CFG + dma_qm_offset, dma_qm_err_cfg);
1888 WREG32(mmDMA0_QM_GLBL_ERR_ADDR_LO + dma_qm_offset,
1889 lower_32_bits(CFG_BASE +
1890 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR));
1891 WREG32(mmDMA0_QM_GLBL_ERR_ADDR_HI + dma_qm_offset,
1892 upper_32_bits(CFG_BASE +
1893 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR));
1894 WREG32(mmDMA0_QM_GLBL_ERR_WDATA + dma_qm_offset,
1895 gaudi_irq_map_table[GAUDI_EVENT_DMA0_QM].cpu_id +
1896 dma_id);
1897
1898 WREG32(mmDMA0_QM_ARB_ERR_MSG_EN + dma_qm_offset,
1899 QM_ARB_ERR_MSG_EN_MASK);
1900
1901 /* Increase ARB WDT to support streams architecture */
1902 WREG32(mmDMA0_QM_ARB_SLV_CHOISE_WDT + dma_qm_offset,
1903 GAUDI_ARB_WDT_TIMEOUT);
1904
1905 WREG32(mmDMA0_QM_GLBL_PROT + dma_qm_offset,
1906 QMAN_EXTERNAL_MAKE_TRUSTED);
1907
1908 WREG32(mmDMA0_QM_GLBL_CFG1 + dma_qm_offset, 0);
1909 }
1910}
1911
1912static void gaudi_init_dma_core(struct hl_device *hdev, int dma_id)
1913{
1914 u32 dma_offset = dma_id * DMA_CORE_OFFSET;
1915 u32 dma_err_cfg = 1 << DMA0_CORE_ERR_CFG_ERR_MSG_EN_SHIFT;
1916
1917 /* Set to maximum possible according to physical size */
1918 WREG32(mmDMA0_CORE_RD_MAX_OUTSTAND + dma_offset, 0);
1919 WREG32(mmDMA0_CORE_RD_MAX_SIZE + dma_offset, 0);
1920
1921 /* STOP_ON bit implies no completion to operation in case of RAZWI */
1922 if (hdev->stop_on_err)
1923 dma_err_cfg |= 1 << DMA0_CORE_ERR_CFG_STOP_ON_ERR_SHIFT;
1924
1925 WREG32(mmDMA0_CORE_ERR_CFG + dma_offset, dma_err_cfg);
1926 WREG32(mmDMA0_CORE_ERRMSG_ADDR_LO + dma_offset,
1927 lower_32_bits(CFG_BASE + mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR));
1928 WREG32(mmDMA0_CORE_ERRMSG_ADDR_HI + dma_offset,
1929 upper_32_bits(CFG_BASE + mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR));
1930 WREG32(mmDMA0_CORE_ERRMSG_WDATA + dma_offset,
1931 gaudi_irq_map_table[GAUDI_EVENT_DMA0_CORE].cpu_id + dma_id);
1932 WREG32(mmDMA0_CORE_PROT + dma_offset,
1933 1 << DMA0_CORE_PROT_ERR_VAL_SHIFT);
1934 /* If the channel is secured, it should be in MMU bypass mode */
1935 WREG32(mmDMA0_CORE_SECURE_PROPS + dma_offset,
1936 1 << DMA0_CORE_SECURE_PROPS_MMBP_SHIFT);
1937 WREG32(mmDMA0_CORE_CFG_0 + dma_offset, 1 << DMA0_CORE_CFG_0_EN_SHIFT);
1938}
1939
1940static void gaudi_enable_qman(struct hl_device *hdev, int dma_id,
1941 u32 enable_mask)
1942{
1943 u32 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
1944
1945 WREG32(mmDMA0_QM_GLBL_CFG0 + dma_qm_offset, enable_mask);
1946}
1947
1948static void gaudi_init_pci_dma_qmans(struct hl_device *hdev)
1949{
1950 struct gaudi_device *gaudi = hdev->asic_specific;
1951 struct hl_hw_queue *q;
1952 int i, j, dma_id, cpu_skip, nic_skip, cq_id = 0, q_idx, msi_vec = 0;
1953
1954 if (gaudi->hw_cap_initialized & HW_CAP_PCI_DMA)
1955 return;
1956
1957 for (i = 0 ; i < PCI_DMA_NUMBER_OF_CHNLS ; i++) {
1958 dma_id = gaudi_dma_assignment[i];
1959 /*
1960 * For queues after the CPU Q need to add 1 to get the correct
1961 * queue. In addition, need to add the CPU EQ and NIC IRQs in
1962 * order to get the correct MSI register.
1963 */
1964 if (dma_id > 1) {
1965 cpu_skip = 1;
1966 nic_skip = NIC_NUMBER_OF_ENGINES;
1967 } else {
1968 cpu_skip = 0;
1969 nic_skip = 0;
1970 }
1971
1972 for (j = 0 ; j < QMAN_STREAMS ; j++) {
1973 q_idx = 4 * dma_id + j + cpu_skip;
1974 q = &hdev->kernel_queues[q_idx];
1975 q->cq_id = cq_id++;
1976 q->msi_vec = nic_skip + cpu_skip + msi_vec++;
1977 gaudi_init_pci_dma_qman(hdev, dma_id, j,
1978 q->bus_address);
1979 }
1980
1981 gaudi_init_dma_core(hdev, dma_id);
1982
1983 gaudi_enable_qman(hdev, dma_id, PCI_DMA_QMAN_ENABLE);
1984 }
1985
1986 gaudi->hw_cap_initialized |= HW_CAP_PCI_DMA;
1987}
1988
1989static void gaudi_init_hbm_dma_qman(struct hl_device *hdev, int dma_id,
1990 int qman_id, u64 qman_base_addr)
1991{
1992 u32 mtr_base_lo, mtr_base_hi;
1993 u32 so_base_lo, so_base_hi;
1994 u32 q_off, dma_qm_offset;
1995 u32 dma_qm_err_cfg;
1996
1997 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
1998
1999 mtr_base_lo = lower_32_bits(CFG_BASE +
2000 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2001 mtr_base_hi = upper_32_bits(CFG_BASE +
2002 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2003 so_base_lo = lower_32_bits(CFG_BASE +
2004 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2005 so_base_hi = upper_32_bits(CFG_BASE +
2006 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2007
2008 q_off = dma_qm_offset + qman_id * 4;
2009
2010 if (qman_id < 4) {
2011 WREG32(mmDMA0_QM_PQ_BASE_LO_0 + q_off,
2012 lower_32_bits(qman_base_addr));
2013 WREG32(mmDMA0_QM_PQ_BASE_HI_0 + q_off,
2014 upper_32_bits(qman_base_addr));
2015
2016 WREG32(mmDMA0_QM_PQ_SIZE_0 + q_off, ilog2(HBM_DMA_QMAN_LENGTH));
2017 WREG32(mmDMA0_QM_PQ_PI_0 + q_off, 0);
2018 WREG32(mmDMA0_QM_PQ_CI_0 + q_off, 0);
2019
2020 WREG32(mmDMA0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off, 0x81BC);
2021 WREG32(mmDMA0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off, 0x81B4);
2022 WREG32(mmDMA0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off, 0x1C);
2023 } else {
2024 WREG32(mmDMA0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off, 0x74);
2025 WREG32(mmDMA0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off, 0x14);
2026 WREG32(mmDMA0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off, 0x1C);
2027
2028 /* Configure RAZWI IRQ */
2029 dma_qm_err_cfg = HBM_DMA_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
2030 if (hdev->stop_on_err) {
2031 dma_qm_err_cfg |=
2032 HBM_DMA_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
2033 }
2034 WREG32(mmDMA0_QM_GLBL_ERR_CFG + dma_qm_offset, dma_qm_err_cfg);
2035
2036 WREG32(mmDMA0_QM_GLBL_ERR_ADDR_LO + dma_qm_offset,
2037 lower_32_bits(CFG_BASE +
2038 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR));
2039 WREG32(mmDMA0_QM_GLBL_ERR_ADDR_HI + dma_qm_offset,
2040 upper_32_bits(CFG_BASE +
2041 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR));
2042 WREG32(mmDMA0_QM_GLBL_ERR_WDATA + dma_qm_offset,
2043 gaudi_irq_map_table[GAUDI_EVENT_DMA0_QM].cpu_id +
2044 dma_id);
2045
2046 WREG32(mmDMA0_QM_ARB_ERR_MSG_EN + dma_qm_offset,
2047 QM_ARB_ERR_MSG_EN_MASK);
2048
2049 /* Increase ARB WDT to support streams architecture */
2050 WREG32(mmDMA0_QM_ARB_SLV_CHOISE_WDT + dma_qm_offset,
2051 GAUDI_ARB_WDT_TIMEOUT);
2052
2053 WREG32(mmDMA0_QM_GLBL_CFG1 + dma_qm_offset, 0);
2054 WREG32(mmDMA0_QM_GLBL_PROT + dma_qm_offset,
2055 QMAN_INTERNAL_MAKE_TRUSTED);
2056 }
2057
2058 WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_lo);
2059 WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_hi);
2060 WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_lo);
2061 WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_hi);
2062}
2063
2064static void gaudi_init_hbm_dma_qmans(struct hl_device *hdev)
2065{
2066 struct gaudi_device *gaudi = hdev->asic_specific;
2067 struct gaudi_internal_qman_info *q;
2068 u64 qman_base_addr;
2069 int i, j, dma_id, internal_q_index;
2070
2071 if (gaudi->hw_cap_initialized & HW_CAP_HBM_DMA)
2072 return;
2073
2074 for (i = 0 ; i < HBM_DMA_NUMBER_OF_CHNLS ; i++) {
2075 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_1 + i];
2076
2077 for (j = 0 ; j < QMAN_STREAMS ; j++) {
2078 /*
2079 * Add the CPU queue in order to get the correct queue
2080 * number as all internal queue are placed after it
2081 */
2082 internal_q_index = dma_id * QMAN_STREAMS + j + 1;
2083
2084 q = &gaudi->internal_qmans[internal_q_index];
2085 qman_base_addr = (u64) q->pq_dma_addr;
2086 gaudi_init_hbm_dma_qman(hdev, dma_id, j,
2087 qman_base_addr);
2088 }
2089
2090 /* Initializing lower CP for HBM DMA QMAN */
2091 gaudi_init_hbm_dma_qman(hdev, dma_id, 4, 0);
2092
2093 gaudi_init_dma_core(hdev, dma_id);
2094
2095 gaudi_enable_qman(hdev, dma_id, HBM_DMA_QMAN_ENABLE);
2096 }
2097
2098 gaudi->hw_cap_initialized |= HW_CAP_HBM_DMA;
2099}
2100
2101static void gaudi_init_mme_qman(struct hl_device *hdev, u32 mme_offset,
2102 int qman_id, u64 qman_base_addr)
2103{
2104 u32 mtr_base_lo, mtr_base_hi;
2105 u32 so_base_lo, so_base_hi;
2106 u32 q_off, mme_id;
2107 u32 mme_qm_err_cfg;
2108
2109 mtr_base_lo = lower_32_bits(CFG_BASE +
2110 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2111 mtr_base_hi = upper_32_bits(CFG_BASE +
2112 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2113 so_base_lo = lower_32_bits(CFG_BASE +
2114 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2115 so_base_hi = upper_32_bits(CFG_BASE +
2116 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2117
2118 q_off = mme_offset + qman_id * 4;
2119
2120 if (qman_id < 4) {
2121 WREG32(mmMME0_QM_PQ_BASE_LO_0 + q_off,
2122 lower_32_bits(qman_base_addr));
2123 WREG32(mmMME0_QM_PQ_BASE_HI_0 + q_off,
2124 upper_32_bits(qman_base_addr));
2125
2126 WREG32(mmMME0_QM_PQ_SIZE_0 + q_off, ilog2(MME_QMAN_LENGTH));
2127 WREG32(mmMME0_QM_PQ_PI_0 + q_off, 0);
2128 WREG32(mmMME0_QM_PQ_CI_0 + q_off, 0);
2129
2130 WREG32(mmMME0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off, 0x81BC);
2131 WREG32(mmMME0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off, 0x81B4);
2132 WREG32(mmMME0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off, 0x1C);
2133 } else {
2134 WREG32(mmMME0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off, 0x74);
2135 WREG32(mmMME0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off, 0x14);
2136 WREG32(mmMME0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off, 0x1C);
2137
2138 /* Configure RAZWI IRQ */
2139 mme_id = mme_offset /
2140 (mmMME1_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0);
2141
2142 mme_qm_err_cfg = MME_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
2143 if (hdev->stop_on_err) {
2144 mme_qm_err_cfg |=
2145 MME_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
2146 }
2147 WREG32(mmMME0_QM_GLBL_ERR_CFG + mme_offset, mme_qm_err_cfg);
2148 WREG32(mmMME0_QM_GLBL_ERR_ADDR_LO + mme_offset,
2149 lower_32_bits(CFG_BASE +
2150 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR));
2151 WREG32(mmMME0_QM_GLBL_ERR_ADDR_HI + mme_offset,
2152 upper_32_bits(CFG_BASE +
2153 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR));
2154 WREG32(mmMME0_QM_GLBL_ERR_WDATA + mme_offset,
2155 gaudi_irq_map_table[GAUDI_EVENT_MME0_QM].cpu_id +
2156 mme_id);
2157
2158 WREG32(mmMME0_QM_ARB_ERR_MSG_EN + mme_offset,
2159 QM_ARB_ERR_MSG_EN_MASK);
2160
2161 /* Increase ARB WDT to support streams architecture */
2162 WREG32(mmMME0_QM_ARB_SLV_CHOISE_WDT + mme_offset,
2163 GAUDI_ARB_WDT_TIMEOUT);
2164
2165 WREG32(mmMME0_QM_GLBL_CFG1 + mme_offset, 0);
2166 WREG32(mmMME0_QM_GLBL_PROT + mme_offset,
2167 QMAN_INTERNAL_MAKE_TRUSTED);
2168 }
2169
2170 WREG32(mmMME0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_lo);
2171 WREG32(mmMME0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_hi);
2172 WREG32(mmMME0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_lo);
2173 WREG32(mmMME0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_hi);
2174}
2175
2176static void gaudi_init_mme_qmans(struct hl_device *hdev)
2177{
2178 struct gaudi_device *gaudi = hdev->asic_specific;
2179 struct gaudi_internal_qman_info *q;
2180 u64 qman_base_addr;
2181 u32 mme_offset;
2182 int i, internal_q_index;
2183
2184 if (gaudi->hw_cap_initialized & HW_CAP_MME)
2185 return;
2186
2187 /*
2188 * map GAUDI_QUEUE_ID_MME_0_X to the N_W_MME (mmMME2_QM_BASE)
2189 * and GAUDI_QUEUE_ID_MME_1_X to the S_W_MME (mmMME0_QM_BASE)
2190 */
2191
2192 mme_offset = mmMME2_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0;
2193
2194 for (i = 0 ; i < MME_NUMBER_OF_QMANS ; i++) {
2195 internal_q_index = GAUDI_QUEUE_ID_MME_0_0 + i;
2196 q = &gaudi->internal_qmans[internal_q_index];
2197 qman_base_addr = (u64) q->pq_dma_addr;
2198 gaudi_init_mme_qman(hdev, mme_offset, (i & 0x3),
2199 qman_base_addr);
2200 if (i == 3)
2201 mme_offset = 0;
2202 }
2203
2204 /* Initializing lower CP for MME QMANs */
2205 mme_offset = mmMME2_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0;
2206 gaudi_init_mme_qman(hdev, mme_offset, 4, 0);
2207 gaudi_init_mme_qman(hdev, 0, 4, 0);
2208
2209 WREG32(mmMME2_QM_GLBL_CFG0, QMAN_MME_ENABLE);
2210 WREG32(mmMME0_QM_GLBL_CFG0, QMAN_MME_ENABLE);
2211
2212 gaudi->hw_cap_initialized |= HW_CAP_MME;
2213}
2214
2215static void gaudi_init_tpc_qman(struct hl_device *hdev, u32 tpc_offset,
2216 int qman_id, u64 qman_base_addr)
2217{
2218 u32 mtr_base_lo, mtr_base_hi;
2219 u32 so_base_lo, so_base_hi;
2220 u32 q_off, tpc_id;
2221 u32 tpc_qm_err_cfg;
2222
2223 mtr_base_lo = lower_32_bits(CFG_BASE +
2224 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2225 mtr_base_hi = upper_32_bits(CFG_BASE +
2226 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2227 so_base_lo = lower_32_bits(CFG_BASE +
2228 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2229 so_base_hi = upper_32_bits(CFG_BASE +
2230 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2231
2232 q_off = tpc_offset + qman_id * 4;
2233
2234 if (qman_id < 4) {
2235 WREG32(mmTPC0_QM_PQ_BASE_LO_0 + q_off,
2236 lower_32_bits(qman_base_addr));
2237 WREG32(mmTPC0_QM_PQ_BASE_HI_0 + q_off,
2238 upper_32_bits(qman_base_addr));
2239
2240 WREG32(mmTPC0_QM_PQ_SIZE_0 + q_off, ilog2(TPC_QMAN_LENGTH));
2241 WREG32(mmTPC0_QM_PQ_PI_0 + q_off, 0);
2242 WREG32(mmTPC0_QM_PQ_CI_0 + q_off, 0);
2243
2244 WREG32(mmTPC0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off, 0x81BC);
2245 WREG32(mmTPC0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off, 0x81B4);
2246 WREG32(mmTPC0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off, 0x1C);
2247 } else {
2248 WREG32(mmTPC0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off, 0x74);
2249 WREG32(mmTPC0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off, 0x14);
2250 WREG32(mmTPC0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off, 0x1C);
2251
2252 /* Configure RAZWI IRQ */
2253 tpc_id = tpc_offset /
2254 (mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0);
2255
2256 tpc_qm_err_cfg = TPC_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
2257 if (hdev->stop_on_err) {
2258 tpc_qm_err_cfg |=
2259 TPC_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
2260 }
2261
2262 WREG32(mmTPC0_QM_GLBL_ERR_CFG + tpc_offset, tpc_qm_err_cfg);
2263 WREG32(mmTPC0_QM_GLBL_ERR_ADDR_LO + tpc_offset,
2264 lower_32_bits(CFG_BASE +
2265 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR));
2266 WREG32(mmTPC0_QM_GLBL_ERR_ADDR_HI + tpc_offset,
2267 upper_32_bits(CFG_BASE +
2268 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR));
2269 WREG32(mmTPC0_QM_GLBL_ERR_WDATA + tpc_offset,
2270 gaudi_irq_map_table[GAUDI_EVENT_TPC0_QM].cpu_id +
2271 tpc_id);
2272
2273 WREG32(mmTPC0_QM_ARB_ERR_MSG_EN + tpc_offset,
2274 QM_ARB_ERR_MSG_EN_MASK);
2275
2276 /* Increase ARB WDT to support streams architecture */
2277 WREG32(mmTPC0_QM_ARB_SLV_CHOISE_WDT + tpc_offset,
2278 GAUDI_ARB_WDT_TIMEOUT);
2279
2280 WREG32(mmTPC0_QM_GLBL_CFG1 + tpc_offset, 0);
2281 WREG32(mmTPC0_QM_GLBL_PROT + tpc_offset,
2282 QMAN_INTERNAL_MAKE_TRUSTED);
2283 }
2284
2285 WREG32(mmTPC0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_lo);
2286 WREG32(mmTPC0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_hi);
2287 WREG32(mmTPC0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_lo);
2288 WREG32(mmTPC0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_hi);
2289}
2290
2291static void gaudi_init_tpc_qmans(struct hl_device *hdev)
2292{
2293 struct gaudi_device *gaudi = hdev->asic_specific;
2294 struct gaudi_internal_qman_info *q;
2295 u64 qman_base_addr;
2296 u32 so_base_hi, tpc_offset = 0;
2297 u32 tpc_delta = mmTPC1_CFG_SM_BASE_ADDRESS_HIGH -
2298 mmTPC0_CFG_SM_BASE_ADDRESS_HIGH;
2299 int i, tpc_id, internal_q_index;
2300
2301 if (gaudi->hw_cap_initialized & HW_CAP_TPC_MASK)
2302 return;
2303
2304 so_base_hi = upper_32_bits(CFG_BASE +
2305 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2306
2307 for (tpc_id = 0 ; tpc_id < TPC_NUMBER_OF_ENGINES ; tpc_id++) {
2308 for (i = 0 ; i < QMAN_STREAMS ; i++) {
2309 internal_q_index = GAUDI_QUEUE_ID_TPC_0_0 +
2310 tpc_id * QMAN_STREAMS + i;
2311 q = &gaudi->internal_qmans[internal_q_index];
2312 qman_base_addr = (u64) q->pq_dma_addr;
2313 gaudi_init_tpc_qman(hdev, tpc_offset, i,
2314 qman_base_addr);
2315
2316 if (i == 3) {
2317 /* Initializing lower CP for TPC QMAN */
2318 gaudi_init_tpc_qman(hdev, tpc_offset, 4, 0);
2319
2320 /* Enable the QMAN and TPC channel */
2321 WREG32(mmTPC0_QM_GLBL_CFG0 + tpc_offset,
2322 QMAN_TPC_ENABLE);
2323 }
2324 }
2325
2326 WREG32(mmTPC0_CFG_SM_BASE_ADDRESS_HIGH + tpc_id * tpc_delta,
2327 so_base_hi);
2328
2329 tpc_offset += mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0;
2330
Oded Gabbay65887292020-08-12 11:21:01 +03002331 gaudi->hw_cap_initialized |=
2332 FIELD_PREP(HW_CAP_TPC_MASK, 1 << tpc_id);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03002333 }
2334}
2335
2336static void gaudi_disable_pci_dma_qmans(struct hl_device *hdev)
2337{
2338 struct gaudi_device *gaudi = hdev->asic_specific;
2339
2340 if (!(gaudi->hw_cap_initialized & HW_CAP_PCI_DMA))
2341 return;
2342
2343 WREG32(mmDMA0_QM_GLBL_CFG0, 0);
2344 WREG32(mmDMA1_QM_GLBL_CFG0, 0);
2345 WREG32(mmDMA5_QM_GLBL_CFG0, 0);
2346}
2347
2348static void gaudi_disable_hbm_dma_qmans(struct hl_device *hdev)
2349{
2350 struct gaudi_device *gaudi = hdev->asic_specific;
2351
2352 if (!(gaudi->hw_cap_initialized & HW_CAP_HBM_DMA))
2353 return;
2354
2355 WREG32(mmDMA2_QM_GLBL_CFG0, 0);
2356 WREG32(mmDMA3_QM_GLBL_CFG0, 0);
2357 WREG32(mmDMA4_QM_GLBL_CFG0, 0);
2358 WREG32(mmDMA6_QM_GLBL_CFG0, 0);
2359 WREG32(mmDMA7_QM_GLBL_CFG0, 0);
2360}
2361
2362static void gaudi_disable_mme_qmans(struct hl_device *hdev)
2363{
2364 struct gaudi_device *gaudi = hdev->asic_specific;
2365
2366 if (!(gaudi->hw_cap_initialized & HW_CAP_MME))
2367 return;
2368
2369 WREG32(mmMME2_QM_GLBL_CFG0, 0);
2370 WREG32(mmMME0_QM_GLBL_CFG0, 0);
2371}
2372
2373static void gaudi_disable_tpc_qmans(struct hl_device *hdev)
2374{
2375 struct gaudi_device *gaudi = hdev->asic_specific;
2376 u32 tpc_offset = 0;
2377 int tpc_id;
2378
2379 if (!(gaudi->hw_cap_initialized & HW_CAP_TPC_MASK))
2380 return;
2381
2382 for (tpc_id = 0 ; tpc_id < TPC_NUMBER_OF_ENGINES ; tpc_id++) {
2383 WREG32(mmTPC0_QM_GLBL_CFG0 + tpc_offset, 0);
2384 tpc_offset += mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0;
2385 }
2386}
2387
2388static void gaudi_stop_pci_dma_qmans(struct hl_device *hdev)
2389{
2390 struct gaudi_device *gaudi = hdev->asic_specific;
2391
2392 if (!(gaudi->hw_cap_initialized & HW_CAP_PCI_DMA))
2393 return;
2394
2395 /* Stop upper CPs of QMANs 0.0 to 1.3 and 5.0 to 5.3 */
2396 WREG32(mmDMA0_QM_GLBL_CFG1, 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
2397 WREG32(mmDMA1_QM_GLBL_CFG1, 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
2398 WREG32(mmDMA5_QM_GLBL_CFG1, 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
2399}
2400
2401static void gaudi_stop_hbm_dma_qmans(struct hl_device *hdev)
2402{
2403 struct gaudi_device *gaudi = hdev->asic_specific;
2404
2405 if (!(gaudi->hw_cap_initialized & HW_CAP_HBM_DMA))
2406 return;
2407
2408 /* Stop CPs of HBM DMA QMANs */
2409
2410 WREG32(mmDMA2_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
2411 WREG32(mmDMA3_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
2412 WREG32(mmDMA4_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
2413 WREG32(mmDMA6_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
2414 WREG32(mmDMA7_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
2415}
2416
2417static void gaudi_stop_mme_qmans(struct hl_device *hdev)
2418{
2419 struct gaudi_device *gaudi = hdev->asic_specific;
2420
2421 if (!(gaudi->hw_cap_initialized & HW_CAP_MME))
2422 return;
2423
2424 /* Stop CPs of MME QMANs */
2425 WREG32(mmMME2_QM_GLBL_CFG1, 0x1F << MME0_QM_GLBL_CFG1_CP_STOP_SHIFT);
2426 WREG32(mmMME0_QM_GLBL_CFG1, 0x1F << MME0_QM_GLBL_CFG1_CP_STOP_SHIFT);
2427}
2428
2429static void gaudi_stop_tpc_qmans(struct hl_device *hdev)
2430{
2431 struct gaudi_device *gaudi = hdev->asic_specific;
2432
2433 if (!(gaudi->hw_cap_initialized & HW_CAP_TPC_MASK))
2434 return;
2435
2436 WREG32(mmTPC0_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
2437 WREG32(mmTPC1_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
2438 WREG32(mmTPC2_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
2439 WREG32(mmTPC3_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
2440 WREG32(mmTPC4_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
2441 WREG32(mmTPC5_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
2442 WREG32(mmTPC6_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
2443 WREG32(mmTPC7_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
2444}
2445
2446static void gaudi_pci_dma_stall(struct hl_device *hdev)
2447{
2448 struct gaudi_device *gaudi = hdev->asic_specific;
2449
2450 if (!(gaudi->hw_cap_initialized & HW_CAP_PCI_DMA))
2451 return;
2452
2453 WREG32(mmDMA0_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
2454 WREG32(mmDMA1_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
2455 WREG32(mmDMA5_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
2456}
2457
2458static void gaudi_hbm_dma_stall(struct hl_device *hdev)
2459{
2460 struct gaudi_device *gaudi = hdev->asic_specific;
2461
2462 if (!(gaudi->hw_cap_initialized & HW_CAP_HBM_DMA))
2463 return;
2464
2465 WREG32(mmDMA2_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
2466 WREG32(mmDMA3_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
2467 WREG32(mmDMA4_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
2468 WREG32(mmDMA6_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
2469 WREG32(mmDMA7_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
2470}
2471
2472static void gaudi_mme_stall(struct hl_device *hdev)
2473{
2474 struct gaudi_device *gaudi = hdev->asic_specific;
2475
2476 if (!(gaudi->hw_cap_initialized & HW_CAP_MME))
2477 return;
2478
2479 /* WA for H3-1800 bug: do ACC and SBAB writes twice */
2480 WREG32(mmMME0_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
2481 WREG32(mmMME0_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
2482 WREG32(mmMME0_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
2483 WREG32(mmMME0_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
2484 WREG32(mmMME1_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
2485 WREG32(mmMME1_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
2486 WREG32(mmMME1_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
2487 WREG32(mmMME1_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
2488 WREG32(mmMME2_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
2489 WREG32(mmMME2_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
2490 WREG32(mmMME2_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
2491 WREG32(mmMME2_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
2492 WREG32(mmMME3_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
2493 WREG32(mmMME3_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
2494 WREG32(mmMME3_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
2495 WREG32(mmMME3_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
2496}
2497
2498static void gaudi_tpc_stall(struct hl_device *hdev)
2499{
2500 struct gaudi_device *gaudi = hdev->asic_specific;
2501
2502 if (!(gaudi->hw_cap_initialized & HW_CAP_TPC_MASK))
2503 return;
2504
2505 WREG32(mmTPC0_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
2506 WREG32(mmTPC1_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
2507 WREG32(mmTPC2_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
2508 WREG32(mmTPC3_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
2509 WREG32(mmTPC4_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
2510 WREG32(mmTPC5_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
2511 WREG32(mmTPC6_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
2512 WREG32(mmTPC7_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
2513}
2514
Oded Gabbaye38bfd32020-07-03 20:46:12 +03002515static void gaudi_set_clock_gating(struct hl_device *hdev)
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03002516{
2517 struct gaudi_device *gaudi = hdev->asic_specific;
2518 u32 qman_offset;
Ofir Bittonf44d23b2020-08-04 13:38:43 +03002519 bool enable;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03002520 int i;
2521
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03002522 /* In case we are during debug session, don't enable the clock gate
2523 * as it may interfere
2524 */
2525 if (hdev->in_debug)
2526 return;
2527
Oded Gabbaye38bfd32020-07-03 20:46:12 +03002528 for (i = GAUDI_PCI_DMA_1, qman_offset = 0 ; i < GAUDI_HBM_DMA_1 ; i++) {
Ofir Bittonf44d23b2020-08-04 13:38:43 +03002529 enable = !!(hdev->clock_gating_mask &
2530 (BIT_ULL(gaudi_dma_assignment[i])));
Oded Gabbaye38bfd32020-07-03 20:46:12 +03002531
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03002532 qman_offset = gaudi_dma_assignment[i] * DMA_QMAN_OFFSET;
Ofir Bittonf44d23b2020-08-04 13:38:43 +03002533 WREG32(mmDMA0_QM_CGM_CFG1 + qman_offset,
2534 enable ? QMAN_CGM1_PWR_GATE_EN : 0);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03002535 WREG32(mmDMA0_QM_CGM_CFG + qman_offset,
Ofir Bittonf44d23b2020-08-04 13:38:43 +03002536 enable ? QMAN_UPPER_CP_CGM_PWR_GATE_EN : 0);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03002537 }
2538
Oded Gabbaye38bfd32020-07-03 20:46:12 +03002539 for (i = GAUDI_HBM_DMA_1 ; i < GAUDI_DMA_MAX ; i++) {
Ofir Bittonf44d23b2020-08-04 13:38:43 +03002540 enable = !!(hdev->clock_gating_mask &
2541 (BIT_ULL(gaudi_dma_assignment[i])));
Oded Gabbaye38bfd32020-07-03 20:46:12 +03002542
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03002543 qman_offset = gaudi_dma_assignment[i] * DMA_QMAN_OFFSET;
Ofir Bittonf44d23b2020-08-04 13:38:43 +03002544 WREG32(mmDMA0_QM_CGM_CFG1 + qman_offset,
2545 enable ? QMAN_CGM1_PWR_GATE_EN : 0);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03002546 WREG32(mmDMA0_QM_CGM_CFG + qman_offset,
Ofir Bittonf44d23b2020-08-04 13:38:43 +03002547 enable ? QMAN_COMMON_CP_CGM_PWR_GATE_EN : 0);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03002548 }
2549
Ofir Bittonf44d23b2020-08-04 13:38:43 +03002550 enable = !!(hdev->clock_gating_mask & (BIT_ULL(GAUDI_ENGINE_ID_MME_0)));
2551 WREG32(mmMME0_QM_CGM_CFG1, enable ? QMAN_CGM1_PWR_GATE_EN : 0);
2552 WREG32(mmMME0_QM_CGM_CFG, enable ? QMAN_COMMON_CP_CGM_PWR_GATE_EN : 0);
Oded Gabbaye38bfd32020-07-03 20:46:12 +03002553
Ofir Bittonf44d23b2020-08-04 13:38:43 +03002554 enable = !!(hdev->clock_gating_mask & (BIT_ULL(GAUDI_ENGINE_ID_MME_2)));
2555 WREG32(mmMME2_QM_CGM_CFG1, enable ? QMAN_CGM1_PWR_GATE_EN : 0);
2556 WREG32(mmMME2_QM_CGM_CFG, enable ? QMAN_COMMON_CP_CGM_PWR_GATE_EN : 0);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03002557
2558 for (i = 0, qman_offset = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) {
Ofir Bittonf44d23b2020-08-04 13:38:43 +03002559 enable = !!(hdev->clock_gating_mask &
2560 (BIT_ULL(GAUDI_ENGINE_ID_TPC_0 + i)));
Oded Gabbaye38bfd32020-07-03 20:46:12 +03002561
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03002562 WREG32(mmTPC0_QM_CGM_CFG1 + qman_offset,
Ofir Bittonf44d23b2020-08-04 13:38:43 +03002563 enable ? QMAN_CGM1_PWR_GATE_EN : 0);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03002564 WREG32(mmTPC0_QM_CGM_CFG + qman_offset,
Ofir Bittonf44d23b2020-08-04 13:38:43 +03002565 enable ? QMAN_COMMON_CP_CGM_PWR_GATE_EN : 0);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03002566
2567 qman_offset += TPC_QMAN_OFFSET;
2568 }
2569
2570 gaudi->hw_cap_initialized |= HW_CAP_CLK_GATE;
2571}
2572
2573static void gaudi_disable_clock_gating(struct hl_device *hdev)
2574{
2575 struct gaudi_device *gaudi = hdev->asic_specific;
2576 u32 qman_offset;
2577 int i;
2578
2579 if (!(gaudi->hw_cap_initialized & HW_CAP_CLK_GATE))
2580 return;
2581
2582 for (i = 0, qman_offset = 0 ; i < DMA_NUMBER_OF_CHANNELS ; i++) {
2583 WREG32(mmDMA0_QM_CGM_CFG + qman_offset, 0);
2584 WREG32(mmDMA0_QM_CGM_CFG1 + qman_offset, 0);
2585
2586 qman_offset += (mmDMA1_QM_CGM_CFG - mmDMA0_QM_CGM_CFG);
2587 }
2588
2589 WREG32(mmMME0_QM_CGM_CFG, 0);
2590 WREG32(mmMME0_QM_CGM_CFG1, 0);
2591 WREG32(mmMME2_QM_CGM_CFG, 0);
2592 WREG32(mmMME2_QM_CGM_CFG1, 0);
2593
2594 for (i = 0, qman_offset = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) {
2595 WREG32(mmTPC0_QM_CGM_CFG + qman_offset, 0);
2596 WREG32(mmTPC0_QM_CGM_CFG1 + qman_offset, 0);
2597
2598 qman_offset += (mmTPC1_QM_CGM_CFG - mmTPC0_QM_CGM_CFG);
2599 }
2600
2601 gaudi->hw_cap_initialized &= ~(HW_CAP_CLK_GATE);
2602}
2603
2604static void gaudi_enable_timestamp(struct hl_device *hdev)
2605{
2606 /* Disable the timestamp counter */
2607 WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 0);
2608
2609 /* Zero the lower/upper parts of the 64-bit counter */
2610 WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE + 0xC, 0);
2611 WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE + 0x8, 0);
2612
2613 /* Enable the counter */
2614 WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 1);
2615}
2616
2617static void gaudi_disable_timestamp(struct hl_device *hdev)
2618{
2619 /* Disable the timestamp counter */
2620 WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 0);
2621}
2622
2623static void gaudi_halt_engines(struct hl_device *hdev, bool hard_reset)
2624{
Oded Gabbayc83c4172020-07-05 15:48:34 +03002625 u32 wait_timeout_ms;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03002626
2627 dev_info(hdev->dev,
2628 "Halting compute engines and disabling interrupts\n");
2629
Oded Gabbayc83c4172020-07-05 15:48:34 +03002630 if (hdev->pldm)
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03002631 wait_timeout_ms = GAUDI_PLDM_RESET_WAIT_MSEC;
Oded Gabbayc83c4172020-07-05 15:48:34 +03002632 else
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03002633 wait_timeout_ms = GAUDI_RESET_WAIT_MSEC;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03002634
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03002635
2636 gaudi_stop_mme_qmans(hdev);
2637 gaudi_stop_tpc_qmans(hdev);
2638 gaudi_stop_hbm_dma_qmans(hdev);
2639 gaudi_stop_pci_dma_qmans(hdev);
2640
Oded Gabbaye38bfd32020-07-03 20:46:12 +03002641 hdev->asic_funcs->disable_clock_gating(hdev);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03002642
2643 msleep(wait_timeout_ms);
2644
2645 gaudi_pci_dma_stall(hdev);
2646 gaudi_hbm_dma_stall(hdev);
2647 gaudi_tpc_stall(hdev);
2648 gaudi_mme_stall(hdev);
2649
2650 msleep(wait_timeout_ms);
2651
2652 gaudi_disable_mme_qmans(hdev);
2653 gaudi_disable_tpc_qmans(hdev);
2654 gaudi_disable_hbm_dma_qmans(hdev);
2655 gaudi_disable_pci_dma_qmans(hdev);
2656
2657 gaudi_disable_timestamp(hdev);
2658
Oded Gabbay12ae3132020-07-03 20:58:23 +03002659 gaudi_disable_msi(hdev);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03002660}
2661
2662static int gaudi_mmu_init(struct hl_device *hdev)
2663{
2664 struct asic_fixed_properties *prop = &hdev->asic_prop;
2665 struct gaudi_device *gaudi = hdev->asic_specific;
2666 u64 hop0_addr;
2667 int rc, i;
2668
2669 if (!hdev->mmu_enable)
2670 return 0;
2671
2672 if (gaudi->hw_cap_initialized & HW_CAP_MMU)
2673 return 0;
2674
2675 hdev->dram_supports_virtual_memory = false;
2676
2677 for (i = 0 ; i < prop->max_asid ; i++) {
2678 hop0_addr = prop->mmu_pgt_addr +
2679 (i * prop->mmu_hop_table_size);
2680
2681 rc = gaudi_mmu_update_asid_hop0_addr(hdev, i, hop0_addr);
2682 if (rc) {
2683 dev_err(hdev->dev,
2684 "failed to set hop0 addr for asid %d\n", i);
2685 goto err;
2686 }
2687 }
2688
2689 /* init MMU cache manage page */
2690 WREG32(mmSTLB_CACHE_INV_BASE_39_8, MMU_CACHE_MNG_ADDR >> 8);
2691 WREG32(mmSTLB_CACHE_INV_BASE_49_40, MMU_CACHE_MNG_ADDR >> 40);
2692
Tomer Tayar644883e2020-07-19 11:00:03 +03002693 hdev->asic_funcs->mmu_invalidate_cache(hdev, true, 0);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03002694
2695 WREG32(mmMMU_UP_MMU_ENABLE, 1);
2696 WREG32(mmMMU_UP_SPI_MASK, 0xF);
2697
2698 WREG32(mmSTLB_HOP_CONFIGURATION,
2699 hdev->mmu_huge_page_opt ? 0x30440 : 0x40440);
2700
Omer Shpigelmancfd41762020-06-03 13:03:35 +03002701 /*
2702 * The H/W expects the first PI after init to be 1. After wraparound
2703 * we'll write 0.
2704 */
2705 gaudi->mmu_cache_inv_pi = 1;
2706
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03002707 gaudi->hw_cap_initialized |= HW_CAP_MMU;
2708
2709 return 0;
2710
2711err:
2712 return rc;
2713}
2714
2715static int gaudi_load_firmware_to_device(struct hl_device *hdev)
2716{
2717 void __iomem *dst;
2718
2719 /* HBM scrambler must be initialized before pushing F/W to HBM */
2720 gaudi_init_scrambler_hbm(hdev);
2721
2722 dst = hdev->pcie_bar[HBM_BAR_ID] + LINUX_FW_OFFSET;
2723
2724 return hl_fw_load_fw_to_device(hdev, GAUDI_LINUX_FW_FILE, dst);
2725}
2726
2727static int gaudi_load_boot_fit_to_device(struct hl_device *hdev)
2728{
2729 void __iomem *dst;
2730
2731 dst = hdev->pcie_bar[SRAM_BAR_ID] + BOOT_FIT_SRAM_OFFSET;
2732
2733 return hl_fw_load_fw_to_device(hdev, GAUDI_BOOT_FIT_FILE, dst);
2734}
2735
2736static void gaudi_read_device_fw_version(struct hl_device *hdev,
2737 enum hl_fw_component fwc)
2738{
2739 const char *name;
2740 u32 ver_off;
2741 char *dest;
2742
2743 switch (fwc) {
2744 case FW_COMP_UBOOT:
2745 ver_off = RREG32(mmUBOOT_VER_OFFSET);
2746 dest = hdev->asic_prop.uboot_ver;
2747 name = "U-Boot";
2748 break;
2749 case FW_COMP_PREBOOT:
2750 ver_off = RREG32(mmPREBOOT_VER_OFFSET);
2751 dest = hdev->asic_prop.preboot_ver;
2752 name = "Preboot";
2753 break;
2754 default:
2755 dev_warn(hdev->dev, "Undefined FW component: %d\n", fwc);
2756 return;
2757 }
2758
2759 ver_off &= ~((u32)SRAM_BASE_ADDR);
2760
2761 if (ver_off < SRAM_SIZE - VERSION_MAX_LEN) {
2762 memcpy_fromio(dest, hdev->pcie_bar[SRAM_BAR_ID] + ver_off,
2763 VERSION_MAX_LEN);
2764 } else {
2765 dev_err(hdev->dev, "%s version offset (0x%x) is above SRAM\n",
2766 name, ver_off);
2767 strcpy(dest, "unavailable");
2768 }
2769}
2770
2771static int gaudi_init_cpu(struct hl_device *hdev)
2772{
2773 struct gaudi_device *gaudi = hdev->asic_specific;
2774 int rc;
2775
2776 if (!hdev->cpu_enable)
2777 return 0;
2778
2779 if (gaudi->hw_cap_initialized & HW_CAP_CPU)
2780 return 0;
2781
2782 /*
2783 * The device CPU works with 40 bits addresses.
2784 * This register sets the extension to 50 bits.
2785 */
2786 WREG32(mmCPU_IF_CPU_MSB_ADDR, hdev->cpu_pci_msb_addr);
2787
2788 rc = hl_fw_init_cpu(hdev, mmPSOC_GLOBAL_CONF_CPU_BOOT_STATUS,
2789 mmPSOC_GLOBAL_CONF_KMD_MSG_TO_CPU,
2790 mmCPU_CMD_STATUS_TO_HOST,
2791 mmCPU_BOOT_ERR0,
2792 !hdev->bmc_enable, GAUDI_CPU_TIMEOUT_USEC,
2793 GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC);
2794
2795 if (rc)
2796 return rc;
2797
2798 gaudi->hw_cap_initialized |= HW_CAP_CPU;
2799
2800 return 0;
2801}
2802
2803static int gaudi_init_cpu_queues(struct hl_device *hdev, u32 cpu_timeout)
2804{
2805 struct gaudi_device *gaudi = hdev->asic_specific;
2806 struct hl_eq *eq;
2807 u32 status;
2808 struct hl_hw_queue *cpu_pq =
2809 &hdev->kernel_queues[GAUDI_QUEUE_ID_CPU_PQ];
2810 int err;
2811
2812 if (!hdev->cpu_queues_enable)
2813 return 0;
2814
2815 if (gaudi->hw_cap_initialized & HW_CAP_CPU_Q)
2816 return 0;
2817
2818 eq = &hdev->event_queue;
2819
2820 WREG32(mmCPU_IF_PQ_BASE_ADDR_LOW, lower_32_bits(cpu_pq->bus_address));
2821 WREG32(mmCPU_IF_PQ_BASE_ADDR_HIGH, upper_32_bits(cpu_pq->bus_address));
2822
2823 WREG32(mmCPU_IF_EQ_BASE_ADDR_LOW, lower_32_bits(eq->bus_address));
2824 WREG32(mmCPU_IF_EQ_BASE_ADDR_HIGH, upper_32_bits(eq->bus_address));
2825
2826 WREG32(mmCPU_IF_CQ_BASE_ADDR_LOW,
2827 lower_32_bits(hdev->cpu_accessible_dma_address));
2828 WREG32(mmCPU_IF_CQ_BASE_ADDR_HIGH,
2829 upper_32_bits(hdev->cpu_accessible_dma_address));
2830
2831 WREG32(mmCPU_IF_PQ_LENGTH, HL_QUEUE_SIZE_IN_BYTES);
2832 WREG32(mmCPU_IF_EQ_LENGTH, HL_EQ_SIZE_IN_BYTES);
2833 WREG32(mmCPU_IF_CQ_LENGTH, HL_CPU_ACCESSIBLE_MEM_SIZE);
2834
2835 /* Used for EQ CI */
2836 WREG32(mmCPU_IF_EQ_RD_OFFS, 0);
2837
2838 WREG32(mmCPU_IF_PF_PQ_PI, 0);
2839
2840 if (gaudi->multi_msi_mode)
2841 WREG32(mmCPU_IF_QUEUE_INIT, PQ_INIT_STATUS_READY_FOR_CP);
2842 else
2843 WREG32(mmCPU_IF_QUEUE_INIT,
2844 PQ_INIT_STATUS_READY_FOR_CP_SINGLE_MSI);
2845
2846 WREG32(mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR, GAUDI_EVENT_PI_UPDATE);
2847
2848 err = hl_poll_timeout(
2849 hdev,
2850 mmCPU_IF_QUEUE_INIT,
2851 status,
2852 (status == PQ_INIT_STATUS_READY_FOR_HOST),
2853 1000,
2854 cpu_timeout);
2855
2856 if (err) {
2857 dev_err(hdev->dev,
Oded Gabbay6138bbe2020-09-04 20:18:16 +03002858 "Failed to communicate with Device CPU (CPU-CP timeout)\n");
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03002859 return -EIO;
2860 }
2861
2862 gaudi->hw_cap_initialized |= HW_CAP_CPU_Q;
2863 return 0;
2864}
2865
2866static void gaudi_pre_hw_init(struct hl_device *hdev)
2867{
2868 /* Perform read from the device to make sure device is up */
2869 RREG32(mmPCIE_DBI_DEVICE_ID_VENDOR_ID_REG);
2870
2871 /*
2872 * Let's mark in the H/W that we have reached this point. We check
2873 * this value in the reset_before_init function to understand whether
2874 * we need to reset the chip before doing H/W init. This register is
2875 * cleared by the H/W upon H/W reset
2876 */
2877 WREG32(mmHW_STATE, HL_DEVICE_HW_STATE_DIRTY);
2878
2879 /* Set the access through PCI bars (Linux driver only) as secured */
2880 WREG32(mmPCIE_WRAP_LBW_PROT_OVR, (PCIE_WRAP_LBW_PROT_OVR_RD_EN_MASK |
2881 PCIE_WRAP_LBW_PROT_OVR_WR_EN_MASK));
2882
2883 /* Perform read to flush the waiting writes to ensure configuration
2884 * was set in the device
2885 */
2886 RREG32(mmPCIE_WRAP_LBW_PROT_OVR);
2887
2888 if (hdev->axi_drain) {
2889 WREG32(mmPCIE_WRAP_LBW_DRAIN_CFG,
2890 1 << PCIE_WRAP_LBW_DRAIN_CFG_EN_SHIFT);
2891 WREG32(mmPCIE_WRAP_HBW_DRAIN_CFG,
2892 1 << PCIE_WRAP_HBW_DRAIN_CFG_EN_SHIFT);
2893
2894 /* Perform read to flush the DRAIN cfg */
2895 RREG32(mmPCIE_WRAP_HBW_DRAIN_CFG);
2896 } else {
2897 WREG32(mmPCIE_WRAP_LBW_DRAIN_CFG, 0);
2898 WREG32(mmPCIE_WRAP_HBW_DRAIN_CFG, 0);
2899
2900 /* Perform read to flush the DRAIN cfg */
2901 RREG32(mmPCIE_WRAP_HBW_DRAIN_CFG);
2902 }
2903
2904 /* Configure the reset registers. Must be done as early as possible
2905 * in case we fail during H/W initialization
2906 */
2907 WREG32(mmPSOC_GLOBAL_CONF_SOFT_RST_CFG_H,
2908 (CFG_RST_H_DMA_MASK |
2909 CFG_RST_H_MME_MASK |
2910 CFG_RST_H_SM_MASK |
Oded Gabbay65887292020-08-12 11:21:01 +03002911 CFG_RST_H_TPC_7_MASK));
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03002912
2913 WREG32(mmPSOC_GLOBAL_CONF_SOFT_RST_CFG_L, CFG_RST_L_TPC_MASK);
2914
2915 WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST_CFG_H,
2916 (CFG_RST_H_HBM_MASK |
Oded Gabbay65887292020-08-12 11:21:01 +03002917 CFG_RST_H_TPC_7_MASK |
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03002918 CFG_RST_H_NIC_MASK |
2919 CFG_RST_H_SM_MASK |
2920 CFG_RST_H_DMA_MASK |
2921 CFG_RST_H_MME_MASK |
2922 CFG_RST_H_CPU_MASK |
2923 CFG_RST_H_MMU_MASK));
2924
2925 WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST_CFG_L,
2926 (CFG_RST_L_IF_MASK |
2927 CFG_RST_L_PSOC_MASK |
2928 CFG_RST_L_TPC_MASK));
2929}
2930
2931static int gaudi_hw_init(struct hl_device *hdev)
2932{
2933 int rc;
2934
2935 dev_info(hdev->dev, "Starting initialization of H/W\n");
2936
2937 gaudi_pre_hw_init(hdev);
2938
2939 gaudi_init_pci_dma_qmans(hdev);
2940
2941 gaudi_init_hbm_dma_qmans(hdev);
2942
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03002943 rc = gaudi_init_cpu(hdev);
2944 if (rc) {
2945 dev_err(hdev->dev, "failed to initialize CPU\n");
2946 return rc;
2947 }
2948
2949 /* SRAM scrambler must be initialized after CPU is running from HBM */
2950 gaudi_init_scrambler_sram(hdev);
2951
2952 /* This is here just in case we are working without CPU */
2953 gaudi_init_scrambler_hbm(hdev);
2954
2955 gaudi_init_golden_registers(hdev);
2956
2957 rc = gaudi_mmu_init(hdev);
2958 if (rc)
2959 return rc;
2960
Omer Shpigelman3a3a5bf12020-05-11 10:45:12 +03002961 gaudi_init_security(hdev);
2962
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03002963 gaudi_init_mme_qmans(hdev);
2964
2965 gaudi_init_tpc_qmans(hdev);
2966
Oded Gabbaye38bfd32020-07-03 20:46:12 +03002967 hdev->asic_funcs->set_clock_gating(hdev);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03002968
2969 gaudi_enable_timestamp(hdev);
2970
2971 /* MSI must be enabled before CPU queues are initialized */
2972 rc = gaudi_enable_msi(hdev);
2973 if (rc)
2974 goto disable_queues;
2975
2976 /* must be called after MSI was enabled */
2977 rc = gaudi_init_cpu_queues(hdev, GAUDI_CPU_TIMEOUT_USEC);
2978 if (rc) {
2979 dev_err(hdev->dev, "failed to initialize CPU H/W queues %d\n",
2980 rc);
2981 goto disable_msi;
2982 }
2983
2984 /* Perform read from the device to flush all configuration */
2985 RREG32(mmPCIE_DBI_DEVICE_ID_VENDOR_ID_REG);
2986
2987 return 0;
2988
2989disable_msi:
2990 gaudi_disable_msi(hdev);
2991disable_queues:
2992 gaudi_disable_mme_qmans(hdev);
2993 gaudi_disable_pci_dma_qmans(hdev);
2994
2995 return rc;
2996}
2997
2998static void gaudi_hw_fini(struct hl_device *hdev, bool hard_reset)
2999{
3000 struct gaudi_device *gaudi = hdev->asic_specific;
Oded Gabbayc83c4172020-07-05 15:48:34 +03003001 u32 status, reset_timeout_ms, cpu_timeout_ms, boot_strap = 0;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003002
Oded Gabbay12ae3132020-07-03 20:58:23 +03003003 if (!hard_reset) {
3004 dev_err(hdev->dev, "GAUDI doesn't support soft-reset\n");
3005 return;
3006 }
3007
Oded Gabbayc83c4172020-07-05 15:48:34 +03003008 if (hdev->pldm) {
Oded Gabbay12ae3132020-07-03 20:58:23 +03003009 reset_timeout_ms = GAUDI_PLDM_HRESET_TIMEOUT_MSEC;
Oded Gabbayc83c4172020-07-05 15:48:34 +03003010 cpu_timeout_ms = GAUDI_PLDM_RESET_WAIT_MSEC;
3011 } else {
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003012 reset_timeout_ms = GAUDI_RESET_TIMEOUT_MSEC;
Oded Gabbayc83c4172020-07-05 15:48:34 +03003013 cpu_timeout_ms = GAUDI_CPU_RESET_WAIT_MSEC;
3014 }
3015
3016 /* Set device to handle FLR by H/W as we will put the device CPU to
3017 * halt mode
3018 */
3019 WREG32(mmPCIE_AUX_FLR_CTRL, (PCIE_AUX_FLR_CTRL_HW_CTRL_MASK |
3020 PCIE_AUX_FLR_CTRL_INT_MASK_MASK));
3021
3022 /* I don't know what is the state of the CPU so make sure it is
3023 * stopped in any means necessary
3024 */
3025 WREG32(mmPSOC_GLOBAL_CONF_KMD_MSG_TO_CPU, KMD_MSG_GOTO_WFE);
3026 WREG32(mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR, GAUDI_EVENT_HALT_MACHINE);
3027
3028 msleep(cpu_timeout_ms);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003029
Oded Gabbay12ae3132020-07-03 20:58:23 +03003030 /* Tell ASIC not to re-initialize PCIe */
3031 WREG32(mmPREBOOT_PCIE_EN, LKD_HARD_RESET_MAGIC);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003032
Oded Gabbay12ae3132020-07-03 20:58:23 +03003033 boot_strap = RREG32(mmPSOC_GLOBAL_CONF_BOOT_STRAP_PINS);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003034
Oded Gabbay12ae3132020-07-03 20:58:23 +03003035 /* H/W bug WA:
3036 * rdata[31:0] = strap_read_val;
3037 * wdata[31:0] = rdata[30:21],1'b0,rdata[20:0]
3038 */
3039 boot_strap = (((boot_strap & 0x7FE00000) << 1) |
3040 (boot_strap & 0x001FFFFF));
3041 WREG32(mmPSOC_GLOBAL_CONF_BOOT_STRAP_PINS, boot_strap & ~0x2);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003042
Oded Gabbay12ae3132020-07-03 20:58:23 +03003043 /* Restart BTL/BLR upon hard-reset */
3044 WREG32(mmPSOC_GLOBAL_CONF_BOOT_SEQ_RE_START, 1);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003045
Oded Gabbay12ae3132020-07-03 20:58:23 +03003046 WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST,
3047 1 << PSOC_GLOBAL_CONF_SW_ALL_RST_IND_SHIFT);
3048 dev_info(hdev->dev,
3049 "Issued HARD reset command, going to wait %dms\n",
3050 reset_timeout_ms);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003051
3052 /*
3053 * After hard reset, we can't poll the BTM_FSM register because the PSOC
3054 * itself is in reset. Need to wait until the reset is deasserted
3055 */
3056 msleep(reset_timeout_ms);
3057
3058 status = RREG32(mmPSOC_GLOBAL_CONF_BTM_FSM);
3059 if (status & PSOC_GLOBAL_CONF_BTM_FSM_STATE_MASK)
3060 dev_err(hdev->dev,
3061 "Timeout while waiting for device to reset 0x%x\n",
3062 status);
3063
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003064 WREG32(mmPSOC_GLOBAL_CONF_BOOT_STRAP_PINS, boot_strap);
3065
3066 gaudi->hw_cap_initialized &= ~(HW_CAP_CPU | HW_CAP_CPU_Q |
3067 HW_CAP_HBM | HW_CAP_PCI_DMA |
3068 HW_CAP_MME | HW_CAP_TPC_MASK |
3069 HW_CAP_HBM_DMA | HW_CAP_PLL |
3070 HW_CAP_MMU |
3071 HW_CAP_SRAM_SCRAMBLER |
Oded Gabbaye38bfd32020-07-03 20:46:12 +03003072 HW_CAP_HBM_SCRAMBLER |
3073 HW_CAP_CLK_GATE);
3074
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003075 memset(gaudi->events_stat, 0, sizeof(gaudi->events_stat));
3076}
3077
3078static int gaudi_suspend(struct hl_device *hdev)
3079{
3080 int rc;
3081
Oded Gabbay2f553422020-08-15 16:28:10 +03003082 rc = hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_DISABLE_PCI_ACCESS);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003083 if (rc)
3084 dev_err(hdev->dev, "Failed to disable PCI access from CPU\n");
3085
3086 return rc;
3087}
3088
3089static int gaudi_resume(struct hl_device *hdev)
3090{
3091 return gaudi_init_iatu(hdev);
3092}
3093
3094static int gaudi_cb_mmap(struct hl_device *hdev, struct vm_area_struct *vma,
Hillf Danton0db57532020-08-23 07:32:42 +08003095 void *cpu_addr, dma_addr_t dma_addr, size_t size)
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003096{
3097 int rc;
3098
3099 vma->vm_flags |= VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP |
3100 VM_DONTCOPY | VM_NORESERVE;
3101
Hillf Danton0db57532020-08-23 07:32:42 +08003102 rc = dma_mmap_coherent(hdev->dev, vma, cpu_addr, dma_addr, size);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003103 if (rc)
Hillf Danton0db57532020-08-23 07:32:42 +08003104 dev_err(hdev->dev, "dma_mmap_coherent error %d", rc);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003105
3106 return rc;
3107}
3108
3109static void gaudi_ring_doorbell(struct hl_device *hdev, u32 hw_queue_id, u32 pi)
3110{
3111 struct gaudi_device *gaudi = hdev->asic_specific;
3112 u32 db_reg_offset, db_value, dma_qm_offset, q_off;
3113 int dma_id;
3114 bool invalid_queue = false;
3115
3116 switch (hw_queue_id) {
3117 case GAUDI_QUEUE_ID_DMA_0_0...GAUDI_QUEUE_ID_DMA_0_3:
3118 dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_1];
3119 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
3120 q_off = dma_qm_offset + (hw_queue_id & 0x3) * 4;
3121 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
3122 break;
3123
3124 case GAUDI_QUEUE_ID_DMA_1_0...GAUDI_QUEUE_ID_DMA_1_3:
3125 dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_2];
3126 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
3127 q_off = dma_qm_offset + (hw_queue_id & 0x3) * 4;
3128 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
3129 break;
3130
3131 case GAUDI_QUEUE_ID_DMA_2_0...GAUDI_QUEUE_ID_DMA_2_3:
3132 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_1];
3133 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
3134 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
3135 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
3136 break;
3137
3138 case GAUDI_QUEUE_ID_DMA_3_0...GAUDI_QUEUE_ID_DMA_3_3:
3139 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_2];
3140 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
3141 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
3142 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
3143 break;
3144
3145 case GAUDI_QUEUE_ID_DMA_4_0...GAUDI_QUEUE_ID_DMA_4_3:
3146 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_3];
3147 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
3148 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
3149 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
3150 break;
3151
3152 case GAUDI_QUEUE_ID_DMA_5_0...GAUDI_QUEUE_ID_DMA_5_3:
3153 dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_3];
3154 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
3155 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
3156 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
3157 break;
3158
3159 case GAUDI_QUEUE_ID_DMA_6_0...GAUDI_QUEUE_ID_DMA_6_3:
3160 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_4];
3161 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
3162 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
3163 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
3164 break;
3165
3166 case GAUDI_QUEUE_ID_DMA_7_0...GAUDI_QUEUE_ID_DMA_7_3:
3167 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_5];
3168 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
3169 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
3170 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
3171 break;
3172
3173 case GAUDI_QUEUE_ID_CPU_PQ:
3174 if (gaudi->hw_cap_initialized & HW_CAP_CPU_Q)
3175 db_reg_offset = mmCPU_IF_PF_PQ_PI;
3176 else
3177 invalid_queue = true;
3178 break;
3179
3180 case GAUDI_QUEUE_ID_MME_0_0:
3181 db_reg_offset = mmMME2_QM_PQ_PI_0;
3182 break;
3183
3184 case GAUDI_QUEUE_ID_MME_0_1:
3185 db_reg_offset = mmMME2_QM_PQ_PI_1;
3186 break;
3187
3188 case GAUDI_QUEUE_ID_MME_0_2:
3189 db_reg_offset = mmMME2_QM_PQ_PI_2;
3190 break;
3191
3192 case GAUDI_QUEUE_ID_MME_0_3:
3193 db_reg_offset = mmMME2_QM_PQ_PI_3;
3194 break;
3195
3196 case GAUDI_QUEUE_ID_MME_1_0:
3197 db_reg_offset = mmMME0_QM_PQ_PI_0;
3198 break;
3199
3200 case GAUDI_QUEUE_ID_MME_1_1:
3201 db_reg_offset = mmMME0_QM_PQ_PI_1;
3202 break;
3203
3204 case GAUDI_QUEUE_ID_MME_1_2:
3205 db_reg_offset = mmMME0_QM_PQ_PI_2;
3206 break;
3207
3208 case GAUDI_QUEUE_ID_MME_1_3:
3209 db_reg_offset = mmMME0_QM_PQ_PI_3;
3210 break;
3211
3212 case GAUDI_QUEUE_ID_TPC_0_0:
3213 db_reg_offset = mmTPC0_QM_PQ_PI_0;
3214 break;
3215
3216 case GAUDI_QUEUE_ID_TPC_0_1:
3217 db_reg_offset = mmTPC0_QM_PQ_PI_1;
3218 break;
3219
3220 case GAUDI_QUEUE_ID_TPC_0_2:
3221 db_reg_offset = mmTPC0_QM_PQ_PI_2;
3222 break;
3223
3224 case GAUDI_QUEUE_ID_TPC_0_3:
3225 db_reg_offset = mmTPC0_QM_PQ_PI_3;
3226 break;
3227
3228 case GAUDI_QUEUE_ID_TPC_1_0:
3229 db_reg_offset = mmTPC1_QM_PQ_PI_0;
3230 break;
3231
3232 case GAUDI_QUEUE_ID_TPC_1_1:
3233 db_reg_offset = mmTPC1_QM_PQ_PI_1;
3234 break;
3235
3236 case GAUDI_QUEUE_ID_TPC_1_2:
3237 db_reg_offset = mmTPC1_QM_PQ_PI_2;
3238 break;
3239
3240 case GAUDI_QUEUE_ID_TPC_1_3:
3241 db_reg_offset = mmTPC1_QM_PQ_PI_3;
3242 break;
3243
3244 case GAUDI_QUEUE_ID_TPC_2_0:
3245 db_reg_offset = mmTPC2_QM_PQ_PI_0;
3246 break;
3247
3248 case GAUDI_QUEUE_ID_TPC_2_1:
3249 db_reg_offset = mmTPC2_QM_PQ_PI_1;
3250 break;
3251
3252 case GAUDI_QUEUE_ID_TPC_2_2:
3253 db_reg_offset = mmTPC2_QM_PQ_PI_2;
3254 break;
3255
3256 case GAUDI_QUEUE_ID_TPC_2_3:
3257 db_reg_offset = mmTPC2_QM_PQ_PI_3;
3258 break;
3259
3260 case GAUDI_QUEUE_ID_TPC_3_0:
3261 db_reg_offset = mmTPC3_QM_PQ_PI_0;
3262 break;
3263
3264 case GAUDI_QUEUE_ID_TPC_3_1:
3265 db_reg_offset = mmTPC3_QM_PQ_PI_1;
3266 break;
3267
3268 case GAUDI_QUEUE_ID_TPC_3_2:
3269 db_reg_offset = mmTPC3_QM_PQ_PI_2;
3270 break;
3271
3272 case GAUDI_QUEUE_ID_TPC_3_3:
3273 db_reg_offset = mmTPC3_QM_PQ_PI_3;
3274 break;
3275
3276 case GAUDI_QUEUE_ID_TPC_4_0:
3277 db_reg_offset = mmTPC4_QM_PQ_PI_0;
3278 break;
3279
3280 case GAUDI_QUEUE_ID_TPC_4_1:
3281 db_reg_offset = mmTPC4_QM_PQ_PI_1;
3282 break;
3283
3284 case GAUDI_QUEUE_ID_TPC_4_2:
3285 db_reg_offset = mmTPC4_QM_PQ_PI_2;
3286 break;
3287
3288 case GAUDI_QUEUE_ID_TPC_4_3:
3289 db_reg_offset = mmTPC4_QM_PQ_PI_3;
3290 break;
3291
3292 case GAUDI_QUEUE_ID_TPC_5_0:
3293 db_reg_offset = mmTPC5_QM_PQ_PI_0;
3294 break;
3295
3296 case GAUDI_QUEUE_ID_TPC_5_1:
3297 db_reg_offset = mmTPC5_QM_PQ_PI_1;
3298 break;
3299
3300 case GAUDI_QUEUE_ID_TPC_5_2:
3301 db_reg_offset = mmTPC5_QM_PQ_PI_2;
3302 break;
3303
3304 case GAUDI_QUEUE_ID_TPC_5_3:
3305 db_reg_offset = mmTPC5_QM_PQ_PI_3;
3306 break;
3307
3308 case GAUDI_QUEUE_ID_TPC_6_0:
3309 db_reg_offset = mmTPC6_QM_PQ_PI_0;
3310 break;
3311
3312 case GAUDI_QUEUE_ID_TPC_6_1:
3313 db_reg_offset = mmTPC6_QM_PQ_PI_1;
3314 break;
3315
3316 case GAUDI_QUEUE_ID_TPC_6_2:
3317 db_reg_offset = mmTPC6_QM_PQ_PI_2;
3318 break;
3319
3320 case GAUDI_QUEUE_ID_TPC_6_3:
3321 db_reg_offset = mmTPC6_QM_PQ_PI_3;
3322 break;
3323
3324 case GAUDI_QUEUE_ID_TPC_7_0:
3325 db_reg_offset = mmTPC7_QM_PQ_PI_0;
3326 break;
3327
3328 case GAUDI_QUEUE_ID_TPC_7_1:
3329 db_reg_offset = mmTPC7_QM_PQ_PI_1;
3330 break;
3331
3332 case GAUDI_QUEUE_ID_TPC_7_2:
3333 db_reg_offset = mmTPC7_QM_PQ_PI_2;
3334 break;
3335
3336 case GAUDI_QUEUE_ID_TPC_7_3:
3337 db_reg_offset = mmTPC7_QM_PQ_PI_3;
3338 break;
3339
3340 default:
3341 invalid_queue = true;
3342 }
3343
3344 if (invalid_queue) {
3345 /* Should never get here */
3346 dev_err(hdev->dev, "h/w queue %d is invalid. Can't set pi\n",
3347 hw_queue_id);
3348 return;
3349 }
3350
3351 db_value = pi;
3352
3353 /* ring the doorbell */
3354 WREG32(db_reg_offset, db_value);
3355
3356 if (hw_queue_id == GAUDI_QUEUE_ID_CPU_PQ)
3357 WREG32(mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR,
3358 GAUDI_EVENT_PI_UPDATE);
3359}
3360
3361static void gaudi_pqe_write(struct hl_device *hdev, __le64 *pqe,
3362 struct hl_bd *bd)
3363{
3364 __le64 *pbd = (__le64 *) bd;
3365
3366 /* The QMANs are on the host memory so a simple copy suffice */
3367 pqe[0] = pbd[0];
3368 pqe[1] = pbd[1];
3369}
3370
3371static void *gaudi_dma_alloc_coherent(struct hl_device *hdev, size_t size,
3372 dma_addr_t *dma_handle, gfp_t flags)
3373{
3374 void *kernel_addr = dma_alloc_coherent(&hdev->pdev->dev, size,
3375 dma_handle, flags);
3376
3377 /* Shift to the device's base physical address of host memory */
3378 if (kernel_addr)
3379 *dma_handle += HOST_PHYS_BASE;
3380
3381 return kernel_addr;
3382}
3383
3384static void gaudi_dma_free_coherent(struct hl_device *hdev, size_t size,
3385 void *cpu_addr, dma_addr_t dma_handle)
3386{
3387 /* Cancel the device's base physical address of host memory */
3388 dma_addr_t fixed_dma_handle = dma_handle - HOST_PHYS_BASE;
3389
3390 dma_free_coherent(&hdev->pdev->dev, size, cpu_addr, fixed_dma_handle);
3391}
3392
3393static void *gaudi_get_int_queue_base(struct hl_device *hdev,
3394 u32 queue_id, dma_addr_t *dma_handle,
3395 u16 *queue_len)
3396{
3397 struct gaudi_device *gaudi = hdev->asic_specific;
3398 struct gaudi_internal_qman_info *q;
3399
3400 if (queue_id >= GAUDI_QUEUE_ID_SIZE ||
3401 gaudi_queue_type[queue_id] != QUEUE_TYPE_INT) {
3402 dev_err(hdev->dev, "Got invalid queue id %d\n", queue_id);
3403 return NULL;
3404 }
3405
3406 q = &gaudi->internal_qmans[queue_id];
3407 *dma_handle = q->pq_dma_addr;
3408 *queue_len = q->pq_size / QMAN_PQ_ENTRY_SIZE;
3409
3410 return q->pq_kernel_addr;
3411}
3412
3413static int gaudi_send_cpu_message(struct hl_device *hdev, u32 *msg,
3414 u16 len, u32 timeout, long *result)
3415{
3416 struct gaudi_device *gaudi = hdev->asic_specific;
3417
3418 if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q)) {
3419 if (result)
3420 *result = 0;
3421 return 0;
3422 }
3423
Oded Gabbay788cacf2020-07-07 17:30:13 +03003424 if (!timeout)
3425 timeout = GAUDI_MSG_TO_CPU_TIMEOUT_USEC;
3426
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003427 return hl_fw_send_cpu_message(hdev, GAUDI_QUEUE_ID_CPU_PQ, msg, len,
3428 timeout, result);
3429}
3430
3431static int gaudi_test_queue(struct hl_device *hdev, u32 hw_queue_id)
3432{
3433 struct packet_msg_prot *fence_pkt;
3434 dma_addr_t pkt_dma_addr;
3435 u32 fence_val, tmp, timeout_usec;
3436 dma_addr_t fence_dma_addr;
3437 u32 *fence_ptr;
3438 int rc;
3439
3440 if (hdev->pldm)
3441 timeout_usec = GAUDI_PLDM_TEST_QUEUE_WAIT_USEC;
3442 else
3443 timeout_usec = GAUDI_TEST_QUEUE_WAIT_USEC;
3444
3445 fence_val = GAUDI_QMAN0_FENCE_VAL;
3446
3447 fence_ptr = hdev->asic_funcs->asic_dma_pool_zalloc(hdev, 4, GFP_KERNEL,
3448 &fence_dma_addr);
3449 if (!fence_ptr) {
3450 dev_err(hdev->dev,
Dotan Barakd6b045c2020-08-06 09:20:49 +03003451 "Failed to allocate memory for H/W queue %d testing\n",
3452 hw_queue_id);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003453 return -ENOMEM;
3454 }
3455
3456 *fence_ptr = 0;
3457
3458 fence_pkt = hdev->asic_funcs->asic_dma_pool_zalloc(hdev,
3459 sizeof(struct packet_msg_prot),
3460 GFP_KERNEL, &pkt_dma_addr);
3461 if (!fence_pkt) {
3462 dev_err(hdev->dev,
Dotan Barakd6b045c2020-08-06 09:20:49 +03003463 "Failed to allocate packet for H/W queue %d testing\n",
3464 hw_queue_id);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003465 rc = -ENOMEM;
3466 goto free_fence_ptr;
3467 }
3468
Oded Gabbay65887292020-08-12 11:21:01 +03003469 tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT);
3470 tmp |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);
3471 tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
3472
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003473 fence_pkt->ctl = cpu_to_le32(tmp);
3474 fence_pkt->value = cpu_to_le32(fence_val);
3475 fence_pkt->addr = cpu_to_le64(fence_dma_addr);
3476
3477 rc = hl_hw_queue_send_cb_no_cmpl(hdev, hw_queue_id,
3478 sizeof(struct packet_msg_prot),
3479 pkt_dma_addr);
3480 if (rc) {
3481 dev_err(hdev->dev,
Dotan Barakd6b045c2020-08-06 09:20:49 +03003482 "Failed to send fence packet to H/W queue %d\n",
3483 hw_queue_id);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003484 goto free_pkt;
3485 }
3486
3487 rc = hl_poll_timeout_memory(hdev, fence_ptr, tmp, (tmp == fence_val),
3488 1000, timeout_usec, true);
3489
3490 hl_hw_queue_inc_ci_kernel(hdev, hw_queue_id);
3491
3492 if (rc == -ETIMEDOUT) {
3493 dev_err(hdev->dev,
3494 "H/W queue %d test failed (scratch(0x%08llX) == 0x%08X)\n",
3495 hw_queue_id, (unsigned long long) fence_dma_addr, tmp);
3496 rc = -EIO;
3497 }
3498
3499free_pkt:
3500 hdev->asic_funcs->asic_dma_pool_free(hdev, (void *) fence_pkt,
3501 pkt_dma_addr);
3502free_fence_ptr:
3503 hdev->asic_funcs->asic_dma_pool_free(hdev, (void *) fence_ptr,
3504 fence_dma_addr);
3505 return rc;
3506}
3507
3508static int gaudi_test_cpu_queue(struct hl_device *hdev)
3509{
3510 struct gaudi_device *gaudi = hdev->asic_specific;
3511
3512 /*
3513 * check capability here as send_cpu_message() won't update the result
3514 * value if no capability
3515 */
3516 if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
3517 return 0;
3518
3519 return hl_fw_test_cpu_queue(hdev);
3520}
3521
3522static int gaudi_test_queues(struct hl_device *hdev)
3523{
3524 int i, rc, ret_val = 0;
3525
Ofir Bitton3abc99b2020-06-23 14:50:39 +03003526 for (i = 0 ; i < hdev->asic_prop.max_queues ; i++) {
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003527 if (hdev->asic_prop.hw_queues_props[i].type == QUEUE_TYPE_EXT) {
3528 rc = gaudi_test_queue(hdev, i);
3529 if (rc)
3530 ret_val = -EINVAL;
3531 }
3532 }
3533
3534 rc = gaudi_test_cpu_queue(hdev);
3535 if (rc)
3536 ret_val = -EINVAL;
3537
3538 return ret_val;
3539}
3540
3541static void *gaudi_dma_pool_zalloc(struct hl_device *hdev, size_t size,
3542 gfp_t mem_flags, dma_addr_t *dma_handle)
3543{
3544 void *kernel_addr;
3545
3546 if (size > GAUDI_DMA_POOL_BLK_SIZE)
3547 return NULL;
3548
3549 kernel_addr = dma_pool_zalloc(hdev->dma_pool, mem_flags, dma_handle);
3550
3551 /* Shift to the device's base physical address of host memory */
3552 if (kernel_addr)
3553 *dma_handle += HOST_PHYS_BASE;
3554
3555 return kernel_addr;
3556}
3557
3558static void gaudi_dma_pool_free(struct hl_device *hdev, void *vaddr,
3559 dma_addr_t dma_addr)
3560{
3561 /* Cancel the device's base physical address of host memory */
3562 dma_addr_t fixed_dma_addr = dma_addr - HOST_PHYS_BASE;
3563
3564 dma_pool_free(hdev->dma_pool, vaddr, fixed_dma_addr);
3565}
3566
3567static void *gaudi_cpu_accessible_dma_pool_alloc(struct hl_device *hdev,
3568 size_t size, dma_addr_t *dma_handle)
3569{
3570 return hl_fw_cpu_accessible_dma_pool_alloc(hdev, size, dma_handle);
3571}
3572
3573static void gaudi_cpu_accessible_dma_pool_free(struct hl_device *hdev,
3574 size_t size, void *vaddr)
3575{
3576 hl_fw_cpu_accessible_dma_pool_free(hdev, size, vaddr);
3577}
3578
3579static int gaudi_dma_map_sg(struct hl_device *hdev, struct scatterlist *sgl,
3580 int nents, enum dma_data_direction dir)
3581{
3582 struct scatterlist *sg;
3583 int i;
3584
3585 if (!dma_map_sg(&hdev->pdev->dev, sgl, nents, dir))
3586 return -ENOMEM;
3587
3588 /* Shift to the device's base physical address of host memory */
3589 for_each_sg(sgl, sg, nents, i)
3590 sg->dma_address += HOST_PHYS_BASE;
3591
3592 return 0;
3593}
3594
3595static void gaudi_dma_unmap_sg(struct hl_device *hdev, struct scatterlist *sgl,
3596 int nents, enum dma_data_direction dir)
3597{
3598 struct scatterlist *sg;
3599 int i;
3600
3601 /* Cancel the device's base physical address of host memory */
3602 for_each_sg(sgl, sg, nents, i)
3603 sg->dma_address -= HOST_PHYS_BASE;
3604
3605 dma_unmap_sg(&hdev->pdev->dev, sgl, nents, dir);
3606}
3607
3608static u32 gaudi_get_dma_desc_list_size(struct hl_device *hdev,
3609 struct sg_table *sgt)
3610{
3611 struct scatterlist *sg, *sg_next_iter;
3612 u32 count, dma_desc_cnt;
3613 u64 len, len_next;
3614 dma_addr_t addr, addr_next;
3615
3616 dma_desc_cnt = 0;
3617
3618 for_each_sg(sgt->sgl, sg, sgt->nents, count) {
3619
3620 len = sg_dma_len(sg);
3621 addr = sg_dma_address(sg);
3622
3623 if (len == 0)
3624 break;
3625
3626 while ((count + 1) < sgt->nents) {
3627 sg_next_iter = sg_next(sg);
3628 len_next = sg_dma_len(sg_next_iter);
3629 addr_next = sg_dma_address(sg_next_iter);
3630
3631 if (len_next == 0)
3632 break;
3633
3634 if ((addr + len == addr_next) &&
3635 (len + len_next <= DMA_MAX_TRANSFER_SIZE)) {
3636 len += len_next;
3637 count++;
3638 sg = sg_next_iter;
3639 } else {
3640 break;
3641 }
3642 }
3643
3644 dma_desc_cnt++;
3645 }
3646
3647 return dma_desc_cnt * sizeof(struct packet_lin_dma);
3648}
3649
3650static int gaudi_pin_memory_before_cs(struct hl_device *hdev,
3651 struct hl_cs_parser *parser,
3652 struct packet_lin_dma *user_dma_pkt,
3653 u64 addr, enum dma_data_direction dir)
3654{
3655 struct hl_userptr *userptr;
3656 int rc;
3657
3658 if (hl_userptr_is_pinned(hdev, addr, le32_to_cpu(user_dma_pkt->tsize),
3659 parser->job_userptr_list, &userptr))
3660 goto already_pinned;
3661
3662 userptr = kzalloc(sizeof(*userptr), GFP_ATOMIC);
3663 if (!userptr)
3664 return -ENOMEM;
3665
3666 rc = hl_pin_host_memory(hdev, addr, le32_to_cpu(user_dma_pkt->tsize),
3667 userptr);
3668 if (rc)
3669 goto free_userptr;
3670
3671 list_add_tail(&userptr->job_node, parser->job_userptr_list);
3672
3673 rc = hdev->asic_funcs->asic_dma_map_sg(hdev, userptr->sgt->sgl,
3674 userptr->sgt->nents, dir);
3675 if (rc) {
3676 dev_err(hdev->dev, "failed to map sgt with DMA region\n");
3677 goto unpin_memory;
3678 }
3679
3680 userptr->dma_mapped = true;
3681 userptr->dir = dir;
3682
3683already_pinned:
3684 parser->patched_cb_size +=
3685 gaudi_get_dma_desc_list_size(hdev, userptr->sgt);
3686
3687 return 0;
3688
3689unpin_memory:
3690 hl_unpin_host_memory(hdev, userptr);
3691free_userptr:
3692 kfree(userptr);
3693 return rc;
3694}
3695
3696static int gaudi_validate_dma_pkt_host(struct hl_device *hdev,
3697 struct hl_cs_parser *parser,
3698 struct packet_lin_dma *user_dma_pkt,
3699 bool src_in_host)
3700{
3701 enum dma_data_direction dir;
3702 bool skip_host_mem_pin = false, user_memset;
3703 u64 addr;
3704 int rc = 0;
3705
3706 user_memset = (le32_to_cpu(user_dma_pkt->ctl) &
3707 GAUDI_PKT_LIN_DMA_CTL_MEMSET_MASK) >>
3708 GAUDI_PKT_LIN_DMA_CTL_MEMSET_SHIFT;
3709
3710 if (src_in_host) {
3711 if (user_memset)
3712 skip_host_mem_pin = true;
3713
3714 dev_dbg(hdev->dev, "DMA direction is HOST --> DEVICE\n");
3715 dir = DMA_TO_DEVICE;
3716 addr = le64_to_cpu(user_dma_pkt->src_addr);
3717 } else {
3718 dev_dbg(hdev->dev, "DMA direction is DEVICE --> HOST\n");
3719 dir = DMA_FROM_DEVICE;
3720 addr = (le64_to_cpu(user_dma_pkt->dst_addr) &
3721 GAUDI_PKT_LIN_DMA_DST_ADDR_MASK) >>
3722 GAUDI_PKT_LIN_DMA_DST_ADDR_SHIFT;
3723 }
3724
3725 if (skip_host_mem_pin)
3726 parser->patched_cb_size += sizeof(*user_dma_pkt);
3727 else
3728 rc = gaudi_pin_memory_before_cs(hdev, parser, user_dma_pkt,
3729 addr, dir);
3730
3731 return rc;
3732}
3733
3734static int gaudi_validate_dma_pkt_no_mmu(struct hl_device *hdev,
3735 struct hl_cs_parser *parser,
3736 struct packet_lin_dma *user_dma_pkt)
3737{
3738 bool src_in_host = false;
3739 u64 dst_addr = (le64_to_cpu(user_dma_pkt->dst_addr) &
3740 GAUDI_PKT_LIN_DMA_DST_ADDR_MASK) >>
3741 GAUDI_PKT_LIN_DMA_DST_ADDR_SHIFT;
3742
3743 dev_dbg(hdev->dev, "DMA packet details:\n");
3744 dev_dbg(hdev->dev, "source == 0x%llx\n",
3745 le64_to_cpu(user_dma_pkt->src_addr));
3746 dev_dbg(hdev->dev, "destination == 0x%llx\n", dst_addr);
3747 dev_dbg(hdev->dev, "size == %u\n", le32_to_cpu(user_dma_pkt->tsize));
3748
3749 /*
3750 * Special handling for DMA with size 0. Bypass all validations
3751 * because no transactions will be done except for WR_COMP, which
3752 * is not a security issue
3753 */
3754 if (!le32_to_cpu(user_dma_pkt->tsize)) {
3755 parser->patched_cb_size += sizeof(*user_dma_pkt);
3756 return 0;
3757 }
3758
3759 if (parser->hw_queue_id <= GAUDI_QUEUE_ID_DMA_0_3)
3760 src_in_host = true;
3761
3762 return gaudi_validate_dma_pkt_host(hdev, parser, user_dma_pkt,
3763 src_in_host);
3764}
3765
Oded Gabbay64536ab2020-05-27 12:38:16 +03003766static int gaudi_validate_load_and_exe_pkt(struct hl_device *hdev,
3767 struct hl_cs_parser *parser,
3768 struct packet_load_and_exe *user_pkt)
3769{
3770 u32 cfg;
3771
3772 cfg = le32_to_cpu(user_pkt->cfg);
3773
3774 if (cfg & GAUDI_PKT_LOAD_AND_EXE_CFG_DST_MASK) {
3775 dev_err(hdev->dev,
3776 "User not allowed to use Load and Execute\n");
3777 return -EPERM;
3778 }
3779
3780 parser->patched_cb_size += sizeof(struct packet_load_and_exe);
3781
3782 return 0;
3783}
3784
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003785static int gaudi_validate_cb(struct hl_device *hdev,
3786 struct hl_cs_parser *parser, bool is_mmu)
3787{
3788 u32 cb_parsed_length = 0;
3789 int rc = 0;
3790
3791 parser->patched_cb_size = 0;
3792
3793 /* cb_user_size is more than 0 so loop will always be executed */
3794 while (cb_parsed_length < parser->user_cb_size) {
3795 enum packet_id pkt_id;
3796 u16 pkt_size;
3797 struct gaudi_packet *user_pkt;
3798
3799 user_pkt = (struct gaudi_packet *) (uintptr_t)
3800 (parser->user_cb->kernel_address + cb_parsed_length);
3801
3802 pkt_id = (enum packet_id) (
3803 (le64_to_cpu(user_pkt->header) &
3804 PACKET_HEADER_PACKET_ID_MASK) >>
3805 PACKET_HEADER_PACKET_ID_SHIFT);
3806
Ofir Bittonbc75be22020-07-30 14:56:38 +03003807 if (!validate_packet_id(pkt_id)) {
3808 dev_err(hdev->dev, "Invalid packet id %u\n", pkt_id);
3809 rc = -EINVAL;
3810 break;
3811 }
3812
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003813 pkt_size = gaudi_packet_sizes[pkt_id];
3814 cb_parsed_length += pkt_size;
3815 if (cb_parsed_length > parser->user_cb_size) {
3816 dev_err(hdev->dev,
3817 "packet 0x%x is out of CB boundary\n", pkt_id);
3818 rc = -EINVAL;
3819 break;
3820 }
3821
3822 switch (pkt_id) {
3823 case PACKET_MSG_PROT:
3824 dev_err(hdev->dev,
3825 "User not allowed to use MSG_PROT\n");
3826 rc = -EPERM;
3827 break;
3828
3829 case PACKET_CP_DMA:
3830 dev_err(hdev->dev, "User not allowed to use CP_DMA\n");
3831 rc = -EPERM;
3832 break;
3833
3834 case PACKET_STOP:
3835 dev_err(hdev->dev, "User not allowed to use STOP\n");
3836 rc = -EPERM;
3837 break;
3838
Oded Gabbay2edc66e2020-07-03 19:28:54 +03003839 case PACKET_WREG_BULK:
3840 dev_err(hdev->dev,
3841 "User not allowed to use WREG_BULK\n");
3842 rc = -EPERM;
3843 break;
3844
Oded Gabbay64536ab2020-05-27 12:38:16 +03003845 case PACKET_LOAD_AND_EXE:
3846 rc = gaudi_validate_load_and_exe_pkt(hdev, parser,
3847 (struct packet_load_and_exe *) user_pkt);
3848 break;
3849
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003850 case PACKET_LIN_DMA:
3851 parser->contains_dma_pkt = true;
3852 if (is_mmu)
3853 parser->patched_cb_size += pkt_size;
3854 else
3855 rc = gaudi_validate_dma_pkt_no_mmu(hdev, parser,
3856 (struct packet_lin_dma *) user_pkt);
3857 break;
3858
3859 case PACKET_WREG_32:
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003860 case PACKET_MSG_LONG:
3861 case PACKET_MSG_SHORT:
3862 case PACKET_REPEAT:
3863 case PACKET_FENCE:
3864 case PACKET_NOP:
3865 case PACKET_ARB_POINT:
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003866 parser->patched_cb_size += pkt_size;
3867 break;
3868
3869 default:
3870 dev_err(hdev->dev, "Invalid packet header 0x%x\n",
3871 pkt_id);
3872 rc = -EINVAL;
3873 break;
3874 }
3875
3876 if (rc)
3877 break;
3878 }
3879
3880 /*
3881 * The new CB should have space at the end for two MSG_PROT packets:
3882 * 1. A packet that will act as a completion packet
3883 * 2. A packet that will generate MSI-X interrupt
3884 */
3885 parser->patched_cb_size += sizeof(struct packet_msg_prot) * 2;
3886
3887 return rc;
3888}
3889
3890static int gaudi_patch_dma_packet(struct hl_device *hdev,
3891 struct hl_cs_parser *parser,
3892 struct packet_lin_dma *user_dma_pkt,
3893 struct packet_lin_dma *new_dma_pkt,
3894 u32 *new_dma_pkt_size)
3895{
3896 struct hl_userptr *userptr;
3897 struct scatterlist *sg, *sg_next_iter;
3898 u32 count, dma_desc_cnt, user_wrcomp_en_mask, ctl;
3899 u64 len, len_next;
3900 dma_addr_t dma_addr, dma_addr_next;
3901 u64 device_memory_addr, addr;
3902 enum dma_data_direction dir;
3903 struct sg_table *sgt;
3904 bool src_in_host = false;
3905 bool skip_host_mem_pin = false;
3906 bool user_memset;
3907
3908 ctl = le32_to_cpu(user_dma_pkt->ctl);
3909
3910 if (parser->hw_queue_id <= GAUDI_QUEUE_ID_DMA_0_3)
3911 src_in_host = true;
3912
3913 user_memset = (ctl & GAUDI_PKT_LIN_DMA_CTL_MEMSET_MASK) >>
3914 GAUDI_PKT_LIN_DMA_CTL_MEMSET_SHIFT;
3915
3916 if (src_in_host) {
3917 addr = le64_to_cpu(user_dma_pkt->src_addr);
3918 device_memory_addr = le64_to_cpu(user_dma_pkt->dst_addr);
3919 dir = DMA_TO_DEVICE;
3920 if (user_memset)
3921 skip_host_mem_pin = true;
3922 } else {
3923 addr = le64_to_cpu(user_dma_pkt->dst_addr);
3924 device_memory_addr = le64_to_cpu(user_dma_pkt->src_addr);
3925 dir = DMA_FROM_DEVICE;
3926 }
3927
3928 if ((!skip_host_mem_pin) &&
3929 (!hl_userptr_is_pinned(hdev, addr,
3930 le32_to_cpu(user_dma_pkt->tsize),
3931 parser->job_userptr_list, &userptr))) {
3932 dev_err(hdev->dev, "Userptr 0x%llx + 0x%x NOT mapped\n",
3933 addr, user_dma_pkt->tsize);
3934 return -EFAULT;
3935 }
3936
3937 if ((user_memset) && (dir == DMA_TO_DEVICE)) {
3938 memcpy(new_dma_pkt, user_dma_pkt, sizeof(*user_dma_pkt));
3939 *new_dma_pkt_size = sizeof(*user_dma_pkt);
3940 return 0;
3941 }
3942
3943 user_wrcomp_en_mask = ctl & GAUDI_PKT_LIN_DMA_CTL_WRCOMP_EN_MASK;
3944
3945 sgt = userptr->sgt;
3946 dma_desc_cnt = 0;
3947
3948 for_each_sg(sgt->sgl, sg, sgt->nents, count) {
3949 len = sg_dma_len(sg);
3950 dma_addr = sg_dma_address(sg);
3951
3952 if (len == 0)
3953 break;
3954
3955 while ((count + 1) < sgt->nents) {
3956 sg_next_iter = sg_next(sg);
3957 len_next = sg_dma_len(sg_next_iter);
3958 dma_addr_next = sg_dma_address(sg_next_iter);
3959
3960 if (len_next == 0)
3961 break;
3962
3963 if ((dma_addr + len == dma_addr_next) &&
3964 (len + len_next <= DMA_MAX_TRANSFER_SIZE)) {
3965 len += len_next;
3966 count++;
3967 sg = sg_next_iter;
3968 } else {
3969 break;
3970 }
3971 }
3972
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003973 ctl = le32_to_cpu(user_dma_pkt->ctl);
3974 if (likely(dma_desc_cnt))
3975 ctl &= ~GAUDI_PKT_CTL_EB_MASK;
3976 ctl &= ~GAUDI_PKT_LIN_DMA_CTL_WRCOMP_EN_MASK;
3977 new_dma_pkt->ctl = cpu_to_le32(ctl);
3978 new_dma_pkt->tsize = cpu_to_le32(len);
3979
3980 if (dir == DMA_TO_DEVICE) {
3981 new_dma_pkt->src_addr = cpu_to_le64(dma_addr);
3982 new_dma_pkt->dst_addr = cpu_to_le64(device_memory_addr);
3983 } else {
3984 new_dma_pkt->src_addr = cpu_to_le64(device_memory_addr);
3985 new_dma_pkt->dst_addr = cpu_to_le64(dma_addr);
3986 }
3987
3988 if (!user_memset)
3989 device_memory_addr += len;
3990 dma_desc_cnt++;
3991 new_dma_pkt++;
3992 }
3993
3994 if (!dma_desc_cnt) {
3995 dev_err(hdev->dev,
3996 "Error of 0 SG entries when patching DMA packet\n");
3997 return -EFAULT;
3998 }
3999
4000 /* Fix the last dma packet - wrcomp must be as user set it */
4001 new_dma_pkt--;
4002 new_dma_pkt->ctl |= cpu_to_le32(user_wrcomp_en_mask);
4003
4004 *new_dma_pkt_size = dma_desc_cnt * sizeof(struct packet_lin_dma);
4005
4006 return 0;
4007}
4008
4009static int gaudi_patch_cb(struct hl_device *hdev,
4010 struct hl_cs_parser *parser)
4011{
4012 u32 cb_parsed_length = 0;
4013 u32 cb_patched_cur_length = 0;
4014 int rc = 0;
4015
4016 /* cb_user_size is more than 0 so loop will always be executed */
4017 while (cb_parsed_length < parser->user_cb_size) {
4018 enum packet_id pkt_id;
4019 u16 pkt_size;
4020 u32 new_pkt_size = 0;
4021 struct gaudi_packet *user_pkt, *kernel_pkt;
4022
4023 user_pkt = (struct gaudi_packet *) (uintptr_t)
4024 (parser->user_cb->kernel_address + cb_parsed_length);
4025 kernel_pkt = (struct gaudi_packet *) (uintptr_t)
4026 (parser->patched_cb->kernel_address +
4027 cb_patched_cur_length);
4028
4029 pkt_id = (enum packet_id) (
4030 (le64_to_cpu(user_pkt->header) &
4031 PACKET_HEADER_PACKET_ID_MASK) >>
4032 PACKET_HEADER_PACKET_ID_SHIFT);
4033
Ofir Bittonbc75be22020-07-30 14:56:38 +03004034 if (!validate_packet_id(pkt_id)) {
4035 dev_err(hdev->dev, "Invalid packet id %u\n", pkt_id);
4036 rc = -EINVAL;
4037 break;
4038 }
4039
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03004040 pkt_size = gaudi_packet_sizes[pkt_id];
4041 cb_parsed_length += pkt_size;
4042 if (cb_parsed_length > parser->user_cb_size) {
4043 dev_err(hdev->dev,
4044 "packet 0x%x is out of CB boundary\n", pkt_id);
4045 rc = -EINVAL;
4046 break;
4047 }
4048
4049 switch (pkt_id) {
4050 case PACKET_LIN_DMA:
4051 rc = gaudi_patch_dma_packet(hdev, parser,
4052 (struct packet_lin_dma *) user_pkt,
4053 (struct packet_lin_dma *) kernel_pkt,
4054 &new_pkt_size);
4055 cb_patched_cur_length += new_pkt_size;
4056 break;
4057
4058 case PACKET_MSG_PROT:
4059 dev_err(hdev->dev,
4060 "User not allowed to use MSG_PROT\n");
4061 rc = -EPERM;
4062 break;
4063
4064 case PACKET_CP_DMA:
4065 dev_err(hdev->dev, "User not allowed to use CP_DMA\n");
4066 rc = -EPERM;
4067 break;
4068
4069 case PACKET_STOP:
4070 dev_err(hdev->dev, "User not allowed to use STOP\n");
4071 rc = -EPERM;
4072 break;
4073
4074 case PACKET_WREG_32:
4075 case PACKET_WREG_BULK:
4076 case PACKET_MSG_LONG:
4077 case PACKET_MSG_SHORT:
4078 case PACKET_REPEAT:
4079 case PACKET_FENCE:
4080 case PACKET_NOP:
4081 case PACKET_ARB_POINT:
4082 case PACKET_LOAD_AND_EXE:
4083 memcpy(kernel_pkt, user_pkt, pkt_size);
4084 cb_patched_cur_length += pkt_size;
4085 break;
4086
4087 default:
4088 dev_err(hdev->dev, "Invalid packet header 0x%x\n",
4089 pkt_id);
4090 rc = -EINVAL;
4091 break;
4092 }
4093
4094 if (rc)
4095 break;
4096 }
4097
4098 return rc;
4099}
4100
4101static int gaudi_parse_cb_mmu(struct hl_device *hdev,
4102 struct hl_cs_parser *parser)
4103{
4104 u64 patched_cb_handle;
4105 u32 patched_cb_size;
4106 struct hl_cb *user_cb;
4107 int rc;
4108
4109 /*
4110 * The new CB should have space at the end for two MSG_PROT pkt:
4111 * 1. A packet that will act as a completion packet
4112 * 2. A packet that will generate MSI interrupt
4113 */
4114 parser->patched_cb_size = parser->user_cb_size +
4115 sizeof(struct packet_msg_prot) * 2;
4116
Tomer Tayarfa8641a12020-09-07 17:36:41 +03004117 rc = hl_cb_create(hdev, &hdev->kernel_cb_mgr, hdev->kernel_ctx,
Tomer Tayaref6a0f62020-07-09 16:17:48 +03004118 parser->patched_cb_size, false, false,
Tomer Tayarfa8641a12020-09-07 17:36:41 +03004119 &patched_cb_handle);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03004120
4121 if (rc) {
4122 dev_err(hdev->dev,
4123 "Failed to allocate patched CB for DMA CS %d\n",
4124 rc);
4125 return rc;
4126 }
4127
4128 patched_cb_handle >>= PAGE_SHIFT;
4129 parser->patched_cb = hl_cb_get(hdev, &hdev->kernel_cb_mgr,
4130 (u32) patched_cb_handle);
4131 /* hl_cb_get should never fail here so use kernel WARN */
4132 WARN(!parser->patched_cb, "DMA CB handle invalid 0x%x\n",
4133 (u32) patched_cb_handle);
4134 if (!parser->patched_cb) {
4135 rc = -EFAULT;
4136 goto out;
4137 }
4138
4139 /*
4140 * The check that parser->user_cb_size <= parser->user_cb->size was done
4141 * in validate_queue_index().
4142 */
4143 memcpy((void *) (uintptr_t) parser->patched_cb->kernel_address,
4144 (void *) (uintptr_t) parser->user_cb->kernel_address,
4145 parser->user_cb_size);
4146
4147 patched_cb_size = parser->patched_cb_size;
4148
4149 /* Validate patched CB instead of user CB */
4150 user_cb = parser->user_cb;
4151 parser->user_cb = parser->patched_cb;
4152 rc = gaudi_validate_cb(hdev, parser, true);
4153 parser->user_cb = user_cb;
4154
4155 if (rc) {
4156 hl_cb_put(parser->patched_cb);
4157 goto out;
4158 }
4159
4160 if (patched_cb_size != parser->patched_cb_size) {
4161 dev_err(hdev->dev, "user CB size mismatch\n");
4162 hl_cb_put(parser->patched_cb);
4163 rc = -EINVAL;
4164 goto out;
4165 }
4166
4167out:
4168 /*
4169 * Always call cb destroy here because we still have 1 reference
4170 * to it by calling cb_get earlier. After the job will be completed,
4171 * cb_put will release it, but here we want to remove it from the
4172 * idr
4173 */
4174 hl_cb_destroy(hdev, &hdev->kernel_cb_mgr,
4175 patched_cb_handle << PAGE_SHIFT);
4176
4177 return rc;
4178}
4179
4180static int gaudi_parse_cb_no_mmu(struct hl_device *hdev,
4181 struct hl_cs_parser *parser)
4182{
4183 u64 patched_cb_handle;
4184 int rc;
4185
4186 rc = gaudi_validate_cb(hdev, parser, false);
4187
4188 if (rc)
4189 goto free_userptr;
4190
Tomer Tayarfa8641a12020-09-07 17:36:41 +03004191 rc = hl_cb_create(hdev, &hdev->kernel_cb_mgr, hdev->kernel_ctx,
Tomer Tayaref6a0f62020-07-09 16:17:48 +03004192 parser->patched_cb_size, false, false,
Tomer Tayarfa8641a12020-09-07 17:36:41 +03004193 &patched_cb_handle);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03004194 if (rc) {
4195 dev_err(hdev->dev,
4196 "Failed to allocate patched CB for DMA CS %d\n", rc);
4197 goto free_userptr;
4198 }
4199
4200 patched_cb_handle >>= PAGE_SHIFT;
4201 parser->patched_cb = hl_cb_get(hdev, &hdev->kernel_cb_mgr,
4202 (u32) patched_cb_handle);
4203 /* hl_cb_get should never fail here so use kernel WARN */
4204 WARN(!parser->patched_cb, "DMA CB handle invalid 0x%x\n",
4205 (u32) patched_cb_handle);
4206 if (!parser->patched_cb) {
4207 rc = -EFAULT;
4208 goto out;
4209 }
4210
4211 rc = gaudi_patch_cb(hdev, parser);
4212
4213 if (rc)
4214 hl_cb_put(parser->patched_cb);
4215
4216out:
4217 /*
4218 * Always call cb destroy here because we still have 1 reference
4219 * to it by calling cb_get earlier. After the job will be completed,
4220 * cb_put will release it, but here we want to remove it from the
4221 * idr
4222 */
4223 hl_cb_destroy(hdev, &hdev->kernel_cb_mgr,
4224 patched_cb_handle << PAGE_SHIFT);
4225
4226free_userptr:
4227 if (rc)
4228 hl_userptr_delete_list(hdev, parser->job_userptr_list);
4229 return rc;
4230}
4231
4232static int gaudi_parse_cb_no_ext_queue(struct hl_device *hdev,
4233 struct hl_cs_parser *parser)
4234{
4235 struct asic_fixed_properties *asic_prop = &hdev->asic_prop;
4236
4237 /* For internal queue jobs just check if CB address is valid */
4238 if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,
4239 parser->user_cb_size,
4240 asic_prop->sram_user_base_address,
4241 asic_prop->sram_end_address))
4242 return 0;
4243
4244 if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,
4245 parser->user_cb_size,
4246 asic_prop->dram_user_base_address,
4247 asic_prop->dram_end_address))
4248 return 0;
4249
4250 /* PMMU and HPMMU addresses are equal, check only one of them */
4251 if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,
4252 parser->user_cb_size,
4253 asic_prop->pmmu.start_addr,
4254 asic_prop->pmmu.end_addr))
4255 return 0;
4256
4257 dev_err(hdev->dev,
4258 "CB address 0x%px + 0x%x for internal QMAN is not valid\n",
4259 parser->user_cb, parser->user_cb_size);
4260
4261 return -EFAULT;
4262}
4263
4264static int gaudi_cs_parser(struct hl_device *hdev, struct hl_cs_parser *parser)
4265{
4266 struct gaudi_device *gaudi = hdev->asic_specific;
4267
4268 if (parser->queue_type == QUEUE_TYPE_INT)
4269 return gaudi_parse_cb_no_ext_queue(hdev, parser);
4270
4271 if (gaudi->hw_cap_initialized & HW_CAP_MMU)
4272 return gaudi_parse_cb_mmu(hdev, parser);
4273 else
4274 return gaudi_parse_cb_no_mmu(hdev, parser);
4275}
4276
4277static void gaudi_add_end_of_cb_packets(struct hl_device *hdev,
4278 u64 kernel_address, u32 len,
4279 u64 cq_addr, u32 cq_val, u32 msi_vec,
4280 bool eb)
4281{
4282 struct gaudi_device *gaudi = hdev->asic_specific;
4283 struct packet_msg_prot *cq_pkt;
4284 u32 tmp;
4285
4286 cq_pkt = (struct packet_msg_prot *) (uintptr_t)
4287 (kernel_address + len - (sizeof(struct packet_msg_prot) * 2));
4288
Oded Gabbay65887292020-08-12 11:21:01 +03004289 tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT);
4290 tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03004291
4292 if (eb)
Oded Gabbay65887292020-08-12 11:21:01 +03004293 tmp |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03004294
4295 cq_pkt->ctl = cpu_to_le32(tmp);
4296 cq_pkt->value = cpu_to_le32(cq_val);
4297 cq_pkt->addr = cpu_to_le64(cq_addr);
4298
4299 cq_pkt++;
4300
Oded Gabbay65887292020-08-12 11:21:01 +03004301 tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT);
4302 tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03004303 cq_pkt->ctl = cpu_to_le32(tmp);
4304 cq_pkt->value = cpu_to_le32(1);
4305
4306 if (!gaudi->multi_msi_mode)
4307 msi_vec = 0;
4308
4309 cq_pkt->addr = cpu_to_le64(CFG_BASE + mmPCIE_MSI_INTR_0 + msi_vec * 4);
4310}
4311
4312static void gaudi_update_eq_ci(struct hl_device *hdev, u32 val)
4313{
4314 WREG32(mmCPU_IF_EQ_RD_OFFS, val);
4315}
4316
4317static int gaudi_memset_device_memory(struct hl_device *hdev, u64 addr,
4318 u32 size, u64 val)
4319{
4320 struct packet_lin_dma *lin_dma_pkt;
4321 struct hl_cs_job *job;
Moti Haimovskia9855a2d92020-06-24 19:40:57 +03004322 u32 cb_size, ctl, err_cause;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03004323 struct hl_cb *cb;
4324 int rc;
4325
Ofir Bittona04b7cd2020-07-13 13:36:55 +03004326 cb = hl_cb_kernel_create(hdev, PAGE_SIZE, false);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03004327 if (!cb)
4328 return -EFAULT;
4329
4330 lin_dma_pkt = (struct packet_lin_dma *) (uintptr_t) cb->kernel_address;
4331 memset(lin_dma_pkt, 0, sizeof(*lin_dma_pkt));
4332 cb_size = sizeof(*lin_dma_pkt);
4333
Oded Gabbay65887292020-08-12 11:21:01 +03004334 ctl = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_LIN_DMA);
4335 ctl |= FIELD_PREP(GAUDI_PKT_LIN_DMA_CTL_MEMSET_MASK, 1);
4336 ctl |= FIELD_PREP(GAUDI_PKT_LIN_DMA_CTL_LIN_MASK, 1);
4337 ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
4338 ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
4339
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03004340 lin_dma_pkt->ctl = cpu_to_le32(ctl);
4341 lin_dma_pkt->src_addr = cpu_to_le64(val);
4342 lin_dma_pkt->dst_addr |= cpu_to_le64(addr);
4343 lin_dma_pkt->tsize = cpu_to_le32(size);
4344
4345 job = hl_cs_allocate_job(hdev, QUEUE_TYPE_EXT, true);
4346 if (!job) {
4347 dev_err(hdev->dev, "Failed to allocate a new job\n");
4348 rc = -ENOMEM;
4349 goto release_cb;
4350 }
4351
Moti Haimovskia9855a2d92020-06-24 19:40:57 +03004352 /* Verify DMA is OK */
4353 err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE);
4354 if (err_cause && !hdev->init_done) {
4355 dev_dbg(hdev->dev,
4356 "Clearing DMA0 engine from errors (cause 0x%x)\n",
4357 err_cause);
4358 WREG32(mmDMA0_CORE_ERR_CAUSE, err_cause);
4359 }
4360
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03004361 job->id = 0;
4362 job->user_cb = cb;
4363 job->user_cb->cs_cnt++;
4364 job->user_cb_size = cb_size;
4365 job->hw_queue_id = GAUDI_QUEUE_ID_DMA_0_0;
4366 job->patched_cb = job->user_cb;
4367 job->job_cb_size = job->user_cb_size + sizeof(struct packet_msg_prot);
4368
4369 hl_debugfs_add_job(hdev, job);
4370
4371 rc = gaudi_send_job_on_qman0(hdev, job);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03004372 hl_debugfs_remove_job(hdev, job);
4373 kfree(job);
4374 cb->cs_cnt--;
4375
Moti Haimovskia9855a2d92020-06-24 19:40:57 +03004376 /* Verify DMA is OK */
4377 err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE);
4378 if (err_cause) {
4379 dev_err(hdev->dev, "DMA Failed, cause 0x%x\n", err_cause);
4380 rc = -EIO;
4381 if (!hdev->init_done) {
4382 dev_dbg(hdev->dev,
4383 "Clearing DMA0 engine from errors (cause 0x%x)\n",
4384 err_cause);
4385 WREG32(mmDMA0_CORE_ERR_CAUSE, err_cause);
4386 }
4387 }
4388
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03004389release_cb:
4390 hl_cb_put(cb);
4391 hl_cb_destroy(hdev, &hdev->kernel_cb_mgr, cb->id << PAGE_SHIFT);
4392
4393 return rc;
4394}
4395
4396static void gaudi_restore_sm_registers(struct hl_device *hdev)
4397{
4398 int i;
4399
4400 for (i = 0 ; i < NUM_OF_SOB_IN_BLOCK << 2 ; i += 4) {
4401 WREG32(mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0 + i, 0);
4402 WREG32(mmSYNC_MNGR_E_S_SYNC_MNGR_OBJS_SOB_OBJ_0 + i, 0);
4403 WREG32(mmSYNC_MNGR_W_N_SYNC_MNGR_OBJS_SOB_OBJ_0 + i, 0);
4404 }
4405
4406 for (i = 0 ; i < NUM_OF_MONITORS_IN_BLOCK << 2 ; i += 4) {
4407 WREG32(mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_STATUS_0 + i, 0);
4408 WREG32(mmSYNC_MNGR_E_S_SYNC_MNGR_OBJS_MON_STATUS_0 + i, 0);
4409 WREG32(mmSYNC_MNGR_W_N_SYNC_MNGR_OBJS_MON_STATUS_0 + i, 0);
4410 }
4411
4412 i = GAUDI_FIRST_AVAILABLE_W_S_SYNC_OBJECT * 4;
4413
4414 for (; i < NUM_OF_SOB_IN_BLOCK << 2 ; i += 4)
4415 WREG32(mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 + i, 0);
4416
4417 i = GAUDI_FIRST_AVAILABLE_W_S_MONITOR * 4;
4418
4419 for (; i < NUM_OF_MONITORS_IN_BLOCK << 2 ; i += 4)
4420 WREG32(mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_STATUS_0 + i, 0);
4421}
4422
4423static void gaudi_restore_dma_registers(struct hl_device *hdev)
4424{
4425 u32 sob_delta = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_1 -
4426 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0;
4427 int i;
4428
4429 for (i = 0 ; i < DMA_NUMBER_OF_CHANNELS ; i++) {
4430 u64 sob_addr = CFG_BASE +
4431 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0 +
4432 (i * sob_delta);
4433 u32 dma_offset = i * DMA_CORE_OFFSET;
4434
4435 WREG32(mmDMA0_CORE_WR_COMP_ADDR_LO + dma_offset,
4436 lower_32_bits(sob_addr));
4437 WREG32(mmDMA0_CORE_WR_COMP_ADDR_HI + dma_offset,
4438 upper_32_bits(sob_addr));
4439 WREG32(mmDMA0_CORE_WR_COMP_WDATA + dma_offset, 0x80000001);
4440
4441 /* For DMAs 2-7, need to restore WR_AWUSER_31_11 as it can be
4442 * modified by the user for SRAM reduction
4443 */
4444 if (i > 1)
4445 WREG32(mmDMA0_CORE_WR_AWUSER_31_11 + dma_offset,
4446 0x00000001);
4447 }
4448}
4449
4450static void gaudi_restore_qm_registers(struct hl_device *hdev)
4451{
4452 u32 qman_offset;
4453 int i;
4454
4455 for (i = 0 ; i < DMA_NUMBER_OF_CHANNELS ; i++) {
4456 qman_offset = i * DMA_QMAN_OFFSET;
4457 WREG32(mmDMA0_QM_ARB_CFG_0 + qman_offset, 0);
4458 }
4459
4460 for (i = 0 ; i < MME_NUMBER_OF_MASTER_ENGINES ; i++) {
4461 qman_offset = i * (mmMME2_QM_BASE - mmMME0_QM_BASE);
4462 WREG32(mmMME0_QM_ARB_CFG_0 + qman_offset, 0);
4463 }
4464
4465 for (i = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) {
4466 qman_offset = i * TPC_QMAN_OFFSET;
4467 WREG32(mmTPC0_QM_ARB_CFG_0 + qman_offset, 0);
4468 }
4469}
4470
4471static void gaudi_restore_user_registers(struct hl_device *hdev)
4472{
4473 gaudi_restore_sm_registers(hdev);
4474 gaudi_restore_dma_registers(hdev);
4475 gaudi_restore_qm_registers(hdev);
4476}
4477
4478static int gaudi_context_switch(struct hl_device *hdev, u32 asid)
4479{
4480 struct asic_fixed_properties *prop = &hdev->asic_prop;
4481 u64 addr = prop->sram_user_base_address;
4482 u32 size = hdev->pldm ? 0x10000 :
4483 (prop->sram_size - SRAM_USER_BASE_OFFSET);
4484 u64 val = 0x7777777777777777ull;
4485 int rc;
4486
4487 rc = gaudi_memset_device_memory(hdev, addr, size, val);
4488 if (rc) {
4489 dev_err(hdev->dev, "Failed to clear SRAM in context switch\n");
4490 return rc;
4491 }
4492
4493 gaudi_mmu_prepare(hdev, asid);
4494
4495 gaudi_restore_user_registers(hdev);
4496
4497 return 0;
4498}
4499
4500static int gaudi_mmu_clear_pgt_range(struct hl_device *hdev)
4501{
4502 struct asic_fixed_properties *prop = &hdev->asic_prop;
4503 struct gaudi_device *gaudi = hdev->asic_specific;
4504 u64 addr = prop->mmu_pgt_addr;
4505 u32 size = prop->mmu_pgt_size + MMU_CACHE_MNG_SIZE;
4506
4507 if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
4508 return 0;
4509
4510 return gaudi_memset_device_memory(hdev, addr, size, 0);
4511}
4512
4513static void gaudi_restore_phase_topology(struct hl_device *hdev)
4514{
4515
4516}
4517
4518static int gaudi_debugfs_read32(struct hl_device *hdev, u64 addr, u32 *val)
4519{
4520 struct asic_fixed_properties *prop = &hdev->asic_prop;
4521 struct gaudi_device *gaudi = hdev->asic_specific;
4522 u64 hbm_bar_addr;
4523 int rc = 0;
4524
4525 if ((addr >= CFG_BASE) && (addr < CFG_BASE + CFG_SIZE)) {
Oded Gabbaye38bfd32020-07-03 20:46:12 +03004526
4527 if ((gaudi->hw_cap_initialized & HW_CAP_CLK_GATE) &&
4528 (hdev->clock_gating_mask &
4529 GAUDI_CLK_GATE_DEBUGFS_MASK)) {
4530
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03004531 dev_err_ratelimited(hdev->dev,
4532 "Can't read register - clock gating is enabled!\n");
4533 rc = -EFAULT;
4534 } else {
4535 *val = RREG32(addr - CFG_BASE);
4536 }
Oded Gabbaye38bfd32020-07-03 20:46:12 +03004537
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03004538 } else if ((addr >= SRAM_BASE_ADDR) &&
4539 (addr < SRAM_BASE_ADDR + SRAM_BAR_SIZE)) {
4540 *val = readl(hdev->pcie_bar[SRAM_BAR_ID] +
4541 (addr - SRAM_BASE_ADDR));
4542 } else if (addr < DRAM_PHYS_BASE + hdev->asic_prop.dram_size) {
4543 u64 bar_base_addr = DRAM_PHYS_BASE +
4544 (addr & ~(prop->dram_pci_bar_size - 0x1ull));
4545
4546 hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, bar_base_addr);
4547 if (hbm_bar_addr != U64_MAX) {
4548 *val = readl(hdev->pcie_bar[HBM_BAR_ID] +
4549 (addr - bar_base_addr));
4550
4551 hbm_bar_addr = gaudi_set_hbm_bar_base(hdev,
4552 hbm_bar_addr);
4553 }
4554 if (hbm_bar_addr == U64_MAX)
4555 rc = -EIO;
4556 } else if (addr >= HOST_PHYS_BASE && !iommu_present(&pci_bus_type)) {
4557 *val = *(u32 *) phys_to_virt(addr - HOST_PHYS_BASE);
4558 } else {
4559 rc = -EFAULT;
4560 }
4561
4562 return rc;
4563}
4564
4565static int gaudi_debugfs_write32(struct hl_device *hdev, u64 addr, u32 val)
4566{
4567 struct asic_fixed_properties *prop = &hdev->asic_prop;
4568 struct gaudi_device *gaudi = hdev->asic_specific;
4569 u64 hbm_bar_addr;
4570 int rc = 0;
4571
4572 if ((addr >= CFG_BASE) && (addr < CFG_BASE + CFG_SIZE)) {
Oded Gabbaye38bfd32020-07-03 20:46:12 +03004573
4574 if ((gaudi->hw_cap_initialized & HW_CAP_CLK_GATE) &&
4575 (hdev->clock_gating_mask &
4576 GAUDI_CLK_GATE_DEBUGFS_MASK)) {
4577
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03004578 dev_err_ratelimited(hdev->dev,
4579 "Can't write register - clock gating is enabled!\n");
4580 rc = -EFAULT;
4581 } else {
4582 WREG32(addr - CFG_BASE, val);
4583 }
Oded Gabbaye38bfd32020-07-03 20:46:12 +03004584
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03004585 } else if ((addr >= SRAM_BASE_ADDR) &&
4586 (addr < SRAM_BASE_ADDR + SRAM_BAR_SIZE)) {
4587 writel(val, hdev->pcie_bar[SRAM_BAR_ID] +
4588 (addr - SRAM_BASE_ADDR));
4589 } else if (addr < DRAM_PHYS_BASE + hdev->asic_prop.dram_size) {
4590 u64 bar_base_addr = DRAM_PHYS_BASE +
4591 (addr & ~(prop->dram_pci_bar_size - 0x1ull));
4592
4593 hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, bar_base_addr);
4594 if (hbm_bar_addr != U64_MAX) {
4595 writel(val, hdev->pcie_bar[HBM_BAR_ID] +
4596 (addr - bar_base_addr));
4597
4598 hbm_bar_addr = gaudi_set_hbm_bar_base(hdev,
4599 hbm_bar_addr);
4600 }
4601 if (hbm_bar_addr == U64_MAX)
4602 rc = -EIO;
4603 } else if (addr >= HOST_PHYS_BASE && !iommu_present(&pci_bus_type)) {
4604 *(u32 *) phys_to_virt(addr - HOST_PHYS_BASE) = val;
4605 } else {
4606 rc = -EFAULT;
4607 }
4608
4609 return rc;
4610}
4611
4612static int gaudi_debugfs_read64(struct hl_device *hdev, u64 addr, u64 *val)
4613{
4614 struct asic_fixed_properties *prop = &hdev->asic_prop;
4615 struct gaudi_device *gaudi = hdev->asic_specific;
4616 u64 hbm_bar_addr;
4617 int rc = 0;
4618
4619 if ((addr >= CFG_BASE) && (addr <= CFG_BASE + CFG_SIZE - sizeof(u64))) {
Oded Gabbaye38bfd32020-07-03 20:46:12 +03004620
4621 if ((gaudi->hw_cap_initialized & HW_CAP_CLK_GATE) &&
4622 (hdev->clock_gating_mask &
4623 GAUDI_CLK_GATE_DEBUGFS_MASK)) {
4624
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03004625 dev_err_ratelimited(hdev->dev,
4626 "Can't read register - clock gating is enabled!\n");
4627 rc = -EFAULT;
4628 } else {
4629 u32 val_l = RREG32(addr - CFG_BASE);
4630 u32 val_h = RREG32(addr + sizeof(u32) - CFG_BASE);
4631
4632 *val = (((u64) val_h) << 32) | val_l;
4633 }
Oded Gabbaye38bfd32020-07-03 20:46:12 +03004634
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03004635 } else if ((addr >= SRAM_BASE_ADDR) &&
4636 (addr <= SRAM_BASE_ADDR + SRAM_BAR_SIZE - sizeof(u64))) {
4637 *val = readq(hdev->pcie_bar[SRAM_BAR_ID] +
4638 (addr - SRAM_BASE_ADDR));
4639 } else if (addr <=
4640 DRAM_PHYS_BASE + hdev->asic_prop.dram_size - sizeof(u64)) {
4641 u64 bar_base_addr = DRAM_PHYS_BASE +
4642 (addr & ~(prop->dram_pci_bar_size - 0x1ull));
4643
4644 hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, bar_base_addr);
4645 if (hbm_bar_addr != U64_MAX) {
4646 *val = readq(hdev->pcie_bar[HBM_BAR_ID] +
4647 (addr - bar_base_addr));
4648
4649 hbm_bar_addr = gaudi_set_hbm_bar_base(hdev,
4650 hbm_bar_addr);
4651 }
4652 if (hbm_bar_addr == U64_MAX)
4653 rc = -EIO;
4654 } else if (addr >= HOST_PHYS_BASE && !iommu_present(&pci_bus_type)) {
4655 *val = *(u64 *) phys_to_virt(addr - HOST_PHYS_BASE);
4656 } else {
4657 rc = -EFAULT;
4658 }
4659
4660 return rc;
4661}
4662
4663static int gaudi_debugfs_write64(struct hl_device *hdev, u64 addr, u64 val)
4664{
4665 struct asic_fixed_properties *prop = &hdev->asic_prop;
4666 struct gaudi_device *gaudi = hdev->asic_specific;
4667 u64 hbm_bar_addr;
4668 int rc = 0;
4669
4670 if ((addr >= CFG_BASE) && (addr <= CFG_BASE + CFG_SIZE - sizeof(u64))) {
Oded Gabbaye38bfd32020-07-03 20:46:12 +03004671
4672 if ((gaudi->hw_cap_initialized & HW_CAP_CLK_GATE) &&
4673 (hdev->clock_gating_mask &
4674 GAUDI_CLK_GATE_DEBUGFS_MASK)) {
4675
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03004676 dev_err_ratelimited(hdev->dev,
4677 "Can't write register - clock gating is enabled!\n");
4678 rc = -EFAULT;
4679 } else {
4680 WREG32(addr - CFG_BASE, lower_32_bits(val));
4681 WREG32(addr + sizeof(u32) - CFG_BASE,
4682 upper_32_bits(val));
4683 }
Oded Gabbaye38bfd32020-07-03 20:46:12 +03004684
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03004685 } else if ((addr >= SRAM_BASE_ADDR) &&
4686 (addr <= SRAM_BASE_ADDR + SRAM_BAR_SIZE - sizeof(u64))) {
4687 writeq(val, hdev->pcie_bar[SRAM_BAR_ID] +
4688 (addr - SRAM_BASE_ADDR));
4689 } else if (addr <=
4690 DRAM_PHYS_BASE + hdev->asic_prop.dram_size - sizeof(u64)) {
4691 u64 bar_base_addr = DRAM_PHYS_BASE +
4692 (addr & ~(prop->dram_pci_bar_size - 0x1ull));
4693
4694 hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, bar_base_addr);
4695 if (hbm_bar_addr != U64_MAX) {
4696 writeq(val, hdev->pcie_bar[HBM_BAR_ID] +
4697 (addr - bar_base_addr));
4698
4699 hbm_bar_addr = gaudi_set_hbm_bar_base(hdev,
4700 hbm_bar_addr);
4701 }
4702 if (hbm_bar_addr == U64_MAX)
4703 rc = -EIO;
4704 } else if (addr >= HOST_PHYS_BASE && !iommu_present(&pci_bus_type)) {
4705 *(u64 *) phys_to_virt(addr - HOST_PHYS_BASE) = val;
4706 } else {
4707 rc = -EFAULT;
4708 }
4709
4710 return rc;
4711}
4712
4713static u64 gaudi_read_pte(struct hl_device *hdev, u64 addr)
4714{
4715 struct gaudi_device *gaudi = hdev->asic_specific;
4716
4717 if (hdev->hard_reset_pending)
4718 return U64_MAX;
4719
4720 return readq(hdev->pcie_bar[HBM_BAR_ID] +
4721 (addr - gaudi->hbm_bar_cur_addr));
4722}
4723
4724static void gaudi_write_pte(struct hl_device *hdev, u64 addr, u64 val)
4725{
4726 struct gaudi_device *gaudi = hdev->asic_specific;
4727
4728 if (hdev->hard_reset_pending)
4729 return;
4730
4731 writeq(val, hdev->pcie_bar[HBM_BAR_ID] +
4732 (addr - gaudi->hbm_bar_cur_addr));
4733}
4734
4735static void gaudi_mmu_prepare_reg(struct hl_device *hdev, u64 reg, u32 asid)
4736{
4737 /* mask to zero the MMBP and ASID bits */
4738 WREG32_AND(reg, ~0x7FF);
4739 WREG32_OR(reg, asid);
4740}
4741
4742static void gaudi_mmu_prepare(struct hl_device *hdev, u32 asid)
4743{
4744 struct gaudi_device *gaudi = hdev->asic_specific;
4745
4746 if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
4747 return;
4748
4749 if (asid & ~DMA0_QM_GLBL_NON_SECURE_PROPS_0_ASID_MASK) {
4750 WARN(1, "asid %u is too big\n", asid);
4751 return;
4752 }
4753
4754 mutex_lock(&gaudi->clk_gate_mutex);
4755
4756 hdev->asic_funcs->disable_clock_gating(hdev);
4757
4758 gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_0, asid);
4759 gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_1, asid);
4760 gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_2, asid);
4761 gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_3, asid);
4762 gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_4, asid);
4763
4764 gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_0, asid);
4765 gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_1, asid);
4766 gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_2, asid);
4767 gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_3, asid);
4768 gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_4, asid);
4769
4770 gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_0, asid);
4771 gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_1, asid);
4772 gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_2, asid);
4773 gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_3, asid);
4774 gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_4, asid);
4775
4776 gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_0, asid);
4777 gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_1, asid);
4778 gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_2, asid);
4779 gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_3, asid);
4780 gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_4, asid);
4781
4782 gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_0, asid);
4783 gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_1, asid);
4784 gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_2, asid);
4785 gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_3, asid);
4786 gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_4, asid);
4787
4788 gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_0, asid);
4789 gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_1, asid);
4790 gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_2, asid);
4791 gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_3, asid);
4792 gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_4, asid);
4793
4794 gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_0, asid);
4795 gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_1, asid);
4796 gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_2, asid);
4797 gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_3, asid);
4798 gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_4, asid);
4799
4800 gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_0, asid);
4801 gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_1, asid);
4802 gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_2, asid);
4803 gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_3, asid);
4804 gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_4, asid);
4805
4806 gaudi_mmu_prepare_reg(hdev, mmDMA0_CORE_NON_SECURE_PROPS, asid);
4807 gaudi_mmu_prepare_reg(hdev, mmDMA1_CORE_NON_SECURE_PROPS, asid);
4808 gaudi_mmu_prepare_reg(hdev, mmDMA2_CORE_NON_SECURE_PROPS, asid);
4809 gaudi_mmu_prepare_reg(hdev, mmDMA3_CORE_NON_SECURE_PROPS, asid);
4810 gaudi_mmu_prepare_reg(hdev, mmDMA4_CORE_NON_SECURE_PROPS, asid);
4811 gaudi_mmu_prepare_reg(hdev, mmDMA5_CORE_NON_SECURE_PROPS, asid);
4812 gaudi_mmu_prepare_reg(hdev, mmDMA6_CORE_NON_SECURE_PROPS, asid);
4813 gaudi_mmu_prepare_reg(hdev, mmDMA7_CORE_NON_SECURE_PROPS, asid);
4814
4815 gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_0, asid);
4816 gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_1, asid);
4817 gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_2, asid);
4818 gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_3, asid);
4819 gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_4, asid);
4820 gaudi_mmu_prepare_reg(hdev, mmTPC0_CFG_ARUSER_LO, asid);
4821 gaudi_mmu_prepare_reg(hdev, mmTPC0_CFG_AWUSER_LO, asid);
4822
4823 gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_0, asid);
4824 gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_1, asid);
4825 gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_2, asid);
4826 gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_3, asid);
4827 gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_4, asid);
4828 gaudi_mmu_prepare_reg(hdev, mmTPC1_CFG_ARUSER_LO, asid);
4829 gaudi_mmu_prepare_reg(hdev, mmTPC1_CFG_AWUSER_LO, asid);
4830
4831 gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_0, asid);
4832 gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_1, asid);
4833 gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_2, asid);
4834 gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_3, asid);
4835 gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_4, asid);
4836 gaudi_mmu_prepare_reg(hdev, mmTPC2_CFG_ARUSER_LO, asid);
4837 gaudi_mmu_prepare_reg(hdev, mmTPC2_CFG_AWUSER_LO, asid);
4838
4839 gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_0, asid);
4840 gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_1, asid);
4841 gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_2, asid);
4842 gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_3, asid);
4843 gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_4, asid);
4844 gaudi_mmu_prepare_reg(hdev, mmTPC3_CFG_ARUSER_LO, asid);
4845 gaudi_mmu_prepare_reg(hdev, mmTPC3_CFG_AWUSER_LO, asid);
4846
4847 gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_0, asid);
4848 gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_1, asid);
4849 gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_2, asid);
4850 gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_3, asid);
4851 gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_4, asid);
4852 gaudi_mmu_prepare_reg(hdev, mmTPC4_CFG_ARUSER_LO, asid);
4853 gaudi_mmu_prepare_reg(hdev, mmTPC4_CFG_AWUSER_LO, asid);
4854
4855 gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_0, asid);
4856 gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_1, asid);
4857 gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_2, asid);
4858 gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_3, asid);
4859 gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_4, asid);
4860 gaudi_mmu_prepare_reg(hdev, mmTPC5_CFG_ARUSER_LO, asid);
4861 gaudi_mmu_prepare_reg(hdev, mmTPC5_CFG_AWUSER_LO, asid);
4862
4863 gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_0, asid);
4864 gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_1, asid);
4865 gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_2, asid);
4866 gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_3, asid);
4867 gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_4, asid);
4868 gaudi_mmu_prepare_reg(hdev, mmTPC6_CFG_ARUSER_LO, asid);
4869 gaudi_mmu_prepare_reg(hdev, mmTPC6_CFG_AWUSER_LO, asid);
4870
4871 gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_0, asid);
4872 gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_1, asid);
4873 gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_2, asid);
4874 gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_3, asid);
4875 gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_4, asid);
4876 gaudi_mmu_prepare_reg(hdev, mmTPC7_CFG_ARUSER_LO, asid);
4877 gaudi_mmu_prepare_reg(hdev, mmTPC7_CFG_AWUSER_LO, asid);
4878
4879 gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_0, asid);
4880 gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_1, asid);
4881 gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_2, asid);
4882 gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_3, asid);
4883 gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_4, asid);
4884 gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_0, asid);
4885 gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_1, asid);
4886 gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_2, asid);
4887 gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_3, asid);
4888 gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_4, asid);
4889
4890 gaudi_mmu_prepare_reg(hdev, mmMME0_SBAB_ARUSER0, asid);
4891 gaudi_mmu_prepare_reg(hdev, mmMME0_SBAB_ARUSER1, asid);
4892 gaudi_mmu_prepare_reg(hdev, mmMME1_SBAB_ARUSER0, asid);
4893 gaudi_mmu_prepare_reg(hdev, mmMME1_SBAB_ARUSER1, asid);
4894 gaudi_mmu_prepare_reg(hdev, mmMME2_SBAB_ARUSER0, asid);
4895 gaudi_mmu_prepare_reg(hdev, mmMME2_SBAB_ARUSER1, asid);
4896 gaudi_mmu_prepare_reg(hdev, mmMME3_SBAB_ARUSER0, asid);
4897 gaudi_mmu_prepare_reg(hdev, mmMME3_SBAB_ARUSER1, asid);
4898 gaudi_mmu_prepare_reg(hdev, mmMME0_ACC_WBC, asid);
4899 gaudi_mmu_prepare_reg(hdev, mmMME1_ACC_WBC, asid);
4900 gaudi_mmu_prepare_reg(hdev, mmMME2_ACC_WBC, asid);
4901 gaudi_mmu_prepare_reg(hdev, mmMME3_ACC_WBC, asid);
4902
4903 gaudi_mmu_prepare_reg(hdev, mmPSOC_GLOBAL_CONF_TRACE_ARUSER, asid);
4904 gaudi_mmu_prepare_reg(hdev, mmPSOC_GLOBAL_CONF_TRACE_AWUSER, asid);
4905
Oded Gabbaye38bfd32020-07-03 20:46:12 +03004906 hdev->asic_funcs->set_clock_gating(hdev);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03004907
4908 mutex_unlock(&gaudi->clk_gate_mutex);
4909}
4910
4911static int gaudi_send_job_on_qman0(struct hl_device *hdev,
4912 struct hl_cs_job *job)
4913{
4914 struct packet_msg_prot *fence_pkt;
4915 u32 *fence_ptr;
4916 dma_addr_t fence_dma_addr;
4917 struct hl_cb *cb;
4918 u32 tmp, timeout, dma_offset;
4919 int rc;
4920
4921 if (hdev->pldm)
4922 timeout = GAUDI_PLDM_QMAN0_TIMEOUT_USEC;
4923 else
4924 timeout = HL_DEVICE_TIMEOUT_USEC;
4925
4926 if (!hdev->asic_funcs->is_device_idle(hdev, NULL, NULL)) {
4927 dev_err_ratelimited(hdev->dev,
4928 "Can't send driver job on QMAN0 because the device is not idle\n");
4929 return -EBUSY;
4930 }
4931
4932 fence_ptr = hdev->asic_funcs->asic_dma_pool_zalloc(hdev, 4, GFP_KERNEL,
4933 &fence_dma_addr);
4934 if (!fence_ptr) {
4935 dev_err(hdev->dev,
4936 "Failed to allocate fence memory for QMAN0\n");
4937 return -ENOMEM;
4938 }
4939
4940 cb = job->patched_cb;
4941
4942 fence_pkt = (struct packet_msg_prot *) (uintptr_t) (cb->kernel_address +
4943 job->job_cb_size - sizeof(struct packet_msg_prot));
4944
Oded Gabbay65887292020-08-12 11:21:01 +03004945 tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT);
4946 tmp |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);
4947 tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
4948
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03004949 fence_pkt->ctl = cpu_to_le32(tmp);
4950 fence_pkt->value = cpu_to_le32(GAUDI_QMAN0_FENCE_VAL);
4951 fence_pkt->addr = cpu_to_le64(fence_dma_addr);
4952
4953 dma_offset = gaudi_dma_assignment[GAUDI_PCI_DMA_1] * DMA_CORE_OFFSET;
4954
4955 WREG32_OR(mmDMA0_CORE_PROT + dma_offset, BIT(DMA0_CORE_PROT_VAL_SHIFT));
4956
4957 rc = hl_hw_queue_send_cb_no_cmpl(hdev, GAUDI_QUEUE_ID_DMA_0_0,
4958 job->job_cb_size, cb->bus_address);
4959 if (rc) {
4960 dev_err(hdev->dev, "Failed to send CB on QMAN0, %d\n", rc);
4961 goto free_fence_ptr;
4962 }
4963
4964 rc = hl_poll_timeout_memory(hdev, fence_ptr, tmp,
4965 (tmp == GAUDI_QMAN0_FENCE_VAL), 1000,
4966 timeout, true);
4967
4968 hl_hw_queue_inc_ci_kernel(hdev, GAUDI_QUEUE_ID_DMA_0_0);
4969
4970 if (rc == -ETIMEDOUT) {
4971 dev_err(hdev->dev, "QMAN0 Job timeout (0x%x)\n", tmp);
4972 goto free_fence_ptr;
4973 }
4974
4975free_fence_ptr:
4976 WREG32_AND(mmDMA0_CORE_PROT + dma_offset,
4977 ~BIT(DMA0_CORE_PROT_VAL_SHIFT));
4978
4979 hdev->asic_funcs->asic_dma_pool_free(hdev, (void *) fence_ptr,
4980 fence_dma_addr);
4981 return rc;
4982}
4983
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03004984static void gaudi_get_event_desc(u16 event_type, char *desc, size_t size)
4985{
Ofir Bittonebd8d122020-05-10 13:41:28 +03004986 if (event_type >= GAUDI_EVENT_SIZE)
4987 goto event_not_supported;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03004988
Ofir Bittonebd8d122020-05-10 13:41:28 +03004989 if (!gaudi_irq_map_table[event_type].valid)
4990 goto event_not_supported;
4991
4992 snprintf(desc, size, gaudi_irq_map_table[event_type].name);
4993
4994 return;
4995
4996event_not_supported:
4997 snprintf(desc, size, "N/A");
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03004998}
4999
5000static const char *gaudi_get_razwi_initiator_dma_name(struct hl_device *hdev,
5001 u32 x_y, bool is_write)
5002{
5003 u32 dma_id[2], dma_offset, err_cause[2], mask, i;
5004
5005 mask = is_write ? DMA0_CORE_ERR_CAUSE_HBW_WR_ERR_MASK :
5006 DMA0_CORE_ERR_CAUSE_HBW_RD_ERR_MASK;
5007
5008 switch (x_y) {
5009 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_0:
5010 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_1:
5011 dma_id[0] = 0;
5012 dma_id[1] = 2;
5013 break;
5014 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_0:
5015 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_1:
5016 dma_id[0] = 1;
5017 dma_id[1] = 3;
5018 break;
5019 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_0:
5020 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_1:
5021 dma_id[0] = 4;
5022 dma_id[1] = 6;
5023 break;
5024 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_0:
5025 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_1:
5026 dma_id[0] = 5;
5027 dma_id[1] = 7;
5028 break;
5029 default:
5030 goto unknown_initiator;
5031 }
5032
5033 for (i = 0 ; i < 2 ; i++) {
5034 dma_offset = dma_id[i] * DMA_CORE_OFFSET;
5035 err_cause[i] = RREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset);
5036 }
5037
5038 switch (x_y) {
5039 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_0:
5040 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_1:
5041 if ((err_cause[0] & mask) && !(err_cause[1] & mask))
5042 return "DMA0";
5043 else if (!(err_cause[0] & mask) && (err_cause[1] & mask))
5044 return "DMA2";
5045 else
5046 return "DMA0 or DMA2";
5047 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_0:
5048 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_1:
5049 if ((err_cause[0] & mask) && !(err_cause[1] & mask))
5050 return "DMA1";
5051 else if (!(err_cause[0] & mask) && (err_cause[1] & mask))
5052 return "DMA3";
5053 else
5054 return "DMA1 or DMA3";
5055 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_0:
5056 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_1:
5057 if ((err_cause[0] & mask) && !(err_cause[1] & mask))
5058 return "DMA4";
5059 else if (!(err_cause[0] & mask) && (err_cause[1] & mask))
5060 return "DMA6";
5061 else
5062 return "DMA4 or DMA6";
5063 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_0:
5064 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_1:
5065 if ((err_cause[0] & mask) && !(err_cause[1] & mask))
5066 return "DMA5";
5067 else if (!(err_cause[0] & mask) && (err_cause[1] & mask))
5068 return "DMA7";
5069 else
5070 return "DMA5 or DMA7";
5071 }
5072
5073unknown_initiator:
5074 return "unknown initiator";
5075}
5076
5077static const char *gaudi_get_razwi_initiator_name(struct hl_device *hdev,
5078 bool is_write)
5079{
5080 u32 val, x_y, axi_id;
5081
5082 val = is_write ? RREG32(mmMMU_UP_RAZWI_WRITE_ID) :
5083 RREG32(mmMMU_UP_RAZWI_READ_ID);
5084 x_y = val & ((RAZWI_INITIATOR_Y_MASK << RAZWI_INITIATOR_Y_SHIFT) |
5085 (RAZWI_INITIATOR_X_MASK << RAZWI_INITIATOR_X_SHIFT));
5086 axi_id = val & (RAZWI_INITIATOR_AXI_ID_MASK <<
5087 RAZWI_INITIATOR_AXI_ID_SHIFT);
5088
5089 switch (x_y) {
5090 case RAZWI_INITIATOR_ID_X_Y_TPC0_NIC0:
5091 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC))
5092 return "TPC0";
5093 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC))
5094 return "NIC0";
5095 break;
5096 case RAZWI_INITIATOR_ID_X_Y_TPC1:
5097 return "TPC1";
5098 case RAZWI_INITIATOR_ID_X_Y_MME0_0:
5099 case RAZWI_INITIATOR_ID_X_Y_MME0_1:
5100 return "MME0";
5101 case RAZWI_INITIATOR_ID_X_Y_MME1_0:
5102 case RAZWI_INITIATOR_ID_X_Y_MME1_1:
5103 return "MME1";
5104 case RAZWI_INITIATOR_ID_X_Y_TPC2:
5105 return "TPC2";
5106 case RAZWI_INITIATOR_ID_X_Y_TPC3_PCI_CPU_PSOC:
5107 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC))
5108 return "TPC3";
5109 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_PCI))
5110 return "PCI";
5111 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_CPU))
5112 return "CPU";
5113 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_PSOC))
5114 return "PSOC";
5115 break;
5116 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_0:
5117 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_1:
5118 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_0:
5119 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_1:
5120 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_0:
5121 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_1:
5122 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_0:
5123 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_1:
5124 return gaudi_get_razwi_initiator_dma_name(hdev, x_y, is_write);
5125 case RAZWI_INITIATOR_ID_X_Y_TPC4_NIC1_NIC2:
5126 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC))
5127 return "TPC4";
5128 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC))
5129 return "NIC1";
5130 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC_FT))
5131 return "NIC2";
5132 break;
5133 case RAZWI_INITIATOR_ID_X_Y_TPC5:
5134 return "TPC5";
5135 case RAZWI_INITIATOR_ID_X_Y_MME2_0:
5136 case RAZWI_INITIATOR_ID_X_Y_MME2_1:
5137 return "MME2";
5138 case RAZWI_INITIATOR_ID_X_Y_MME3_0:
5139 case RAZWI_INITIATOR_ID_X_Y_MME3_1:
5140 return "MME3";
5141 case RAZWI_INITIATOR_ID_X_Y_TPC6:
5142 return "TPC6";
5143 case RAZWI_INITIATOR_ID_X_Y_TPC7_NIC4_NIC5:
5144 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC))
5145 return "TPC7";
5146 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC))
5147 return "NIC4";
5148 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC_FT))
5149 return "NIC5";
5150 break;
5151 default:
5152 break;
5153 }
5154
5155 dev_err(hdev->dev,
5156 "Unknown RAZWI initiator ID 0x%x [Y=%d, X=%d, AXI_ID=%d]\n",
5157 val,
5158 (val >> RAZWI_INITIATOR_Y_SHIFT) & RAZWI_INITIATOR_Y_MASK,
5159 (val >> RAZWI_INITIATOR_X_SHIFT) & RAZWI_INITIATOR_X_MASK,
5160 (val >> RAZWI_INITIATOR_AXI_ID_SHIFT) &
5161 RAZWI_INITIATOR_AXI_ID_MASK);
5162
5163 return "unknown initiator";
5164}
5165
5166static void gaudi_print_razwi_info(struct hl_device *hdev)
5167{
5168 if (RREG32(mmMMU_UP_RAZWI_WRITE_VLD)) {
5169 dev_err_ratelimited(hdev->dev,
5170 "RAZWI event caused by illegal write of %s\n",
5171 gaudi_get_razwi_initiator_name(hdev, true));
5172 WREG32(mmMMU_UP_RAZWI_WRITE_VLD, 0);
5173 }
5174
5175 if (RREG32(mmMMU_UP_RAZWI_READ_VLD)) {
5176 dev_err_ratelimited(hdev->dev,
5177 "RAZWI event caused by illegal read of %s\n",
5178 gaudi_get_razwi_initiator_name(hdev, false));
5179 WREG32(mmMMU_UP_RAZWI_READ_VLD, 0);
5180 }
5181}
5182
5183static void gaudi_print_mmu_error_info(struct hl_device *hdev)
5184{
5185 struct gaudi_device *gaudi = hdev->asic_specific;
5186 u64 addr;
5187 u32 val;
5188
5189 if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
5190 return;
5191
5192 val = RREG32(mmMMU_UP_PAGE_ERROR_CAPTURE);
5193 if (val & MMU_UP_PAGE_ERROR_CAPTURE_ENTRY_VALID_MASK) {
5194 addr = val & MMU_UP_PAGE_ERROR_CAPTURE_VA_49_32_MASK;
5195 addr <<= 32;
5196 addr |= RREG32(mmMMU_UP_PAGE_ERROR_CAPTURE_VA);
5197
5198 dev_err_ratelimited(hdev->dev, "MMU page fault on va 0x%llx\n",
5199 addr);
5200
5201 WREG32(mmMMU_UP_PAGE_ERROR_CAPTURE, 0);
5202 }
5203
5204 val = RREG32(mmMMU_UP_ACCESS_ERROR_CAPTURE);
5205 if (val & MMU_UP_ACCESS_ERROR_CAPTURE_ENTRY_VALID_MASK) {
5206 addr = val & MMU_UP_ACCESS_ERROR_CAPTURE_VA_49_32_MASK;
5207 addr <<= 32;
5208 addr |= RREG32(mmMMU_UP_ACCESS_ERROR_CAPTURE_VA);
5209
5210 dev_err_ratelimited(hdev->dev,
5211 "MMU access error on va 0x%llx\n", addr);
5212
5213 WREG32(mmMMU_UP_ACCESS_ERROR_CAPTURE, 0);
5214 }
5215}
5216
5217/*
5218 * +-------------------+------------------------------------------------------+
5219 * | Configuration Reg | Description |
5220 * | Address | |
5221 * +-------------------+------------------------------------------------------+
5222 * | 0xF30 - 0xF3F |ECC single error indication (1 bit per memory wrapper)|
5223 * | |0xF30 memory wrappers 31:0 (MSB to LSB) |
5224 * | |0xF34 memory wrappers 63:32 |
5225 * | |0xF38 memory wrappers 95:64 |
5226 * | |0xF3C memory wrappers 127:96 |
5227 * +-------------------+------------------------------------------------------+
5228 * | 0xF40 - 0xF4F |ECC double error indication (1 bit per memory wrapper)|
5229 * | |0xF40 memory wrappers 31:0 (MSB to LSB) |
5230 * | |0xF44 memory wrappers 63:32 |
5231 * | |0xF48 memory wrappers 95:64 |
5232 * | |0xF4C memory wrappers 127:96 |
5233 * +-------------------+------------------------------------------------------+
5234 */
Oded Gabbayfcc6a4e2020-05-17 08:20:35 +03005235static int gaudi_extract_ecc_info(struct hl_device *hdev,
5236 struct ecc_info_extract_params *params, u64 *ecc_address,
5237 u64 *ecc_syndrom, u8 *memory_wrapper_idx)
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005238{
5239 struct gaudi_device *gaudi = hdev->asic_specific;
Oded Gabbayfcc6a4e2020-05-17 08:20:35 +03005240 u32 i, num_mem_regs, reg, err_bit;
5241 u64 err_addr, err_word = 0;
5242 int rc = 0;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005243
Oded Gabbayfcc6a4e2020-05-17 08:20:35 +03005244 num_mem_regs = params->num_memories / 32 +
5245 ((params->num_memories % 32) ? 1 : 0);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005246
Oded Gabbayfcc6a4e2020-05-17 08:20:35 +03005247 if (params->block_address >= CFG_BASE)
5248 params->block_address -= CFG_BASE;
5249
5250 if (params->derr)
5251 err_addr = params->block_address + GAUDI_ECC_DERR0_OFFSET;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005252 else
Oded Gabbayfcc6a4e2020-05-17 08:20:35 +03005253 err_addr = params->block_address + GAUDI_ECC_SERR0_OFFSET;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005254
Oded Gabbayfcc6a4e2020-05-17 08:20:35 +03005255 if (params->disable_clock_gating) {
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005256 mutex_lock(&gaudi->clk_gate_mutex);
5257 hdev->asic_funcs->disable_clock_gating(hdev);
5258 }
5259
Oded Gabbayfcc6a4e2020-05-17 08:20:35 +03005260 /* Set invalid wrapper index */
5261 *memory_wrapper_idx = 0xFF;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005262
Oded Gabbayfcc6a4e2020-05-17 08:20:35 +03005263 /* Iterate through memory wrappers, a single bit must be set */
Dan Carpenterb0353542020-08-05 12:51:05 +03005264 for (i = 0 ; i < num_mem_regs ; i++) {
Oded Gabbayfcc6a4e2020-05-17 08:20:35 +03005265 err_addr += i * 4;
5266 err_word = RREG32(err_addr);
5267 if (err_word) {
5268 err_bit = __ffs(err_word);
5269 *memory_wrapper_idx = err_bit + (32 * i);
5270 break;
5271 }
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005272 }
5273
Oded Gabbayfcc6a4e2020-05-17 08:20:35 +03005274 if (*memory_wrapper_idx == 0xFF) {
5275 dev_err(hdev->dev, "ECC error information cannot be found\n");
5276 rc = -EINVAL;
5277 goto enable_clk_gate;
5278 }
5279
5280 WREG32(params->block_address + GAUDI_ECC_MEM_SEL_OFFSET,
5281 *memory_wrapper_idx);
5282
5283 *ecc_address =
5284 RREG32(params->block_address + GAUDI_ECC_ADDRESS_OFFSET);
5285 *ecc_syndrom =
5286 RREG32(params->block_address + GAUDI_ECC_SYNDROME_OFFSET);
5287
5288 /* Clear error indication */
5289 reg = RREG32(params->block_address + GAUDI_ECC_MEM_INFO_CLR_OFFSET);
5290 if (params->derr)
5291 reg |= FIELD_PREP(GAUDI_ECC_MEM_INFO_CLR_DERR_MASK, 1);
5292 else
5293 reg |= FIELD_PREP(GAUDI_ECC_MEM_INFO_CLR_SERR_MASK, 1);
5294
5295 WREG32(params->block_address + GAUDI_ECC_MEM_INFO_CLR_OFFSET, reg);
5296
5297enable_clk_gate:
5298 if (params->disable_clock_gating) {
Oded Gabbaye38bfd32020-07-03 20:46:12 +03005299 hdev->asic_funcs->set_clock_gating(hdev);
Greg Kroah-Hartman65a9bde62020-07-27 11:49:37 +02005300
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005301 mutex_unlock(&gaudi->clk_gate_mutex);
5302 }
Oded Gabbayfcc6a4e2020-05-17 08:20:35 +03005303
5304 return rc;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005305}
5306
5307static void gaudi_handle_qman_err_generic(struct hl_device *hdev,
5308 const char *qm_name,
5309 u64 glbl_sts_addr,
5310 u64 arb_err_addr)
5311{
5312 u32 i, j, glbl_sts_val, arb_err_val, glbl_sts_clr_val;
5313 char reg_desc[32];
5314
5315 /* Iterate through all stream GLBL_STS1 registers + Lower CP */
5316 for (i = 0 ; i < QMAN_STREAMS + 1 ; i++) {
5317 glbl_sts_clr_val = 0;
5318 glbl_sts_val = RREG32(glbl_sts_addr + 4 * i);
5319
5320 if (!glbl_sts_val)
5321 continue;
5322
5323 if (i == QMAN_STREAMS)
5324 snprintf(reg_desc, ARRAY_SIZE(reg_desc), "LowerCP");
5325 else
5326 snprintf(reg_desc, ARRAY_SIZE(reg_desc), "stream%u", i);
5327
5328 for (j = 0 ; j < GAUDI_NUM_OF_QM_ERR_CAUSE ; j++) {
5329 if (glbl_sts_val & BIT(j)) {
5330 dev_err_ratelimited(hdev->dev,
5331 "%s %s. err cause: %s\n",
5332 qm_name, reg_desc,
5333 gaudi_qman_error_cause[j]);
5334 glbl_sts_clr_val |= BIT(j);
5335 }
5336 }
5337
5338 /* Write 1 clear errors */
5339 WREG32(glbl_sts_addr + 4 * i, glbl_sts_clr_val);
5340 }
5341
5342 arb_err_val = RREG32(arb_err_addr);
5343
5344 if (!arb_err_val)
5345 return;
5346
5347 for (j = 0 ; j < GAUDI_NUM_OF_QM_ARB_ERR_CAUSE ; j++) {
5348 if (arb_err_val & BIT(j)) {
5349 dev_err_ratelimited(hdev->dev,
5350 "%s ARB_ERR. err cause: %s\n",
5351 qm_name,
5352 gaudi_qman_arb_error_cause[j]);
5353 }
5354 }
5355}
5356
Oded Gabbayfcc6a4e2020-05-17 08:20:35 +03005357static void gaudi_handle_ecc_event(struct hl_device *hdev, u16 event_type,
5358 struct hl_eq_ecc_data *ecc_data)
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005359{
Oded Gabbayfcc6a4e2020-05-17 08:20:35 +03005360 struct ecc_info_extract_params params;
5361 u64 ecc_address = 0, ecc_syndrom = 0;
5362 u8 index, memory_wrapper_idx = 0;
5363 bool extract_info_from_fw;
5364 int rc;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005365
5366 switch (event_type) {
Oded Gabbayfcc6a4e2020-05-17 08:20:35 +03005367 case GAUDI_EVENT_PCIE_CORE_SERR ... GAUDI_EVENT_PCIE_PHY_DERR:
5368 case GAUDI_EVENT_DMA0_SERR_ECC ... GAUDI_EVENT_MMU_DERR:
5369 extract_info_from_fw = true;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005370 break;
5371 case GAUDI_EVENT_TPC0_SERR ... GAUDI_EVENT_TPC7_SERR:
5372 index = event_type - GAUDI_EVENT_TPC0_SERR;
Oded Gabbayfcc6a4e2020-05-17 08:20:35 +03005373 params.block_address = mmTPC0_CFG_BASE + index * TPC_CFG_OFFSET;
5374 params.num_memories = 90;
5375 params.derr = false;
5376 params.disable_clock_gating = true;
5377 extract_info_from_fw = false;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005378 break;
5379 case GAUDI_EVENT_TPC0_DERR ... GAUDI_EVENT_TPC7_DERR:
5380 index = event_type - GAUDI_EVENT_TPC0_DERR;
Oded Gabbayfcc6a4e2020-05-17 08:20:35 +03005381 params.block_address =
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005382 mmTPC0_CFG_BASE + index * TPC_CFG_OFFSET;
Oded Gabbayfcc6a4e2020-05-17 08:20:35 +03005383 params.num_memories = 90;
5384 params.derr = true;
5385 params.disable_clock_gating = true;
5386 extract_info_from_fw = false;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005387 break;
5388 case GAUDI_EVENT_MME0_ACC_SERR:
5389 case GAUDI_EVENT_MME1_ACC_SERR:
5390 case GAUDI_EVENT_MME2_ACC_SERR:
5391 case GAUDI_EVENT_MME3_ACC_SERR:
5392 index = (event_type - GAUDI_EVENT_MME0_ACC_SERR) / 4;
Oded Gabbayfcc6a4e2020-05-17 08:20:35 +03005393 params.block_address = mmMME0_ACC_BASE + index * MME_ACC_OFFSET;
5394 params.num_memories = 128;
5395 params.derr = false;
5396 params.disable_clock_gating = true;
5397 extract_info_from_fw = false;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005398 break;
5399 case GAUDI_EVENT_MME0_ACC_DERR:
5400 case GAUDI_EVENT_MME1_ACC_DERR:
5401 case GAUDI_EVENT_MME2_ACC_DERR:
5402 case GAUDI_EVENT_MME3_ACC_DERR:
5403 index = (event_type - GAUDI_EVENT_MME0_ACC_DERR) / 4;
Oded Gabbayfcc6a4e2020-05-17 08:20:35 +03005404 params.block_address = mmMME0_ACC_BASE + index * MME_ACC_OFFSET;
5405 params.num_memories = 128;
5406 params.derr = true;
5407 params.disable_clock_gating = true;
5408 extract_info_from_fw = false;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005409 break;
5410 case GAUDI_EVENT_MME0_SBAB_SERR:
5411 case GAUDI_EVENT_MME1_SBAB_SERR:
5412 case GAUDI_EVENT_MME2_SBAB_SERR:
5413 case GAUDI_EVENT_MME3_SBAB_SERR:
5414 index = (event_type - GAUDI_EVENT_MME0_SBAB_SERR) / 4;
Oded Gabbayfcc6a4e2020-05-17 08:20:35 +03005415 params.block_address =
5416 mmMME0_SBAB_BASE + index * MME_ACC_OFFSET;
5417 params.num_memories = 33;
5418 params.derr = false;
5419 params.disable_clock_gating = true;
5420 extract_info_from_fw = false;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005421 break;
5422 case GAUDI_EVENT_MME0_SBAB_DERR:
5423 case GAUDI_EVENT_MME1_SBAB_DERR:
5424 case GAUDI_EVENT_MME2_SBAB_DERR:
5425 case GAUDI_EVENT_MME3_SBAB_DERR:
5426 index = (event_type - GAUDI_EVENT_MME0_SBAB_DERR) / 4;
Oded Gabbayfcc6a4e2020-05-17 08:20:35 +03005427 params.block_address =
5428 mmMME0_SBAB_BASE + index * MME_ACC_OFFSET;
5429 params.num_memories = 33;
5430 params.derr = true;
5431 params.disable_clock_gating = true;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005432 default:
5433 return;
5434 }
5435
Oded Gabbayfcc6a4e2020-05-17 08:20:35 +03005436 if (extract_info_from_fw) {
5437 ecc_address = le64_to_cpu(ecc_data->ecc_address);
5438 ecc_syndrom = le64_to_cpu(ecc_data->ecc_syndrom);
5439 memory_wrapper_idx = ecc_data->memory_wrapper_idx;
5440 } else {
5441 rc = gaudi_extract_ecc_info(hdev, &params, &ecc_address,
5442 &ecc_syndrom, &memory_wrapper_idx);
5443 if (rc)
5444 return;
5445 }
5446
5447 dev_err(hdev->dev,
5448 "ECC error detected. address: %#llx. Syndrom: %#llx. block id %u\n",
5449 ecc_address, ecc_syndrom, memory_wrapper_idx);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005450}
5451
5452static void gaudi_handle_qman_err(struct hl_device *hdev, u16 event_type)
5453{
5454 u64 glbl_sts_addr, arb_err_addr;
5455 u8 index;
5456 char desc[32];
5457
5458 switch (event_type) {
5459 case GAUDI_EVENT_TPC0_QM ... GAUDI_EVENT_TPC7_QM:
5460 index = event_type - GAUDI_EVENT_TPC0_QM;
5461 glbl_sts_addr =
5462 mmTPC0_QM_GLBL_STS1_0 + index * TPC_QMAN_OFFSET;
5463 arb_err_addr =
5464 mmTPC0_QM_ARB_ERR_CAUSE + index * TPC_QMAN_OFFSET;
5465 snprintf(desc, ARRAY_SIZE(desc), "%s%d", "TPC_QM", index);
5466 break;
5467 case GAUDI_EVENT_MME0_QM ... GAUDI_EVENT_MME2_QM:
5468 index = event_type - GAUDI_EVENT_MME0_QM;
5469 glbl_sts_addr =
5470 mmMME0_QM_GLBL_STS1_0 + index * MME_QMAN_OFFSET;
5471 arb_err_addr =
5472 mmMME0_QM_ARB_ERR_CAUSE + index * MME_QMAN_OFFSET;
5473 snprintf(desc, ARRAY_SIZE(desc), "%s%d", "MME_QM", index);
5474 break;
5475 case GAUDI_EVENT_DMA0_QM ... GAUDI_EVENT_DMA7_QM:
5476 index = event_type - GAUDI_EVENT_DMA0_QM;
5477 glbl_sts_addr =
5478 mmDMA0_QM_GLBL_STS1_0 + index * DMA_QMAN_OFFSET;
5479 arb_err_addr =
5480 mmDMA0_QM_ARB_ERR_CAUSE + index * DMA_QMAN_OFFSET;
5481 snprintf(desc, ARRAY_SIZE(desc), "%s%d", "DMA_QM", index);
5482 break;
5483 default:
5484 return;
5485 }
5486
5487 gaudi_handle_qman_err_generic(hdev, desc, glbl_sts_addr, arb_err_addr);
5488}
5489
5490static void gaudi_print_irq_info(struct hl_device *hdev, u16 event_type,
5491 bool razwi)
5492{
Ofir Bittonebd8d122020-05-10 13:41:28 +03005493 char desc[64] = "";
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005494
5495 gaudi_get_event_desc(event_type, desc, sizeof(desc));
5496 dev_err_ratelimited(hdev->dev, "Received H/W interrupt %d [\"%s\"]\n",
5497 event_type, desc);
5498
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005499 if (razwi) {
5500 gaudi_print_razwi_info(hdev);
5501 gaudi_print_mmu_error_info(hdev);
5502 }
5503}
5504
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005505static int gaudi_soft_reset_late_init(struct hl_device *hdev)
5506{
Ofir Bittonebd8d122020-05-10 13:41:28 +03005507 struct gaudi_device *gaudi = hdev->asic_specific;
5508
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005509 /* Unmask all IRQs since some could have been received
5510 * during the soft reset
5511 */
Ofir Bittonebd8d122020-05-10 13:41:28 +03005512 return hl_fw_unmask_irq_arr(hdev, gaudi->events, sizeof(gaudi->events));
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005513}
5514
5515static int gaudi_hbm_read_interrupts(struct hl_device *hdev, int device)
5516{
5517 int ch, err = 0;
5518 u32 base, val, val2;
5519
5520 base = GAUDI_HBM_CFG_BASE + device * GAUDI_HBM_CFG_OFFSET;
5521 for (ch = 0 ; ch < GAUDI_HBM_CHANNELS ; ch++) {
5522 val = RREG32_MASK(base + ch * 0x1000 + 0x06C, 0x0000FFFF);
5523 val = (val & 0xFF) | ((val >> 8) & 0xFF);
5524 if (val) {
5525 err = 1;
5526 dev_err(hdev->dev,
5527 "HBM%d pc%d interrupts info: WR_PAR=%d, RD_PAR=%d, CA_PAR=%d, SERR=%d, DERR=%d\n",
5528 device, ch * 2, val & 0x1, (val >> 1) & 0x1,
5529 (val >> 2) & 0x1, (val >> 3) & 0x1,
5530 (val >> 4) & 0x1);
5531
5532 val2 = RREG32(base + ch * 0x1000 + 0x060);
5533 dev_err(hdev->dev,
5534 "HBM%d pc%d ECC info: 1ST_ERR_ADDR=0x%x, 1ST_ERR_TYPE=%d, SEC_CONT_CNT=%d, SEC_CNT=%d, DED_CNT=%d\n",
5535 device, ch * 2,
5536 RREG32(base + ch * 0x1000 + 0x064),
5537 (val2 & 0x200) >> 9, (val2 & 0xFC00) >> 10,
5538 (val2 & 0xFF0000) >> 16,
5539 (val2 & 0xFF000000) >> 24);
5540 }
5541
5542 val = RREG32_MASK(base + ch * 0x1000 + 0x07C, 0x0000FFFF);
5543 val = (val & 0xFF) | ((val >> 8) & 0xFF);
5544 if (val) {
5545 err = 1;
5546 dev_err(hdev->dev,
5547 "HBM%d pc%d interrupts info: WR_PAR=%d, RD_PAR=%d, CA_PAR=%d, SERR=%d, DERR=%d\n",
5548 device, ch * 2 + 1, val & 0x1, (val >> 1) & 0x1,
5549 (val >> 2) & 0x1, (val >> 3) & 0x1,
5550 (val >> 4) & 0x1);
5551
5552 val2 = RREG32(base + ch * 0x1000 + 0x070);
5553 dev_err(hdev->dev,
5554 "HBM%d pc%d ECC info: 1ST_ERR_ADDR=0x%x, 1ST_ERR_TYPE=%d, SEC_CONT_CNT=%d, SEC_CNT=%d, DED_CNT=%d\n",
5555 device, ch * 2 + 1,
5556 RREG32(base + ch * 0x1000 + 0x074),
5557 (val2 & 0x200) >> 9, (val2 & 0xFC00) >> 10,
5558 (val2 & 0xFF0000) >> 16,
5559 (val2 & 0xFF000000) >> 24);
5560 }
5561
5562 /* Clear interrupts */
5563 RMWREG32(base + (ch * 0x1000) + 0x060, 0x1C8, 0x1FF);
5564 RMWREG32(base + (ch * 0x1000) + 0x070, 0x1C8, 0x1FF);
5565 WREG32(base + (ch * 0x1000) + 0x06C, 0x1F1F);
5566 WREG32(base + (ch * 0x1000) + 0x07C, 0x1F1F);
5567 RMWREG32(base + (ch * 0x1000) + 0x060, 0x0, 0xF);
5568 RMWREG32(base + (ch * 0x1000) + 0x070, 0x0, 0xF);
5569 }
5570
5571 val = RREG32(base + 0x8F30);
5572 val2 = RREG32(base + 0x8F34);
5573 if (val | val2) {
5574 err = 1;
5575 dev_err(hdev->dev,
5576 "HBM %d MC SRAM SERR info: Reg 0x8F30=0x%x, Reg 0x8F34=0x%x\n",
5577 device, val, val2);
5578 }
5579 val = RREG32(base + 0x8F40);
5580 val2 = RREG32(base + 0x8F44);
5581 if (val | val2) {
5582 err = 1;
5583 dev_err(hdev->dev,
5584 "HBM %d MC SRAM DERR info: Reg 0x8F40=0x%x, Reg 0x8F44=0x%x\n",
5585 device, val, val2);
5586 }
5587
5588 return err;
5589}
5590
5591static int gaudi_hbm_event_to_dev(u16 hbm_event_type)
5592{
5593 switch (hbm_event_type) {
5594 case GAUDI_EVENT_HBM0_SPI_0:
5595 case GAUDI_EVENT_HBM0_SPI_1:
5596 return 0;
5597 case GAUDI_EVENT_HBM1_SPI_0:
5598 case GAUDI_EVENT_HBM1_SPI_1:
5599 return 1;
5600 case GAUDI_EVENT_HBM2_SPI_0:
5601 case GAUDI_EVENT_HBM2_SPI_1:
5602 return 2;
5603 case GAUDI_EVENT_HBM3_SPI_0:
5604 case GAUDI_EVENT_HBM3_SPI_1:
5605 return 3;
5606 default:
5607 break;
5608 }
5609
5610 /* Should never happen */
5611 return 0;
5612}
5613
5614static bool gaudi_tpc_read_interrupts(struct hl_device *hdev, u8 tpc_id,
5615 char *interrupt_name)
5616{
5617 struct gaudi_device *gaudi = hdev->asic_specific;
5618 u32 tpc_offset = tpc_id * TPC_CFG_OFFSET, tpc_interrupts_cause, i;
5619 bool soft_reset_required = false;
5620
5621 /* Accessing the TPC_INTR_CAUSE registers requires disabling the clock
Oded Gabbay6138bbe2020-09-04 20:18:16 +03005622 * gating, and thus cannot be done in CPU-CP and should be done instead
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005623 * by the driver.
5624 */
5625
5626 mutex_lock(&gaudi->clk_gate_mutex);
5627
5628 hdev->asic_funcs->disable_clock_gating(hdev);
5629
5630 tpc_interrupts_cause = RREG32(mmTPC0_CFG_TPC_INTR_CAUSE + tpc_offset) &
5631 TPC0_CFG_TPC_INTR_CAUSE_CAUSE_MASK;
5632
5633 for (i = 0 ; i < GAUDI_NUM_OF_TPC_INTR_CAUSE ; i++)
5634 if (tpc_interrupts_cause & BIT(i)) {
5635 dev_err_ratelimited(hdev->dev,
5636 "TPC%d_%s interrupt cause: %s\n",
5637 tpc_id, interrupt_name,
5638 gaudi_tpc_interrupts_cause[i]);
5639 /* If this is QM error, we need to soft-reset */
5640 if (i == 15)
5641 soft_reset_required = true;
5642 }
5643
5644 /* Clear interrupts */
5645 WREG32(mmTPC0_CFG_TPC_INTR_CAUSE + tpc_offset, 0);
5646
Oded Gabbaye38bfd32020-07-03 20:46:12 +03005647 hdev->asic_funcs->set_clock_gating(hdev);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005648
5649 mutex_unlock(&gaudi->clk_gate_mutex);
5650
5651 return soft_reset_required;
5652}
5653
5654static int tpc_dec_event_to_tpc_id(u16 tpc_dec_event_type)
5655{
5656 return (tpc_dec_event_type - GAUDI_EVENT_TPC0_DEC) >> 1;
5657}
5658
5659static int tpc_krn_event_to_tpc_id(u16 tpc_dec_event_type)
5660{
5661 return (tpc_dec_event_type - GAUDI_EVENT_TPC0_KRN_ERR) / 6;
5662}
5663
5664static void gaudi_print_clk_change_info(struct hl_device *hdev,
5665 u16 event_type)
5666{
5667 switch (event_type) {
5668 case GAUDI_EVENT_FIX_POWER_ENV_S:
Ofir Bitton0a068ad2020-07-21 10:49:51 +03005669 hdev->clk_throttling_reason |= HL_CLK_THROTTLE_POWER;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005670 dev_info_ratelimited(hdev->dev,
5671 "Clock throttling due to power consumption\n");
5672 break;
5673
5674 case GAUDI_EVENT_FIX_POWER_ENV_E:
Ofir Bitton0a068ad2020-07-21 10:49:51 +03005675 hdev->clk_throttling_reason &= ~HL_CLK_THROTTLE_POWER;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005676 dev_info_ratelimited(hdev->dev,
5677 "Power envelop is safe, back to optimal clock\n");
5678 break;
5679
5680 case GAUDI_EVENT_FIX_THERMAL_ENV_S:
Ofir Bitton0a068ad2020-07-21 10:49:51 +03005681 hdev->clk_throttling_reason |= HL_CLK_THROTTLE_THERMAL;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005682 dev_info_ratelimited(hdev->dev,
5683 "Clock throttling due to overheating\n");
5684 break;
5685
5686 case GAUDI_EVENT_FIX_THERMAL_ENV_E:
Ofir Bitton0a068ad2020-07-21 10:49:51 +03005687 hdev->clk_throttling_reason &= ~HL_CLK_THROTTLE_THERMAL;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005688 dev_info_ratelimited(hdev->dev,
5689 "Thermal envelop is safe, back to optimal clock\n");
5690 break;
5691
5692 default:
5693 dev_err(hdev->dev, "Received invalid clock change event %d\n",
5694 event_type);
5695 break;
5696 }
5697}
5698
5699static void gaudi_handle_eqe(struct hl_device *hdev,
5700 struct hl_eq_entry *eq_entry)
5701{
5702 struct gaudi_device *gaudi = hdev->asic_specific;
5703 u32 ctl = le32_to_cpu(eq_entry->hdr.ctl);
5704 u16 event_type = ((ctl & EQ_CTL_EVENT_TYPE_MASK)
5705 >> EQ_CTL_EVENT_TYPE_SHIFT);
5706 u8 cause;
Oded Gabbay66446822020-05-18 16:48:01 +03005707 bool reset_required;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005708
5709 gaudi->events_stat[event_type]++;
5710 gaudi->events_stat_aggregate[event_type]++;
5711
5712 switch (event_type) {
5713 case GAUDI_EVENT_PCIE_CORE_DERR:
5714 case GAUDI_EVENT_PCIE_IF_DERR:
5715 case GAUDI_EVENT_PCIE_PHY_DERR:
5716 case GAUDI_EVENT_TPC0_DERR ... GAUDI_EVENT_TPC7_DERR:
5717 case GAUDI_EVENT_MME0_ACC_DERR:
5718 case GAUDI_EVENT_MME0_SBAB_DERR:
5719 case GAUDI_EVENT_MME1_ACC_DERR:
5720 case GAUDI_EVENT_MME1_SBAB_DERR:
5721 case GAUDI_EVENT_MME2_ACC_DERR:
5722 case GAUDI_EVENT_MME2_SBAB_DERR:
5723 case GAUDI_EVENT_MME3_ACC_DERR:
5724 case GAUDI_EVENT_MME3_SBAB_DERR:
5725 case GAUDI_EVENT_DMA0_DERR_ECC ... GAUDI_EVENT_DMA7_DERR_ECC:
5726 fallthrough;
5727 case GAUDI_EVENT_CPU_IF_ECC_DERR:
5728 case GAUDI_EVENT_PSOC_MEM_DERR:
5729 case GAUDI_EVENT_PSOC_CORESIGHT_DERR:
5730 case GAUDI_EVENT_SRAM0_DERR ... GAUDI_EVENT_SRAM28_DERR:
5731 case GAUDI_EVENT_DMA_IF0_DERR ... GAUDI_EVENT_DMA_IF3_DERR:
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005732 case GAUDI_EVENT_HBM_0_DERR ... GAUDI_EVENT_HBM_3_DERR:
5733 case GAUDI_EVENT_MMU_DERR:
Oded Gabbayfcc6a4e2020-05-17 08:20:35 +03005734 gaudi_print_irq_info(hdev, event_type, true);
5735 gaudi_handle_ecc_event(hdev, event_type, &eq_entry->ecc_data);
5736 if (hdev->hard_reset_on_fw_events)
5737 hl_device_reset(hdev, true, false);
5738 break;
5739
5740 case GAUDI_EVENT_GIC500:
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005741 case GAUDI_EVENT_AXI_ECC:
5742 case GAUDI_EVENT_L2_RAM_ECC:
5743 case GAUDI_EVENT_PLL0 ... GAUDI_EVENT_PLL17:
5744 gaudi_print_irq_info(hdev, event_type, false);
5745 if (hdev->hard_reset_on_fw_events)
5746 hl_device_reset(hdev, true, false);
5747 break;
5748
5749 case GAUDI_EVENT_HBM0_SPI_0:
5750 case GAUDI_EVENT_HBM1_SPI_0:
5751 case GAUDI_EVENT_HBM2_SPI_0:
5752 case GAUDI_EVENT_HBM3_SPI_0:
5753 gaudi_print_irq_info(hdev, event_type, false);
5754 gaudi_hbm_read_interrupts(hdev,
5755 gaudi_hbm_event_to_dev(event_type));
5756 if (hdev->hard_reset_on_fw_events)
5757 hl_device_reset(hdev, true, false);
5758 break;
5759
5760 case GAUDI_EVENT_HBM0_SPI_1:
5761 case GAUDI_EVENT_HBM1_SPI_1:
5762 case GAUDI_EVENT_HBM2_SPI_1:
5763 case GAUDI_EVENT_HBM3_SPI_1:
5764 gaudi_print_irq_info(hdev, event_type, false);
5765 gaudi_hbm_read_interrupts(hdev,
5766 gaudi_hbm_event_to_dev(event_type));
5767 break;
5768
5769 case GAUDI_EVENT_TPC0_DEC:
5770 case GAUDI_EVENT_TPC1_DEC:
5771 case GAUDI_EVENT_TPC2_DEC:
5772 case GAUDI_EVENT_TPC3_DEC:
5773 case GAUDI_EVENT_TPC4_DEC:
5774 case GAUDI_EVENT_TPC5_DEC:
5775 case GAUDI_EVENT_TPC6_DEC:
5776 case GAUDI_EVENT_TPC7_DEC:
5777 gaudi_print_irq_info(hdev, event_type, true);
Oded Gabbay66446822020-05-18 16:48:01 +03005778 reset_required = gaudi_tpc_read_interrupts(hdev,
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005779 tpc_dec_event_to_tpc_id(event_type),
5780 "AXI_SLV_DEC_Error");
Oded Gabbay66446822020-05-18 16:48:01 +03005781 if (reset_required) {
5782 dev_err(hdev->dev, "hard reset required due to %s\n",
5783 gaudi_irq_map_table[event_type].name);
5784
5785 if (hdev->hard_reset_on_fw_events)
5786 hl_device_reset(hdev, true, false);
5787 } else {
5788 hl_fw_unmask_irq(hdev, event_type);
Omer Shpigelmand7985072020-05-17 23:01:22 +03005789 }
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005790 break;
5791
5792 case GAUDI_EVENT_TPC0_KRN_ERR:
5793 case GAUDI_EVENT_TPC1_KRN_ERR:
5794 case GAUDI_EVENT_TPC2_KRN_ERR:
5795 case GAUDI_EVENT_TPC3_KRN_ERR:
5796 case GAUDI_EVENT_TPC4_KRN_ERR:
5797 case GAUDI_EVENT_TPC5_KRN_ERR:
5798 case GAUDI_EVENT_TPC6_KRN_ERR:
5799 case GAUDI_EVENT_TPC7_KRN_ERR:
5800 gaudi_print_irq_info(hdev, event_type, true);
Oded Gabbay66446822020-05-18 16:48:01 +03005801 reset_required = gaudi_tpc_read_interrupts(hdev,
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005802 tpc_krn_event_to_tpc_id(event_type),
5803 "KRN_ERR");
Oded Gabbay66446822020-05-18 16:48:01 +03005804 if (reset_required) {
5805 dev_err(hdev->dev, "hard reset required due to %s\n",
5806 gaudi_irq_map_table[event_type].name);
5807
5808 if (hdev->hard_reset_on_fw_events)
5809 hl_device_reset(hdev, true, false);
5810 } else {
5811 hl_fw_unmask_irq(hdev, event_type);
Omer Shpigelmand7985072020-05-17 23:01:22 +03005812 }
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005813 break;
5814
5815 case GAUDI_EVENT_PCIE_CORE_SERR:
5816 case GAUDI_EVENT_PCIE_IF_SERR:
5817 case GAUDI_EVENT_PCIE_PHY_SERR:
5818 case GAUDI_EVENT_TPC0_SERR ... GAUDI_EVENT_TPC7_SERR:
5819 case GAUDI_EVENT_MME0_ACC_SERR:
5820 case GAUDI_EVENT_MME0_SBAB_SERR:
5821 case GAUDI_EVENT_MME1_ACC_SERR:
5822 case GAUDI_EVENT_MME1_SBAB_SERR:
5823 case GAUDI_EVENT_MME2_ACC_SERR:
5824 case GAUDI_EVENT_MME2_SBAB_SERR:
5825 case GAUDI_EVENT_MME3_ACC_SERR:
5826 case GAUDI_EVENT_MME3_SBAB_SERR:
5827 case GAUDI_EVENT_DMA0_SERR_ECC ... GAUDI_EVENT_DMA7_SERR_ECC:
5828 case GAUDI_EVENT_CPU_IF_ECC_SERR:
5829 case GAUDI_EVENT_PSOC_MEM_SERR:
5830 case GAUDI_EVENT_PSOC_CORESIGHT_SERR:
5831 case GAUDI_EVENT_SRAM0_SERR ... GAUDI_EVENT_SRAM28_SERR:
5832 case GAUDI_EVENT_DMA_IF0_SERR ... GAUDI_EVENT_DMA_IF3_SERR:
5833 case GAUDI_EVENT_HBM_0_SERR ... GAUDI_EVENT_HBM_3_SERR:
5834 fallthrough;
5835 case GAUDI_EVENT_MMU_SERR:
Oded Gabbayfcc6a4e2020-05-17 08:20:35 +03005836 gaudi_print_irq_info(hdev, event_type, true);
5837 gaudi_handle_ecc_event(hdev, event_type, &eq_entry->ecc_data);
5838 hl_fw_unmask_irq(hdev, event_type);
5839 break;
5840
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005841 case GAUDI_EVENT_PCIE_DEC:
5842 case GAUDI_EVENT_MME0_WBC_RSP:
5843 case GAUDI_EVENT_MME0_SBAB0_RSP:
5844 case GAUDI_EVENT_MME1_WBC_RSP:
5845 case GAUDI_EVENT_MME1_SBAB0_RSP:
5846 case GAUDI_EVENT_MME2_WBC_RSP:
5847 case GAUDI_EVENT_MME2_SBAB0_RSP:
5848 case GAUDI_EVENT_MME3_WBC_RSP:
5849 case GAUDI_EVENT_MME3_SBAB0_RSP:
5850 case GAUDI_EVENT_CPU_AXI_SPLITTER:
5851 case GAUDI_EVENT_PSOC_AXI_DEC:
5852 case GAUDI_EVENT_PSOC_PRSTN_FALL:
5853 case GAUDI_EVENT_MMU_PAGE_FAULT:
5854 case GAUDI_EVENT_MMU_WR_PERM:
5855 case GAUDI_EVENT_RAZWI_OR_ADC:
5856 case GAUDI_EVENT_TPC0_QM ... GAUDI_EVENT_TPC7_QM:
5857 case GAUDI_EVENT_MME0_QM ... GAUDI_EVENT_MME2_QM:
5858 case GAUDI_EVENT_DMA0_QM ... GAUDI_EVENT_DMA7_QM:
5859 fallthrough;
5860 case GAUDI_EVENT_DMA0_CORE ... GAUDI_EVENT_DMA7_CORE:
5861 gaudi_print_irq_info(hdev, event_type, true);
5862 gaudi_handle_qman_err(hdev, event_type);
Ofir Bittonebd8d122020-05-10 13:41:28 +03005863 hl_fw_unmask_irq(hdev, event_type);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005864 break;
5865
5866 case GAUDI_EVENT_RAZWI_OR_ADC_SW:
5867 gaudi_print_irq_info(hdev, event_type, true);
Oded Gabbay66446822020-05-18 16:48:01 +03005868 if (hdev->hard_reset_on_fw_events)
5869 hl_device_reset(hdev, true, false);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005870 break;
5871
5872 case GAUDI_EVENT_TPC0_BMON_SPMU:
5873 case GAUDI_EVENT_TPC1_BMON_SPMU:
5874 case GAUDI_EVENT_TPC2_BMON_SPMU:
5875 case GAUDI_EVENT_TPC3_BMON_SPMU:
5876 case GAUDI_EVENT_TPC4_BMON_SPMU:
5877 case GAUDI_EVENT_TPC5_BMON_SPMU:
5878 case GAUDI_EVENT_TPC6_BMON_SPMU:
5879 case GAUDI_EVENT_TPC7_BMON_SPMU:
5880 case GAUDI_EVENT_DMA_BM_CH0 ... GAUDI_EVENT_DMA_BM_CH7:
5881 gaudi_print_irq_info(hdev, event_type, false);
Ofir Bittonebd8d122020-05-10 13:41:28 +03005882 hl_fw_unmask_irq(hdev, event_type);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005883 break;
5884
5885 case GAUDI_EVENT_FIX_POWER_ENV_S ... GAUDI_EVENT_FIX_THERMAL_ENV_E:
5886 gaudi_print_clk_change_info(hdev, event_type);
Ofir Bittonebd8d122020-05-10 13:41:28 +03005887 hl_fw_unmask_irq(hdev, event_type);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005888 break;
5889
5890 case GAUDI_EVENT_PSOC_GPIO_U16_0:
5891 cause = le64_to_cpu(eq_entry->data[0]) & 0xFF;
5892 dev_err(hdev->dev,
5893 "Received high temp H/W interrupt %d (cause %d)\n",
5894 event_type, cause);
5895 break;
5896
5897 default:
5898 dev_err(hdev->dev, "Received invalid H/W interrupt %d\n",
5899 event_type);
5900 break;
5901 }
5902}
5903
5904static void *gaudi_get_events_stat(struct hl_device *hdev, bool aggregate,
5905 u32 *size)
5906{
5907 struct gaudi_device *gaudi = hdev->asic_specific;
5908
5909 if (aggregate) {
5910 *size = (u32) sizeof(gaudi->events_stat_aggregate);
5911 return gaudi->events_stat_aggregate;
5912 }
5913
5914 *size = (u32) sizeof(gaudi->events_stat);
5915 return gaudi->events_stat;
5916}
5917
Omer Shpigelman8ff5f4f2020-05-24 23:06:59 +03005918static int gaudi_mmu_invalidate_cache(struct hl_device *hdev, bool is_hard,
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005919 u32 flags)
5920{
5921 struct gaudi_device *gaudi = hdev->asic_specific;
5922 u32 status, timeout_usec;
5923 int rc;
5924
5925 if (!(gaudi->hw_cap_initialized & HW_CAP_MMU) ||
5926 hdev->hard_reset_pending)
Omer Shpigelman8ff5f4f2020-05-24 23:06:59 +03005927 return 0;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005928
5929 if (hdev->pldm)
5930 timeout_usec = GAUDI_PLDM_MMU_TIMEOUT_USEC;
5931 else
5932 timeout_usec = MMU_CONFIG_TIMEOUT_USEC;
5933
Omer Shpigelman8ff5f4f2020-05-24 23:06:59 +03005934 mutex_lock(&hdev->mmu_cache_lock);
5935
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005936 /* L0 & L1 invalidation */
Omer Shpigelmancfd41762020-06-03 13:03:35 +03005937 WREG32(mmSTLB_INV_PS, 3);
5938 WREG32(mmSTLB_CACHE_INV, gaudi->mmu_cache_inv_pi++);
Omer Shpigelman42d0b0b2020-05-17 17:35:39 +03005939 WREG32(mmSTLB_INV_PS, 2);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005940
5941 rc = hl_poll_timeout(
5942 hdev,
Omer Shpigelman42d0b0b2020-05-17 17:35:39 +03005943 mmSTLB_INV_PS,
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005944 status,
5945 !status,
5946 1000,
5947 timeout_usec);
5948
Omer Shpigelman42d0b0b2020-05-17 17:35:39 +03005949 WREG32(mmSTLB_INV_SET, 0);
5950
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005951 mutex_unlock(&hdev->mmu_cache_lock);
Omer Shpigelman8ff5f4f2020-05-24 23:06:59 +03005952
5953 if (rc) {
5954 dev_err_ratelimited(hdev->dev,
5955 "MMU cache invalidation timeout\n");
5956 hl_device_reset(hdev, true, false);
5957 }
5958
5959 return rc;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005960}
5961
Omer Shpigelman8ff5f4f2020-05-24 23:06:59 +03005962static int gaudi_mmu_invalidate_cache_range(struct hl_device *hdev,
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005963 bool is_hard, u32 asid, u64 va, u64 size)
5964{
5965 struct gaudi_device *gaudi = hdev->asic_specific;
5966 u32 status, timeout_usec;
5967 u32 inv_data;
5968 u32 pi;
5969 int rc;
5970
5971 if (!(gaudi->hw_cap_initialized & HW_CAP_MMU) ||
5972 hdev->hard_reset_pending)
Omer Shpigelman8ff5f4f2020-05-24 23:06:59 +03005973 return 0;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005974
5975 mutex_lock(&hdev->mmu_cache_lock);
5976
5977 if (hdev->pldm)
5978 timeout_usec = GAUDI_PLDM_MMU_TIMEOUT_USEC;
5979 else
5980 timeout_usec = MMU_CONFIG_TIMEOUT_USEC;
5981
5982 /*
5983 * TODO: currently invalidate entire L0 & L1 as in regular hard
5984 * invalidation. Need to apply invalidation of specific cache
5985 * lines with mask of ASID & VA & size.
5986 * Note that L1 with be flushed entirely in any case.
5987 */
5988
5989 /* L0 & L1 invalidation */
5990 inv_data = RREG32(mmSTLB_CACHE_INV);
5991 /* PI is 8 bit */
5992 pi = ((inv_data & STLB_CACHE_INV_PRODUCER_INDEX_MASK) + 1) & 0xFF;
5993 WREG32(mmSTLB_CACHE_INV,
5994 (inv_data & STLB_CACHE_INV_INDEX_MASK_MASK) | pi);
5995
5996 rc = hl_poll_timeout(
5997 hdev,
5998 mmSTLB_INV_CONSUMER_INDEX,
5999 status,
6000 status == pi,
6001 1000,
6002 timeout_usec);
6003
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006004 mutex_unlock(&hdev->mmu_cache_lock);
Omer Shpigelman8ff5f4f2020-05-24 23:06:59 +03006005
6006 if (rc) {
6007 dev_err_ratelimited(hdev->dev,
6008 "MMU cache invalidation timeout\n");
6009 hl_device_reset(hdev, true, false);
6010 }
6011
6012 return rc;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006013}
6014
6015static int gaudi_mmu_update_asid_hop0_addr(struct hl_device *hdev,
6016 u32 asid, u64 phys_addr)
6017{
6018 u32 status, timeout_usec;
6019 int rc;
6020
6021 if (hdev->pldm)
6022 timeout_usec = GAUDI_PLDM_MMU_TIMEOUT_USEC;
6023 else
6024 timeout_usec = MMU_CONFIG_TIMEOUT_USEC;
6025
6026 WREG32(MMU_ASID, asid);
6027 WREG32(MMU_HOP0_PA43_12, phys_addr >> MMU_HOP0_PA43_12_SHIFT);
6028 WREG32(MMU_HOP0_PA49_44, phys_addr >> MMU_HOP0_PA49_44_SHIFT);
6029 WREG32(MMU_BUSY, 0x80000000);
6030
6031 rc = hl_poll_timeout(
6032 hdev,
6033 MMU_BUSY,
6034 status,
6035 !(status & 0x80000000),
6036 1000,
6037 timeout_usec);
6038
6039 if (rc) {
6040 dev_err(hdev->dev,
6041 "Timeout during MMU hop0 config of asid %d\n", asid);
6042 return rc;
6043 }
6044
6045 return 0;
6046}
6047
6048static int gaudi_send_heartbeat(struct hl_device *hdev)
6049{
6050 struct gaudi_device *gaudi = hdev->asic_specific;
6051
6052 if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
6053 return 0;
6054
6055 return hl_fw_send_heartbeat(hdev);
6056}
6057
Oded Gabbay2f553422020-08-15 16:28:10 +03006058static int gaudi_cpucp_info_get(struct hl_device *hdev)
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006059{
6060 struct gaudi_device *gaudi = hdev->asic_specific;
6061 struct asic_fixed_properties *prop = &hdev->asic_prop;
6062 int rc;
6063
6064 if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
6065 return 0;
6066
Oded Gabbay2f553422020-08-15 16:28:10 +03006067 rc = hl_fw_cpucp_info_get(hdev);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006068 if (rc)
6069 return rc;
6070
Oded Gabbay2f553422020-08-15 16:28:10 +03006071 if (!strlen(prop->cpucp_info.card_name))
6072 strncpy(prop->cpucp_info.card_name, GAUDI_DEFAULT_CARD_NAME,
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006073 CARD_NAME_MAX_LEN);
6074
Oded Gabbay2f553422020-08-15 16:28:10 +03006075 hdev->card_type = le32_to_cpu(hdev->asic_prop.cpucp_info.card_type);
Oded Gabbay58361aa2020-08-08 23:34:47 +03006076
Oded Gabbay2f553422020-08-15 16:28:10 +03006077 if (hdev->card_type == cpucp_card_type_pci)
Oded Gabbay58361aa2020-08-08 23:34:47 +03006078 prop->max_power_default = MAX_POWER_DEFAULT_PCI;
Oded Gabbay2f553422020-08-15 16:28:10 +03006079 else if (hdev->card_type == cpucp_card_type_pmc)
Oded Gabbay58361aa2020-08-08 23:34:47 +03006080 prop->max_power_default = MAX_POWER_DEFAULT_PMC;
6081
6082 hdev->max_power = prop->max_power_default;
6083
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006084 return 0;
6085}
6086
farah kassabrid90416c2020-08-12 17:20:13 +03006087static bool gaudi_is_device_idle(struct hl_device *hdev, u64 *mask,
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006088 struct seq_file *s)
6089{
6090 struct gaudi_device *gaudi = hdev->asic_specific;
6091 const char *fmt = "%-5d%-9s%#-14x%#-12x%#x\n";
6092 const char *mme_slave_fmt = "%-5d%-9s%-14s%-12s%#x\n";
6093 u32 qm_glbl_sts0, qm_cgm_sts, dma_core_sts0, tpc_cfg_sts, mme_arch_sts;
6094 bool is_idle = true, is_eng_idle, is_slave;
6095 u64 offset;
6096 int i, dma_id;
6097
6098 mutex_lock(&gaudi->clk_gate_mutex);
6099
6100 hdev->asic_funcs->disable_clock_gating(hdev);
6101
6102 if (s)
6103 seq_puts(s,
6104 "\nDMA is_idle QM_GLBL_STS0 QM_CGM_STS DMA_CORE_STS0\n"
6105 "--- ------- ------------ ---------- -------------\n");
6106
6107 for (i = 0 ; i < DMA_NUMBER_OF_CHNLS ; i++) {
6108 dma_id = gaudi_dma_assignment[i];
6109 offset = dma_id * DMA_QMAN_OFFSET;
6110
6111 qm_glbl_sts0 = RREG32(mmDMA0_QM_GLBL_STS0 + offset);
6112 qm_cgm_sts = RREG32(mmDMA0_QM_CGM_STS + offset);
6113 dma_core_sts0 = RREG32(mmDMA0_CORE_STS0 + offset);
6114 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts) &&
6115 IS_DMA_IDLE(dma_core_sts0);
6116 is_idle &= is_eng_idle;
6117
6118 if (mask)
Oded Gabbayf7639462020-08-29 11:24:03 +03006119 *mask |= ((u64) !is_eng_idle) <<
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006120 (GAUDI_ENGINE_ID_DMA_0 + dma_id);
6121 if (s)
6122 seq_printf(s, fmt, dma_id,
6123 is_eng_idle ? "Y" : "N", qm_glbl_sts0,
6124 qm_cgm_sts, dma_core_sts0);
6125 }
6126
6127 if (s)
6128 seq_puts(s,
6129 "\nTPC is_idle QM_GLBL_STS0 QM_CGM_STS CFG_STATUS\n"
6130 "--- ------- ------------ ---------- ----------\n");
6131
6132 for (i = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) {
6133 offset = i * TPC_QMAN_OFFSET;
6134 qm_glbl_sts0 = RREG32(mmTPC0_QM_GLBL_STS0 + offset);
6135 qm_cgm_sts = RREG32(mmTPC0_QM_CGM_STS + offset);
6136 tpc_cfg_sts = RREG32(mmTPC0_CFG_STATUS + offset);
6137 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts) &&
6138 IS_TPC_IDLE(tpc_cfg_sts);
6139 is_idle &= is_eng_idle;
6140
6141 if (mask)
Oded Gabbayf7639462020-08-29 11:24:03 +03006142 *mask |= ((u64) !is_eng_idle) <<
6143 (GAUDI_ENGINE_ID_TPC_0 + i);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006144 if (s)
6145 seq_printf(s, fmt, i,
6146 is_eng_idle ? "Y" : "N",
6147 qm_glbl_sts0, qm_cgm_sts, tpc_cfg_sts);
6148 }
6149
6150 if (s)
6151 seq_puts(s,
6152 "\nMME is_idle QM_GLBL_STS0 QM_CGM_STS ARCH_STATUS\n"
6153 "--- ------- ------------ ---------- -----------\n");
6154
6155 for (i = 0 ; i < MME_NUMBER_OF_ENGINES ; i++) {
6156 offset = i * MME_QMAN_OFFSET;
6157 mme_arch_sts = RREG32(mmMME0_CTRL_ARCH_STATUS + offset);
6158 is_eng_idle = IS_MME_IDLE(mme_arch_sts);
6159
6160 /* MME 1 & 3 are slaves, no need to check their QMANs */
6161 is_slave = i % 2;
6162 if (!is_slave) {
6163 qm_glbl_sts0 = RREG32(mmMME0_QM_GLBL_STS0 + offset);
6164 qm_cgm_sts = RREG32(mmMME0_QM_CGM_STS + offset);
6165 is_eng_idle &= IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts);
6166 }
6167
6168 is_idle &= is_eng_idle;
6169
6170 if (mask)
Oded Gabbayf7639462020-08-29 11:24:03 +03006171 *mask |= ((u64) !is_eng_idle) <<
6172 (GAUDI_ENGINE_ID_MME_0 + i);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006173 if (s) {
6174 if (!is_slave)
6175 seq_printf(s, fmt, i,
6176 is_eng_idle ? "Y" : "N",
6177 qm_glbl_sts0, qm_cgm_sts, mme_arch_sts);
6178 else
6179 seq_printf(s, mme_slave_fmt, i,
6180 is_eng_idle ? "Y" : "N", "-",
6181 "-", mme_arch_sts);
6182 }
6183 }
6184
6185 if (s)
6186 seq_puts(s, "\n");
6187
Oded Gabbaye38bfd32020-07-03 20:46:12 +03006188 hdev->asic_funcs->set_clock_gating(hdev);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006189
6190 mutex_unlock(&gaudi->clk_gate_mutex);
6191
6192 return is_idle;
6193}
6194
6195static void gaudi_hw_queues_lock(struct hl_device *hdev)
6196 __acquires(&gaudi->hw_queues_lock)
6197{
6198 struct gaudi_device *gaudi = hdev->asic_specific;
6199
6200 spin_lock(&gaudi->hw_queues_lock);
6201}
6202
6203static void gaudi_hw_queues_unlock(struct hl_device *hdev)
6204 __releases(&gaudi->hw_queues_lock)
6205{
6206 struct gaudi_device *gaudi = hdev->asic_specific;
6207
6208 spin_unlock(&gaudi->hw_queues_lock);
6209}
6210
6211static u32 gaudi_get_pci_id(struct hl_device *hdev)
6212{
6213 return hdev->pdev->device;
6214}
6215
6216static int gaudi_get_eeprom_data(struct hl_device *hdev, void *data,
6217 size_t max_size)
6218{
6219 struct gaudi_device *gaudi = hdev->asic_specific;
6220
6221 if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
6222 return 0;
6223
6224 return hl_fw_get_eeprom_data(hdev, data, max_size);
6225}
6226
6227/*
6228 * this function should be used only during initialization and/or after reset,
6229 * when there are no active users.
6230 */
6231static int gaudi_run_tpc_kernel(struct hl_device *hdev, u64 tpc_kernel,
6232 u32 tpc_id)
6233{
6234 struct gaudi_device *gaudi = hdev->asic_specific;
6235 u64 kernel_timeout;
6236 u32 status, offset;
6237 int rc;
6238
6239 offset = tpc_id * (mmTPC1_CFG_STATUS - mmTPC0_CFG_STATUS);
6240
6241 if (hdev->pldm)
6242 kernel_timeout = GAUDI_PLDM_TPC_KERNEL_WAIT_USEC;
6243 else
6244 kernel_timeout = HL_DEVICE_TIMEOUT_USEC;
6245
6246 mutex_lock(&gaudi->clk_gate_mutex);
6247
6248 hdev->asic_funcs->disable_clock_gating(hdev);
6249
6250 WREG32(mmTPC0_CFG_QM_KERNEL_BASE_ADDRESS_LOW + offset,
6251 lower_32_bits(tpc_kernel));
6252 WREG32(mmTPC0_CFG_QM_KERNEL_BASE_ADDRESS_HIGH + offset,
6253 upper_32_bits(tpc_kernel));
6254
6255 WREG32(mmTPC0_CFG_ICACHE_BASE_ADDERESS_LOW + offset,
6256 lower_32_bits(tpc_kernel));
6257 WREG32(mmTPC0_CFG_ICACHE_BASE_ADDERESS_HIGH + offset,
6258 upper_32_bits(tpc_kernel));
6259 /* set a valid LUT pointer, content is of no significance */
6260 WREG32(mmTPC0_CFG_LUT_FUNC256_BASE_ADDR_LO + offset,
6261 lower_32_bits(tpc_kernel));
6262 WREG32(mmTPC0_CFG_LUT_FUNC256_BASE_ADDR_HI + offset,
6263 upper_32_bits(tpc_kernel));
6264
6265 WREG32(mmTPC0_CFG_QM_SYNC_OBJECT_ADDR + offset,
6266 lower_32_bits(CFG_BASE +
6267 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0));
6268
6269 WREG32(mmTPC0_CFG_TPC_CMD + offset,
6270 (1 << TPC0_CFG_TPC_CMD_ICACHE_INVALIDATE_SHIFT |
6271 1 << TPC0_CFG_TPC_CMD_ICACHE_PREFETCH_64KB_SHIFT));
6272 /* wait a bit for the engine to start executing */
6273 usleep_range(1000, 1500);
6274
6275 /* wait until engine has finished executing */
6276 rc = hl_poll_timeout(
6277 hdev,
6278 mmTPC0_CFG_STATUS + offset,
6279 status,
6280 (status & TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK) ==
6281 TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK,
6282 1000,
6283 kernel_timeout);
6284
6285 if (rc) {
6286 dev_err(hdev->dev,
6287 "Timeout while waiting for TPC%d icache prefetch\n",
6288 tpc_id);
Oded Gabbaye38bfd32020-07-03 20:46:12 +03006289 hdev->asic_funcs->set_clock_gating(hdev);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006290 mutex_unlock(&gaudi->clk_gate_mutex);
6291 return -EIO;
6292 }
6293
6294 WREG32(mmTPC0_CFG_TPC_EXECUTE + offset,
6295 1 << TPC0_CFG_TPC_EXECUTE_V_SHIFT);
6296
6297 /* wait a bit for the engine to start executing */
6298 usleep_range(1000, 1500);
6299
6300 /* wait until engine has finished executing */
6301 rc = hl_poll_timeout(
6302 hdev,
6303 mmTPC0_CFG_STATUS + offset,
6304 status,
6305 (status & TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK) ==
6306 TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK,
6307 1000,
6308 kernel_timeout);
6309
Oded Gabbay31ac1f12020-08-12 11:28:13 +03006310 if (rc) {
6311 dev_err(hdev->dev,
6312 "Timeout while waiting for TPC%d vector pipe\n",
6313 tpc_id);
6314 hdev->asic_funcs->set_clock_gating(hdev);
6315 mutex_unlock(&gaudi->clk_gate_mutex);
6316 return -EIO;
6317 }
6318
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006319 rc = hl_poll_timeout(
6320 hdev,
6321 mmTPC0_CFG_WQ_INFLIGHT_CNTR + offset,
6322 status,
6323 (status == 0),
6324 1000,
6325 kernel_timeout);
6326
Oded Gabbaye38bfd32020-07-03 20:46:12 +03006327 hdev->asic_funcs->set_clock_gating(hdev);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006328 mutex_unlock(&gaudi->clk_gate_mutex);
6329
6330 if (rc) {
6331 dev_err(hdev->dev,
6332 "Timeout while waiting for TPC%d kernel to execute\n",
6333 tpc_id);
6334 return -EIO;
6335 }
6336
6337 return 0;
6338}
6339
6340static enum hl_device_hw_state gaudi_get_hw_state(struct hl_device *hdev)
6341{
6342 return RREG32(mmHW_STATE);
6343}
6344
kernel test robotbb34bf72020-07-29 08:03:13 +08006345static int gaudi_ctx_init(struct hl_ctx *ctx)
Ofir Bittona04b7cd2020-07-13 13:36:55 +03006346{
6347 return 0;
6348}
6349
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006350static u32 gaudi_get_queue_id_for_cq(struct hl_device *hdev, u32 cq_idx)
6351{
6352 return gaudi_cq_assignment[cq_idx];
6353}
6354
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006355static u32 gaudi_get_signal_cb_size(struct hl_device *hdev)
6356{
6357 return sizeof(struct packet_msg_short) +
6358 sizeof(struct packet_msg_prot) * 2;
6359}
6360
6361static u32 gaudi_get_wait_cb_size(struct hl_device *hdev)
6362{
6363 return sizeof(struct packet_msg_short) * 4 +
6364 sizeof(struct packet_fence) +
6365 sizeof(struct packet_msg_prot) * 2;
6366}
6367
6368static void gaudi_gen_signal_cb(struct hl_device *hdev, void *data, u16 sob_id)
6369{
6370 struct hl_cb *cb = (struct hl_cb *) data;
6371 struct packet_msg_short *pkt;
6372 u32 value, ctl;
6373
6374 pkt = (struct packet_msg_short *) (uintptr_t) cb->kernel_address;
6375 memset(pkt, 0, sizeof(*pkt));
6376
Ofir Bitton6c07bab2020-06-01 10:38:46 +03006377 /* Inc by 1, Mode ADD */
6378 value = FIELD_PREP(GAUDI_PKT_SHORT_VAL_SOB_SYNC_VAL_MASK, 1);
6379 value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_SOB_MOD_MASK, 1);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006380
Ofir Bitton6c07bab2020-06-01 10:38:46 +03006381 ctl = FIELD_PREP(GAUDI_PKT_SHORT_CTL_ADDR_MASK, sob_id * 4);
6382 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_OP_MASK, 0); /* write the value */
6383 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_BASE_MASK, 3); /* W_S SOB base */
6384 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_OPCODE_MASK, PACKET_MSG_SHORT);
6385 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_EB_MASK, 1);
6386 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_RB_MASK, 1);
6387 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_MB_MASK, 1);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006388
6389 pkt->value = cpu_to_le32(value);
6390 pkt->ctl = cpu_to_le32(ctl);
6391}
6392
6393static u32 gaudi_add_mon_msg_short(struct packet_msg_short *pkt, u32 value,
6394 u16 addr)
6395{
6396 u32 ctl, pkt_size = sizeof(*pkt);
6397
6398 memset(pkt, 0, pkt_size);
6399
Ofir Bitton6c07bab2020-06-01 10:38:46 +03006400 ctl = FIELD_PREP(GAUDI_PKT_SHORT_CTL_ADDR_MASK, addr);
6401 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_BASE_MASK, 2); /* W_S MON base */
6402 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_OPCODE_MASK, PACKET_MSG_SHORT);
6403 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_EB_MASK, 0);
6404 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_RB_MASK, 1);
6405 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_MB_MASK, 0); /* last pkt MB */
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006406
6407 pkt->value = cpu_to_le32(value);
6408 pkt->ctl = cpu_to_le32(ctl);
6409
6410 return pkt_size;
6411}
6412
6413static u32 gaudi_add_arm_monitor_pkt(struct packet_msg_short *pkt, u16 sob_id,
6414 u16 sob_val, u16 addr)
6415{
6416 u32 ctl, value, pkt_size = sizeof(*pkt);
6417 u8 mask = ~(1 << (sob_id & 0x7));
6418
6419 memset(pkt, 0, pkt_size);
6420
Ofir Bitton6c07bab2020-06-01 10:38:46 +03006421 value = FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_SYNC_GID_MASK, sob_id / 8);
6422 value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_SYNC_VAL_MASK, sob_val);
6423 value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_MODE_MASK,
6424 0); /* GREATER OR EQUAL*/
6425 value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_MASK_MASK, mask);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006426
Ofir Bitton6c07bab2020-06-01 10:38:46 +03006427 ctl = FIELD_PREP(GAUDI_PKT_SHORT_CTL_ADDR_MASK, addr);
6428 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_OP_MASK, 0); /* write the value */
6429 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_BASE_MASK, 2); /* W_S MON base */
6430 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_OPCODE_MASK, PACKET_MSG_SHORT);
6431 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_EB_MASK, 0);
6432 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_RB_MASK, 1);
6433 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_MB_MASK, 1);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006434
6435 pkt->value = cpu_to_le32(value);
6436 pkt->ctl = cpu_to_le32(ctl);
6437
6438 return pkt_size;
6439}
6440
6441static u32 gaudi_add_fence_pkt(struct packet_fence *pkt)
6442{
6443 u32 ctl, cfg, pkt_size = sizeof(*pkt);
6444
6445 memset(pkt, 0, pkt_size);
6446
Ofir Bitton6c07bab2020-06-01 10:38:46 +03006447 cfg = FIELD_PREP(GAUDI_PKT_FENCE_CFG_DEC_VAL_MASK, 1);
6448 cfg |= FIELD_PREP(GAUDI_PKT_FENCE_CFG_TARGET_VAL_MASK, 1);
6449 cfg |= FIELD_PREP(GAUDI_PKT_FENCE_CFG_ID_MASK, 2);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006450
Ofir Bitton6c07bab2020-06-01 10:38:46 +03006451 ctl = FIELD_PREP(GAUDI_PKT_FENCE_CTL_OPCODE_MASK, PACKET_FENCE);
6452 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_EB_MASK, 0);
6453 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_RB_MASK, 1);
6454 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_MB_MASK, 1);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006455
6456 pkt->cfg = cpu_to_le32(cfg);
6457 pkt->ctl = cpu_to_le32(ctl);
6458
6459 return pkt_size;
6460}
6461
6462static void gaudi_gen_wait_cb(struct hl_device *hdev, void *data, u16 sob_id,
6463 u16 sob_val, u16 mon_id, u32 q_idx)
6464{
6465 struct hl_cb *cb = (struct hl_cb *) data;
6466 void *buf = (void *) (uintptr_t) cb->kernel_address;
6467 u64 monitor_base, fence_addr = 0;
6468 u32 size = 0;
6469 u16 msg_addr_offset;
6470
6471 switch (q_idx) {
6472 case GAUDI_QUEUE_ID_DMA_0_0:
6473 fence_addr = mmDMA0_QM_CP_FENCE2_RDATA_0;
6474 break;
6475 case GAUDI_QUEUE_ID_DMA_0_1:
6476 fence_addr = mmDMA0_QM_CP_FENCE2_RDATA_1;
6477 break;
6478 case GAUDI_QUEUE_ID_DMA_0_2:
6479 fence_addr = mmDMA0_QM_CP_FENCE2_RDATA_2;
6480 break;
6481 case GAUDI_QUEUE_ID_DMA_0_3:
6482 fence_addr = mmDMA0_QM_CP_FENCE2_RDATA_3;
6483 break;
6484 case GAUDI_QUEUE_ID_DMA_1_0:
6485 fence_addr = mmDMA1_QM_CP_FENCE2_RDATA_0;
6486 break;
6487 case GAUDI_QUEUE_ID_DMA_1_1:
6488 fence_addr = mmDMA1_QM_CP_FENCE2_RDATA_1;
6489 break;
6490 case GAUDI_QUEUE_ID_DMA_1_2:
6491 fence_addr = mmDMA1_QM_CP_FENCE2_RDATA_2;
6492 break;
6493 case GAUDI_QUEUE_ID_DMA_1_3:
6494 fence_addr = mmDMA1_QM_CP_FENCE2_RDATA_3;
6495 break;
6496 case GAUDI_QUEUE_ID_DMA_5_0:
6497 fence_addr = mmDMA5_QM_CP_FENCE2_RDATA_0;
6498 break;
6499 case GAUDI_QUEUE_ID_DMA_5_1:
6500 fence_addr = mmDMA5_QM_CP_FENCE2_RDATA_1;
6501 break;
6502 case GAUDI_QUEUE_ID_DMA_5_2:
6503 fence_addr = mmDMA5_QM_CP_FENCE2_RDATA_2;
6504 break;
6505 case GAUDI_QUEUE_ID_DMA_5_3:
6506 fence_addr = mmDMA5_QM_CP_FENCE2_RDATA_3;
6507 break;
6508 default:
6509 /* queue index should be valid here */
6510 dev_crit(hdev->dev, "wrong queue id %d for wait packet\n",
6511 q_idx);
6512 return;
6513 }
6514
6515 fence_addr += CFG_BASE;
6516
6517 /*
6518 * monitor_base should be the content of the base0 address registers,
6519 * so it will be added to the msg short offsets
6520 */
6521 monitor_base = mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0;
6522
6523 /* First monitor config packet: low address of the sync */
6524 msg_addr_offset =
6525 (mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0 + mon_id * 4) -
6526 monitor_base;
6527
6528 size += gaudi_add_mon_msg_short(buf + size, (u32) fence_addr,
6529 msg_addr_offset);
6530
6531 /* Second monitor config packet: high address of the sync */
6532 msg_addr_offset =
6533 (mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRH_0 + mon_id * 4) -
6534 monitor_base;
6535
6536 size += gaudi_add_mon_msg_short(buf + size, (u32) (fence_addr >> 32),
6537 msg_addr_offset);
6538
6539 /*
6540 * Third monitor config packet: the payload, i.e. what to write when the
6541 * sync triggers
6542 */
6543 msg_addr_offset =
6544 (mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_DATA_0 + mon_id * 4) -
6545 monitor_base;
6546
6547 size += gaudi_add_mon_msg_short(buf + size, 1, msg_addr_offset);
6548
6549 /* Fourth monitor config packet: bind the monitor to a sync object */
6550 msg_addr_offset =
6551 (mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0 + mon_id * 4) -
6552 monitor_base;
6553 size += gaudi_add_arm_monitor_pkt(buf + size, sob_id, sob_val,
6554 msg_addr_offset);
6555
6556 /* Fence packet */
6557 size += gaudi_add_fence_pkt(buf + size);
6558}
6559
6560static void gaudi_reset_sob(struct hl_device *hdev, void *data)
6561{
6562 struct hl_hw_sob *hw_sob = (struct hl_hw_sob *) data;
6563
6564 dev_dbg(hdev->dev, "reset SOB, q_idx: %d, sob_id: %d\n", hw_sob->q_idx,
6565 hw_sob->sob_id);
6566
6567 WREG32(mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 + hw_sob->sob_id * 4,
6568 0);
6569
6570 kref_init(&hw_sob->kref);
6571}
6572
6573static void gaudi_set_dma_mask_from_fw(struct hl_device *hdev)
6574{
6575 if (RREG32(mmPSOC_GLOBAL_CONF_NON_RST_FLOPS_0) ==
6576 HL_POWER9_HOST_MAGIC) {
6577 hdev->power9_64bit_dma_enable = 1;
6578 hdev->dma_mask = 64;
6579 } else {
6580 hdev->power9_64bit_dma_enable = 0;
6581 hdev->dma_mask = 48;
6582 }
6583}
6584
6585static u64 gaudi_get_device_time(struct hl_device *hdev)
6586{
6587 u64 device_time = ((u64) RREG32(mmPSOC_TIMESTAMP_CNTCVU)) << 32;
6588
6589 return device_time | RREG32(mmPSOC_TIMESTAMP_CNTCVL);
6590}
6591
6592static const struct hl_asic_funcs gaudi_funcs = {
6593 .early_init = gaudi_early_init,
6594 .early_fini = gaudi_early_fini,
6595 .late_init = gaudi_late_init,
6596 .late_fini = gaudi_late_fini,
6597 .sw_init = gaudi_sw_init,
6598 .sw_fini = gaudi_sw_fini,
6599 .hw_init = gaudi_hw_init,
6600 .hw_fini = gaudi_hw_fini,
6601 .halt_engines = gaudi_halt_engines,
6602 .suspend = gaudi_suspend,
6603 .resume = gaudi_resume,
6604 .cb_mmap = gaudi_cb_mmap,
6605 .ring_doorbell = gaudi_ring_doorbell,
6606 .pqe_write = gaudi_pqe_write,
6607 .asic_dma_alloc_coherent = gaudi_dma_alloc_coherent,
6608 .asic_dma_free_coherent = gaudi_dma_free_coherent,
6609 .get_int_queue_base = gaudi_get_int_queue_base,
6610 .test_queues = gaudi_test_queues,
6611 .asic_dma_pool_zalloc = gaudi_dma_pool_zalloc,
6612 .asic_dma_pool_free = gaudi_dma_pool_free,
6613 .cpu_accessible_dma_pool_alloc = gaudi_cpu_accessible_dma_pool_alloc,
6614 .cpu_accessible_dma_pool_free = gaudi_cpu_accessible_dma_pool_free,
6615 .hl_dma_unmap_sg = gaudi_dma_unmap_sg,
6616 .cs_parser = gaudi_cs_parser,
6617 .asic_dma_map_sg = gaudi_dma_map_sg,
6618 .get_dma_desc_list_size = gaudi_get_dma_desc_list_size,
6619 .add_end_of_cb_packets = gaudi_add_end_of_cb_packets,
6620 .update_eq_ci = gaudi_update_eq_ci,
6621 .context_switch = gaudi_context_switch,
6622 .restore_phase_topology = gaudi_restore_phase_topology,
6623 .debugfs_read32 = gaudi_debugfs_read32,
6624 .debugfs_write32 = gaudi_debugfs_write32,
6625 .debugfs_read64 = gaudi_debugfs_read64,
6626 .debugfs_write64 = gaudi_debugfs_write64,
Oded Gabbaybcaf4152020-05-11 10:41:37 +03006627 .add_device_attr = gaudi_add_device_attr,
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006628 .handle_eqe = gaudi_handle_eqe,
Oded Gabbaybcaf4152020-05-11 10:41:37 +03006629 .set_pll_profile = gaudi_set_pll_profile,
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006630 .get_events_stat = gaudi_get_events_stat,
6631 .read_pte = gaudi_read_pte,
6632 .write_pte = gaudi_write_pte,
6633 .mmu_invalidate_cache = gaudi_mmu_invalidate_cache,
6634 .mmu_invalidate_cache_range = gaudi_mmu_invalidate_cache_range,
6635 .send_heartbeat = gaudi_send_heartbeat,
Oded Gabbaye38bfd32020-07-03 20:46:12 +03006636 .set_clock_gating = gaudi_set_clock_gating,
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006637 .disable_clock_gating = gaudi_disable_clock_gating,
Omer Shpigelman79fc7a92020-05-11 10:46:29 +03006638 .debug_coresight = gaudi_debug_coresight,
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006639 .is_device_idle = gaudi_is_device_idle,
6640 .soft_reset_late_init = gaudi_soft_reset_late_init,
6641 .hw_queues_lock = gaudi_hw_queues_lock,
6642 .hw_queues_unlock = gaudi_hw_queues_unlock,
6643 .get_pci_id = gaudi_get_pci_id,
6644 .get_eeprom_data = gaudi_get_eeprom_data,
6645 .send_cpu_message = gaudi_send_cpu_message,
6646 .get_hw_state = gaudi_get_hw_state,
6647 .pci_bars_map = gaudi_pci_bars_map,
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006648 .init_iatu = gaudi_init_iatu,
6649 .rreg = hl_rreg,
6650 .wreg = hl_wreg,
Omer Shpigelman79fc7a92020-05-11 10:46:29 +03006651 .halt_coresight = gaudi_halt_coresight,
Ofir Bittona04b7cd2020-07-13 13:36:55 +03006652 .ctx_init = gaudi_ctx_init,
Oded Gabbaybcaf4152020-05-11 10:41:37 +03006653 .get_clk_rate = gaudi_get_clk_rate,
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006654 .get_queue_id_for_cq = gaudi_get_queue_id_for_cq,
6655 .read_device_fw_version = gaudi_read_device_fw_version,
6656 .load_firmware_to_device = gaudi_load_firmware_to_device,
6657 .load_boot_fit_to_device = gaudi_load_boot_fit_to_device,
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006658 .get_signal_cb_size = gaudi_get_signal_cb_size,
6659 .get_wait_cb_size = gaudi_get_wait_cb_size,
6660 .gen_signal_cb = gaudi_gen_signal_cb,
6661 .gen_wait_cb = gaudi_gen_wait_cb,
6662 .reset_sob = gaudi_reset_sob,
6663 .set_dma_mask_from_fw = gaudi_set_dma_mask_from_fw,
6664 .get_device_time = gaudi_get_device_time
6665};
6666
6667/**
6668 * gaudi_set_asic_funcs - set GAUDI function pointers
6669 *
Lee Jonesf7d227c2020-07-01 09:58:42 +01006670 * @hdev: pointer to hl_device structure
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006671 *
6672 */
6673void gaudi_set_asic_funcs(struct hl_device *hdev)
6674{
6675 hdev->asic_funcs = &gaudi_funcs;
6676}