blob: 4a4327d9cbbf47417caac680fd065d34df1b97c9 [file] [log] [blame]
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03001// SPDX-License-Identifier: GPL-2.0
2
3/*
4 * Copyright 2016-2020 HabanaLabs, Ltd.
5 * All Rights Reserved.
6 */
7
8#include "gaudiP.h"
Greg Kroah-Hartman7b16a152020-07-28 19:18:51 +02009#include "../include/hw_ip/mmu/mmu_general.h"
10#include "../include/hw_ip/mmu/mmu_v1_1.h"
11#include "../include/gaudi/gaudi_masks.h"
12#include "../include/gaudi/gaudi_fw_if.h"
13#include "../include/gaudi/gaudi_reg_map.h"
14#include "../include/gaudi/gaudi_async_ids_map_extended.h"
Oded Gabbayac0ae6a2020-05-11 10:29:27 +030015
16#include <linux/module.h>
17#include <linux/pci.h>
18#include <linux/firmware.h>
19#include <linux/hwmon.h>
20#include <linux/genalloc.h>
21#include <linux/io-64-nonatomic-lo-hi.h>
22#include <linux/iommu.h>
23#include <linux/seq_file.h>
24
25/*
26 * Gaudi security scheme:
27 *
28 * 1. Host is protected by:
29 * - Range registers
30 * - MMU
31 *
32 * 2. DDR is protected by:
33 * - Range registers (protect the first 512MB)
34 *
35 * 3. Configuration is protected by:
36 * - Range registers
37 * - Protection bits
38 *
39 * MMU is always enabled.
40 *
41 * QMAN DMA channels 0,1,5 (PCI DMAN):
42 * - DMA is not secured.
43 * - PQ and CQ are secured.
44 * - CP is secured: The driver needs to parse CB but WREG should be allowed
45 * because of TDMA (tensor DMA). Hence, WREG is always not
46 * secured.
47 *
48 * When the driver needs to use DMA it will check that Gaudi is idle, set DMA
49 * channel 0 to be secured, execute the DMA and change it back to not secured.
50 * Currently, the driver doesn't use the DMA while there are compute jobs
51 * running.
52 *
53 * The current use cases for the driver to use the DMA are:
54 * - Clear SRAM on context switch (happens on context switch when device is
55 * idle)
56 * - MMU page tables area clear (happens on init)
57 *
58 * QMAN DMA 2-4,6,7, TPC, MME, NIC:
59 * PQ is secured and is located on the Host (HBM CON TPC3 bug)
60 * CQ, CP and the engine are not secured
61 *
62 */
63
64#define GAUDI_BOOT_FIT_FILE "habanalabs/gaudi/gaudi-boot-fit.itb"
65#define GAUDI_LINUX_FW_FILE "habanalabs/gaudi/gaudi-fit.itb"
66#define GAUDI_TPC_FW_FILE "habanalabs/gaudi/gaudi_tpc.bin"
67
68#define GAUDI_DMA_POOL_BLK_SIZE 0x100 /* 256 bytes */
69
70#define GAUDI_RESET_TIMEOUT_MSEC 1000 /* 1000ms */
71#define GAUDI_RESET_WAIT_MSEC 1 /* 1ms */
72#define GAUDI_CPU_RESET_WAIT_MSEC 200 /* 200ms */
73#define GAUDI_TEST_QUEUE_WAIT_USEC 100000 /* 100ms */
74
75#define GAUDI_PLDM_RESET_WAIT_MSEC 1000 /* 1s */
76#define GAUDI_PLDM_HRESET_TIMEOUT_MSEC 20000 /* 20s */
Oded Gabbayac0ae6a2020-05-11 10:29:27 +030077#define GAUDI_PLDM_TEST_QUEUE_WAIT_USEC 1000000 /* 1s */
78#define GAUDI_PLDM_MMU_TIMEOUT_USEC (MMU_CONFIG_TIMEOUT_USEC * 100)
79#define GAUDI_PLDM_QMAN0_TIMEOUT_USEC (HL_DEVICE_TIMEOUT_USEC * 30)
80#define GAUDI_PLDM_TPC_KERNEL_WAIT_USEC (HL_DEVICE_TIMEOUT_USEC * 30)
81#define GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC 1000000 /* 1s */
Oded Gabbay788cacf2020-07-07 17:30:13 +030082#define GAUDI_MSG_TO_CPU_TIMEOUT_USEC 4000000 /* 4s */
Oded Gabbayac0ae6a2020-05-11 10:29:27 +030083
84#define GAUDI_QMAN0_FENCE_VAL 0x72E91AB9
85
86#define GAUDI_MAX_STRING_LEN 20
87
88#define GAUDI_CB_POOL_CB_CNT 512
89#define GAUDI_CB_POOL_CB_SIZE 0x20000 /* 128KB */
90
91#define GAUDI_ALLOC_CPU_MEM_RETRY_CNT 3
92
93#define GAUDI_NUM_OF_TPC_INTR_CAUSE 20
94
95#define GAUDI_NUM_OF_QM_ERR_CAUSE 16
96
97#define GAUDI_NUM_OF_QM_ARB_ERR_CAUSE 3
98
Oded Gabbay647e8352020-06-07 11:26:48 +030099#define GAUDI_ARB_WDT_TIMEOUT 0x1000000
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300100
Oded Gabbaye38bfd32020-07-03 20:46:12 +0300101#define GAUDI_CLK_GATE_DEBUGFS_MASK (\
102 BIT(GAUDI_ENGINE_ID_MME_0) |\
103 BIT(GAUDI_ENGINE_ID_MME_2) |\
104 GENMASK_ULL(GAUDI_ENGINE_ID_TPC_7, GAUDI_ENGINE_ID_TPC_0))
105
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300106static const char gaudi_irq_name[GAUDI_MSI_ENTRIES][GAUDI_MAX_STRING_LEN] = {
107 "gaudi cq 0_0", "gaudi cq 0_1", "gaudi cq 0_2", "gaudi cq 0_3",
108 "gaudi cq 1_0", "gaudi cq 1_1", "gaudi cq 1_2", "gaudi cq 1_3",
109 "gaudi cq 5_0", "gaudi cq 5_1", "gaudi cq 5_2", "gaudi cq 5_3",
110 "gaudi cpu eq"
111};
112
113static const u8 gaudi_dma_assignment[GAUDI_DMA_MAX] = {
Oded Gabbaye38bfd32020-07-03 20:46:12 +0300114 [GAUDI_PCI_DMA_1] = GAUDI_ENGINE_ID_DMA_0,
115 [GAUDI_PCI_DMA_2] = GAUDI_ENGINE_ID_DMA_1,
116 [GAUDI_PCI_DMA_3] = GAUDI_ENGINE_ID_DMA_5,
117 [GAUDI_HBM_DMA_1] = GAUDI_ENGINE_ID_DMA_2,
118 [GAUDI_HBM_DMA_2] = GAUDI_ENGINE_ID_DMA_3,
119 [GAUDI_HBM_DMA_3] = GAUDI_ENGINE_ID_DMA_4,
120 [GAUDI_HBM_DMA_4] = GAUDI_ENGINE_ID_DMA_6,
121 [GAUDI_HBM_DMA_5] = GAUDI_ENGINE_ID_DMA_7
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300122};
123
124static const u8 gaudi_cq_assignment[NUMBER_OF_CMPLT_QUEUES] = {
125 [0] = GAUDI_QUEUE_ID_DMA_0_0,
126 [1] = GAUDI_QUEUE_ID_DMA_0_1,
127 [2] = GAUDI_QUEUE_ID_DMA_0_2,
128 [3] = GAUDI_QUEUE_ID_DMA_0_3,
129 [4] = GAUDI_QUEUE_ID_DMA_1_0,
130 [5] = GAUDI_QUEUE_ID_DMA_1_1,
131 [6] = GAUDI_QUEUE_ID_DMA_1_2,
132 [7] = GAUDI_QUEUE_ID_DMA_1_3,
133 [8] = GAUDI_QUEUE_ID_DMA_5_0,
134 [9] = GAUDI_QUEUE_ID_DMA_5_1,
135 [10] = GAUDI_QUEUE_ID_DMA_5_2,
136 [11] = GAUDI_QUEUE_ID_DMA_5_3
137};
138
139static const u16 gaudi_packet_sizes[MAX_PACKET_ID] = {
140 [PACKET_WREG_32] = sizeof(struct packet_wreg32),
141 [PACKET_WREG_BULK] = sizeof(struct packet_wreg_bulk),
142 [PACKET_MSG_LONG] = sizeof(struct packet_msg_long),
143 [PACKET_MSG_SHORT] = sizeof(struct packet_msg_short),
144 [PACKET_CP_DMA] = sizeof(struct packet_cp_dma),
145 [PACKET_REPEAT] = sizeof(struct packet_repeat),
146 [PACKET_MSG_PROT] = sizeof(struct packet_msg_prot),
147 [PACKET_FENCE] = sizeof(struct packet_fence),
148 [PACKET_LIN_DMA] = sizeof(struct packet_lin_dma),
149 [PACKET_NOP] = sizeof(struct packet_nop),
150 [PACKET_STOP] = sizeof(struct packet_stop),
151 [PACKET_ARB_POINT] = sizeof(struct packet_arb_point),
152 [PACKET_WAIT] = sizeof(struct packet_wait),
153 [PACKET_LOAD_AND_EXE] = sizeof(struct packet_load_and_exe)
154};
155
Ofir Bittonbc75be22020-07-30 14:56:38 +0300156static inline bool validate_packet_id(enum packet_id id)
157{
158 switch (id) {
159 case PACKET_WREG_32:
160 case PACKET_WREG_BULK:
161 case PACKET_MSG_LONG:
162 case PACKET_MSG_SHORT:
163 case PACKET_CP_DMA:
164 case PACKET_REPEAT:
165 case PACKET_MSG_PROT:
166 case PACKET_FENCE:
167 case PACKET_LIN_DMA:
168 case PACKET_NOP:
169 case PACKET_STOP:
170 case PACKET_ARB_POINT:
171 case PACKET_WAIT:
172 case PACKET_LOAD_AND_EXE:
173 return true;
174 default:
175 return false;
176 }
177}
178
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300179static const char * const
180gaudi_tpc_interrupts_cause[GAUDI_NUM_OF_TPC_INTR_CAUSE] = {
181 "tpc_address_exceed_slm",
182 "tpc_div_by_0",
183 "tpc_spu_mac_overflow",
184 "tpc_spu_addsub_overflow",
185 "tpc_spu_abs_overflow",
186 "tpc_spu_fp_dst_nan_inf",
187 "tpc_spu_fp_dst_denorm",
188 "tpc_vpu_mac_overflow",
189 "tpc_vpu_addsub_overflow",
190 "tpc_vpu_abs_overflow",
191 "tpc_vpu_fp_dst_nan_inf",
192 "tpc_vpu_fp_dst_denorm",
193 "tpc_assertions",
194 "tpc_illegal_instruction",
195 "tpc_pc_wrap_around",
196 "tpc_qm_sw_err",
197 "tpc_hbw_rresp_err",
198 "tpc_hbw_bresp_err",
199 "tpc_lbw_rresp_err",
200 "tpc_lbw_bresp_err"
201};
202
203static const char * const
204gaudi_qman_error_cause[GAUDI_NUM_OF_QM_ERR_CAUSE] = {
205 "PQ AXI HBW error",
206 "CQ AXI HBW error",
207 "CP AXI HBW error",
208 "CP error due to undefined OPCODE",
209 "CP encountered STOP OPCODE",
210 "CP AXI LBW error",
211 "CP WRREG32 or WRBULK returned error",
212 "N/A",
213 "FENCE 0 inc over max value and clipped",
214 "FENCE 1 inc over max value and clipped",
215 "FENCE 2 inc over max value and clipped",
216 "FENCE 3 inc over max value and clipped",
217 "FENCE 0 dec under min value and clipped",
218 "FENCE 1 dec under min value and clipped",
219 "FENCE 2 dec under min value and clipped",
220 "FENCE 3 dec under min value and clipped"
221};
222
223static const char * const
224gaudi_qman_arb_error_cause[GAUDI_NUM_OF_QM_ARB_ERR_CAUSE] = {
225 "Choice push while full error",
226 "Choice Q watchdog error",
227 "MSG AXI LBW returned with error"
228};
229
230static enum hl_queue_type gaudi_queue_type[GAUDI_QUEUE_ID_SIZE] = {
231 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_0 */
232 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_1 */
233 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_2 */
234 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_3 */
235 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_0 */
236 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_1 */
237 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_2 */
238 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_3 */
239 QUEUE_TYPE_CPU, /* GAUDI_QUEUE_ID_CPU_PQ */
240 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_0 */
241 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_1 */
242 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_2 */
243 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_3 */
244 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_0 */
245 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_1 */
246 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_2 */
247 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_3 */
248 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_0 */
249 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_1 */
250 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_2 */
251 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_3 */
252 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_5_0 */
253 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_5_1 */
254 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_5_2 */
255 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_5_3 */
256 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_0 */
257 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_1 */
258 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_2 */
259 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_3 */
260 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_0 */
261 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_1 */
262 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_2 */
263 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_3 */
264 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_0 */
265 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_1 */
266 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_2 */
267 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_3 */
268 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_0 */
269 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_1 */
270 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_2 */
271 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_3 */
272 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_0 */
273 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_1 */
274 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_2 */
275 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_3 */
276 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_0 */
277 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_1 */
278 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_2 */
279 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_3 */
280 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_0 */
281 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_1 */
282 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_2 */
283 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_3 */
284 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_0 */
285 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_1 */
286 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_2 */
287 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_3 */
288 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_0 */
289 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_1 */
290 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_2 */
291 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_3 */
292 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_0 */
293 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_1 */
294 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_2 */
295 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_3 */
296 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_0 */
297 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_1 */
298 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_2 */
299 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_3 */
300 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_0 */
301 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_1 */
302 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_2 */
303 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_3 */
304 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_0_0 */
305 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_0_1 */
306 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_0_2 */
307 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_0_3 */
308 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_1_0 */
309 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_1_1 */
310 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_1_2 */
311 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_1_3 */
312 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_2_0 */
313 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_2_1 */
314 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_2_2 */
315 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_2_3 */
316 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_3_0 */
317 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_3_1 */
318 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_3_2 */
319 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_3_3 */
320 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_4_0 */
321 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_4_1 */
322 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_4_2 */
323 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_4_3 */
324 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_5_0 */
325 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_5_1 */
326 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_5_2 */
327 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_5_3 */
328 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_6_0 */
329 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_6_1 */
330 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_6_2 */
331 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_6_3 */
332 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_7_0 */
333 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_7_1 */
334 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_7_2 */
335 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_7_3 */
336 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_8_0 */
337 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_8_1 */
338 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_8_2 */
339 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_8_3 */
340 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_9_0 */
341 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_9_1 */
342 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_9_2 */
343 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_9_3 */
344};
345
Oded Gabbayfcc6a4e2020-05-17 08:20:35 +0300346struct ecc_info_extract_params {
347 u64 block_address;
348 u32 num_memories;
349 bool derr;
350 bool disable_clock_gating;
351};
352
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300353static int gaudi_mmu_update_asid_hop0_addr(struct hl_device *hdev, u32 asid,
354 u64 phys_addr);
355static int gaudi_send_job_on_qman0(struct hl_device *hdev,
356 struct hl_cs_job *job);
357static int gaudi_memset_device_memory(struct hl_device *hdev, u64 addr,
358 u32 size, u64 val);
359static int gaudi_run_tpc_kernel(struct hl_device *hdev, u64 tpc_kernel,
360 u32 tpc_id);
361static int gaudi_mmu_clear_pgt_range(struct hl_device *hdev);
362static int gaudi_armcp_info_get(struct hl_device *hdev);
363static void gaudi_disable_clock_gating(struct hl_device *hdev);
364static void gaudi_mmu_prepare(struct hl_device *hdev, u32 asid);
365
366static int gaudi_get_fixed_properties(struct hl_device *hdev)
367{
368 struct asic_fixed_properties *prop = &hdev->asic_prop;
Ofir Bitton843839b2020-07-19 11:08:09 +0300369 u32 num_sync_stream_queues = 0;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300370 int i;
371
Ofir Bitton3abc99b2020-06-23 14:50:39 +0300372 prop->max_queues = GAUDI_QUEUE_ID_SIZE;
373 prop->hw_queues_props = kcalloc(prop->max_queues,
374 sizeof(struct hw_queue_properties),
375 GFP_KERNEL);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300376
Ofir Bitton3abc99b2020-06-23 14:50:39 +0300377 if (!prop->hw_queues_props)
378 return -ENOMEM;
379
380 for (i = 0 ; i < prop->max_queues ; i++) {
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300381 if (gaudi_queue_type[i] == QUEUE_TYPE_EXT) {
382 prop->hw_queues_props[i].type = QUEUE_TYPE_EXT;
383 prop->hw_queues_props[i].driver_only = 0;
384 prop->hw_queues_props[i].requires_kernel_cb = 1;
Ofir Bitton21e7a342020-05-14 18:25:47 +0300385 prop->hw_queues_props[i].supports_sync_stream = 1;
Ofir Bitton843839b2020-07-19 11:08:09 +0300386 num_sync_stream_queues++;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300387 } else if (gaudi_queue_type[i] == QUEUE_TYPE_CPU) {
388 prop->hw_queues_props[i].type = QUEUE_TYPE_CPU;
389 prop->hw_queues_props[i].driver_only = 1;
390 prop->hw_queues_props[i].requires_kernel_cb = 0;
Ofir Bitton21e7a342020-05-14 18:25:47 +0300391 prop->hw_queues_props[i].supports_sync_stream = 0;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300392 } else if (gaudi_queue_type[i] == QUEUE_TYPE_INT) {
393 prop->hw_queues_props[i].type = QUEUE_TYPE_INT;
394 prop->hw_queues_props[i].driver_only = 0;
395 prop->hw_queues_props[i].requires_kernel_cb = 0;
396 } else if (gaudi_queue_type[i] == QUEUE_TYPE_NA) {
397 prop->hw_queues_props[i].type = QUEUE_TYPE_NA;
398 prop->hw_queues_props[i].driver_only = 0;
399 prop->hw_queues_props[i].requires_kernel_cb = 0;
Ofir Bitton21e7a342020-05-14 18:25:47 +0300400 prop->hw_queues_props[i].supports_sync_stream = 0;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300401 }
402 }
403
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300404 prop->completion_queues_count = NUMBER_OF_CMPLT_QUEUES;
Ofir Bitton21e7a342020-05-14 18:25:47 +0300405 prop->sync_stream_first_sob = 0;
406 prop->sync_stream_first_mon = 0;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300407 prop->dram_base_address = DRAM_PHYS_BASE;
408 prop->dram_size = GAUDI_HBM_SIZE_32GB;
409 prop->dram_end_address = prop->dram_base_address +
410 prop->dram_size;
411 prop->dram_user_base_address = DRAM_BASE_ADDR_USER;
412
413 prop->sram_base_address = SRAM_BASE_ADDR;
414 prop->sram_size = SRAM_SIZE;
415 prop->sram_end_address = prop->sram_base_address +
416 prop->sram_size;
417 prop->sram_user_base_address = prop->sram_base_address +
418 SRAM_USER_BASE_OFFSET;
419
420 prop->mmu_pgt_addr = MMU_PAGE_TABLES_ADDR;
421 if (hdev->pldm)
422 prop->mmu_pgt_size = 0x800000; /* 8MB */
423 else
424 prop->mmu_pgt_size = MMU_PAGE_TABLES_SIZE;
425 prop->mmu_pte_size = HL_PTE_SIZE;
426 prop->mmu_hop_table_size = HOP_TABLE_SIZE;
427 prop->mmu_hop0_tables_total_size = HOP0_TABLES_TOTAL_SIZE;
428 prop->dram_page_size = PAGE_SIZE_2MB;
429
430 prop->pmmu.hop0_shift = HOP0_SHIFT;
431 prop->pmmu.hop1_shift = HOP1_SHIFT;
432 prop->pmmu.hop2_shift = HOP2_SHIFT;
433 prop->pmmu.hop3_shift = HOP3_SHIFT;
434 prop->pmmu.hop4_shift = HOP4_SHIFT;
435 prop->pmmu.hop0_mask = HOP0_MASK;
436 prop->pmmu.hop1_mask = HOP1_MASK;
437 prop->pmmu.hop2_mask = HOP2_MASK;
438 prop->pmmu.hop3_mask = HOP3_MASK;
439 prop->pmmu.hop4_mask = HOP4_MASK;
440 prop->pmmu.start_addr = VA_HOST_SPACE_START;
441 prop->pmmu.end_addr =
442 (VA_HOST_SPACE_START + VA_HOST_SPACE_SIZE / 2) - 1;
443 prop->pmmu.page_size = PAGE_SIZE_4KB;
444
445 /* PMMU and HPMMU are the same except of page size */
446 memcpy(&prop->pmmu_huge, &prop->pmmu, sizeof(prop->pmmu));
447 prop->pmmu_huge.page_size = PAGE_SIZE_2MB;
448
449 /* shifts and masks are the same in PMMU and DMMU */
450 memcpy(&prop->dmmu, &prop->pmmu, sizeof(prop->pmmu));
451 prop->dmmu.start_addr = (VA_HOST_SPACE_START + VA_HOST_SPACE_SIZE / 2);
452 prop->dmmu.end_addr = VA_HOST_SPACE_END;
453 prop->dmmu.page_size = PAGE_SIZE_2MB;
454
455 prop->cfg_size = CFG_SIZE;
456 prop->max_asid = MAX_ASID;
457 prop->num_of_events = GAUDI_EVENT_SIZE;
458 prop->tpc_enabled_mask = TPC_ENABLED_MASK;
459
Oded Gabbay58361aa2020-08-08 23:34:47 +0300460 prop->max_power_default = MAX_POWER_DEFAULT_PCI;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300461
462 prop->cb_pool_cb_cnt = GAUDI_CB_POOL_CB_CNT;
463 prop->cb_pool_cb_size = GAUDI_CB_POOL_CB_SIZE;
464
465 prop->pcie_dbi_base_address = mmPCIE_DBI_BASE;
466 prop->pcie_aux_dbi_reg_addr = CFG_BASE + mmPCIE_AUX_DBI;
467
468 strncpy(prop->armcp_info.card_name, GAUDI_DEFAULT_CARD_NAME,
469 CARD_NAME_MAX_LEN);
470
Ofir Bittonc16d45f2020-06-02 12:28:27 +0300471 prop->max_pending_cs = GAUDI_MAX_PENDING_CS;
472
Ofir Bitton843839b2020-07-19 11:08:09 +0300473 prop->first_available_user_sob[HL_GAUDI_WS_DCORE] =
474 num_sync_stream_queues * HL_RSVD_SOBS;
475 prop->first_available_user_mon[HL_GAUDI_WS_DCORE] =
476 num_sync_stream_queues * HL_RSVD_MONS;
477
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300478 return 0;
479}
480
481static int gaudi_pci_bars_map(struct hl_device *hdev)
482{
483 static const char * const name[] = {"SRAM", "CFG", "HBM"};
484 bool is_wc[3] = {false, false, true};
485 int rc;
486
487 rc = hl_pci_bars_map(hdev, name, is_wc);
488 if (rc)
489 return rc;
490
491 hdev->rmmio = hdev->pcie_bar[CFG_BAR_ID] +
492 (CFG_BASE - SPI_FLASH_BASE_ADDR);
493
494 return 0;
495}
496
497static u64 gaudi_set_hbm_bar_base(struct hl_device *hdev, u64 addr)
498{
499 struct gaudi_device *gaudi = hdev->asic_specific;
Ofir Bittonf4cbfd22020-06-15 17:45:12 +0300500 struct hl_inbound_pci_region pci_region;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300501 u64 old_addr = addr;
502 int rc;
503
504 if ((gaudi) && (gaudi->hbm_bar_cur_addr == addr))
505 return old_addr;
506
507 /* Inbound Region 2 - Bar 4 - Point to HBM */
Ofir Bittonf4cbfd22020-06-15 17:45:12 +0300508 pci_region.mode = PCI_BAR_MATCH_MODE;
509 pci_region.bar = HBM_BAR_ID;
510 pci_region.addr = addr;
511 rc = hl_pci_set_inbound_region(hdev, 2, &pci_region);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300512 if (rc)
513 return U64_MAX;
514
515 if (gaudi) {
516 old_addr = gaudi->hbm_bar_cur_addr;
517 gaudi->hbm_bar_cur_addr = addr;
518 }
519
520 return old_addr;
521}
522
523static int gaudi_init_iatu(struct hl_device *hdev)
524{
Ofir Bittonf4cbfd22020-06-15 17:45:12 +0300525 struct hl_inbound_pci_region inbound_region;
526 struct hl_outbound_pci_region outbound_region;
527 int rc;
528
529 /* Inbound Region 0 - Bar 0 - Point to SRAM + CFG */
530 inbound_region.mode = PCI_BAR_MATCH_MODE;
531 inbound_region.bar = SRAM_BAR_ID;
532 inbound_region.addr = SRAM_BASE_ADDR;
533 rc = hl_pci_set_inbound_region(hdev, 0, &inbound_region);
534 if (rc)
535 goto done;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300536
537 /* Inbound Region 1 - Bar 2 - Point to SPI FLASH */
Ofir Bittonf4cbfd22020-06-15 17:45:12 +0300538 inbound_region.mode = PCI_BAR_MATCH_MODE;
539 inbound_region.bar = CFG_BAR_ID;
540 inbound_region.addr = SPI_FLASH_BASE_ADDR;
541 rc = hl_pci_set_inbound_region(hdev, 1, &inbound_region);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300542 if (rc)
Ofir Bittonf4cbfd22020-06-15 17:45:12 +0300543 goto done;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300544
Ofir Bittonf4cbfd22020-06-15 17:45:12 +0300545 /* Inbound Region 2 - Bar 4 - Point to HBM */
546 inbound_region.mode = PCI_BAR_MATCH_MODE;
547 inbound_region.bar = HBM_BAR_ID;
548 inbound_region.addr = DRAM_PHYS_BASE;
549 rc = hl_pci_set_inbound_region(hdev, 2, &inbound_region);
550 if (rc)
551 goto done;
552
553 hdev->asic_funcs->set_dma_mask_from_fw(hdev);
554
555 /* Outbound Region 0 - Point to Host */
556 outbound_region.addr = HOST_PHYS_BASE;
557 outbound_region.size = HOST_PHYS_SIZE;
558 rc = hl_pci_set_outbound_region(hdev, &outbound_region);
559
560done:
561 return rc;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300562}
563
564static int gaudi_early_init(struct hl_device *hdev)
565{
566 struct asic_fixed_properties *prop = &hdev->asic_prop;
567 struct pci_dev *pdev = hdev->pdev;
568 int rc;
569
570 rc = gaudi_get_fixed_properties(hdev);
571 if (rc) {
572 dev_err(hdev->dev, "Failed to get fixed properties\n");
573 return rc;
574 }
575
576 /* Check BAR sizes */
577 if (pci_resource_len(pdev, SRAM_BAR_ID) != SRAM_BAR_SIZE) {
578 dev_err(hdev->dev,
579 "Not " HL_NAME "? BAR %d size %llu, expecting %llu\n",
580 SRAM_BAR_ID,
581 (unsigned long long) pci_resource_len(pdev,
582 SRAM_BAR_ID),
583 SRAM_BAR_SIZE);
Ofir Bitton3abc99b2020-06-23 14:50:39 +0300584 rc = -ENODEV;
585 goto free_queue_props;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300586 }
587
588 if (pci_resource_len(pdev, CFG_BAR_ID) != CFG_BAR_SIZE) {
589 dev_err(hdev->dev,
590 "Not " HL_NAME "? BAR %d size %llu, expecting %llu\n",
591 CFG_BAR_ID,
592 (unsigned long long) pci_resource_len(pdev,
593 CFG_BAR_ID),
594 CFG_BAR_SIZE);
Ofir Bitton3abc99b2020-06-23 14:50:39 +0300595 rc = -ENODEV;
596 goto free_queue_props;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300597 }
598
599 prop->dram_pci_bar_size = pci_resource_len(pdev, HBM_BAR_ID);
600
601 rc = hl_pci_init(hdev);
602 if (rc)
Ofir Bitton3abc99b2020-06-23 14:50:39 +0300603 goto free_queue_props;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300604
605 return 0;
Ofir Bitton3abc99b2020-06-23 14:50:39 +0300606
607free_queue_props:
608 kfree(hdev->asic_prop.hw_queues_props);
609 return rc;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300610}
611
612static int gaudi_early_fini(struct hl_device *hdev)
613{
Ofir Bitton3abc99b2020-06-23 14:50:39 +0300614 kfree(hdev->asic_prop.hw_queues_props);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300615 hl_pci_fini(hdev);
616
617 return 0;
618}
619
620/**
621 * gaudi_fetch_psoc_frequency - Fetch PSOC frequency values
622 *
623 * @hdev: pointer to hl_device structure
624 *
625 */
626static void gaudi_fetch_psoc_frequency(struct hl_device *hdev)
627{
628 struct asic_fixed_properties *prop = &hdev->asic_prop;
Adam Aharone8edded2020-05-26 11:04:30 +0300629 u32 trace_freq = 0;
630 u32 pll_clk = 0;
631 u32 div_fctr = RREG32(mmPSOC_CPU_PLL_DIV_FACTOR_2);
632 u32 div_sel = RREG32(mmPSOC_CPU_PLL_DIV_SEL_2);
633 u32 nr = RREG32(mmPSOC_CPU_PLL_NR);
634 u32 nf = RREG32(mmPSOC_CPU_PLL_NF);
635 u32 od = RREG32(mmPSOC_CPU_PLL_OD);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300636
Adam Aharone8edded2020-05-26 11:04:30 +0300637 if (div_sel == DIV_SEL_REF_CLK || div_sel == DIV_SEL_DIVIDED_REF) {
638 if (div_sel == DIV_SEL_REF_CLK)
639 trace_freq = PLL_REF_CLK;
640 else
641 trace_freq = PLL_REF_CLK / (div_fctr + 1);
642 } else if (div_sel == DIV_SEL_PLL_CLK ||
643 div_sel == DIV_SEL_DIVIDED_PLL) {
644 pll_clk = PLL_REF_CLK * (nf + 1) / ((nr + 1) * (od + 1));
645 if (div_sel == DIV_SEL_PLL_CLK)
646 trace_freq = pll_clk;
647 else
648 trace_freq = pll_clk / (div_fctr + 1);
649 } else {
650 dev_warn(hdev->dev,
651 "Received invalid div select value: %d", div_sel);
652 }
653
654 prop->psoc_timestamp_frequency = trace_freq;
655 prop->psoc_pci_pll_nr = nr;
656 prop->psoc_pci_pll_nf = nf;
657 prop->psoc_pci_pll_od = od;
658 prop->psoc_pci_pll_div_factor = div_fctr;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300659}
660
661static int _gaudi_init_tpc_mem(struct hl_device *hdev,
662 dma_addr_t tpc_kernel_src_addr, u32 tpc_kernel_size)
663{
664 struct asic_fixed_properties *prop = &hdev->asic_prop;
665 struct packet_lin_dma *init_tpc_mem_pkt;
666 struct hl_cs_job *job;
667 struct hl_cb *cb;
668 u64 dst_addr;
669 u32 cb_size, ctl;
670 u8 tpc_id;
671 int rc;
672
Ofir Bittona04b7cd2020-07-13 13:36:55 +0300673 cb = hl_cb_kernel_create(hdev, PAGE_SIZE, false);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300674 if (!cb)
675 return -EFAULT;
676
677 init_tpc_mem_pkt = (struct packet_lin_dma *) (uintptr_t)
678 cb->kernel_address;
679 cb_size = sizeof(*init_tpc_mem_pkt);
680 memset(init_tpc_mem_pkt, 0, cb_size);
681
682 init_tpc_mem_pkt->tsize = cpu_to_le32(tpc_kernel_size);
683
Oded Gabbay65887292020-08-12 11:21:01 +0300684 ctl = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_LIN_DMA);
685 ctl |= FIELD_PREP(GAUDI_PKT_LIN_DMA_CTL_LIN_MASK, 1);
686 ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
687 ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300688
689 init_tpc_mem_pkt->ctl = cpu_to_le32(ctl);
690
691 init_tpc_mem_pkt->src_addr = cpu_to_le64(tpc_kernel_src_addr);
692 dst_addr = (prop->sram_user_base_address &
693 GAUDI_PKT_LIN_DMA_DST_ADDR_MASK) >>
694 GAUDI_PKT_LIN_DMA_DST_ADDR_SHIFT;
695 init_tpc_mem_pkt->dst_addr |= cpu_to_le64(dst_addr);
696
697 job = hl_cs_allocate_job(hdev, QUEUE_TYPE_EXT, true);
698 if (!job) {
699 dev_err(hdev->dev, "Failed to allocate a new job\n");
700 rc = -ENOMEM;
701 goto release_cb;
702 }
703
704 job->id = 0;
705 job->user_cb = cb;
706 job->user_cb->cs_cnt++;
707 job->user_cb_size = cb_size;
708 job->hw_queue_id = GAUDI_QUEUE_ID_DMA_0_0;
709 job->patched_cb = job->user_cb;
710 job->job_cb_size = job->user_cb_size + sizeof(struct packet_msg_prot);
711
712 hl_debugfs_add_job(hdev, job);
713
714 rc = gaudi_send_job_on_qman0(hdev, job);
715
716 if (rc)
717 goto free_job;
718
719 for (tpc_id = 0 ; tpc_id < TPC_NUMBER_OF_ENGINES ; tpc_id++) {
720 rc = gaudi_run_tpc_kernel(hdev, dst_addr, tpc_id);
721 if (rc)
722 break;
723 }
724
725free_job:
726 hl_userptr_delete_list(hdev, &job->userptr_list);
727 hl_debugfs_remove_job(hdev, job);
728 kfree(job);
729 cb->cs_cnt--;
730
731release_cb:
732 hl_cb_put(cb);
733 hl_cb_destroy(hdev, &hdev->kernel_cb_mgr, cb->id << PAGE_SHIFT);
734
735 return rc;
736}
737
738/*
739 * gaudi_init_tpc_mem() - Initialize TPC memories.
740 * @hdev: Pointer to hl_device structure.
741 *
742 * Copy TPC kernel fw from firmware file and run it to initialize TPC memories.
743 *
744 * Return: 0 for success, negative value for error.
745 */
746static int gaudi_init_tpc_mem(struct hl_device *hdev)
747{
748 const struct firmware *fw;
749 size_t fw_size;
750 void *cpu_addr;
751 dma_addr_t dma_handle;
752 int rc;
753
754 rc = request_firmware(&fw, GAUDI_TPC_FW_FILE, hdev->dev);
755 if (rc) {
756 dev_err(hdev->dev, "Firmware file %s is not found!\n",
757 GAUDI_TPC_FW_FILE);
758 goto out;
759 }
760
761 fw_size = fw->size;
762 cpu_addr = hdev->asic_funcs->asic_dma_alloc_coherent(hdev, fw_size,
763 &dma_handle, GFP_KERNEL | __GFP_ZERO);
764 if (!cpu_addr) {
765 dev_err(hdev->dev,
766 "Failed to allocate %zu of dma memory for TPC kernel\n",
767 fw_size);
768 rc = -ENOMEM;
769 goto out;
770 }
771
772 memcpy(cpu_addr, fw->data, fw_size);
773
774 rc = _gaudi_init_tpc_mem(hdev, dma_handle, fw_size);
775
776 hdev->asic_funcs->asic_dma_free_coherent(hdev, fw->size, cpu_addr,
777 dma_handle);
778
779out:
780 release_firmware(fw);
781 return rc;
782}
783
784static int gaudi_late_init(struct hl_device *hdev)
785{
786 struct gaudi_device *gaudi = hdev->asic_specific;
787 int rc;
788
789 rc = gaudi->armcp_info_get(hdev);
790 if (rc) {
791 dev_err(hdev->dev, "Failed to get armcp info\n");
792 return rc;
793 }
794
795 rc = hl_fw_send_pci_access_msg(hdev, ARMCP_PACKET_ENABLE_PCI_ACCESS);
796 if (rc) {
797 dev_err(hdev->dev, "Failed to enable PCI access from CPU\n");
798 return rc;
799 }
800
801 WREG32(mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR, GAUDI_EVENT_INTS_REGISTER);
802
803 gaudi_fetch_psoc_frequency(hdev);
804
805 rc = gaudi_mmu_clear_pgt_range(hdev);
806 if (rc) {
807 dev_err(hdev->dev, "Failed to clear MMU page tables range\n");
808 goto disable_pci_access;
809 }
810
811 rc = gaudi_init_tpc_mem(hdev);
812 if (rc) {
813 dev_err(hdev->dev, "Failed to initialize TPC memories\n");
814 goto disable_pci_access;
815 }
816
817 return 0;
818
819disable_pci_access:
820 hl_fw_send_pci_access_msg(hdev, ARMCP_PACKET_DISABLE_PCI_ACCESS);
821
822 return rc;
823}
824
825static void gaudi_late_fini(struct hl_device *hdev)
826{
827 const struct hwmon_channel_info **channel_info_arr;
828 int i = 0;
829
830 if (!hdev->hl_chip_info->info)
831 return;
832
833 channel_info_arr = hdev->hl_chip_info->info;
834
835 while (channel_info_arr[i]) {
836 kfree(channel_info_arr[i]->config);
837 kfree(channel_info_arr[i]);
838 i++;
839 }
840
841 kfree(channel_info_arr);
842
843 hdev->hl_chip_info->info = NULL;
844}
845
846static int gaudi_alloc_cpu_accessible_dma_mem(struct hl_device *hdev)
847{
848 dma_addr_t dma_addr_arr[GAUDI_ALLOC_CPU_MEM_RETRY_CNT] = {}, end_addr;
849 void *virt_addr_arr[GAUDI_ALLOC_CPU_MEM_RETRY_CNT] = {};
850 int i, j, rc = 0;
851
852 /*
853 * The device CPU works with 40-bits addresses, while bit 39 must be set
854 * to '1' when accessing the host.
855 * Bits 49:39 of the full host address are saved for a later
856 * configuration of the HW to perform extension to 50 bits.
857 * Because there is a single HW register that holds the extension bits,
858 * these bits must be identical in all allocated range.
859 */
860
861 for (i = 0 ; i < GAUDI_ALLOC_CPU_MEM_RETRY_CNT ; i++) {
862 virt_addr_arr[i] =
863 hdev->asic_funcs->asic_dma_alloc_coherent(hdev,
864 HL_CPU_ACCESSIBLE_MEM_SIZE,
865 &dma_addr_arr[i],
866 GFP_KERNEL | __GFP_ZERO);
867 if (!virt_addr_arr[i]) {
868 rc = -ENOMEM;
869 goto free_dma_mem_arr;
870 }
871
872 end_addr = dma_addr_arr[i] + HL_CPU_ACCESSIBLE_MEM_SIZE - 1;
873 if (GAUDI_CPU_PCI_MSB_ADDR(dma_addr_arr[i]) ==
874 GAUDI_CPU_PCI_MSB_ADDR(end_addr))
875 break;
876 }
877
878 if (i == GAUDI_ALLOC_CPU_MEM_RETRY_CNT) {
879 dev_err(hdev->dev,
880 "MSB of CPU accessible DMA memory are not identical in all range\n");
881 rc = -EFAULT;
882 goto free_dma_mem_arr;
883 }
884
885 hdev->cpu_accessible_dma_mem = virt_addr_arr[i];
886 hdev->cpu_accessible_dma_address = dma_addr_arr[i];
887 hdev->cpu_pci_msb_addr =
888 GAUDI_CPU_PCI_MSB_ADDR(hdev->cpu_accessible_dma_address);
889
890 GAUDI_PCI_TO_CPU_ADDR(hdev->cpu_accessible_dma_address);
891
892free_dma_mem_arr:
893 for (j = 0 ; j < i ; j++)
894 hdev->asic_funcs->asic_dma_free_coherent(hdev,
895 HL_CPU_ACCESSIBLE_MEM_SIZE,
896 virt_addr_arr[j],
897 dma_addr_arr[j]);
898
899 return rc;
900}
901
902static void gaudi_free_internal_qmans_pq_mem(struct hl_device *hdev)
903{
904 struct gaudi_device *gaudi = hdev->asic_specific;
905 struct gaudi_internal_qman_info *q;
906 u32 i;
907
908 for (i = 0 ; i < GAUDI_QUEUE_ID_SIZE ; i++) {
909 q = &gaudi->internal_qmans[i];
910 if (!q->pq_kernel_addr)
911 continue;
912 hdev->asic_funcs->asic_dma_free_coherent(hdev, q->pq_size,
913 q->pq_kernel_addr,
914 q->pq_dma_addr);
915 }
916}
917
918static int gaudi_alloc_internal_qmans_pq_mem(struct hl_device *hdev)
919{
920 struct gaudi_device *gaudi = hdev->asic_specific;
921 struct gaudi_internal_qman_info *q;
922 int rc, i;
923
924 for (i = 0 ; i < GAUDI_QUEUE_ID_SIZE ; i++) {
925 if (gaudi_queue_type[i] != QUEUE_TYPE_INT)
926 continue;
927
928 q = &gaudi->internal_qmans[i];
929
930 switch (i) {
931 case GAUDI_QUEUE_ID_DMA_2_0 ... GAUDI_QUEUE_ID_DMA_4_3:
932 case GAUDI_QUEUE_ID_DMA_6_0 ... GAUDI_QUEUE_ID_DMA_7_3:
933 q->pq_size = HBM_DMA_QMAN_SIZE_IN_BYTES;
934 break;
935 case GAUDI_QUEUE_ID_MME_0_0 ... GAUDI_QUEUE_ID_MME_1_3:
936 q->pq_size = MME_QMAN_SIZE_IN_BYTES;
937 break;
938 case GAUDI_QUEUE_ID_TPC_0_0 ... GAUDI_QUEUE_ID_TPC_7_3:
939 q->pq_size = TPC_QMAN_SIZE_IN_BYTES;
940 break;
941 default:
942 dev_err(hdev->dev, "Bad internal queue index %d", i);
943 rc = -EINVAL;
944 goto free_internal_qmans_pq_mem;
945 }
946
947 q->pq_kernel_addr = hdev->asic_funcs->asic_dma_alloc_coherent(
948 hdev, q->pq_size,
949 &q->pq_dma_addr,
950 GFP_KERNEL | __GFP_ZERO);
951 if (!q->pq_kernel_addr) {
952 rc = -ENOMEM;
953 goto free_internal_qmans_pq_mem;
954 }
955 }
956
957 return 0;
958
959free_internal_qmans_pq_mem:
960 gaudi_free_internal_qmans_pq_mem(hdev);
961 return rc;
962}
963
964static int gaudi_sw_init(struct hl_device *hdev)
965{
966 struct gaudi_device *gaudi;
Ofir Bittonebd8d122020-05-10 13:41:28 +0300967 u32 i, event_id = 0;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300968 int rc;
969
970 /* Allocate device structure */
971 gaudi = kzalloc(sizeof(*gaudi), GFP_KERNEL);
972 if (!gaudi)
973 return -ENOMEM;
974
Ofir Bittonebd8d122020-05-10 13:41:28 +0300975 for (i = 0 ; i < ARRAY_SIZE(gaudi_irq_map_table) ; i++) {
976 if (gaudi_irq_map_table[i].valid) {
977 if (event_id == GAUDI_EVENT_SIZE) {
978 dev_err(hdev->dev,
979 "Event array exceeds the limit of %u events\n",
980 GAUDI_EVENT_SIZE);
981 rc = -EINVAL;
982 goto free_gaudi_device;
983 }
984
985 gaudi->events[event_id++] =
986 gaudi_irq_map_table[i].fc_id;
987 }
988 }
989
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300990 gaudi->armcp_info_get = gaudi_armcp_info_get;
991
992 gaudi->max_freq_value = GAUDI_MAX_CLK_FREQ;
993
994 hdev->asic_specific = gaudi;
995
996 /* Create DMA pool for small allocations */
997 hdev->dma_pool = dma_pool_create(dev_name(hdev->dev),
998 &hdev->pdev->dev, GAUDI_DMA_POOL_BLK_SIZE, 8, 0);
999 if (!hdev->dma_pool) {
1000 dev_err(hdev->dev, "failed to create DMA pool\n");
1001 rc = -ENOMEM;
1002 goto free_gaudi_device;
1003 }
1004
1005 rc = gaudi_alloc_cpu_accessible_dma_mem(hdev);
1006 if (rc)
1007 goto free_dma_pool;
1008
1009 hdev->cpu_accessible_dma_pool = gen_pool_create(ilog2(32), -1);
1010 if (!hdev->cpu_accessible_dma_pool) {
1011 dev_err(hdev->dev,
1012 "Failed to create CPU accessible DMA pool\n");
1013 rc = -ENOMEM;
1014 goto free_cpu_dma_mem;
1015 }
1016
1017 rc = gen_pool_add(hdev->cpu_accessible_dma_pool,
1018 (uintptr_t) hdev->cpu_accessible_dma_mem,
1019 HL_CPU_ACCESSIBLE_MEM_SIZE, -1);
1020 if (rc) {
1021 dev_err(hdev->dev,
1022 "Failed to add memory to CPU accessible DMA pool\n");
1023 rc = -EFAULT;
1024 goto free_cpu_accessible_dma_pool;
1025 }
1026
1027 rc = gaudi_alloc_internal_qmans_pq_mem(hdev);
1028 if (rc)
1029 goto free_cpu_accessible_dma_pool;
1030
1031 spin_lock_init(&gaudi->hw_queues_lock);
1032 mutex_init(&gaudi->clk_gate_mutex);
1033
1034 hdev->supports_sync_stream = true;
1035 hdev->supports_coresight = true;
1036
1037 return 0;
1038
1039free_cpu_accessible_dma_pool:
1040 gen_pool_destroy(hdev->cpu_accessible_dma_pool);
1041free_cpu_dma_mem:
1042 GAUDI_CPU_TO_PCI_ADDR(hdev->cpu_accessible_dma_address,
1043 hdev->cpu_pci_msb_addr);
1044 hdev->asic_funcs->asic_dma_free_coherent(hdev,
1045 HL_CPU_ACCESSIBLE_MEM_SIZE,
1046 hdev->cpu_accessible_dma_mem,
1047 hdev->cpu_accessible_dma_address);
1048free_dma_pool:
1049 dma_pool_destroy(hdev->dma_pool);
1050free_gaudi_device:
1051 kfree(gaudi);
1052 return rc;
1053}
1054
1055static int gaudi_sw_fini(struct hl_device *hdev)
1056{
1057 struct gaudi_device *gaudi = hdev->asic_specific;
1058
1059 gaudi_free_internal_qmans_pq_mem(hdev);
1060
1061 gen_pool_destroy(hdev->cpu_accessible_dma_pool);
1062
1063 GAUDI_CPU_TO_PCI_ADDR(hdev->cpu_accessible_dma_address,
1064 hdev->cpu_pci_msb_addr);
1065 hdev->asic_funcs->asic_dma_free_coherent(hdev,
1066 HL_CPU_ACCESSIBLE_MEM_SIZE,
1067 hdev->cpu_accessible_dma_mem,
1068 hdev->cpu_accessible_dma_address);
1069
1070 dma_pool_destroy(hdev->dma_pool);
1071
1072 mutex_destroy(&gaudi->clk_gate_mutex);
1073
1074 kfree(gaudi);
1075
1076 return 0;
1077}
1078
1079static irqreturn_t gaudi_irq_handler_single(int irq, void *arg)
1080{
1081 struct hl_device *hdev = arg;
1082 int i;
1083
1084 if (hdev->disabled)
1085 return IRQ_HANDLED;
1086
1087 for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++)
1088 hl_irq_handler_cq(irq, &hdev->completion_queue[i]);
1089
1090 hl_irq_handler_eq(irq, &hdev->event_queue);
1091
1092 return IRQ_HANDLED;
1093}
1094
1095/*
1096 * For backward compatibility, new MSI interrupts should be set after the
1097 * existing CPU and NIC interrupts.
1098 */
1099static int gaudi_pci_irq_vector(struct hl_device *hdev, unsigned int nr,
1100 bool cpu_eq)
1101{
1102 int msi_vec;
1103
1104 if ((nr != GAUDI_EVENT_QUEUE_MSI_IDX) && (cpu_eq))
1105 dev_crit(hdev->dev, "CPU EQ must use IRQ %d\n",
1106 GAUDI_EVENT_QUEUE_MSI_IDX);
1107
1108 msi_vec = ((nr < GAUDI_EVENT_QUEUE_MSI_IDX) || (cpu_eq)) ? nr :
1109 (nr + NIC_NUMBER_OF_ENGINES + 1);
1110
1111 return pci_irq_vector(hdev->pdev, msi_vec);
1112}
1113
1114static int gaudi_enable_msi_single(struct hl_device *hdev)
1115{
1116 int rc, irq;
1117
1118 dev_info(hdev->dev, "Working in single MSI IRQ mode\n");
1119
1120 irq = gaudi_pci_irq_vector(hdev, 0, false);
1121 rc = request_irq(irq, gaudi_irq_handler_single, 0,
1122 "gaudi single msi", hdev);
1123 if (rc)
1124 dev_err(hdev->dev,
1125 "Failed to request single MSI IRQ\n");
1126
1127 return rc;
1128}
1129
1130static int gaudi_enable_msi_multi(struct hl_device *hdev)
1131{
1132 int cq_cnt = hdev->asic_prop.completion_queues_count;
1133 int rc, i, irq_cnt_init, irq;
1134
1135 for (i = 0, irq_cnt_init = 0 ; i < cq_cnt ; i++, irq_cnt_init++) {
1136 irq = gaudi_pci_irq_vector(hdev, i, false);
1137 rc = request_irq(irq, hl_irq_handler_cq, 0, gaudi_irq_name[i],
1138 &hdev->completion_queue[i]);
1139 if (rc) {
1140 dev_err(hdev->dev, "Failed to request IRQ %d", irq);
1141 goto free_irqs;
1142 }
1143 }
1144
1145 irq = gaudi_pci_irq_vector(hdev, GAUDI_EVENT_QUEUE_MSI_IDX, true);
1146 rc = request_irq(irq, hl_irq_handler_eq, 0, gaudi_irq_name[cq_cnt],
1147 &hdev->event_queue);
1148 if (rc) {
1149 dev_err(hdev->dev, "Failed to request IRQ %d", irq);
1150 goto free_irqs;
1151 }
1152
1153 return 0;
1154
1155free_irqs:
1156 for (i = 0 ; i < irq_cnt_init ; i++)
1157 free_irq(gaudi_pci_irq_vector(hdev, i, false),
1158 &hdev->completion_queue[i]);
1159 return rc;
1160}
1161
1162static int gaudi_enable_msi(struct hl_device *hdev)
1163{
1164 struct gaudi_device *gaudi = hdev->asic_specific;
1165 int rc;
1166
1167 if (gaudi->hw_cap_initialized & HW_CAP_MSI)
1168 return 0;
1169
1170 rc = pci_alloc_irq_vectors(hdev->pdev, 1, GAUDI_MSI_ENTRIES,
1171 PCI_IRQ_MSI);
1172 if (rc < 0) {
1173 dev_err(hdev->dev, "MSI: Failed to enable support %d\n", rc);
1174 return rc;
1175 }
1176
1177 if (rc < NUMBER_OF_INTERRUPTS) {
1178 gaudi->multi_msi_mode = false;
1179 rc = gaudi_enable_msi_single(hdev);
1180 } else {
1181 gaudi->multi_msi_mode = true;
1182 rc = gaudi_enable_msi_multi(hdev);
1183 }
1184
1185 if (rc)
1186 goto free_pci_irq_vectors;
1187
1188 gaudi->hw_cap_initialized |= HW_CAP_MSI;
1189
1190 return 0;
1191
1192free_pci_irq_vectors:
1193 pci_free_irq_vectors(hdev->pdev);
1194 return rc;
1195}
1196
1197static void gaudi_sync_irqs(struct hl_device *hdev)
1198{
1199 struct gaudi_device *gaudi = hdev->asic_specific;
1200 int i, cq_cnt = hdev->asic_prop.completion_queues_count;
1201
1202 if (!(gaudi->hw_cap_initialized & HW_CAP_MSI))
1203 return;
1204
1205 /* Wait for all pending IRQs to be finished */
1206 if (gaudi->multi_msi_mode) {
1207 for (i = 0 ; i < cq_cnt ; i++)
1208 synchronize_irq(gaudi_pci_irq_vector(hdev, i, false));
1209
1210 synchronize_irq(gaudi_pci_irq_vector(hdev,
1211 GAUDI_EVENT_QUEUE_MSI_IDX,
1212 true));
1213 } else {
1214 synchronize_irq(gaudi_pci_irq_vector(hdev, 0, false));
1215 }
1216}
1217
1218static void gaudi_disable_msi(struct hl_device *hdev)
1219{
1220 struct gaudi_device *gaudi = hdev->asic_specific;
1221 int i, irq, cq_cnt = hdev->asic_prop.completion_queues_count;
1222
1223 if (!(gaudi->hw_cap_initialized & HW_CAP_MSI))
1224 return;
1225
1226 gaudi_sync_irqs(hdev);
1227
1228 if (gaudi->multi_msi_mode) {
1229 irq = gaudi_pci_irq_vector(hdev, GAUDI_EVENT_QUEUE_MSI_IDX,
1230 true);
1231 free_irq(irq, &hdev->event_queue);
1232
1233 for (i = 0 ; i < cq_cnt ; i++) {
1234 irq = gaudi_pci_irq_vector(hdev, i, false);
1235 free_irq(irq, &hdev->completion_queue[i]);
1236 }
1237 } else {
1238 free_irq(gaudi_pci_irq_vector(hdev, 0, false), hdev);
1239 }
1240
1241 pci_free_irq_vectors(hdev->pdev);
1242
1243 gaudi->hw_cap_initialized &= ~HW_CAP_MSI;
1244}
1245
1246static void gaudi_init_scrambler_sram(struct hl_device *hdev)
1247{
1248 struct gaudi_device *gaudi = hdev->asic_specific;
1249
1250 if (gaudi->hw_cap_initialized & HW_CAP_SRAM_SCRAMBLER)
1251 return;
1252
1253 if (!hdev->sram_scrambler_enable)
1254 return;
1255
1256 WREG32(mmNIF_RTR_CTRL_0_SCRAM_SRAM_EN,
1257 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1258 WREG32(mmNIF_RTR_CTRL_1_SCRAM_SRAM_EN,
1259 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1260 WREG32(mmNIF_RTR_CTRL_2_SCRAM_SRAM_EN,
1261 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1262 WREG32(mmNIF_RTR_CTRL_3_SCRAM_SRAM_EN,
1263 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1264 WREG32(mmNIF_RTR_CTRL_4_SCRAM_SRAM_EN,
1265 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1266 WREG32(mmNIF_RTR_CTRL_5_SCRAM_SRAM_EN,
1267 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1268 WREG32(mmNIF_RTR_CTRL_6_SCRAM_SRAM_EN,
1269 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1270 WREG32(mmNIF_RTR_CTRL_7_SCRAM_SRAM_EN,
1271 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1272
1273 WREG32(mmSIF_RTR_CTRL_0_SCRAM_SRAM_EN,
1274 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1275 WREG32(mmSIF_RTR_CTRL_1_SCRAM_SRAM_EN,
1276 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1277 WREG32(mmSIF_RTR_CTRL_2_SCRAM_SRAM_EN,
1278 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1279 WREG32(mmSIF_RTR_CTRL_3_SCRAM_SRAM_EN,
1280 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1281 WREG32(mmSIF_RTR_CTRL_4_SCRAM_SRAM_EN,
1282 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1283 WREG32(mmSIF_RTR_CTRL_5_SCRAM_SRAM_EN,
1284 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1285 WREG32(mmSIF_RTR_CTRL_6_SCRAM_SRAM_EN,
1286 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1287 WREG32(mmSIF_RTR_CTRL_7_SCRAM_SRAM_EN,
1288 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1289
1290 WREG32(mmDMA_IF_E_N_DOWN_CH0_SCRAM_SRAM_EN,
1291 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
1292 WREG32(mmDMA_IF_E_N_DOWN_CH1_SCRAM_SRAM_EN,
1293 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
1294 WREG32(mmDMA_IF_E_S_DOWN_CH0_SCRAM_SRAM_EN,
1295 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
1296 WREG32(mmDMA_IF_E_S_DOWN_CH1_SCRAM_SRAM_EN,
1297 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
1298 WREG32(mmDMA_IF_W_N_DOWN_CH0_SCRAM_SRAM_EN,
1299 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
1300 WREG32(mmDMA_IF_W_N_DOWN_CH1_SCRAM_SRAM_EN,
1301 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
1302 WREG32(mmDMA_IF_W_S_DOWN_CH0_SCRAM_SRAM_EN,
1303 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
1304 WREG32(mmDMA_IF_W_S_DOWN_CH1_SCRAM_SRAM_EN,
1305 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
1306
1307 gaudi->hw_cap_initialized |= HW_CAP_SRAM_SCRAMBLER;
1308}
1309
1310static void gaudi_init_scrambler_hbm(struct hl_device *hdev)
1311{
1312 struct gaudi_device *gaudi = hdev->asic_specific;
1313
1314 if (gaudi->hw_cap_initialized & HW_CAP_HBM_SCRAMBLER)
1315 return;
1316
1317 if (!hdev->dram_scrambler_enable)
1318 return;
1319
1320 WREG32(mmNIF_RTR_CTRL_0_SCRAM_HBM_EN,
1321 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1322 WREG32(mmNIF_RTR_CTRL_1_SCRAM_HBM_EN,
1323 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1324 WREG32(mmNIF_RTR_CTRL_2_SCRAM_HBM_EN,
1325 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1326 WREG32(mmNIF_RTR_CTRL_3_SCRAM_HBM_EN,
1327 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1328 WREG32(mmNIF_RTR_CTRL_4_SCRAM_HBM_EN,
1329 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1330 WREG32(mmNIF_RTR_CTRL_5_SCRAM_HBM_EN,
1331 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1332 WREG32(mmNIF_RTR_CTRL_6_SCRAM_HBM_EN,
1333 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1334 WREG32(mmNIF_RTR_CTRL_7_SCRAM_HBM_EN,
1335 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1336
1337 WREG32(mmSIF_RTR_CTRL_0_SCRAM_HBM_EN,
1338 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1339 WREG32(mmSIF_RTR_CTRL_1_SCRAM_HBM_EN,
1340 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1341 WREG32(mmSIF_RTR_CTRL_2_SCRAM_HBM_EN,
1342 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1343 WREG32(mmSIF_RTR_CTRL_3_SCRAM_HBM_EN,
1344 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1345 WREG32(mmSIF_RTR_CTRL_4_SCRAM_HBM_EN,
1346 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1347 WREG32(mmSIF_RTR_CTRL_5_SCRAM_HBM_EN,
1348 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1349 WREG32(mmSIF_RTR_CTRL_6_SCRAM_HBM_EN,
1350 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1351 WREG32(mmSIF_RTR_CTRL_7_SCRAM_HBM_EN,
1352 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1353
1354 WREG32(mmDMA_IF_E_N_DOWN_CH0_SCRAM_HBM_EN,
1355 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
1356 WREG32(mmDMA_IF_E_N_DOWN_CH1_SCRAM_HBM_EN,
1357 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
1358 WREG32(mmDMA_IF_E_S_DOWN_CH0_SCRAM_HBM_EN,
1359 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
1360 WREG32(mmDMA_IF_E_S_DOWN_CH1_SCRAM_HBM_EN,
1361 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
1362 WREG32(mmDMA_IF_W_N_DOWN_CH0_SCRAM_HBM_EN,
1363 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
1364 WREG32(mmDMA_IF_W_N_DOWN_CH1_SCRAM_HBM_EN,
1365 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
1366 WREG32(mmDMA_IF_W_S_DOWN_CH0_SCRAM_HBM_EN,
1367 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
1368 WREG32(mmDMA_IF_W_S_DOWN_CH1_SCRAM_HBM_EN,
1369 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
1370
1371 gaudi->hw_cap_initialized |= HW_CAP_HBM_SCRAMBLER;
1372}
1373
1374static void gaudi_init_e2e(struct hl_device *hdev)
1375{
1376 WREG32(mmSIF_RTR_CTRL_0_E2E_HBM_WR_SIZE, 247 >> 3);
1377 WREG32(mmSIF_RTR_CTRL_0_E2E_HBM_RD_SIZE, 785 >> 3);
1378 WREG32(mmSIF_RTR_CTRL_0_E2E_PCI_WR_SIZE, 49);
1379 WREG32(mmSIF_RTR_CTRL_0_E2E_PCI_RD_SIZE, 101);
1380
1381 WREG32(mmSIF_RTR_CTRL_1_E2E_HBM_WR_SIZE, 275 >> 3);
1382 WREG32(mmSIF_RTR_CTRL_1_E2E_HBM_RD_SIZE, 614 >> 3);
1383 WREG32(mmSIF_RTR_CTRL_1_E2E_PCI_WR_SIZE, 1);
1384 WREG32(mmSIF_RTR_CTRL_1_E2E_PCI_RD_SIZE, 39);
1385
1386 WREG32(mmSIF_RTR_CTRL_2_E2E_HBM_WR_SIZE, 1);
1387 WREG32(mmSIF_RTR_CTRL_2_E2E_HBM_RD_SIZE, 1);
1388 WREG32(mmSIF_RTR_CTRL_2_E2E_PCI_WR_SIZE, 1);
1389 WREG32(mmSIF_RTR_CTRL_2_E2E_PCI_RD_SIZE, 32);
1390
1391 WREG32(mmSIF_RTR_CTRL_3_E2E_HBM_WR_SIZE, 176 >> 3);
1392 WREG32(mmSIF_RTR_CTRL_3_E2E_HBM_RD_SIZE, 32 >> 3);
1393 WREG32(mmSIF_RTR_CTRL_3_E2E_PCI_WR_SIZE, 19);
1394 WREG32(mmSIF_RTR_CTRL_3_E2E_PCI_RD_SIZE, 32);
1395
1396 WREG32(mmSIF_RTR_CTRL_4_E2E_HBM_WR_SIZE, 176 >> 3);
1397 WREG32(mmSIF_RTR_CTRL_4_E2E_HBM_RD_SIZE, 32 >> 3);
1398 WREG32(mmSIF_RTR_CTRL_4_E2E_PCI_WR_SIZE, 19);
1399 WREG32(mmSIF_RTR_CTRL_4_E2E_PCI_RD_SIZE, 32);
1400
1401 WREG32(mmSIF_RTR_CTRL_5_E2E_HBM_WR_SIZE, 1);
1402 WREG32(mmSIF_RTR_CTRL_5_E2E_HBM_RD_SIZE, 1);
1403 WREG32(mmSIF_RTR_CTRL_5_E2E_PCI_WR_SIZE, 1);
1404 WREG32(mmSIF_RTR_CTRL_5_E2E_PCI_RD_SIZE, 32);
1405
1406 WREG32(mmSIF_RTR_CTRL_6_E2E_HBM_WR_SIZE, 275 >> 3);
1407 WREG32(mmSIF_RTR_CTRL_6_E2E_HBM_RD_SIZE, 614 >> 3);
1408 WREG32(mmSIF_RTR_CTRL_6_E2E_PCI_WR_SIZE, 1);
1409 WREG32(mmSIF_RTR_CTRL_6_E2E_PCI_RD_SIZE, 39);
1410
1411 WREG32(mmSIF_RTR_CTRL_7_E2E_HBM_WR_SIZE, 297 >> 3);
1412 WREG32(mmSIF_RTR_CTRL_7_E2E_HBM_RD_SIZE, 908 >> 3);
1413 WREG32(mmSIF_RTR_CTRL_7_E2E_PCI_WR_SIZE, 19);
1414 WREG32(mmSIF_RTR_CTRL_7_E2E_PCI_RD_SIZE, 19);
1415
1416 WREG32(mmNIF_RTR_CTRL_0_E2E_HBM_WR_SIZE, 318 >> 3);
1417 WREG32(mmNIF_RTR_CTRL_0_E2E_HBM_RD_SIZE, 956 >> 3);
1418 WREG32(mmNIF_RTR_CTRL_0_E2E_PCI_WR_SIZE, 79);
1419 WREG32(mmNIF_RTR_CTRL_0_E2E_PCI_RD_SIZE, 163);
1420
1421 WREG32(mmNIF_RTR_CTRL_1_E2E_HBM_WR_SIZE, 275 >> 3);
1422 WREG32(mmNIF_RTR_CTRL_1_E2E_HBM_RD_SIZE, 614 >> 3);
1423 WREG32(mmNIF_RTR_CTRL_1_E2E_PCI_WR_SIZE, 1);
1424 WREG32(mmNIF_RTR_CTRL_1_E2E_PCI_RD_SIZE, 39);
1425
1426 WREG32(mmNIF_RTR_CTRL_2_E2E_HBM_WR_SIZE, 1);
1427 WREG32(mmNIF_RTR_CTRL_2_E2E_HBM_RD_SIZE, 1);
1428 WREG32(mmNIF_RTR_CTRL_2_E2E_PCI_WR_SIZE, 1);
1429 WREG32(mmNIF_RTR_CTRL_2_E2E_PCI_RD_SIZE, 32);
1430
1431 WREG32(mmNIF_RTR_CTRL_3_E2E_HBM_WR_SIZE, 176 >> 3);
1432 WREG32(mmNIF_RTR_CTRL_3_E2E_HBM_RD_SIZE, 32 >> 3);
1433 WREG32(mmNIF_RTR_CTRL_3_E2E_PCI_WR_SIZE, 19);
1434 WREG32(mmNIF_RTR_CTRL_3_E2E_PCI_RD_SIZE, 32);
1435
1436 WREG32(mmNIF_RTR_CTRL_4_E2E_HBM_WR_SIZE, 176 >> 3);
1437 WREG32(mmNIF_RTR_CTRL_4_E2E_HBM_RD_SIZE, 32 >> 3);
1438 WREG32(mmNIF_RTR_CTRL_4_E2E_PCI_WR_SIZE, 19);
1439 WREG32(mmNIF_RTR_CTRL_4_E2E_PCI_RD_SIZE, 32);
1440
1441 WREG32(mmNIF_RTR_CTRL_5_E2E_HBM_WR_SIZE, 1);
1442 WREG32(mmNIF_RTR_CTRL_5_E2E_HBM_RD_SIZE, 1);
1443 WREG32(mmNIF_RTR_CTRL_5_E2E_PCI_WR_SIZE, 1);
1444 WREG32(mmNIF_RTR_CTRL_5_E2E_PCI_RD_SIZE, 32);
1445
1446 WREG32(mmNIF_RTR_CTRL_6_E2E_HBM_WR_SIZE, 275 >> 3);
1447 WREG32(mmNIF_RTR_CTRL_6_E2E_HBM_RD_SIZE, 614 >> 3);
1448 WREG32(mmNIF_RTR_CTRL_6_E2E_PCI_WR_SIZE, 1);
1449 WREG32(mmNIF_RTR_CTRL_6_E2E_PCI_RD_SIZE, 39);
1450
1451 WREG32(mmNIF_RTR_CTRL_7_E2E_HBM_WR_SIZE, 318 >> 3);
1452 WREG32(mmNIF_RTR_CTRL_7_E2E_HBM_RD_SIZE, 956 >> 3);
1453 WREG32(mmNIF_RTR_CTRL_7_E2E_PCI_WR_SIZE, 79);
1454 WREG32(mmNIF_RTR_CTRL_7_E2E_PCI_RD_SIZE, 79);
1455
1456 WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
1457 WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
1458 WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
1459 WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
1460
1461 WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
1462 WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
1463 WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
1464 WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
1465
1466 WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
1467 WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
1468 WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
1469 WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
1470
1471 WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
1472 WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
1473 WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
1474 WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
1475
1476 WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
1477 WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
1478 WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
1479 WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
1480
1481 WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
1482 WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
1483 WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
1484 WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
1485
1486 WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
1487 WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
1488 WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
1489 WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
1490
1491 WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
1492 WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
1493 WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
1494 WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
1495
1496 if (!hdev->dram_scrambler_enable) {
1497 WREG32(mmSIF_RTR_CTRL_0_NL_HBM_SEL_0, 0x21);
1498 WREG32(mmSIF_RTR_CTRL_0_NL_HBM_SEL_1, 0x22);
1499 WREG32(mmSIF_RTR_CTRL_0_NL_HBM_OFFSET_18, 0x1F);
1500 WREG32(mmSIF_RTR_CTRL_0_NL_HBM_PC_SEL_3, 0x20);
1501
1502 WREG32(mmSIF_RTR_CTRL_1_NL_HBM_SEL_0, 0x21);
1503 WREG32(mmSIF_RTR_CTRL_1_NL_HBM_SEL_1, 0x22);
1504 WREG32(mmSIF_RTR_CTRL_1_NL_HBM_OFFSET_18, 0x1F);
1505 WREG32(mmSIF_RTR_CTRL_1_NL_HBM_PC_SEL_3, 0x20);
1506
1507 WREG32(mmSIF_RTR_CTRL_2_NL_HBM_SEL_0, 0x21);
1508 WREG32(mmSIF_RTR_CTRL_2_NL_HBM_SEL_1, 0x22);
1509 WREG32(mmSIF_RTR_CTRL_2_NL_HBM_OFFSET_18, 0x1F);
1510 WREG32(mmSIF_RTR_CTRL_2_NL_HBM_PC_SEL_3, 0x20);
1511
1512 WREG32(mmSIF_RTR_CTRL_3_NL_HBM_SEL_0, 0x21);
1513 WREG32(mmSIF_RTR_CTRL_3_NL_HBM_SEL_1, 0x22);
1514 WREG32(mmSIF_RTR_CTRL_3_NL_HBM_OFFSET_18, 0x1F);
1515 WREG32(mmSIF_RTR_CTRL_3_NL_HBM_PC_SEL_3, 0x20);
1516
1517 WREG32(mmSIF_RTR_CTRL_4_NL_HBM_SEL_0, 0x21);
1518 WREG32(mmSIF_RTR_CTRL_4_NL_HBM_SEL_1, 0x22);
1519 WREG32(mmSIF_RTR_CTRL_4_NL_HBM_OFFSET_18, 0x1F);
1520 WREG32(mmSIF_RTR_CTRL_4_NL_HBM_PC_SEL_3, 0x20);
1521
1522 WREG32(mmSIF_RTR_CTRL_5_NL_HBM_SEL_0, 0x21);
1523 WREG32(mmSIF_RTR_CTRL_5_NL_HBM_SEL_1, 0x22);
1524 WREG32(mmSIF_RTR_CTRL_5_NL_HBM_OFFSET_18, 0x1F);
1525 WREG32(mmSIF_RTR_CTRL_5_NL_HBM_PC_SEL_3, 0x20);
1526
1527 WREG32(mmSIF_RTR_CTRL_6_NL_HBM_SEL_0, 0x21);
1528 WREG32(mmSIF_RTR_CTRL_6_NL_HBM_SEL_1, 0x22);
1529 WREG32(mmSIF_RTR_CTRL_6_NL_HBM_OFFSET_18, 0x1F);
1530 WREG32(mmSIF_RTR_CTRL_6_NL_HBM_PC_SEL_3, 0x20);
1531
1532 WREG32(mmSIF_RTR_CTRL_7_NL_HBM_SEL_0, 0x21);
1533 WREG32(mmSIF_RTR_CTRL_7_NL_HBM_SEL_1, 0x22);
1534 WREG32(mmSIF_RTR_CTRL_7_NL_HBM_OFFSET_18, 0x1F);
1535 WREG32(mmSIF_RTR_CTRL_7_NL_HBM_PC_SEL_3, 0x20);
1536
1537 WREG32(mmNIF_RTR_CTRL_0_NL_HBM_SEL_0, 0x21);
1538 WREG32(mmNIF_RTR_CTRL_0_NL_HBM_SEL_1, 0x22);
1539 WREG32(mmNIF_RTR_CTRL_0_NL_HBM_OFFSET_18, 0x1F);
1540 WREG32(mmNIF_RTR_CTRL_0_NL_HBM_PC_SEL_3, 0x20);
1541
1542 WREG32(mmNIF_RTR_CTRL_1_NL_HBM_SEL_0, 0x21);
1543 WREG32(mmNIF_RTR_CTRL_1_NL_HBM_SEL_1, 0x22);
1544 WREG32(mmNIF_RTR_CTRL_1_NL_HBM_OFFSET_18, 0x1F);
1545 WREG32(mmNIF_RTR_CTRL_1_NL_HBM_PC_SEL_3, 0x20);
1546
1547 WREG32(mmNIF_RTR_CTRL_2_NL_HBM_SEL_0, 0x21);
1548 WREG32(mmNIF_RTR_CTRL_2_NL_HBM_SEL_1, 0x22);
1549 WREG32(mmNIF_RTR_CTRL_2_NL_HBM_OFFSET_18, 0x1F);
1550 WREG32(mmNIF_RTR_CTRL_2_NL_HBM_PC_SEL_3, 0x20);
1551
1552 WREG32(mmNIF_RTR_CTRL_3_NL_HBM_SEL_0, 0x21);
1553 WREG32(mmNIF_RTR_CTRL_3_NL_HBM_SEL_1, 0x22);
1554 WREG32(mmNIF_RTR_CTRL_3_NL_HBM_OFFSET_18, 0x1F);
1555 WREG32(mmNIF_RTR_CTRL_3_NL_HBM_PC_SEL_3, 0x20);
1556
1557 WREG32(mmNIF_RTR_CTRL_4_NL_HBM_SEL_0, 0x21);
1558 WREG32(mmNIF_RTR_CTRL_4_NL_HBM_SEL_1, 0x22);
1559 WREG32(mmNIF_RTR_CTRL_4_NL_HBM_OFFSET_18, 0x1F);
1560 WREG32(mmNIF_RTR_CTRL_4_NL_HBM_PC_SEL_3, 0x20);
1561
1562 WREG32(mmNIF_RTR_CTRL_5_NL_HBM_SEL_0, 0x21);
1563 WREG32(mmNIF_RTR_CTRL_5_NL_HBM_SEL_1, 0x22);
1564 WREG32(mmNIF_RTR_CTRL_5_NL_HBM_OFFSET_18, 0x1F);
1565 WREG32(mmNIF_RTR_CTRL_5_NL_HBM_PC_SEL_3, 0x20);
1566
1567 WREG32(mmNIF_RTR_CTRL_6_NL_HBM_SEL_0, 0x21);
1568 WREG32(mmNIF_RTR_CTRL_6_NL_HBM_SEL_1, 0x22);
1569 WREG32(mmNIF_RTR_CTRL_6_NL_HBM_OFFSET_18, 0x1F);
1570 WREG32(mmNIF_RTR_CTRL_6_NL_HBM_PC_SEL_3, 0x20);
1571
1572 WREG32(mmNIF_RTR_CTRL_7_NL_HBM_SEL_0, 0x21);
1573 WREG32(mmNIF_RTR_CTRL_7_NL_HBM_SEL_1, 0x22);
1574 WREG32(mmNIF_RTR_CTRL_7_NL_HBM_OFFSET_18, 0x1F);
1575 WREG32(mmNIF_RTR_CTRL_7_NL_HBM_PC_SEL_3, 0x20);
1576
1577 WREG32(mmDMA_IF_E_N_DOWN_CH0_NL_HBM_SEL_0, 0x21);
1578 WREG32(mmDMA_IF_E_N_DOWN_CH0_NL_HBM_SEL_1, 0x22);
1579 WREG32(mmDMA_IF_E_N_DOWN_CH0_NL_HBM_OFFSET_18, 0x1F);
1580 WREG32(mmDMA_IF_E_N_DOWN_CH0_NL_HBM_PC_SEL_3, 0x20);
1581
1582 WREG32(mmDMA_IF_E_N_DOWN_CH1_NL_HBM_SEL_0, 0x21);
1583 WREG32(mmDMA_IF_E_N_DOWN_CH1_NL_HBM_SEL_1, 0x22);
1584 WREG32(mmDMA_IF_E_N_DOWN_CH1_NL_HBM_OFFSET_18, 0x1F);
1585 WREG32(mmDMA_IF_E_N_DOWN_CH1_NL_HBM_PC_SEL_3, 0x20);
1586
1587 WREG32(mmDMA_IF_E_S_DOWN_CH0_NL_HBM_SEL_0, 0x21);
1588 WREG32(mmDMA_IF_E_S_DOWN_CH0_NL_HBM_SEL_1, 0x22);
1589 WREG32(mmDMA_IF_E_S_DOWN_CH0_NL_HBM_OFFSET_18, 0x1F);
1590 WREG32(mmDMA_IF_E_S_DOWN_CH0_NL_HBM_PC_SEL_3, 0x20);
1591
1592 WREG32(mmDMA_IF_E_S_DOWN_CH1_NL_HBM_SEL_0, 0x21);
1593 WREG32(mmDMA_IF_E_S_DOWN_CH1_NL_HBM_SEL_1, 0x22);
1594 WREG32(mmDMA_IF_E_S_DOWN_CH1_NL_HBM_OFFSET_18, 0x1F);
1595 WREG32(mmDMA_IF_E_S_DOWN_CH1_NL_HBM_PC_SEL_3, 0x20);
1596
1597 WREG32(mmDMA_IF_W_N_DOWN_CH0_NL_HBM_SEL_0, 0x21);
1598 WREG32(mmDMA_IF_W_N_DOWN_CH0_NL_HBM_SEL_1, 0x22);
1599 WREG32(mmDMA_IF_W_N_DOWN_CH0_NL_HBM_OFFSET_18, 0x1F);
1600 WREG32(mmDMA_IF_W_N_DOWN_CH0_NL_HBM_PC_SEL_3, 0x20);
1601
1602 WREG32(mmDMA_IF_W_N_DOWN_CH1_NL_HBM_SEL_0, 0x21);
1603 WREG32(mmDMA_IF_W_N_DOWN_CH1_NL_HBM_SEL_1, 0x22);
1604 WREG32(mmDMA_IF_W_N_DOWN_CH1_NL_HBM_OFFSET_18, 0x1F);
1605 WREG32(mmDMA_IF_W_N_DOWN_CH1_NL_HBM_PC_SEL_3, 0x20);
1606
1607 WREG32(mmDMA_IF_W_S_DOWN_CH0_NL_HBM_SEL_0, 0x21);
1608 WREG32(mmDMA_IF_W_S_DOWN_CH0_NL_HBM_SEL_1, 0x22);
1609 WREG32(mmDMA_IF_W_S_DOWN_CH0_NL_HBM_OFFSET_18, 0x1F);
1610 WREG32(mmDMA_IF_W_S_DOWN_CH0_NL_HBM_PC_SEL_3, 0x20);
1611
1612 WREG32(mmDMA_IF_W_S_DOWN_CH1_NL_HBM_SEL_0, 0x21);
1613 WREG32(mmDMA_IF_W_S_DOWN_CH1_NL_HBM_SEL_1, 0x22);
1614 WREG32(mmDMA_IF_W_S_DOWN_CH1_NL_HBM_OFFSET_18, 0x1F);
1615 WREG32(mmDMA_IF_W_S_DOWN_CH1_NL_HBM_PC_SEL_3, 0x20);
1616 }
1617
1618 WREG32(mmSIF_RTR_CTRL_0_E2E_HBM_EN,
1619 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
1620 WREG32(mmSIF_RTR_CTRL_0_E2E_PCI_EN,
1621 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
1622
1623 WREG32(mmSIF_RTR_CTRL_1_E2E_HBM_EN,
1624 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
1625 WREG32(mmSIF_RTR_CTRL_1_E2E_PCI_EN,
1626 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
1627
1628 WREG32(mmSIF_RTR_CTRL_2_E2E_HBM_EN,
1629 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
1630 WREG32(mmSIF_RTR_CTRL_2_E2E_PCI_EN,
1631 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
1632
1633 WREG32(mmSIF_RTR_CTRL_3_E2E_HBM_EN,
1634 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
1635 WREG32(mmSIF_RTR_CTRL_3_E2E_PCI_EN,
1636 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
1637
1638 WREG32(mmSIF_RTR_CTRL_4_E2E_HBM_EN,
1639 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
1640 WREG32(mmSIF_RTR_CTRL_4_E2E_PCI_EN,
1641 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
1642
1643 WREG32(mmSIF_RTR_CTRL_5_E2E_HBM_EN,
1644 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
1645 WREG32(mmSIF_RTR_CTRL_5_E2E_PCI_EN,
1646 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
1647
1648 WREG32(mmSIF_RTR_CTRL_6_E2E_HBM_EN,
1649 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
1650 WREG32(mmSIF_RTR_CTRL_6_E2E_PCI_EN,
1651 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
1652
1653 WREG32(mmSIF_RTR_CTRL_7_E2E_HBM_EN,
1654 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
1655 WREG32(mmSIF_RTR_CTRL_7_E2E_PCI_EN,
1656 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
1657
1658 WREG32(mmNIF_RTR_CTRL_0_E2E_HBM_EN,
1659 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
1660 WREG32(mmNIF_RTR_CTRL_0_E2E_PCI_EN,
1661 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
1662
1663 WREG32(mmNIF_RTR_CTRL_1_E2E_HBM_EN,
1664 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
1665 WREG32(mmNIF_RTR_CTRL_1_E2E_PCI_EN,
1666 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
1667
1668 WREG32(mmNIF_RTR_CTRL_2_E2E_HBM_EN,
1669 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
1670 WREG32(mmNIF_RTR_CTRL_2_E2E_PCI_EN,
1671 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
1672
1673 WREG32(mmNIF_RTR_CTRL_3_E2E_HBM_EN,
1674 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
1675 WREG32(mmNIF_RTR_CTRL_3_E2E_PCI_EN,
1676 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
1677
1678 WREG32(mmNIF_RTR_CTRL_4_E2E_HBM_EN,
1679 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
1680 WREG32(mmNIF_RTR_CTRL_4_E2E_PCI_EN,
1681 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
1682
1683 WREG32(mmNIF_RTR_CTRL_5_E2E_HBM_EN,
1684 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
1685 WREG32(mmNIF_RTR_CTRL_5_E2E_PCI_EN,
1686 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
1687
1688 WREG32(mmNIF_RTR_CTRL_6_E2E_HBM_EN,
1689 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
1690 WREG32(mmNIF_RTR_CTRL_6_E2E_PCI_EN,
1691 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
1692
1693 WREG32(mmNIF_RTR_CTRL_7_E2E_HBM_EN,
1694 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
1695 WREG32(mmNIF_RTR_CTRL_7_E2E_PCI_EN,
1696 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
1697
1698 WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_HBM_EN,
1699 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
1700 WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_PCI_EN,
1701 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
1702
1703 WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_HBM_EN,
1704 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
1705 WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_PCI_EN,
1706 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
1707
1708 WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_HBM_EN,
1709 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
1710 WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_PCI_EN,
1711 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
1712
1713 WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_HBM_EN,
1714 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
1715 WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_PCI_EN,
1716 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
1717
1718 WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_HBM_EN,
1719 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
1720 WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_PCI_EN,
1721 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
1722
1723 WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_HBM_EN,
1724 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
1725 WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_PCI_EN,
1726 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
1727
1728 WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_HBM_EN,
1729 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
1730 WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_PCI_EN,
1731 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
1732
1733 WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_HBM_EN,
1734 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
1735 WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_PCI_EN,
1736 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
1737}
1738
1739static void gaudi_init_hbm_cred(struct hl_device *hdev)
1740{
1741 uint32_t hbm0_wr, hbm1_wr, hbm0_rd, hbm1_rd;
1742
1743 hbm0_wr = 0x33333333;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03001744 hbm0_rd = 0x77777777;
Oded Gabbay0b168c82020-06-15 19:25:57 +03001745 hbm1_wr = 0x55555555;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03001746 hbm1_rd = 0xDDDDDDDD;
1747
1748 WREG32(mmDMA_IF_E_N_HBM0_WR_CRED_CNT, hbm0_wr);
1749 WREG32(mmDMA_IF_E_N_HBM1_WR_CRED_CNT, hbm1_wr);
1750 WREG32(mmDMA_IF_E_N_HBM0_RD_CRED_CNT, hbm0_rd);
1751 WREG32(mmDMA_IF_E_N_HBM1_RD_CRED_CNT, hbm1_rd);
1752
1753 WREG32(mmDMA_IF_E_S_HBM0_WR_CRED_CNT, hbm0_wr);
1754 WREG32(mmDMA_IF_E_S_HBM1_WR_CRED_CNT, hbm1_wr);
1755 WREG32(mmDMA_IF_E_S_HBM0_RD_CRED_CNT, hbm0_rd);
1756 WREG32(mmDMA_IF_E_S_HBM1_RD_CRED_CNT, hbm1_rd);
1757
1758 WREG32(mmDMA_IF_W_N_HBM0_WR_CRED_CNT, hbm0_wr);
1759 WREG32(mmDMA_IF_W_N_HBM1_WR_CRED_CNT, hbm1_wr);
1760 WREG32(mmDMA_IF_W_N_HBM0_RD_CRED_CNT, hbm0_rd);
1761 WREG32(mmDMA_IF_W_N_HBM1_RD_CRED_CNT, hbm1_rd);
1762
1763 WREG32(mmDMA_IF_W_S_HBM0_WR_CRED_CNT, hbm0_wr);
1764 WREG32(mmDMA_IF_W_S_HBM1_WR_CRED_CNT, hbm1_wr);
1765 WREG32(mmDMA_IF_W_S_HBM0_RD_CRED_CNT, hbm0_rd);
1766 WREG32(mmDMA_IF_W_S_HBM1_RD_CRED_CNT, hbm1_rd);
1767
1768 WREG32(mmDMA_IF_E_N_HBM_CRED_EN_0,
1769 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
1770 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
1771 WREG32(mmDMA_IF_E_S_HBM_CRED_EN_0,
1772 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
1773 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
1774 WREG32(mmDMA_IF_W_N_HBM_CRED_EN_0,
1775 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
1776 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
1777 WREG32(mmDMA_IF_W_S_HBM_CRED_EN_0,
1778 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
1779 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
1780
1781 WREG32(mmDMA_IF_E_N_HBM_CRED_EN_1,
1782 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
1783 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
1784 WREG32(mmDMA_IF_E_S_HBM_CRED_EN_1,
1785 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
1786 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
1787 WREG32(mmDMA_IF_W_N_HBM_CRED_EN_1,
1788 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
1789 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
1790 WREG32(mmDMA_IF_W_S_HBM_CRED_EN_1,
1791 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
1792 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
1793}
1794
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03001795static void gaudi_init_golden_registers(struct hl_device *hdev)
1796{
1797 u32 tpc_offset;
1798 int tpc_id, i;
1799
1800 gaudi_init_e2e(hdev);
1801
1802 gaudi_init_hbm_cred(hdev);
1803
Oded Gabbaye38bfd32020-07-03 20:46:12 +03001804 hdev->asic_funcs->disable_clock_gating(hdev);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03001805
1806 for (tpc_id = 0, tpc_offset = 0;
1807 tpc_id < TPC_NUMBER_OF_ENGINES;
1808 tpc_id++, tpc_offset += TPC_CFG_OFFSET) {
1809 /* Mask all arithmetic interrupts from TPC */
1810 WREG32(mmTPC0_CFG_TPC_INTR_MASK + tpc_offset, 0x8FFF);
1811 /* Set 16 cache lines */
1812 WREG32_FIELD(TPC0_CFG_MSS_CONFIG, tpc_offset,
1813 ICACHE_FETCH_LINE_NUM, 2);
1814 }
1815
1816 /* Make sure 1st 128 bytes in SRAM are 0 for Tensor DMA */
1817 for (i = 0 ; i < 128 ; i += 8)
1818 writeq(0, hdev->pcie_bar[SRAM_BAR_ID] + i);
1819
1820 WREG32(mmMME0_CTRL_EUS_ROLLUP_CNT_ADD, 3);
1821 WREG32(mmMME1_CTRL_EUS_ROLLUP_CNT_ADD, 3);
1822 WREG32(mmMME2_CTRL_EUS_ROLLUP_CNT_ADD, 3);
1823 WREG32(mmMME3_CTRL_EUS_ROLLUP_CNT_ADD, 3);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03001824}
1825
1826static void gaudi_init_pci_dma_qman(struct hl_device *hdev, int dma_id,
1827 int qman_id, dma_addr_t qman_pq_addr)
1828{
1829 u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi;
1830 u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi;
1831 u32 q_off, dma_qm_offset;
1832 u32 dma_qm_err_cfg;
1833
1834 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
1835
1836 mtr_base_en_lo = lower_32_bits(CFG_BASE +
1837 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
1838 mtr_base_en_hi = upper_32_bits(CFG_BASE +
1839 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
1840 so_base_en_lo = lower_32_bits(CFG_BASE +
1841 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
1842 so_base_en_hi = upper_32_bits(CFG_BASE +
1843 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
1844 mtr_base_ws_lo = lower_32_bits(CFG_BASE +
1845 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
1846 mtr_base_ws_hi = upper_32_bits(CFG_BASE +
1847 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
1848 so_base_ws_lo = lower_32_bits(CFG_BASE +
1849 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
1850 so_base_ws_hi = upper_32_bits(CFG_BASE +
1851 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
1852
1853 q_off = dma_qm_offset + qman_id * 4;
1854
1855 WREG32(mmDMA0_QM_PQ_BASE_LO_0 + q_off, lower_32_bits(qman_pq_addr));
1856 WREG32(mmDMA0_QM_PQ_BASE_HI_0 + q_off, upper_32_bits(qman_pq_addr));
1857
1858 WREG32(mmDMA0_QM_PQ_SIZE_0 + q_off, ilog2(HL_QUEUE_LENGTH));
1859 WREG32(mmDMA0_QM_PQ_PI_0 + q_off, 0);
1860 WREG32(mmDMA0_QM_PQ_CI_0 + q_off, 0);
1861
1862 WREG32(mmDMA0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off, 0x74);
1863 WREG32(mmDMA0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off, 0x14);
1864 WREG32(mmDMA0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off, 0x1C);
1865
1866 WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo);
1867 WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi);
1868 WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo);
1869 WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi);
1870 WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_LO_0 + q_off, mtr_base_ws_lo);
1871 WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_HI_0 + q_off, mtr_base_ws_hi);
1872 WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_LO_0 + q_off, so_base_ws_lo);
1873 WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_HI_0 + q_off, so_base_ws_hi);
1874
Omer Shpigelmance043262020-06-16 17:56:27 +03001875 WREG32(mmDMA0_QM_CP_BARRIER_CFG_0 + q_off, 0x100);
1876
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03001877 /* The following configuration is needed only once per QMAN */
1878 if (qman_id == 0) {
1879 /* Configure RAZWI IRQ */
1880 dma_qm_err_cfg = PCI_DMA_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
1881 if (hdev->stop_on_err) {
1882 dma_qm_err_cfg |=
1883 PCI_DMA_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
1884 }
1885
1886 WREG32(mmDMA0_QM_GLBL_ERR_CFG + dma_qm_offset, dma_qm_err_cfg);
1887 WREG32(mmDMA0_QM_GLBL_ERR_ADDR_LO + dma_qm_offset,
1888 lower_32_bits(CFG_BASE +
1889 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR));
1890 WREG32(mmDMA0_QM_GLBL_ERR_ADDR_HI + dma_qm_offset,
1891 upper_32_bits(CFG_BASE +
1892 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR));
1893 WREG32(mmDMA0_QM_GLBL_ERR_WDATA + dma_qm_offset,
1894 gaudi_irq_map_table[GAUDI_EVENT_DMA0_QM].cpu_id +
1895 dma_id);
1896
1897 WREG32(mmDMA0_QM_ARB_ERR_MSG_EN + dma_qm_offset,
1898 QM_ARB_ERR_MSG_EN_MASK);
1899
1900 /* Increase ARB WDT to support streams architecture */
1901 WREG32(mmDMA0_QM_ARB_SLV_CHOISE_WDT + dma_qm_offset,
1902 GAUDI_ARB_WDT_TIMEOUT);
1903
1904 WREG32(mmDMA0_QM_GLBL_PROT + dma_qm_offset,
1905 QMAN_EXTERNAL_MAKE_TRUSTED);
1906
1907 WREG32(mmDMA0_QM_GLBL_CFG1 + dma_qm_offset, 0);
1908 }
1909}
1910
1911static void gaudi_init_dma_core(struct hl_device *hdev, int dma_id)
1912{
1913 u32 dma_offset = dma_id * DMA_CORE_OFFSET;
1914 u32 dma_err_cfg = 1 << DMA0_CORE_ERR_CFG_ERR_MSG_EN_SHIFT;
1915
1916 /* Set to maximum possible according to physical size */
1917 WREG32(mmDMA0_CORE_RD_MAX_OUTSTAND + dma_offset, 0);
1918 WREG32(mmDMA0_CORE_RD_MAX_SIZE + dma_offset, 0);
1919
1920 /* STOP_ON bit implies no completion to operation in case of RAZWI */
1921 if (hdev->stop_on_err)
1922 dma_err_cfg |= 1 << DMA0_CORE_ERR_CFG_STOP_ON_ERR_SHIFT;
1923
1924 WREG32(mmDMA0_CORE_ERR_CFG + dma_offset, dma_err_cfg);
1925 WREG32(mmDMA0_CORE_ERRMSG_ADDR_LO + dma_offset,
1926 lower_32_bits(CFG_BASE + mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR));
1927 WREG32(mmDMA0_CORE_ERRMSG_ADDR_HI + dma_offset,
1928 upper_32_bits(CFG_BASE + mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR));
1929 WREG32(mmDMA0_CORE_ERRMSG_WDATA + dma_offset,
1930 gaudi_irq_map_table[GAUDI_EVENT_DMA0_CORE].cpu_id + dma_id);
1931 WREG32(mmDMA0_CORE_PROT + dma_offset,
1932 1 << DMA0_CORE_PROT_ERR_VAL_SHIFT);
1933 /* If the channel is secured, it should be in MMU bypass mode */
1934 WREG32(mmDMA0_CORE_SECURE_PROPS + dma_offset,
1935 1 << DMA0_CORE_SECURE_PROPS_MMBP_SHIFT);
1936 WREG32(mmDMA0_CORE_CFG_0 + dma_offset, 1 << DMA0_CORE_CFG_0_EN_SHIFT);
1937}
1938
1939static void gaudi_enable_qman(struct hl_device *hdev, int dma_id,
1940 u32 enable_mask)
1941{
1942 u32 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
1943
1944 WREG32(mmDMA0_QM_GLBL_CFG0 + dma_qm_offset, enable_mask);
1945}
1946
1947static void gaudi_init_pci_dma_qmans(struct hl_device *hdev)
1948{
1949 struct gaudi_device *gaudi = hdev->asic_specific;
1950 struct hl_hw_queue *q;
1951 int i, j, dma_id, cpu_skip, nic_skip, cq_id = 0, q_idx, msi_vec = 0;
1952
1953 if (gaudi->hw_cap_initialized & HW_CAP_PCI_DMA)
1954 return;
1955
1956 for (i = 0 ; i < PCI_DMA_NUMBER_OF_CHNLS ; i++) {
1957 dma_id = gaudi_dma_assignment[i];
1958 /*
1959 * For queues after the CPU Q need to add 1 to get the correct
1960 * queue. In addition, need to add the CPU EQ and NIC IRQs in
1961 * order to get the correct MSI register.
1962 */
1963 if (dma_id > 1) {
1964 cpu_skip = 1;
1965 nic_skip = NIC_NUMBER_OF_ENGINES;
1966 } else {
1967 cpu_skip = 0;
1968 nic_skip = 0;
1969 }
1970
1971 for (j = 0 ; j < QMAN_STREAMS ; j++) {
1972 q_idx = 4 * dma_id + j + cpu_skip;
1973 q = &hdev->kernel_queues[q_idx];
1974 q->cq_id = cq_id++;
1975 q->msi_vec = nic_skip + cpu_skip + msi_vec++;
1976 gaudi_init_pci_dma_qman(hdev, dma_id, j,
1977 q->bus_address);
1978 }
1979
1980 gaudi_init_dma_core(hdev, dma_id);
1981
1982 gaudi_enable_qman(hdev, dma_id, PCI_DMA_QMAN_ENABLE);
1983 }
1984
1985 gaudi->hw_cap_initialized |= HW_CAP_PCI_DMA;
1986}
1987
1988static void gaudi_init_hbm_dma_qman(struct hl_device *hdev, int dma_id,
1989 int qman_id, u64 qman_base_addr)
1990{
1991 u32 mtr_base_lo, mtr_base_hi;
1992 u32 so_base_lo, so_base_hi;
1993 u32 q_off, dma_qm_offset;
1994 u32 dma_qm_err_cfg;
1995
1996 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
1997
1998 mtr_base_lo = lower_32_bits(CFG_BASE +
1999 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2000 mtr_base_hi = upper_32_bits(CFG_BASE +
2001 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2002 so_base_lo = lower_32_bits(CFG_BASE +
2003 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2004 so_base_hi = upper_32_bits(CFG_BASE +
2005 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2006
2007 q_off = dma_qm_offset + qman_id * 4;
2008
2009 if (qman_id < 4) {
2010 WREG32(mmDMA0_QM_PQ_BASE_LO_0 + q_off,
2011 lower_32_bits(qman_base_addr));
2012 WREG32(mmDMA0_QM_PQ_BASE_HI_0 + q_off,
2013 upper_32_bits(qman_base_addr));
2014
2015 WREG32(mmDMA0_QM_PQ_SIZE_0 + q_off, ilog2(HBM_DMA_QMAN_LENGTH));
2016 WREG32(mmDMA0_QM_PQ_PI_0 + q_off, 0);
2017 WREG32(mmDMA0_QM_PQ_CI_0 + q_off, 0);
2018
2019 WREG32(mmDMA0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off, 0x81BC);
2020 WREG32(mmDMA0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off, 0x81B4);
2021 WREG32(mmDMA0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off, 0x1C);
2022 } else {
2023 WREG32(mmDMA0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off, 0x74);
2024 WREG32(mmDMA0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off, 0x14);
2025 WREG32(mmDMA0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off, 0x1C);
2026
2027 /* Configure RAZWI IRQ */
2028 dma_qm_err_cfg = HBM_DMA_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
2029 if (hdev->stop_on_err) {
2030 dma_qm_err_cfg |=
2031 HBM_DMA_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
2032 }
2033 WREG32(mmDMA0_QM_GLBL_ERR_CFG + dma_qm_offset, dma_qm_err_cfg);
2034
2035 WREG32(mmDMA0_QM_GLBL_ERR_ADDR_LO + dma_qm_offset,
2036 lower_32_bits(CFG_BASE +
2037 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR));
2038 WREG32(mmDMA0_QM_GLBL_ERR_ADDR_HI + dma_qm_offset,
2039 upper_32_bits(CFG_BASE +
2040 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR));
2041 WREG32(mmDMA0_QM_GLBL_ERR_WDATA + dma_qm_offset,
2042 gaudi_irq_map_table[GAUDI_EVENT_DMA0_QM].cpu_id +
2043 dma_id);
2044
2045 WREG32(mmDMA0_QM_ARB_ERR_MSG_EN + dma_qm_offset,
2046 QM_ARB_ERR_MSG_EN_MASK);
2047
2048 /* Increase ARB WDT to support streams architecture */
2049 WREG32(mmDMA0_QM_ARB_SLV_CHOISE_WDT + dma_qm_offset,
2050 GAUDI_ARB_WDT_TIMEOUT);
2051
2052 WREG32(mmDMA0_QM_GLBL_CFG1 + dma_qm_offset, 0);
2053 WREG32(mmDMA0_QM_GLBL_PROT + dma_qm_offset,
2054 QMAN_INTERNAL_MAKE_TRUSTED);
2055 }
2056
2057 WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_lo);
2058 WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_hi);
2059 WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_lo);
2060 WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_hi);
2061}
2062
2063static void gaudi_init_hbm_dma_qmans(struct hl_device *hdev)
2064{
2065 struct gaudi_device *gaudi = hdev->asic_specific;
2066 struct gaudi_internal_qman_info *q;
2067 u64 qman_base_addr;
2068 int i, j, dma_id, internal_q_index;
2069
2070 if (gaudi->hw_cap_initialized & HW_CAP_HBM_DMA)
2071 return;
2072
2073 for (i = 0 ; i < HBM_DMA_NUMBER_OF_CHNLS ; i++) {
2074 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_1 + i];
2075
2076 for (j = 0 ; j < QMAN_STREAMS ; j++) {
2077 /*
2078 * Add the CPU queue in order to get the correct queue
2079 * number as all internal queue are placed after it
2080 */
2081 internal_q_index = dma_id * QMAN_STREAMS + j + 1;
2082
2083 q = &gaudi->internal_qmans[internal_q_index];
2084 qman_base_addr = (u64) q->pq_dma_addr;
2085 gaudi_init_hbm_dma_qman(hdev, dma_id, j,
2086 qman_base_addr);
2087 }
2088
2089 /* Initializing lower CP for HBM DMA QMAN */
2090 gaudi_init_hbm_dma_qman(hdev, dma_id, 4, 0);
2091
2092 gaudi_init_dma_core(hdev, dma_id);
2093
2094 gaudi_enable_qman(hdev, dma_id, HBM_DMA_QMAN_ENABLE);
2095 }
2096
2097 gaudi->hw_cap_initialized |= HW_CAP_HBM_DMA;
2098}
2099
2100static void gaudi_init_mme_qman(struct hl_device *hdev, u32 mme_offset,
2101 int qman_id, u64 qman_base_addr)
2102{
2103 u32 mtr_base_lo, mtr_base_hi;
2104 u32 so_base_lo, so_base_hi;
2105 u32 q_off, mme_id;
2106 u32 mme_qm_err_cfg;
2107
2108 mtr_base_lo = lower_32_bits(CFG_BASE +
2109 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2110 mtr_base_hi = upper_32_bits(CFG_BASE +
2111 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2112 so_base_lo = lower_32_bits(CFG_BASE +
2113 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2114 so_base_hi = upper_32_bits(CFG_BASE +
2115 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2116
2117 q_off = mme_offset + qman_id * 4;
2118
2119 if (qman_id < 4) {
2120 WREG32(mmMME0_QM_PQ_BASE_LO_0 + q_off,
2121 lower_32_bits(qman_base_addr));
2122 WREG32(mmMME0_QM_PQ_BASE_HI_0 + q_off,
2123 upper_32_bits(qman_base_addr));
2124
2125 WREG32(mmMME0_QM_PQ_SIZE_0 + q_off, ilog2(MME_QMAN_LENGTH));
2126 WREG32(mmMME0_QM_PQ_PI_0 + q_off, 0);
2127 WREG32(mmMME0_QM_PQ_CI_0 + q_off, 0);
2128
2129 WREG32(mmMME0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off, 0x81BC);
2130 WREG32(mmMME0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off, 0x81B4);
2131 WREG32(mmMME0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off, 0x1C);
2132 } else {
2133 WREG32(mmMME0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off, 0x74);
2134 WREG32(mmMME0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off, 0x14);
2135 WREG32(mmMME0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off, 0x1C);
2136
2137 /* Configure RAZWI IRQ */
2138 mme_id = mme_offset /
2139 (mmMME1_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0);
2140
2141 mme_qm_err_cfg = MME_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
2142 if (hdev->stop_on_err) {
2143 mme_qm_err_cfg |=
2144 MME_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
2145 }
2146 WREG32(mmMME0_QM_GLBL_ERR_CFG + mme_offset, mme_qm_err_cfg);
2147 WREG32(mmMME0_QM_GLBL_ERR_ADDR_LO + mme_offset,
2148 lower_32_bits(CFG_BASE +
2149 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR));
2150 WREG32(mmMME0_QM_GLBL_ERR_ADDR_HI + mme_offset,
2151 upper_32_bits(CFG_BASE +
2152 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR));
2153 WREG32(mmMME0_QM_GLBL_ERR_WDATA + mme_offset,
2154 gaudi_irq_map_table[GAUDI_EVENT_MME0_QM].cpu_id +
2155 mme_id);
2156
2157 WREG32(mmMME0_QM_ARB_ERR_MSG_EN + mme_offset,
2158 QM_ARB_ERR_MSG_EN_MASK);
2159
2160 /* Increase ARB WDT to support streams architecture */
2161 WREG32(mmMME0_QM_ARB_SLV_CHOISE_WDT + mme_offset,
2162 GAUDI_ARB_WDT_TIMEOUT);
2163
2164 WREG32(mmMME0_QM_GLBL_CFG1 + mme_offset, 0);
2165 WREG32(mmMME0_QM_GLBL_PROT + mme_offset,
2166 QMAN_INTERNAL_MAKE_TRUSTED);
2167 }
2168
2169 WREG32(mmMME0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_lo);
2170 WREG32(mmMME0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_hi);
2171 WREG32(mmMME0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_lo);
2172 WREG32(mmMME0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_hi);
2173}
2174
2175static void gaudi_init_mme_qmans(struct hl_device *hdev)
2176{
2177 struct gaudi_device *gaudi = hdev->asic_specific;
2178 struct gaudi_internal_qman_info *q;
2179 u64 qman_base_addr;
2180 u32 mme_offset;
2181 int i, internal_q_index;
2182
2183 if (gaudi->hw_cap_initialized & HW_CAP_MME)
2184 return;
2185
2186 /*
2187 * map GAUDI_QUEUE_ID_MME_0_X to the N_W_MME (mmMME2_QM_BASE)
2188 * and GAUDI_QUEUE_ID_MME_1_X to the S_W_MME (mmMME0_QM_BASE)
2189 */
2190
2191 mme_offset = mmMME2_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0;
2192
2193 for (i = 0 ; i < MME_NUMBER_OF_QMANS ; i++) {
2194 internal_q_index = GAUDI_QUEUE_ID_MME_0_0 + i;
2195 q = &gaudi->internal_qmans[internal_q_index];
2196 qman_base_addr = (u64) q->pq_dma_addr;
2197 gaudi_init_mme_qman(hdev, mme_offset, (i & 0x3),
2198 qman_base_addr);
2199 if (i == 3)
2200 mme_offset = 0;
2201 }
2202
2203 /* Initializing lower CP for MME QMANs */
2204 mme_offset = mmMME2_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0;
2205 gaudi_init_mme_qman(hdev, mme_offset, 4, 0);
2206 gaudi_init_mme_qman(hdev, 0, 4, 0);
2207
2208 WREG32(mmMME2_QM_GLBL_CFG0, QMAN_MME_ENABLE);
2209 WREG32(mmMME0_QM_GLBL_CFG0, QMAN_MME_ENABLE);
2210
2211 gaudi->hw_cap_initialized |= HW_CAP_MME;
2212}
2213
2214static void gaudi_init_tpc_qman(struct hl_device *hdev, u32 tpc_offset,
2215 int qman_id, u64 qman_base_addr)
2216{
2217 u32 mtr_base_lo, mtr_base_hi;
2218 u32 so_base_lo, so_base_hi;
2219 u32 q_off, tpc_id;
2220 u32 tpc_qm_err_cfg;
2221
2222 mtr_base_lo = lower_32_bits(CFG_BASE +
2223 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2224 mtr_base_hi = upper_32_bits(CFG_BASE +
2225 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2226 so_base_lo = lower_32_bits(CFG_BASE +
2227 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2228 so_base_hi = upper_32_bits(CFG_BASE +
2229 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2230
2231 q_off = tpc_offset + qman_id * 4;
2232
2233 if (qman_id < 4) {
2234 WREG32(mmTPC0_QM_PQ_BASE_LO_0 + q_off,
2235 lower_32_bits(qman_base_addr));
2236 WREG32(mmTPC0_QM_PQ_BASE_HI_0 + q_off,
2237 upper_32_bits(qman_base_addr));
2238
2239 WREG32(mmTPC0_QM_PQ_SIZE_0 + q_off, ilog2(TPC_QMAN_LENGTH));
2240 WREG32(mmTPC0_QM_PQ_PI_0 + q_off, 0);
2241 WREG32(mmTPC0_QM_PQ_CI_0 + q_off, 0);
2242
2243 WREG32(mmTPC0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off, 0x81BC);
2244 WREG32(mmTPC0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off, 0x81B4);
2245 WREG32(mmTPC0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off, 0x1C);
2246 } else {
2247 WREG32(mmTPC0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off, 0x74);
2248 WREG32(mmTPC0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off, 0x14);
2249 WREG32(mmTPC0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off, 0x1C);
2250
2251 /* Configure RAZWI IRQ */
2252 tpc_id = tpc_offset /
2253 (mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0);
2254
2255 tpc_qm_err_cfg = TPC_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
2256 if (hdev->stop_on_err) {
2257 tpc_qm_err_cfg |=
2258 TPC_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
2259 }
2260
2261 WREG32(mmTPC0_QM_GLBL_ERR_CFG + tpc_offset, tpc_qm_err_cfg);
2262 WREG32(mmTPC0_QM_GLBL_ERR_ADDR_LO + tpc_offset,
2263 lower_32_bits(CFG_BASE +
2264 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR));
2265 WREG32(mmTPC0_QM_GLBL_ERR_ADDR_HI + tpc_offset,
2266 upper_32_bits(CFG_BASE +
2267 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR));
2268 WREG32(mmTPC0_QM_GLBL_ERR_WDATA + tpc_offset,
2269 gaudi_irq_map_table[GAUDI_EVENT_TPC0_QM].cpu_id +
2270 tpc_id);
2271
2272 WREG32(mmTPC0_QM_ARB_ERR_MSG_EN + tpc_offset,
2273 QM_ARB_ERR_MSG_EN_MASK);
2274
2275 /* Increase ARB WDT to support streams architecture */
2276 WREG32(mmTPC0_QM_ARB_SLV_CHOISE_WDT + tpc_offset,
2277 GAUDI_ARB_WDT_TIMEOUT);
2278
2279 WREG32(mmTPC0_QM_GLBL_CFG1 + tpc_offset, 0);
2280 WREG32(mmTPC0_QM_GLBL_PROT + tpc_offset,
2281 QMAN_INTERNAL_MAKE_TRUSTED);
2282 }
2283
2284 WREG32(mmTPC0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_lo);
2285 WREG32(mmTPC0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_hi);
2286 WREG32(mmTPC0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_lo);
2287 WREG32(mmTPC0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_hi);
2288}
2289
2290static void gaudi_init_tpc_qmans(struct hl_device *hdev)
2291{
2292 struct gaudi_device *gaudi = hdev->asic_specific;
2293 struct gaudi_internal_qman_info *q;
2294 u64 qman_base_addr;
2295 u32 so_base_hi, tpc_offset = 0;
2296 u32 tpc_delta = mmTPC1_CFG_SM_BASE_ADDRESS_HIGH -
2297 mmTPC0_CFG_SM_BASE_ADDRESS_HIGH;
2298 int i, tpc_id, internal_q_index;
2299
2300 if (gaudi->hw_cap_initialized & HW_CAP_TPC_MASK)
2301 return;
2302
2303 so_base_hi = upper_32_bits(CFG_BASE +
2304 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2305
2306 for (tpc_id = 0 ; tpc_id < TPC_NUMBER_OF_ENGINES ; tpc_id++) {
2307 for (i = 0 ; i < QMAN_STREAMS ; i++) {
2308 internal_q_index = GAUDI_QUEUE_ID_TPC_0_0 +
2309 tpc_id * QMAN_STREAMS + i;
2310 q = &gaudi->internal_qmans[internal_q_index];
2311 qman_base_addr = (u64) q->pq_dma_addr;
2312 gaudi_init_tpc_qman(hdev, tpc_offset, i,
2313 qman_base_addr);
2314
2315 if (i == 3) {
2316 /* Initializing lower CP for TPC QMAN */
2317 gaudi_init_tpc_qman(hdev, tpc_offset, 4, 0);
2318
2319 /* Enable the QMAN and TPC channel */
2320 WREG32(mmTPC0_QM_GLBL_CFG0 + tpc_offset,
2321 QMAN_TPC_ENABLE);
2322 }
2323 }
2324
2325 WREG32(mmTPC0_CFG_SM_BASE_ADDRESS_HIGH + tpc_id * tpc_delta,
2326 so_base_hi);
2327
2328 tpc_offset += mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0;
2329
Oded Gabbay65887292020-08-12 11:21:01 +03002330 gaudi->hw_cap_initialized |=
2331 FIELD_PREP(HW_CAP_TPC_MASK, 1 << tpc_id);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03002332 }
2333}
2334
2335static void gaudi_disable_pci_dma_qmans(struct hl_device *hdev)
2336{
2337 struct gaudi_device *gaudi = hdev->asic_specific;
2338
2339 if (!(gaudi->hw_cap_initialized & HW_CAP_PCI_DMA))
2340 return;
2341
2342 WREG32(mmDMA0_QM_GLBL_CFG0, 0);
2343 WREG32(mmDMA1_QM_GLBL_CFG0, 0);
2344 WREG32(mmDMA5_QM_GLBL_CFG0, 0);
2345}
2346
2347static void gaudi_disable_hbm_dma_qmans(struct hl_device *hdev)
2348{
2349 struct gaudi_device *gaudi = hdev->asic_specific;
2350
2351 if (!(gaudi->hw_cap_initialized & HW_CAP_HBM_DMA))
2352 return;
2353
2354 WREG32(mmDMA2_QM_GLBL_CFG0, 0);
2355 WREG32(mmDMA3_QM_GLBL_CFG0, 0);
2356 WREG32(mmDMA4_QM_GLBL_CFG0, 0);
2357 WREG32(mmDMA6_QM_GLBL_CFG0, 0);
2358 WREG32(mmDMA7_QM_GLBL_CFG0, 0);
2359}
2360
2361static void gaudi_disable_mme_qmans(struct hl_device *hdev)
2362{
2363 struct gaudi_device *gaudi = hdev->asic_specific;
2364
2365 if (!(gaudi->hw_cap_initialized & HW_CAP_MME))
2366 return;
2367
2368 WREG32(mmMME2_QM_GLBL_CFG0, 0);
2369 WREG32(mmMME0_QM_GLBL_CFG0, 0);
2370}
2371
2372static void gaudi_disable_tpc_qmans(struct hl_device *hdev)
2373{
2374 struct gaudi_device *gaudi = hdev->asic_specific;
2375 u32 tpc_offset = 0;
2376 int tpc_id;
2377
2378 if (!(gaudi->hw_cap_initialized & HW_CAP_TPC_MASK))
2379 return;
2380
2381 for (tpc_id = 0 ; tpc_id < TPC_NUMBER_OF_ENGINES ; tpc_id++) {
2382 WREG32(mmTPC0_QM_GLBL_CFG0 + tpc_offset, 0);
2383 tpc_offset += mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0;
2384 }
2385}
2386
2387static void gaudi_stop_pci_dma_qmans(struct hl_device *hdev)
2388{
2389 struct gaudi_device *gaudi = hdev->asic_specific;
2390
2391 if (!(gaudi->hw_cap_initialized & HW_CAP_PCI_DMA))
2392 return;
2393
2394 /* Stop upper CPs of QMANs 0.0 to 1.3 and 5.0 to 5.3 */
2395 WREG32(mmDMA0_QM_GLBL_CFG1, 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
2396 WREG32(mmDMA1_QM_GLBL_CFG1, 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
2397 WREG32(mmDMA5_QM_GLBL_CFG1, 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
2398}
2399
2400static void gaudi_stop_hbm_dma_qmans(struct hl_device *hdev)
2401{
2402 struct gaudi_device *gaudi = hdev->asic_specific;
2403
2404 if (!(gaudi->hw_cap_initialized & HW_CAP_HBM_DMA))
2405 return;
2406
2407 /* Stop CPs of HBM DMA QMANs */
2408
2409 WREG32(mmDMA2_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
2410 WREG32(mmDMA3_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
2411 WREG32(mmDMA4_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
2412 WREG32(mmDMA6_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
2413 WREG32(mmDMA7_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
2414}
2415
2416static void gaudi_stop_mme_qmans(struct hl_device *hdev)
2417{
2418 struct gaudi_device *gaudi = hdev->asic_specific;
2419
2420 if (!(gaudi->hw_cap_initialized & HW_CAP_MME))
2421 return;
2422
2423 /* Stop CPs of MME QMANs */
2424 WREG32(mmMME2_QM_GLBL_CFG1, 0x1F << MME0_QM_GLBL_CFG1_CP_STOP_SHIFT);
2425 WREG32(mmMME0_QM_GLBL_CFG1, 0x1F << MME0_QM_GLBL_CFG1_CP_STOP_SHIFT);
2426}
2427
2428static void gaudi_stop_tpc_qmans(struct hl_device *hdev)
2429{
2430 struct gaudi_device *gaudi = hdev->asic_specific;
2431
2432 if (!(gaudi->hw_cap_initialized & HW_CAP_TPC_MASK))
2433 return;
2434
2435 WREG32(mmTPC0_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
2436 WREG32(mmTPC1_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
2437 WREG32(mmTPC2_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
2438 WREG32(mmTPC3_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
2439 WREG32(mmTPC4_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
2440 WREG32(mmTPC5_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
2441 WREG32(mmTPC6_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
2442 WREG32(mmTPC7_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
2443}
2444
2445static void gaudi_pci_dma_stall(struct hl_device *hdev)
2446{
2447 struct gaudi_device *gaudi = hdev->asic_specific;
2448
2449 if (!(gaudi->hw_cap_initialized & HW_CAP_PCI_DMA))
2450 return;
2451
2452 WREG32(mmDMA0_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
2453 WREG32(mmDMA1_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
2454 WREG32(mmDMA5_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
2455}
2456
2457static void gaudi_hbm_dma_stall(struct hl_device *hdev)
2458{
2459 struct gaudi_device *gaudi = hdev->asic_specific;
2460
2461 if (!(gaudi->hw_cap_initialized & HW_CAP_HBM_DMA))
2462 return;
2463
2464 WREG32(mmDMA2_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
2465 WREG32(mmDMA3_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
2466 WREG32(mmDMA4_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
2467 WREG32(mmDMA6_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
2468 WREG32(mmDMA7_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
2469}
2470
2471static void gaudi_mme_stall(struct hl_device *hdev)
2472{
2473 struct gaudi_device *gaudi = hdev->asic_specific;
2474
2475 if (!(gaudi->hw_cap_initialized & HW_CAP_MME))
2476 return;
2477
2478 /* WA for H3-1800 bug: do ACC and SBAB writes twice */
2479 WREG32(mmMME0_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
2480 WREG32(mmMME0_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
2481 WREG32(mmMME0_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
2482 WREG32(mmMME0_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
2483 WREG32(mmMME1_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
2484 WREG32(mmMME1_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
2485 WREG32(mmMME1_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
2486 WREG32(mmMME1_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
2487 WREG32(mmMME2_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
2488 WREG32(mmMME2_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
2489 WREG32(mmMME2_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
2490 WREG32(mmMME2_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
2491 WREG32(mmMME3_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
2492 WREG32(mmMME3_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
2493 WREG32(mmMME3_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
2494 WREG32(mmMME3_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
2495}
2496
2497static void gaudi_tpc_stall(struct hl_device *hdev)
2498{
2499 struct gaudi_device *gaudi = hdev->asic_specific;
2500
2501 if (!(gaudi->hw_cap_initialized & HW_CAP_TPC_MASK))
2502 return;
2503
2504 WREG32(mmTPC0_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
2505 WREG32(mmTPC1_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
2506 WREG32(mmTPC2_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
2507 WREG32(mmTPC3_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
2508 WREG32(mmTPC4_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
2509 WREG32(mmTPC5_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
2510 WREG32(mmTPC6_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
2511 WREG32(mmTPC7_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
2512}
2513
Oded Gabbaye38bfd32020-07-03 20:46:12 +03002514static void gaudi_set_clock_gating(struct hl_device *hdev)
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03002515{
2516 struct gaudi_device *gaudi = hdev->asic_specific;
2517 u32 qman_offset;
Ofir Bittonf44d23b2020-08-04 13:38:43 +03002518 bool enable;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03002519 int i;
2520
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03002521 /* In case we are during debug session, don't enable the clock gate
2522 * as it may interfere
2523 */
2524 if (hdev->in_debug)
2525 return;
2526
Oded Gabbaye38bfd32020-07-03 20:46:12 +03002527 for (i = GAUDI_PCI_DMA_1, qman_offset = 0 ; i < GAUDI_HBM_DMA_1 ; i++) {
Ofir Bittonf44d23b2020-08-04 13:38:43 +03002528 enable = !!(hdev->clock_gating_mask &
2529 (BIT_ULL(gaudi_dma_assignment[i])));
Oded Gabbaye38bfd32020-07-03 20:46:12 +03002530
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03002531 qman_offset = gaudi_dma_assignment[i] * DMA_QMAN_OFFSET;
Ofir Bittonf44d23b2020-08-04 13:38:43 +03002532 WREG32(mmDMA0_QM_CGM_CFG1 + qman_offset,
2533 enable ? QMAN_CGM1_PWR_GATE_EN : 0);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03002534 WREG32(mmDMA0_QM_CGM_CFG + qman_offset,
Ofir Bittonf44d23b2020-08-04 13:38:43 +03002535 enable ? QMAN_UPPER_CP_CGM_PWR_GATE_EN : 0);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03002536 }
2537
Oded Gabbaye38bfd32020-07-03 20:46:12 +03002538 for (i = GAUDI_HBM_DMA_1 ; i < GAUDI_DMA_MAX ; i++) {
Ofir Bittonf44d23b2020-08-04 13:38:43 +03002539 enable = !!(hdev->clock_gating_mask &
2540 (BIT_ULL(gaudi_dma_assignment[i])));
Oded Gabbaye38bfd32020-07-03 20:46:12 +03002541
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03002542 qman_offset = gaudi_dma_assignment[i] * DMA_QMAN_OFFSET;
Ofir Bittonf44d23b2020-08-04 13:38:43 +03002543 WREG32(mmDMA0_QM_CGM_CFG1 + qman_offset,
2544 enable ? QMAN_CGM1_PWR_GATE_EN : 0);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03002545 WREG32(mmDMA0_QM_CGM_CFG + qman_offset,
Ofir Bittonf44d23b2020-08-04 13:38:43 +03002546 enable ? QMAN_COMMON_CP_CGM_PWR_GATE_EN : 0);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03002547 }
2548
Ofir Bittonf44d23b2020-08-04 13:38:43 +03002549 enable = !!(hdev->clock_gating_mask & (BIT_ULL(GAUDI_ENGINE_ID_MME_0)));
2550 WREG32(mmMME0_QM_CGM_CFG1, enable ? QMAN_CGM1_PWR_GATE_EN : 0);
2551 WREG32(mmMME0_QM_CGM_CFG, enable ? QMAN_COMMON_CP_CGM_PWR_GATE_EN : 0);
Oded Gabbaye38bfd32020-07-03 20:46:12 +03002552
Ofir Bittonf44d23b2020-08-04 13:38:43 +03002553 enable = !!(hdev->clock_gating_mask & (BIT_ULL(GAUDI_ENGINE_ID_MME_2)));
2554 WREG32(mmMME2_QM_CGM_CFG1, enable ? QMAN_CGM1_PWR_GATE_EN : 0);
2555 WREG32(mmMME2_QM_CGM_CFG, enable ? QMAN_COMMON_CP_CGM_PWR_GATE_EN : 0);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03002556
2557 for (i = 0, qman_offset = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) {
Ofir Bittonf44d23b2020-08-04 13:38:43 +03002558 enable = !!(hdev->clock_gating_mask &
2559 (BIT_ULL(GAUDI_ENGINE_ID_TPC_0 + i)));
Oded Gabbaye38bfd32020-07-03 20:46:12 +03002560
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03002561 WREG32(mmTPC0_QM_CGM_CFG1 + qman_offset,
Ofir Bittonf44d23b2020-08-04 13:38:43 +03002562 enable ? QMAN_CGM1_PWR_GATE_EN : 0);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03002563 WREG32(mmTPC0_QM_CGM_CFG + qman_offset,
Ofir Bittonf44d23b2020-08-04 13:38:43 +03002564 enable ? QMAN_COMMON_CP_CGM_PWR_GATE_EN : 0);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03002565
2566 qman_offset += TPC_QMAN_OFFSET;
2567 }
2568
2569 gaudi->hw_cap_initialized |= HW_CAP_CLK_GATE;
2570}
2571
2572static void gaudi_disable_clock_gating(struct hl_device *hdev)
2573{
2574 struct gaudi_device *gaudi = hdev->asic_specific;
2575 u32 qman_offset;
2576 int i;
2577
2578 if (!(gaudi->hw_cap_initialized & HW_CAP_CLK_GATE))
2579 return;
2580
2581 for (i = 0, qman_offset = 0 ; i < DMA_NUMBER_OF_CHANNELS ; i++) {
2582 WREG32(mmDMA0_QM_CGM_CFG + qman_offset, 0);
2583 WREG32(mmDMA0_QM_CGM_CFG1 + qman_offset, 0);
2584
2585 qman_offset += (mmDMA1_QM_CGM_CFG - mmDMA0_QM_CGM_CFG);
2586 }
2587
2588 WREG32(mmMME0_QM_CGM_CFG, 0);
2589 WREG32(mmMME0_QM_CGM_CFG1, 0);
2590 WREG32(mmMME2_QM_CGM_CFG, 0);
2591 WREG32(mmMME2_QM_CGM_CFG1, 0);
2592
2593 for (i = 0, qman_offset = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) {
2594 WREG32(mmTPC0_QM_CGM_CFG + qman_offset, 0);
2595 WREG32(mmTPC0_QM_CGM_CFG1 + qman_offset, 0);
2596
2597 qman_offset += (mmTPC1_QM_CGM_CFG - mmTPC0_QM_CGM_CFG);
2598 }
2599
2600 gaudi->hw_cap_initialized &= ~(HW_CAP_CLK_GATE);
2601}
2602
2603static void gaudi_enable_timestamp(struct hl_device *hdev)
2604{
2605 /* Disable the timestamp counter */
2606 WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 0);
2607
2608 /* Zero the lower/upper parts of the 64-bit counter */
2609 WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE + 0xC, 0);
2610 WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE + 0x8, 0);
2611
2612 /* Enable the counter */
2613 WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 1);
2614}
2615
2616static void gaudi_disable_timestamp(struct hl_device *hdev)
2617{
2618 /* Disable the timestamp counter */
2619 WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 0);
2620}
2621
2622static void gaudi_halt_engines(struct hl_device *hdev, bool hard_reset)
2623{
Oded Gabbayc83c4172020-07-05 15:48:34 +03002624 u32 wait_timeout_ms;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03002625
2626 dev_info(hdev->dev,
2627 "Halting compute engines and disabling interrupts\n");
2628
Oded Gabbayc83c4172020-07-05 15:48:34 +03002629 if (hdev->pldm)
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03002630 wait_timeout_ms = GAUDI_PLDM_RESET_WAIT_MSEC;
Oded Gabbayc83c4172020-07-05 15:48:34 +03002631 else
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03002632 wait_timeout_ms = GAUDI_RESET_WAIT_MSEC;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03002633
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03002634
2635 gaudi_stop_mme_qmans(hdev);
2636 gaudi_stop_tpc_qmans(hdev);
2637 gaudi_stop_hbm_dma_qmans(hdev);
2638 gaudi_stop_pci_dma_qmans(hdev);
2639
Oded Gabbaye38bfd32020-07-03 20:46:12 +03002640 hdev->asic_funcs->disable_clock_gating(hdev);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03002641
2642 msleep(wait_timeout_ms);
2643
2644 gaudi_pci_dma_stall(hdev);
2645 gaudi_hbm_dma_stall(hdev);
2646 gaudi_tpc_stall(hdev);
2647 gaudi_mme_stall(hdev);
2648
2649 msleep(wait_timeout_ms);
2650
2651 gaudi_disable_mme_qmans(hdev);
2652 gaudi_disable_tpc_qmans(hdev);
2653 gaudi_disable_hbm_dma_qmans(hdev);
2654 gaudi_disable_pci_dma_qmans(hdev);
2655
2656 gaudi_disable_timestamp(hdev);
2657
Oded Gabbay12ae3132020-07-03 20:58:23 +03002658 gaudi_disable_msi(hdev);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03002659}
2660
2661static int gaudi_mmu_init(struct hl_device *hdev)
2662{
2663 struct asic_fixed_properties *prop = &hdev->asic_prop;
2664 struct gaudi_device *gaudi = hdev->asic_specific;
2665 u64 hop0_addr;
2666 int rc, i;
2667
2668 if (!hdev->mmu_enable)
2669 return 0;
2670
2671 if (gaudi->hw_cap_initialized & HW_CAP_MMU)
2672 return 0;
2673
2674 hdev->dram_supports_virtual_memory = false;
2675
2676 for (i = 0 ; i < prop->max_asid ; i++) {
2677 hop0_addr = prop->mmu_pgt_addr +
2678 (i * prop->mmu_hop_table_size);
2679
2680 rc = gaudi_mmu_update_asid_hop0_addr(hdev, i, hop0_addr);
2681 if (rc) {
2682 dev_err(hdev->dev,
2683 "failed to set hop0 addr for asid %d\n", i);
2684 goto err;
2685 }
2686 }
2687
2688 /* init MMU cache manage page */
2689 WREG32(mmSTLB_CACHE_INV_BASE_39_8, MMU_CACHE_MNG_ADDR >> 8);
2690 WREG32(mmSTLB_CACHE_INV_BASE_49_40, MMU_CACHE_MNG_ADDR >> 40);
2691
Tomer Tayar644883e2020-07-19 11:00:03 +03002692 hdev->asic_funcs->mmu_invalidate_cache(hdev, true, 0);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03002693
2694 WREG32(mmMMU_UP_MMU_ENABLE, 1);
2695 WREG32(mmMMU_UP_SPI_MASK, 0xF);
2696
2697 WREG32(mmSTLB_HOP_CONFIGURATION,
2698 hdev->mmu_huge_page_opt ? 0x30440 : 0x40440);
2699
Omer Shpigelmancfd41762020-06-03 13:03:35 +03002700 /*
2701 * The H/W expects the first PI after init to be 1. After wraparound
2702 * we'll write 0.
2703 */
2704 gaudi->mmu_cache_inv_pi = 1;
2705
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03002706 gaudi->hw_cap_initialized |= HW_CAP_MMU;
2707
2708 return 0;
2709
2710err:
2711 return rc;
2712}
2713
2714static int gaudi_load_firmware_to_device(struct hl_device *hdev)
2715{
2716 void __iomem *dst;
2717
2718 /* HBM scrambler must be initialized before pushing F/W to HBM */
2719 gaudi_init_scrambler_hbm(hdev);
2720
2721 dst = hdev->pcie_bar[HBM_BAR_ID] + LINUX_FW_OFFSET;
2722
2723 return hl_fw_load_fw_to_device(hdev, GAUDI_LINUX_FW_FILE, dst);
2724}
2725
2726static int gaudi_load_boot_fit_to_device(struct hl_device *hdev)
2727{
2728 void __iomem *dst;
2729
2730 dst = hdev->pcie_bar[SRAM_BAR_ID] + BOOT_FIT_SRAM_OFFSET;
2731
2732 return hl_fw_load_fw_to_device(hdev, GAUDI_BOOT_FIT_FILE, dst);
2733}
2734
2735static void gaudi_read_device_fw_version(struct hl_device *hdev,
2736 enum hl_fw_component fwc)
2737{
2738 const char *name;
2739 u32 ver_off;
2740 char *dest;
2741
2742 switch (fwc) {
2743 case FW_COMP_UBOOT:
2744 ver_off = RREG32(mmUBOOT_VER_OFFSET);
2745 dest = hdev->asic_prop.uboot_ver;
2746 name = "U-Boot";
2747 break;
2748 case FW_COMP_PREBOOT:
2749 ver_off = RREG32(mmPREBOOT_VER_OFFSET);
2750 dest = hdev->asic_prop.preboot_ver;
2751 name = "Preboot";
2752 break;
2753 default:
2754 dev_warn(hdev->dev, "Undefined FW component: %d\n", fwc);
2755 return;
2756 }
2757
2758 ver_off &= ~((u32)SRAM_BASE_ADDR);
2759
2760 if (ver_off < SRAM_SIZE - VERSION_MAX_LEN) {
2761 memcpy_fromio(dest, hdev->pcie_bar[SRAM_BAR_ID] + ver_off,
2762 VERSION_MAX_LEN);
2763 } else {
2764 dev_err(hdev->dev, "%s version offset (0x%x) is above SRAM\n",
2765 name, ver_off);
2766 strcpy(dest, "unavailable");
2767 }
2768}
2769
2770static int gaudi_init_cpu(struct hl_device *hdev)
2771{
2772 struct gaudi_device *gaudi = hdev->asic_specific;
2773 int rc;
2774
2775 if (!hdev->cpu_enable)
2776 return 0;
2777
2778 if (gaudi->hw_cap_initialized & HW_CAP_CPU)
2779 return 0;
2780
2781 /*
2782 * The device CPU works with 40 bits addresses.
2783 * This register sets the extension to 50 bits.
2784 */
2785 WREG32(mmCPU_IF_CPU_MSB_ADDR, hdev->cpu_pci_msb_addr);
2786
2787 rc = hl_fw_init_cpu(hdev, mmPSOC_GLOBAL_CONF_CPU_BOOT_STATUS,
2788 mmPSOC_GLOBAL_CONF_KMD_MSG_TO_CPU,
2789 mmCPU_CMD_STATUS_TO_HOST,
2790 mmCPU_BOOT_ERR0,
2791 !hdev->bmc_enable, GAUDI_CPU_TIMEOUT_USEC,
2792 GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC);
2793
2794 if (rc)
2795 return rc;
2796
2797 gaudi->hw_cap_initialized |= HW_CAP_CPU;
2798
2799 return 0;
2800}
2801
2802static int gaudi_init_cpu_queues(struct hl_device *hdev, u32 cpu_timeout)
2803{
2804 struct gaudi_device *gaudi = hdev->asic_specific;
2805 struct hl_eq *eq;
2806 u32 status;
2807 struct hl_hw_queue *cpu_pq =
2808 &hdev->kernel_queues[GAUDI_QUEUE_ID_CPU_PQ];
2809 int err;
2810
2811 if (!hdev->cpu_queues_enable)
2812 return 0;
2813
2814 if (gaudi->hw_cap_initialized & HW_CAP_CPU_Q)
2815 return 0;
2816
2817 eq = &hdev->event_queue;
2818
2819 WREG32(mmCPU_IF_PQ_BASE_ADDR_LOW, lower_32_bits(cpu_pq->bus_address));
2820 WREG32(mmCPU_IF_PQ_BASE_ADDR_HIGH, upper_32_bits(cpu_pq->bus_address));
2821
2822 WREG32(mmCPU_IF_EQ_BASE_ADDR_LOW, lower_32_bits(eq->bus_address));
2823 WREG32(mmCPU_IF_EQ_BASE_ADDR_HIGH, upper_32_bits(eq->bus_address));
2824
2825 WREG32(mmCPU_IF_CQ_BASE_ADDR_LOW,
2826 lower_32_bits(hdev->cpu_accessible_dma_address));
2827 WREG32(mmCPU_IF_CQ_BASE_ADDR_HIGH,
2828 upper_32_bits(hdev->cpu_accessible_dma_address));
2829
2830 WREG32(mmCPU_IF_PQ_LENGTH, HL_QUEUE_SIZE_IN_BYTES);
2831 WREG32(mmCPU_IF_EQ_LENGTH, HL_EQ_SIZE_IN_BYTES);
2832 WREG32(mmCPU_IF_CQ_LENGTH, HL_CPU_ACCESSIBLE_MEM_SIZE);
2833
2834 /* Used for EQ CI */
2835 WREG32(mmCPU_IF_EQ_RD_OFFS, 0);
2836
2837 WREG32(mmCPU_IF_PF_PQ_PI, 0);
2838
2839 if (gaudi->multi_msi_mode)
2840 WREG32(mmCPU_IF_QUEUE_INIT, PQ_INIT_STATUS_READY_FOR_CP);
2841 else
2842 WREG32(mmCPU_IF_QUEUE_INIT,
2843 PQ_INIT_STATUS_READY_FOR_CP_SINGLE_MSI);
2844
2845 WREG32(mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR, GAUDI_EVENT_PI_UPDATE);
2846
2847 err = hl_poll_timeout(
2848 hdev,
2849 mmCPU_IF_QUEUE_INIT,
2850 status,
2851 (status == PQ_INIT_STATUS_READY_FOR_HOST),
2852 1000,
2853 cpu_timeout);
2854
2855 if (err) {
2856 dev_err(hdev->dev,
2857 "Failed to communicate with ARM CPU (ArmCP timeout)\n");
2858 return -EIO;
2859 }
2860
2861 gaudi->hw_cap_initialized |= HW_CAP_CPU_Q;
2862 return 0;
2863}
2864
2865static void gaudi_pre_hw_init(struct hl_device *hdev)
2866{
2867 /* Perform read from the device to make sure device is up */
2868 RREG32(mmPCIE_DBI_DEVICE_ID_VENDOR_ID_REG);
2869
2870 /*
2871 * Let's mark in the H/W that we have reached this point. We check
2872 * this value in the reset_before_init function to understand whether
2873 * we need to reset the chip before doing H/W init. This register is
2874 * cleared by the H/W upon H/W reset
2875 */
2876 WREG32(mmHW_STATE, HL_DEVICE_HW_STATE_DIRTY);
2877
2878 /* Set the access through PCI bars (Linux driver only) as secured */
2879 WREG32(mmPCIE_WRAP_LBW_PROT_OVR, (PCIE_WRAP_LBW_PROT_OVR_RD_EN_MASK |
2880 PCIE_WRAP_LBW_PROT_OVR_WR_EN_MASK));
2881
2882 /* Perform read to flush the waiting writes to ensure configuration
2883 * was set in the device
2884 */
2885 RREG32(mmPCIE_WRAP_LBW_PROT_OVR);
2886
2887 if (hdev->axi_drain) {
2888 WREG32(mmPCIE_WRAP_LBW_DRAIN_CFG,
2889 1 << PCIE_WRAP_LBW_DRAIN_CFG_EN_SHIFT);
2890 WREG32(mmPCIE_WRAP_HBW_DRAIN_CFG,
2891 1 << PCIE_WRAP_HBW_DRAIN_CFG_EN_SHIFT);
2892
2893 /* Perform read to flush the DRAIN cfg */
2894 RREG32(mmPCIE_WRAP_HBW_DRAIN_CFG);
2895 } else {
2896 WREG32(mmPCIE_WRAP_LBW_DRAIN_CFG, 0);
2897 WREG32(mmPCIE_WRAP_HBW_DRAIN_CFG, 0);
2898
2899 /* Perform read to flush the DRAIN cfg */
2900 RREG32(mmPCIE_WRAP_HBW_DRAIN_CFG);
2901 }
2902
2903 /* Configure the reset registers. Must be done as early as possible
2904 * in case we fail during H/W initialization
2905 */
2906 WREG32(mmPSOC_GLOBAL_CONF_SOFT_RST_CFG_H,
2907 (CFG_RST_H_DMA_MASK |
2908 CFG_RST_H_MME_MASK |
2909 CFG_RST_H_SM_MASK |
Oded Gabbay65887292020-08-12 11:21:01 +03002910 CFG_RST_H_TPC_7_MASK));
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03002911
2912 WREG32(mmPSOC_GLOBAL_CONF_SOFT_RST_CFG_L, CFG_RST_L_TPC_MASK);
2913
2914 WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST_CFG_H,
2915 (CFG_RST_H_HBM_MASK |
Oded Gabbay65887292020-08-12 11:21:01 +03002916 CFG_RST_H_TPC_7_MASK |
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03002917 CFG_RST_H_NIC_MASK |
2918 CFG_RST_H_SM_MASK |
2919 CFG_RST_H_DMA_MASK |
2920 CFG_RST_H_MME_MASK |
2921 CFG_RST_H_CPU_MASK |
2922 CFG_RST_H_MMU_MASK));
2923
2924 WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST_CFG_L,
2925 (CFG_RST_L_IF_MASK |
2926 CFG_RST_L_PSOC_MASK |
2927 CFG_RST_L_TPC_MASK));
2928}
2929
2930static int gaudi_hw_init(struct hl_device *hdev)
2931{
2932 int rc;
2933
2934 dev_info(hdev->dev, "Starting initialization of H/W\n");
2935
2936 gaudi_pre_hw_init(hdev);
2937
2938 gaudi_init_pci_dma_qmans(hdev);
2939
2940 gaudi_init_hbm_dma_qmans(hdev);
2941
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03002942 rc = gaudi_init_cpu(hdev);
2943 if (rc) {
2944 dev_err(hdev->dev, "failed to initialize CPU\n");
2945 return rc;
2946 }
2947
2948 /* SRAM scrambler must be initialized after CPU is running from HBM */
2949 gaudi_init_scrambler_sram(hdev);
2950
2951 /* This is here just in case we are working without CPU */
2952 gaudi_init_scrambler_hbm(hdev);
2953
2954 gaudi_init_golden_registers(hdev);
2955
2956 rc = gaudi_mmu_init(hdev);
2957 if (rc)
2958 return rc;
2959
Omer Shpigelman3a3a5bf12020-05-11 10:45:12 +03002960 gaudi_init_security(hdev);
2961
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03002962 gaudi_init_mme_qmans(hdev);
2963
2964 gaudi_init_tpc_qmans(hdev);
2965
Oded Gabbaye38bfd32020-07-03 20:46:12 +03002966 hdev->asic_funcs->set_clock_gating(hdev);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03002967
2968 gaudi_enable_timestamp(hdev);
2969
2970 /* MSI must be enabled before CPU queues are initialized */
2971 rc = gaudi_enable_msi(hdev);
2972 if (rc)
2973 goto disable_queues;
2974
2975 /* must be called after MSI was enabled */
2976 rc = gaudi_init_cpu_queues(hdev, GAUDI_CPU_TIMEOUT_USEC);
2977 if (rc) {
2978 dev_err(hdev->dev, "failed to initialize CPU H/W queues %d\n",
2979 rc);
2980 goto disable_msi;
2981 }
2982
2983 /* Perform read from the device to flush all configuration */
2984 RREG32(mmPCIE_DBI_DEVICE_ID_VENDOR_ID_REG);
2985
2986 return 0;
2987
2988disable_msi:
2989 gaudi_disable_msi(hdev);
2990disable_queues:
2991 gaudi_disable_mme_qmans(hdev);
2992 gaudi_disable_pci_dma_qmans(hdev);
2993
2994 return rc;
2995}
2996
2997static void gaudi_hw_fini(struct hl_device *hdev, bool hard_reset)
2998{
2999 struct gaudi_device *gaudi = hdev->asic_specific;
Oded Gabbayc83c4172020-07-05 15:48:34 +03003000 u32 status, reset_timeout_ms, cpu_timeout_ms, boot_strap = 0;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003001
Oded Gabbay12ae3132020-07-03 20:58:23 +03003002 if (!hard_reset) {
3003 dev_err(hdev->dev, "GAUDI doesn't support soft-reset\n");
3004 return;
3005 }
3006
Oded Gabbayc83c4172020-07-05 15:48:34 +03003007 if (hdev->pldm) {
Oded Gabbay12ae3132020-07-03 20:58:23 +03003008 reset_timeout_ms = GAUDI_PLDM_HRESET_TIMEOUT_MSEC;
Oded Gabbayc83c4172020-07-05 15:48:34 +03003009 cpu_timeout_ms = GAUDI_PLDM_RESET_WAIT_MSEC;
3010 } else {
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003011 reset_timeout_ms = GAUDI_RESET_TIMEOUT_MSEC;
Oded Gabbayc83c4172020-07-05 15:48:34 +03003012 cpu_timeout_ms = GAUDI_CPU_RESET_WAIT_MSEC;
3013 }
3014
3015 /* Set device to handle FLR by H/W as we will put the device CPU to
3016 * halt mode
3017 */
3018 WREG32(mmPCIE_AUX_FLR_CTRL, (PCIE_AUX_FLR_CTRL_HW_CTRL_MASK |
3019 PCIE_AUX_FLR_CTRL_INT_MASK_MASK));
3020
3021 /* I don't know what is the state of the CPU so make sure it is
3022 * stopped in any means necessary
3023 */
3024 WREG32(mmPSOC_GLOBAL_CONF_KMD_MSG_TO_CPU, KMD_MSG_GOTO_WFE);
3025 WREG32(mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR, GAUDI_EVENT_HALT_MACHINE);
3026
3027 msleep(cpu_timeout_ms);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003028
Oded Gabbay12ae3132020-07-03 20:58:23 +03003029 /* Tell ASIC not to re-initialize PCIe */
3030 WREG32(mmPREBOOT_PCIE_EN, LKD_HARD_RESET_MAGIC);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003031
Oded Gabbay12ae3132020-07-03 20:58:23 +03003032 boot_strap = RREG32(mmPSOC_GLOBAL_CONF_BOOT_STRAP_PINS);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003033
Oded Gabbay12ae3132020-07-03 20:58:23 +03003034 /* H/W bug WA:
3035 * rdata[31:0] = strap_read_val;
3036 * wdata[31:0] = rdata[30:21],1'b0,rdata[20:0]
3037 */
3038 boot_strap = (((boot_strap & 0x7FE00000) << 1) |
3039 (boot_strap & 0x001FFFFF));
3040 WREG32(mmPSOC_GLOBAL_CONF_BOOT_STRAP_PINS, boot_strap & ~0x2);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003041
Oded Gabbay12ae3132020-07-03 20:58:23 +03003042 /* Restart BTL/BLR upon hard-reset */
3043 WREG32(mmPSOC_GLOBAL_CONF_BOOT_SEQ_RE_START, 1);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003044
Oded Gabbay12ae3132020-07-03 20:58:23 +03003045 WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST,
3046 1 << PSOC_GLOBAL_CONF_SW_ALL_RST_IND_SHIFT);
3047 dev_info(hdev->dev,
3048 "Issued HARD reset command, going to wait %dms\n",
3049 reset_timeout_ms);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003050
3051 /*
3052 * After hard reset, we can't poll the BTM_FSM register because the PSOC
3053 * itself is in reset. Need to wait until the reset is deasserted
3054 */
3055 msleep(reset_timeout_ms);
3056
3057 status = RREG32(mmPSOC_GLOBAL_CONF_BTM_FSM);
3058 if (status & PSOC_GLOBAL_CONF_BTM_FSM_STATE_MASK)
3059 dev_err(hdev->dev,
3060 "Timeout while waiting for device to reset 0x%x\n",
3061 status);
3062
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003063 WREG32(mmPSOC_GLOBAL_CONF_BOOT_STRAP_PINS, boot_strap);
3064
3065 gaudi->hw_cap_initialized &= ~(HW_CAP_CPU | HW_CAP_CPU_Q |
3066 HW_CAP_HBM | HW_CAP_PCI_DMA |
3067 HW_CAP_MME | HW_CAP_TPC_MASK |
3068 HW_CAP_HBM_DMA | HW_CAP_PLL |
3069 HW_CAP_MMU |
3070 HW_CAP_SRAM_SCRAMBLER |
Oded Gabbaye38bfd32020-07-03 20:46:12 +03003071 HW_CAP_HBM_SCRAMBLER |
3072 HW_CAP_CLK_GATE);
3073
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003074 memset(gaudi->events_stat, 0, sizeof(gaudi->events_stat));
3075}
3076
3077static int gaudi_suspend(struct hl_device *hdev)
3078{
3079 int rc;
3080
3081 rc = hl_fw_send_pci_access_msg(hdev, ARMCP_PACKET_DISABLE_PCI_ACCESS);
3082 if (rc)
3083 dev_err(hdev->dev, "Failed to disable PCI access from CPU\n");
3084
3085 return rc;
3086}
3087
3088static int gaudi_resume(struct hl_device *hdev)
3089{
3090 return gaudi_init_iatu(hdev);
3091}
3092
3093static int gaudi_cb_mmap(struct hl_device *hdev, struct vm_area_struct *vma,
3094 u64 kaddress, phys_addr_t paddress, u32 size)
3095{
3096 int rc;
3097
3098 vma->vm_flags |= VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP |
3099 VM_DONTCOPY | VM_NORESERVE;
3100
3101 rc = remap_pfn_range(vma, vma->vm_start, paddress >> PAGE_SHIFT,
3102 size, vma->vm_page_prot);
3103 if (rc)
3104 dev_err(hdev->dev, "remap_pfn_range error %d", rc);
3105
3106 return rc;
3107}
3108
3109static void gaudi_ring_doorbell(struct hl_device *hdev, u32 hw_queue_id, u32 pi)
3110{
3111 struct gaudi_device *gaudi = hdev->asic_specific;
3112 u32 db_reg_offset, db_value, dma_qm_offset, q_off;
3113 int dma_id;
3114 bool invalid_queue = false;
3115
3116 switch (hw_queue_id) {
3117 case GAUDI_QUEUE_ID_DMA_0_0...GAUDI_QUEUE_ID_DMA_0_3:
3118 dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_1];
3119 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
3120 q_off = dma_qm_offset + (hw_queue_id & 0x3) * 4;
3121 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
3122 break;
3123
3124 case GAUDI_QUEUE_ID_DMA_1_0...GAUDI_QUEUE_ID_DMA_1_3:
3125 dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_2];
3126 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
3127 q_off = dma_qm_offset + (hw_queue_id & 0x3) * 4;
3128 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
3129 break;
3130
3131 case GAUDI_QUEUE_ID_DMA_2_0...GAUDI_QUEUE_ID_DMA_2_3:
3132 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_1];
3133 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
3134 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
3135 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
3136 break;
3137
3138 case GAUDI_QUEUE_ID_DMA_3_0...GAUDI_QUEUE_ID_DMA_3_3:
3139 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_2];
3140 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
3141 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
3142 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
3143 break;
3144
3145 case GAUDI_QUEUE_ID_DMA_4_0...GAUDI_QUEUE_ID_DMA_4_3:
3146 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_3];
3147 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
3148 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
3149 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
3150 break;
3151
3152 case GAUDI_QUEUE_ID_DMA_5_0...GAUDI_QUEUE_ID_DMA_5_3:
3153 dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_3];
3154 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
3155 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
3156 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
3157 break;
3158
3159 case GAUDI_QUEUE_ID_DMA_6_0...GAUDI_QUEUE_ID_DMA_6_3:
3160 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_4];
3161 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
3162 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
3163 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
3164 break;
3165
3166 case GAUDI_QUEUE_ID_DMA_7_0...GAUDI_QUEUE_ID_DMA_7_3:
3167 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_5];
3168 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
3169 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
3170 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
3171 break;
3172
3173 case GAUDI_QUEUE_ID_CPU_PQ:
3174 if (gaudi->hw_cap_initialized & HW_CAP_CPU_Q)
3175 db_reg_offset = mmCPU_IF_PF_PQ_PI;
3176 else
3177 invalid_queue = true;
3178 break;
3179
3180 case GAUDI_QUEUE_ID_MME_0_0:
3181 db_reg_offset = mmMME2_QM_PQ_PI_0;
3182 break;
3183
3184 case GAUDI_QUEUE_ID_MME_0_1:
3185 db_reg_offset = mmMME2_QM_PQ_PI_1;
3186 break;
3187
3188 case GAUDI_QUEUE_ID_MME_0_2:
3189 db_reg_offset = mmMME2_QM_PQ_PI_2;
3190 break;
3191
3192 case GAUDI_QUEUE_ID_MME_0_3:
3193 db_reg_offset = mmMME2_QM_PQ_PI_3;
3194 break;
3195
3196 case GAUDI_QUEUE_ID_MME_1_0:
3197 db_reg_offset = mmMME0_QM_PQ_PI_0;
3198 break;
3199
3200 case GAUDI_QUEUE_ID_MME_1_1:
3201 db_reg_offset = mmMME0_QM_PQ_PI_1;
3202 break;
3203
3204 case GAUDI_QUEUE_ID_MME_1_2:
3205 db_reg_offset = mmMME0_QM_PQ_PI_2;
3206 break;
3207
3208 case GAUDI_QUEUE_ID_MME_1_3:
3209 db_reg_offset = mmMME0_QM_PQ_PI_3;
3210 break;
3211
3212 case GAUDI_QUEUE_ID_TPC_0_0:
3213 db_reg_offset = mmTPC0_QM_PQ_PI_0;
3214 break;
3215
3216 case GAUDI_QUEUE_ID_TPC_0_1:
3217 db_reg_offset = mmTPC0_QM_PQ_PI_1;
3218 break;
3219
3220 case GAUDI_QUEUE_ID_TPC_0_2:
3221 db_reg_offset = mmTPC0_QM_PQ_PI_2;
3222 break;
3223
3224 case GAUDI_QUEUE_ID_TPC_0_3:
3225 db_reg_offset = mmTPC0_QM_PQ_PI_3;
3226 break;
3227
3228 case GAUDI_QUEUE_ID_TPC_1_0:
3229 db_reg_offset = mmTPC1_QM_PQ_PI_0;
3230 break;
3231
3232 case GAUDI_QUEUE_ID_TPC_1_1:
3233 db_reg_offset = mmTPC1_QM_PQ_PI_1;
3234 break;
3235
3236 case GAUDI_QUEUE_ID_TPC_1_2:
3237 db_reg_offset = mmTPC1_QM_PQ_PI_2;
3238 break;
3239
3240 case GAUDI_QUEUE_ID_TPC_1_3:
3241 db_reg_offset = mmTPC1_QM_PQ_PI_3;
3242 break;
3243
3244 case GAUDI_QUEUE_ID_TPC_2_0:
3245 db_reg_offset = mmTPC2_QM_PQ_PI_0;
3246 break;
3247
3248 case GAUDI_QUEUE_ID_TPC_2_1:
3249 db_reg_offset = mmTPC2_QM_PQ_PI_1;
3250 break;
3251
3252 case GAUDI_QUEUE_ID_TPC_2_2:
3253 db_reg_offset = mmTPC2_QM_PQ_PI_2;
3254 break;
3255
3256 case GAUDI_QUEUE_ID_TPC_2_3:
3257 db_reg_offset = mmTPC2_QM_PQ_PI_3;
3258 break;
3259
3260 case GAUDI_QUEUE_ID_TPC_3_0:
3261 db_reg_offset = mmTPC3_QM_PQ_PI_0;
3262 break;
3263
3264 case GAUDI_QUEUE_ID_TPC_3_1:
3265 db_reg_offset = mmTPC3_QM_PQ_PI_1;
3266 break;
3267
3268 case GAUDI_QUEUE_ID_TPC_3_2:
3269 db_reg_offset = mmTPC3_QM_PQ_PI_2;
3270 break;
3271
3272 case GAUDI_QUEUE_ID_TPC_3_3:
3273 db_reg_offset = mmTPC3_QM_PQ_PI_3;
3274 break;
3275
3276 case GAUDI_QUEUE_ID_TPC_4_0:
3277 db_reg_offset = mmTPC4_QM_PQ_PI_0;
3278 break;
3279
3280 case GAUDI_QUEUE_ID_TPC_4_1:
3281 db_reg_offset = mmTPC4_QM_PQ_PI_1;
3282 break;
3283
3284 case GAUDI_QUEUE_ID_TPC_4_2:
3285 db_reg_offset = mmTPC4_QM_PQ_PI_2;
3286 break;
3287
3288 case GAUDI_QUEUE_ID_TPC_4_3:
3289 db_reg_offset = mmTPC4_QM_PQ_PI_3;
3290 break;
3291
3292 case GAUDI_QUEUE_ID_TPC_5_0:
3293 db_reg_offset = mmTPC5_QM_PQ_PI_0;
3294 break;
3295
3296 case GAUDI_QUEUE_ID_TPC_5_1:
3297 db_reg_offset = mmTPC5_QM_PQ_PI_1;
3298 break;
3299
3300 case GAUDI_QUEUE_ID_TPC_5_2:
3301 db_reg_offset = mmTPC5_QM_PQ_PI_2;
3302 break;
3303
3304 case GAUDI_QUEUE_ID_TPC_5_3:
3305 db_reg_offset = mmTPC5_QM_PQ_PI_3;
3306 break;
3307
3308 case GAUDI_QUEUE_ID_TPC_6_0:
3309 db_reg_offset = mmTPC6_QM_PQ_PI_0;
3310 break;
3311
3312 case GAUDI_QUEUE_ID_TPC_6_1:
3313 db_reg_offset = mmTPC6_QM_PQ_PI_1;
3314 break;
3315
3316 case GAUDI_QUEUE_ID_TPC_6_2:
3317 db_reg_offset = mmTPC6_QM_PQ_PI_2;
3318 break;
3319
3320 case GAUDI_QUEUE_ID_TPC_6_3:
3321 db_reg_offset = mmTPC6_QM_PQ_PI_3;
3322 break;
3323
3324 case GAUDI_QUEUE_ID_TPC_7_0:
3325 db_reg_offset = mmTPC7_QM_PQ_PI_0;
3326 break;
3327
3328 case GAUDI_QUEUE_ID_TPC_7_1:
3329 db_reg_offset = mmTPC7_QM_PQ_PI_1;
3330 break;
3331
3332 case GAUDI_QUEUE_ID_TPC_7_2:
3333 db_reg_offset = mmTPC7_QM_PQ_PI_2;
3334 break;
3335
3336 case GAUDI_QUEUE_ID_TPC_7_3:
3337 db_reg_offset = mmTPC7_QM_PQ_PI_3;
3338 break;
3339
3340 default:
3341 invalid_queue = true;
3342 }
3343
3344 if (invalid_queue) {
3345 /* Should never get here */
3346 dev_err(hdev->dev, "h/w queue %d is invalid. Can't set pi\n",
3347 hw_queue_id);
3348 return;
3349 }
3350
3351 db_value = pi;
3352
3353 /* ring the doorbell */
3354 WREG32(db_reg_offset, db_value);
3355
3356 if (hw_queue_id == GAUDI_QUEUE_ID_CPU_PQ)
3357 WREG32(mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR,
3358 GAUDI_EVENT_PI_UPDATE);
3359}
3360
3361static void gaudi_pqe_write(struct hl_device *hdev, __le64 *pqe,
3362 struct hl_bd *bd)
3363{
3364 __le64 *pbd = (__le64 *) bd;
3365
3366 /* The QMANs are on the host memory so a simple copy suffice */
3367 pqe[0] = pbd[0];
3368 pqe[1] = pbd[1];
3369}
3370
3371static void *gaudi_dma_alloc_coherent(struct hl_device *hdev, size_t size,
3372 dma_addr_t *dma_handle, gfp_t flags)
3373{
3374 void *kernel_addr = dma_alloc_coherent(&hdev->pdev->dev, size,
3375 dma_handle, flags);
3376
3377 /* Shift to the device's base physical address of host memory */
3378 if (kernel_addr)
3379 *dma_handle += HOST_PHYS_BASE;
3380
3381 return kernel_addr;
3382}
3383
3384static void gaudi_dma_free_coherent(struct hl_device *hdev, size_t size,
3385 void *cpu_addr, dma_addr_t dma_handle)
3386{
3387 /* Cancel the device's base physical address of host memory */
3388 dma_addr_t fixed_dma_handle = dma_handle - HOST_PHYS_BASE;
3389
3390 dma_free_coherent(&hdev->pdev->dev, size, cpu_addr, fixed_dma_handle);
3391}
3392
3393static void *gaudi_get_int_queue_base(struct hl_device *hdev,
3394 u32 queue_id, dma_addr_t *dma_handle,
3395 u16 *queue_len)
3396{
3397 struct gaudi_device *gaudi = hdev->asic_specific;
3398 struct gaudi_internal_qman_info *q;
3399
3400 if (queue_id >= GAUDI_QUEUE_ID_SIZE ||
3401 gaudi_queue_type[queue_id] != QUEUE_TYPE_INT) {
3402 dev_err(hdev->dev, "Got invalid queue id %d\n", queue_id);
3403 return NULL;
3404 }
3405
3406 q = &gaudi->internal_qmans[queue_id];
3407 *dma_handle = q->pq_dma_addr;
3408 *queue_len = q->pq_size / QMAN_PQ_ENTRY_SIZE;
3409
3410 return q->pq_kernel_addr;
3411}
3412
3413static int gaudi_send_cpu_message(struct hl_device *hdev, u32 *msg,
3414 u16 len, u32 timeout, long *result)
3415{
3416 struct gaudi_device *gaudi = hdev->asic_specific;
3417
3418 if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q)) {
3419 if (result)
3420 *result = 0;
3421 return 0;
3422 }
3423
Oded Gabbay788cacf2020-07-07 17:30:13 +03003424 if (!timeout)
3425 timeout = GAUDI_MSG_TO_CPU_TIMEOUT_USEC;
3426
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003427 return hl_fw_send_cpu_message(hdev, GAUDI_QUEUE_ID_CPU_PQ, msg, len,
3428 timeout, result);
3429}
3430
3431static int gaudi_test_queue(struct hl_device *hdev, u32 hw_queue_id)
3432{
3433 struct packet_msg_prot *fence_pkt;
3434 dma_addr_t pkt_dma_addr;
3435 u32 fence_val, tmp, timeout_usec;
3436 dma_addr_t fence_dma_addr;
3437 u32 *fence_ptr;
3438 int rc;
3439
3440 if (hdev->pldm)
3441 timeout_usec = GAUDI_PLDM_TEST_QUEUE_WAIT_USEC;
3442 else
3443 timeout_usec = GAUDI_TEST_QUEUE_WAIT_USEC;
3444
3445 fence_val = GAUDI_QMAN0_FENCE_VAL;
3446
3447 fence_ptr = hdev->asic_funcs->asic_dma_pool_zalloc(hdev, 4, GFP_KERNEL,
3448 &fence_dma_addr);
3449 if (!fence_ptr) {
3450 dev_err(hdev->dev,
Dotan Barakd6b045c2020-08-06 09:20:49 +03003451 "Failed to allocate memory for H/W queue %d testing\n",
3452 hw_queue_id);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003453 return -ENOMEM;
3454 }
3455
3456 *fence_ptr = 0;
3457
3458 fence_pkt = hdev->asic_funcs->asic_dma_pool_zalloc(hdev,
3459 sizeof(struct packet_msg_prot),
3460 GFP_KERNEL, &pkt_dma_addr);
3461 if (!fence_pkt) {
3462 dev_err(hdev->dev,
Dotan Barakd6b045c2020-08-06 09:20:49 +03003463 "Failed to allocate packet for H/W queue %d testing\n",
3464 hw_queue_id);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003465 rc = -ENOMEM;
3466 goto free_fence_ptr;
3467 }
3468
Oded Gabbay65887292020-08-12 11:21:01 +03003469 tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT);
3470 tmp |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);
3471 tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
3472
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003473 fence_pkt->ctl = cpu_to_le32(tmp);
3474 fence_pkt->value = cpu_to_le32(fence_val);
3475 fence_pkt->addr = cpu_to_le64(fence_dma_addr);
3476
3477 rc = hl_hw_queue_send_cb_no_cmpl(hdev, hw_queue_id,
3478 sizeof(struct packet_msg_prot),
3479 pkt_dma_addr);
3480 if (rc) {
3481 dev_err(hdev->dev,
Dotan Barakd6b045c2020-08-06 09:20:49 +03003482 "Failed to send fence packet to H/W queue %d\n",
3483 hw_queue_id);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003484 goto free_pkt;
3485 }
3486
3487 rc = hl_poll_timeout_memory(hdev, fence_ptr, tmp, (tmp == fence_val),
3488 1000, timeout_usec, true);
3489
3490 hl_hw_queue_inc_ci_kernel(hdev, hw_queue_id);
3491
3492 if (rc == -ETIMEDOUT) {
3493 dev_err(hdev->dev,
3494 "H/W queue %d test failed (scratch(0x%08llX) == 0x%08X)\n",
3495 hw_queue_id, (unsigned long long) fence_dma_addr, tmp);
3496 rc = -EIO;
3497 }
3498
3499free_pkt:
3500 hdev->asic_funcs->asic_dma_pool_free(hdev, (void *) fence_pkt,
3501 pkt_dma_addr);
3502free_fence_ptr:
3503 hdev->asic_funcs->asic_dma_pool_free(hdev, (void *) fence_ptr,
3504 fence_dma_addr);
3505 return rc;
3506}
3507
3508static int gaudi_test_cpu_queue(struct hl_device *hdev)
3509{
3510 struct gaudi_device *gaudi = hdev->asic_specific;
3511
3512 /*
3513 * check capability here as send_cpu_message() won't update the result
3514 * value if no capability
3515 */
3516 if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
3517 return 0;
3518
3519 return hl_fw_test_cpu_queue(hdev);
3520}
3521
3522static int gaudi_test_queues(struct hl_device *hdev)
3523{
3524 int i, rc, ret_val = 0;
3525
Ofir Bitton3abc99b2020-06-23 14:50:39 +03003526 for (i = 0 ; i < hdev->asic_prop.max_queues ; i++) {
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003527 if (hdev->asic_prop.hw_queues_props[i].type == QUEUE_TYPE_EXT) {
3528 rc = gaudi_test_queue(hdev, i);
3529 if (rc)
3530 ret_val = -EINVAL;
3531 }
3532 }
3533
3534 rc = gaudi_test_cpu_queue(hdev);
3535 if (rc)
3536 ret_val = -EINVAL;
3537
3538 return ret_val;
3539}
3540
3541static void *gaudi_dma_pool_zalloc(struct hl_device *hdev, size_t size,
3542 gfp_t mem_flags, dma_addr_t *dma_handle)
3543{
3544 void *kernel_addr;
3545
3546 if (size > GAUDI_DMA_POOL_BLK_SIZE)
3547 return NULL;
3548
3549 kernel_addr = dma_pool_zalloc(hdev->dma_pool, mem_flags, dma_handle);
3550
3551 /* Shift to the device's base physical address of host memory */
3552 if (kernel_addr)
3553 *dma_handle += HOST_PHYS_BASE;
3554
3555 return kernel_addr;
3556}
3557
3558static void gaudi_dma_pool_free(struct hl_device *hdev, void *vaddr,
3559 dma_addr_t dma_addr)
3560{
3561 /* Cancel the device's base physical address of host memory */
3562 dma_addr_t fixed_dma_addr = dma_addr - HOST_PHYS_BASE;
3563
3564 dma_pool_free(hdev->dma_pool, vaddr, fixed_dma_addr);
3565}
3566
3567static void *gaudi_cpu_accessible_dma_pool_alloc(struct hl_device *hdev,
3568 size_t size, dma_addr_t *dma_handle)
3569{
3570 return hl_fw_cpu_accessible_dma_pool_alloc(hdev, size, dma_handle);
3571}
3572
3573static void gaudi_cpu_accessible_dma_pool_free(struct hl_device *hdev,
3574 size_t size, void *vaddr)
3575{
3576 hl_fw_cpu_accessible_dma_pool_free(hdev, size, vaddr);
3577}
3578
3579static int gaudi_dma_map_sg(struct hl_device *hdev, struct scatterlist *sgl,
3580 int nents, enum dma_data_direction dir)
3581{
3582 struct scatterlist *sg;
3583 int i;
3584
3585 if (!dma_map_sg(&hdev->pdev->dev, sgl, nents, dir))
3586 return -ENOMEM;
3587
3588 /* Shift to the device's base physical address of host memory */
3589 for_each_sg(sgl, sg, nents, i)
3590 sg->dma_address += HOST_PHYS_BASE;
3591
3592 return 0;
3593}
3594
3595static void gaudi_dma_unmap_sg(struct hl_device *hdev, struct scatterlist *sgl,
3596 int nents, enum dma_data_direction dir)
3597{
3598 struct scatterlist *sg;
3599 int i;
3600
3601 /* Cancel the device's base physical address of host memory */
3602 for_each_sg(sgl, sg, nents, i)
3603 sg->dma_address -= HOST_PHYS_BASE;
3604
3605 dma_unmap_sg(&hdev->pdev->dev, sgl, nents, dir);
3606}
3607
3608static u32 gaudi_get_dma_desc_list_size(struct hl_device *hdev,
3609 struct sg_table *sgt)
3610{
3611 struct scatterlist *sg, *sg_next_iter;
3612 u32 count, dma_desc_cnt;
3613 u64 len, len_next;
3614 dma_addr_t addr, addr_next;
3615
3616 dma_desc_cnt = 0;
3617
3618 for_each_sg(sgt->sgl, sg, sgt->nents, count) {
3619
3620 len = sg_dma_len(sg);
3621 addr = sg_dma_address(sg);
3622
3623 if (len == 0)
3624 break;
3625
3626 while ((count + 1) < sgt->nents) {
3627 sg_next_iter = sg_next(sg);
3628 len_next = sg_dma_len(sg_next_iter);
3629 addr_next = sg_dma_address(sg_next_iter);
3630
3631 if (len_next == 0)
3632 break;
3633
3634 if ((addr + len == addr_next) &&
3635 (len + len_next <= DMA_MAX_TRANSFER_SIZE)) {
3636 len += len_next;
3637 count++;
3638 sg = sg_next_iter;
3639 } else {
3640 break;
3641 }
3642 }
3643
3644 dma_desc_cnt++;
3645 }
3646
3647 return dma_desc_cnt * sizeof(struct packet_lin_dma);
3648}
3649
3650static int gaudi_pin_memory_before_cs(struct hl_device *hdev,
3651 struct hl_cs_parser *parser,
3652 struct packet_lin_dma *user_dma_pkt,
3653 u64 addr, enum dma_data_direction dir)
3654{
3655 struct hl_userptr *userptr;
3656 int rc;
3657
3658 if (hl_userptr_is_pinned(hdev, addr, le32_to_cpu(user_dma_pkt->tsize),
3659 parser->job_userptr_list, &userptr))
3660 goto already_pinned;
3661
3662 userptr = kzalloc(sizeof(*userptr), GFP_ATOMIC);
3663 if (!userptr)
3664 return -ENOMEM;
3665
3666 rc = hl_pin_host_memory(hdev, addr, le32_to_cpu(user_dma_pkt->tsize),
3667 userptr);
3668 if (rc)
3669 goto free_userptr;
3670
3671 list_add_tail(&userptr->job_node, parser->job_userptr_list);
3672
3673 rc = hdev->asic_funcs->asic_dma_map_sg(hdev, userptr->sgt->sgl,
3674 userptr->sgt->nents, dir);
3675 if (rc) {
3676 dev_err(hdev->dev, "failed to map sgt with DMA region\n");
3677 goto unpin_memory;
3678 }
3679
3680 userptr->dma_mapped = true;
3681 userptr->dir = dir;
3682
3683already_pinned:
3684 parser->patched_cb_size +=
3685 gaudi_get_dma_desc_list_size(hdev, userptr->sgt);
3686
3687 return 0;
3688
3689unpin_memory:
3690 hl_unpin_host_memory(hdev, userptr);
3691free_userptr:
3692 kfree(userptr);
3693 return rc;
3694}
3695
3696static int gaudi_validate_dma_pkt_host(struct hl_device *hdev,
3697 struct hl_cs_parser *parser,
3698 struct packet_lin_dma *user_dma_pkt,
3699 bool src_in_host)
3700{
3701 enum dma_data_direction dir;
3702 bool skip_host_mem_pin = false, user_memset;
3703 u64 addr;
3704 int rc = 0;
3705
3706 user_memset = (le32_to_cpu(user_dma_pkt->ctl) &
3707 GAUDI_PKT_LIN_DMA_CTL_MEMSET_MASK) >>
3708 GAUDI_PKT_LIN_DMA_CTL_MEMSET_SHIFT;
3709
3710 if (src_in_host) {
3711 if (user_memset)
3712 skip_host_mem_pin = true;
3713
3714 dev_dbg(hdev->dev, "DMA direction is HOST --> DEVICE\n");
3715 dir = DMA_TO_DEVICE;
3716 addr = le64_to_cpu(user_dma_pkt->src_addr);
3717 } else {
3718 dev_dbg(hdev->dev, "DMA direction is DEVICE --> HOST\n");
3719 dir = DMA_FROM_DEVICE;
3720 addr = (le64_to_cpu(user_dma_pkt->dst_addr) &
3721 GAUDI_PKT_LIN_DMA_DST_ADDR_MASK) >>
3722 GAUDI_PKT_LIN_DMA_DST_ADDR_SHIFT;
3723 }
3724
3725 if (skip_host_mem_pin)
3726 parser->patched_cb_size += sizeof(*user_dma_pkt);
3727 else
3728 rc = gaudi_pin_memory_before_cs(hdev, parser, user_dma_pkt,
3729 addr, dir);
3730
3731 return rc;
3732}
3733
3734static int gaudi_validate_dma_pkt_no_mmu(struct hl_device *hdev,
3735 struct hl_cs_parser *parser,
3736 struct packet_lin_dma *user_dma_pkt)
3737{
3738 bool src_in_host = false;
3739 u64 dst_addr = (le64_to_cpu(user_dma_pkt->dst_addr) &
3740 GAUDI_PKT_LIN_DMA_DST_ADDR_MASK) >>
3741 GAUDI_PKT_LIN_DMA_DST_ADDR_SHIFT;
3742
3743 dev_dbg(hdev->dev, "DMA packet details:\n");
3744 dev_dbg(hdev->dev, "source == 0x%llx\n",
3745 le64_to_cpu(user_dma_pkt->src_addr));
3746 dev_dbg(hdev->dev, "destination == 0x%llx\n", dst_addr);
3747 dev_dbg(hdev->dev, "size == %u\n", le32_to_cpu(user_dma_pkt->tsize));
3748
3749 /*
3750 * Special handling for DMA with size 0. Bypass all validations
3751 * because no transactions will be done except for WR_COMP, which
3752 * is not a security issue
3753 */
3754 if (!le32_to_cpu(user_dma_pkt->tsize)) {
3755 parser->patched_cb_size += sizeof(*user_dma_pkt);
3756 return 0;
3757 }
3758
3759 if (parser->hw_queue_id <= GAUDI_QUEUE_ID_DMA_0_3)
3760 src_in_host = true;
3761
3762 return gaudi_validate_dma_pkt_host(hdev, parser, user_dma_pkt,
3763 src_in_host);
3764}
3765
Oded Gabbay64536ab2020-05-27 12:38:16 +03003766static int gaudi_validate_load_and_exe_pkt(struct hl_device *hdev,
3767 struct hl_cs_parser *parser,
3768 struct packet_load_and_exe *user_pkt)
3769{
3770 u32 cfg;
3771
3772 cfg = le32_to_cpu(user_pkt->cfg);
3773
3774 if (cfg & GAUDI_PKT_LOAD_AND_EXE_CFG_DST_MASK) {
3775 dev_err(hdev->dev,
3776 "User not allowed to use Load and Execute\n");
3777 return -EPERM;
3778 }
3779
3780 parser->patched_cb_size += sizeof(struct packet_load_and_exe);
3781
3782 return 0;
3783}
3784
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003785static int gaudi_validate_cb(struct hl_device *hdev,
3786 struct hl_cs_parser *parser, bool is_mmu)
3787{
3788 u32 cb_parsed_length = 0;
3789 int rc = 0;
3790
3791 parser->patched_cb_size = 0;
3792
3793 /* cb_user_size is more than 0 so loop will always be executed */
3794 while (cb_parsed_length < parser->user_cb_size) {
3795 enum packet_id pkt_id;
3796 u16 pkt_size;
3797 struct gaudi_packet *user_pkt;
3798
3799 user_pkt = (struct gaudi_packet *) (uintptr_t)
3800 (parser->user_cb->kernel_address + cb_parsed_length);
3801
3802 pkt_id = (enum packet_id) (
3803 (le64_to_cpu(user_pkt->header) &
3804 PACKET_HEADER_PACKET_ID_MASK) >>
3805 PACKET_HEADER_PACKET_ID_SHIFT);
3806
Ofir Bittonbc75be22020-07-30 14:56:38 +03003807 if (!validate_packet_id(pkt_id)) {
3808 dev_err(hdev->dev, "Invalid packet id %u\n", pkt_id);
3809 rc = -EINVAL;
3810 break;
3811 }
3812
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003813 pkt_size = gaudi_packet_sizes[pkt_id];
3814 cb_parsed_length += pkt_size;
3815 if (cb_parsed_length > parser->user_cb_size) {
3816 dev_err(hdev->dev,
3817 "packet 0x%x is out of CB boundary\n", pkt_id);
3818 rc = -EINVAL;
3819 break;
3820 }
3821
3822 switch (pkt_id) {
3823 case PACKET_MSG_PROT:
3824 dev_err(hdev->dev,
3825 "User not allowed to use MSG_PROT\n");
3826 rc = -EPERM;
3827 break;
3828
3829 case PACKET_CP_DMA:
3830 dev_err(hdev->dev, "User not allowed to use CP_DMA\n");
3831 rc = -EPERM;
3832 break;
3833
3834 case PACKET_STOP:
3835 dev_err(hdev->dev, "User not allowed to use STOP\n");
3836 rc = -EPERM;
3837 break;
3838
Oded Gabbay2edc66e2020-07-03 19:28:54 +03003839 case PACKET_WREG_BULK:
3840 dev_err(hdev->dev,
3841 "User not allowed to use WREG_BULK\n");
3842 rc = -EPERM;
3843 break;
3844
Oded Gabbay64536ab2020-05-27 12:38:16 +03003845 case PACKET_LOAD_AND_EXE:
3846 rc = gaudi_validate_load_and_exe_pkt(hdev, parser,
3847 (struct packet_load_and_exe *) user_pkt);
3848 break;
3849
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003850 case PACKET_LIN_DMA:
3851 parser->contains_dma_pkt = true;
3852 if (is_mmu)
3853 parser->patched_cb_size += pkt_size;
3854 else
3855 rc = gaudi_validate_dma_pkt_no_mmu(hdev, parser,
3856 (struct packet_lin_dma *) user_pkt);
3857 break;
3858
3859 case PACKET_WREG_32:
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003860 case PACKET_MSG_LONG:
3861 case PACKET_MSG_SHORT:
3862 case PACKET_REPEAT:
3863 case PACKET_FENCE:
3864 case PACKET_NOP:
3865 case PACKET_ARB_POINT:
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003866 parser->patched_cb_size += pkt_size;
3867 break;
3868
3869 default:
3870 dev_err(hdev->dev, "Invalid packet header 0x%x\n",
3871 pkt_id);
3872 rc = -EINVAL;
3873 break;
3874 }
3875
3876 if (rc)
3877 break;
3878 }
3879
3880 /*
3881 * The new CB should have space at the end for two MSG_PROT packets:
3882 * 1. A packet that will act as a completion packet
3883 * 2. A packet that will generate MSI-X interrupt
3884 */
3885 parser->patched_cb_size += sizeof(struct packet_msg_prot) * 2;
3886
3887 return rc;
3888}
3889
3890static int gaudi_patch_dma_packet(struct hl_device *hdev,
3891 struct hl_cs_parser *parser,
3892 struct packet_lin_dma *user_dma_pkt,
3893 struct packet_lin_dma *new_dma_pkt,
3894 u32 *new_dma_pkt_size)
3895{
3896 struct hl_userptr *userptr;
3897 struct scatterlist *sg, *sg_next_iter;
3898 u32 count, dma_desc_cnt, user_wrcomp_en_mask, ctl;
3899 u64 len, len_next;
3900 dma_addr_t dma_addr, dma_addr_next;
3901 u64 device_memory_addr, addr;
3902 enum dma_data_direction dir;
3903 struct sg_table *sgt;
3904 bool src_in_host = false;
3905 bool skip_host_mem_pin = false;
3906 bool user_memset;
3907
3908 ctl = le32_to_cpu(user_dma_pkt->ctl);
3909
3910 if (parser->hw_queue_id <= GAUDI_QUEUE_ID_DMA_0_3)
3911 src_in_host = true;
3912
3913 user_memset = (ctl & GAUDI_PKT_LIN_DMA_CTL_MEMSET_MASK) >>
3914 GAUDI_PKT_LIN_DMA_CTL_MEMSET_SHIFT;
3915
3916 if (src_in_host) {
3917 addr = le64_to_cpu(user_dma_pkt->src_addr);
3918 device_memory_addr = le64_to_cpu(user_dma_pkt->dst_addr);
3919 dir = DMA_TO_DEVICE;
3920 if (user_memset)
3921 skip_host_mem_pin = true;
3922 } else {
3923 addr = le64_to_cpu(user_dma_pkt->dst_addr);
3924 device_memory_addr = le64_to_cpu(user_dma_pkt->src_addr);
3925 dir = DMA_FROM_DEVICE;
3926 }
3927
3928 if ((!skip_host_mem_pin) &&
3929 (!hl_userptr_is_pinned(hdev, addr,
3930 le32_to_cpu(user_dma_pkt->tsize),
3931 parser->job_userptr_list, &userptr))) {
3932 dev_err(hdev->dev, "Userptr 0x%llx + 0x%x NOT mapped\n",
3933 addr, user_dma_pkt->tsize);
3934 return -EFAULT;
3935 }
3936
3937 if ((user_memset) && (dir == DMA_TO_DEVICE)) {
3938 memcpy(new_dma_pkt, user_dma_pkt, sizeof(*user_dma_pkt));
3939 *new_dma_pkt_size = sizeof(*user_dma_pkt);
3940 return 0;
3941 }
3942
3943 user_wrcomp_en_mask = ctl & GAUDI_PKT_LIN_DMA_CTL_WRCOMP_EN_MASK;
3944
3945 sgt = userptr->sgt;
3946 dma_desc_cnt = 0;
3947
3948 for_each_sg(sgt->sgl, sg, sgt->nents, count) {
3949 len = sg_dma_len(sg);
3950 dma_addr = sg_dma_address(sg);
3951
3952 if (len == 0)
3953 break;
3954
3955 while ((count + 1) < sgt->nents) {
3956 sg_next_iter = sg_next(sg);
3957 len_next = sg_dma_len(sg_next_iter);
3958 dma_addr_next = sg_dma_address(sg_next_iter);
3959
3960 if (len_next == 0)
3961 break;
3962
3963 if ((dma_addr + len == dma_addr_next) &&
3964 (len + len_next <= DMA_MAX_TRANSFER_SIZE)) {
3965 len += len_next;
3966 count++;
3967 sg = sg_next_iter;
3968 } else {
3969 break;
3970 }
3971 }
3972
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003973 ctl = le32_to_cpu(user_dma_pkt->ctl);
3974 if (likely(dma_desc_cnt))
3975 ctl &= ~GAUDI_PKT_CTL_EB_MASK;
3976 ctl &= ~GAUDI_PKT_LIN_DMA_CTL_WRCOMP_EN_MASK;
3977 new_dma_pkt->ctl = cpu_to_le32(ctl);
3978 new_dma_pkt->tsize = cpu_to_le32(len);
3979
3980 if (dir == DMA_TO_DEVICE) {
3981 new_dma_pkt->src_addr = cpu_to_le64(dma_addr);
3982 new_dma_pkt->dst_addr = cpu_to_le64(device_memory_addr);
3983 } else {
3984 new_dma_pkt->src_addr = cpu_to_le64(device_memory_addr);
3985 new_dma_pkt->dst_addr = cpu_to_le64(dma_addr);
3986 }
3987
3988 if (!user_memset)
3989 device_memory_addr += len;
3990 dma_desc_cnt++;
3991 new_dma_pkt++;
3992 }
3993
3994 if (!dma_desc_cnt) {
3995 dev_err(hdev->dev,
3996 "Error of 0 SG entries when patching DMA packet\n");
3997 return -EFAULT;
3998 }
3999
4000 /* Fix the last dma packet - wrcomp must be as user set it */
4001 new_dma_pkt--;
4002 new_dma_pkt->ctl |= cpu_to_le32(user_wrcomp_en_mask);
4003
4004 *new_dma_pkt_size = dma_desc_cnt * sizeof(struct packet_lin_dma);
4005
4006 return 0;
4007}
4008
4009static int gaudi_patch_cb(struct hl_device *hdev,
4010 struct hl_cs_parser *parser)
4011{
4012 u32 cb_parsed_length = 0;
4013 u32 cb_patched_cur_length = 0;
4014 int rc = 0;
4015
4016 /* cb_user_size is more than 0 so loop will always be executed */
4017 while (cb_parsed_length < parser->user_cb_size) {
4018 enum packet_id pkt_id;
4019 u16 pkt_size;
4020 u32 new_pkt_size = 0;
4021 struct gaudi_packet *user_pkt, *kernel_pkt;
4022
4023 user_pkt = (struct gaudi_packet *) (uintptr_t)
4024 (parser->user_cb->kernel_address + cb_parsed_length);
4025 kernel_pkt = (struct gaudi_packet *) (uintptr_t)
4026 (parser->patched_cb->kernel_address +
4027 cb_patched_cur_length);
4028
4029 pkt_id = (enum packet_id) (
4030 (le64_to_cpu(user_pkt->header) &
4031 PACKET_HEADER_PACKET_ID_MASK) >>
4032 PACKET_HEADER_PACKET_ID_SHIFT);
4033
Ofir Bittonbc75be22020-07-30 14:56:38 +03004034 if (!validate_packet_id(pkt_id)) {
4035 dev_err(hdev->dev, "Invalid packet id %u\n", pkt_id);
4036 rc = -EINVAL;
4037 break;
4038 }
4039
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03004040 pkt_size = gaudi_packet_sizes[pkt_id];
4041 cb_parsed_length += pkt_size;
4042 if (cb_parsed_length > parser->user_cb_size) {
4043 dev_err(hdev->dev,
4044 "packet 0x%x is out of CB boundary\n", pkt_id);
4045 rc = -EINVAL;
4046 break;
4047 }
4048
4049 switch (pkt_id) {
4050 case PACKET_LIN_DMA:
4051 rc = gaudi_patch_dma_packet(hdev, parser,
4052 (struct packet_lin_dma *) user_pkt,
4053 (struct packet_lin_dma *) kernel_pkt,
4054 &new_pkt_size);
4055 cb_patched_cur_length += new_pkt_size;
4056 break;
4057
4058 case PACKET_MSG_PROT:
4059 dev_err(hdev->dev,
4060 "User not allowed to use MSG_PROT\n");
4061 rc = -EPERM;
4062 break;
4063
4064 case PACKET_CP_DMA:
4065 dev_err(hdev->dev, "User not allowed to use CP_DMA\n");
4066 rc = -EPERM;
4067 break;
4068
4069 case PACKET_STOP:
4070 dev_err(hdev->dev, "User not allowed to use STOP\n");
4071 rc = -EPERM;
4072 break;
4073
4074 case PACKET_WREG_32:
4075 case PACKET_WREG_BULK:
4076 case PACKET_MSG_LONG:
4077 case PACKET_MSG_SHORT:
4078 case PACKET_REPEAT:
4079 case PACKET_FENCE:
4080 case PACKET_NOP:
4081 case PACKET_ARB_POINT:
4082 case PACKET_LOAD_AND_EXE:
4083 memcpy(kernel_pkt, user_pkt, pkt_size);
4084 cb_patched_cur_length += pkt_size;
4085 break;
4086
4087 default:
4088 dev_err(hdev->dev, "Invalid packet header 0x%x\n",
4089 pkt_id);
4090 rc = -EINVAL;
4091 break;
4092 }
4093
4094 if (rc)
4095 break;
4096 }
4097
4098 return rc;
4099}
4100
4101static int gaudi_parse_cb_mmu(struct hl_device *hdev,
4102 struct hl_cs_parser *parser)
4103{
4104 u64 patched_cb_handle;
4105 u32 patched_cb_size;
4106 struct hl_cb *user_cb;
4107 int rc;
4108
4109 /*
4110 * The new CB should have space at the end for two MSG_PROT pkt:
4111 * 1. A packet that will act as a completion packet
4112 * 2. A packet that will generate MSI interrupt
4113 */
4114 parser->patched_cb_size = parser->user_cb_size +
4115 sizeof(struct packet_msg_prot) * 2;
4116
Ofir Bittona04b7cd2020-07-13 13:36:55 +03004117 rc = hl_cb_create(hdev, &hdev->kernel_cb_mgr, parser->patched_cb_size,
4118 &patched_cb_handle, HL_KERNEL_ASID_ID, false);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03004119
4120 if (rc) {
4121 dev_err(hdev->dev,
4122 "Failed to allocate patched CB for DMA CS %d\n",
4123 rc);
4124 return rc;
4125 }
4126
4127 patched_cb_handle >>= PAGE_SHIFT;
4128 parser->patched_cb = hl_cb_get(hdev, &hdev->kernel_cb_mgr,
4129 (u32) patched_cb_handle);
4130 /* hl_cb_get should never fail here so use kernel WARN */
4131 WARN(!parser->patched_cb, "DMA CB handle invalid 0x%x\n",
4132 (u32) patched_cb_handle);
4133 if (!parser->patched_cb) {
4134 rc = -EFAULT;
4135 goto out;
4136 }
4137
4138 /*
4139 * The check that parser->user_cb_size <= parser->user_cb->size was done
4140 * in validate_queue_index().
4141 */
4142 memcpy((void *) (uintptr_t) parser->patched_cb->kernel_address,
4143 (void *) (uintptr_t) parser->user_cb->kernel_address,
4144 parser->user_cb_size);
4145
4146 patched_cb_size = parser->patched_cb_size;
4147
4148 /* Validate patched CB instead of user CB */
4149 user_cb = parser->user_cb;
4150 parser->user_cb = parser->patched_cb;
4151 rc = gaudi_validate_cb(hdev, parser, true);
4152 parser->user_cb = user_cb;
4153
4154 if (rc) {
4155 hl_cb_put(parser->patched_cb);
4156 goto out;
4157 }
4158
4159 if (patched_cb_size != parser->patched_cb_size) {
4160 dev_err(hdev->dev, "user CB size mismatch\n");
4161 hl_cb_put(parser->patched_cb);
4162 rc = -EINVAL;
4163 goto out;
4164 }
4165
4166out:
4167 /*
4168 * Always call cb destroy here because we still have 1 reference
4169 * to it by calling cb_get earlier. After the job will be completed,
4170 * cb_put will release it, but here we want to remove it from the
4171 * idr
4172 */
4173 hl_cb_destroy(hdev, &hdev->kernel_cb_mgr,
4174 patched_cb_handle << PAGE_SHIFT);
4175
4176 return rc;
4177}
4178
4179static int gaudi_parse_cb_no_mmu(struct hl_device *hdev,
4180 struct hl_cs_parser *parser)
4181{
4182 u64 patched_cb_handle;
4183 int rc;
4184
4185 rc = gaudi_validate_cb(hdev, parser, false);
4186
4187 if (rc)
4188 goto free_userptr;
4189
Ofir Bittona04b7cd2020-07-13 13:36:55 +03004190 rc = hl_cb_create(hdev, &hdev->kernel_cb_mgr, parser->patched_cb_size,
4191 &patched_cb_handle, HL_KERNEL_ASID_ID, false);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03004192 if (rc) {
4193 dev_err(hdev->dev,
4194 "Failed to allocate patched CB for DMA CS %d\n", rc);
4195 goto free_userptr;
4196 }
4197
4198 patched_cb_handle >>= PAGE_SHIFT;
4199 parser->patched_cb = hl_cb_get(hdev, &hdev->kernel_cb_mgr,
4200 (u32) patched_cb_handle);
4201 /* hl_cb_get should never fail here so use kernel WARN */
4202 WARN(!parser->patched_cb, "DMA CB handle invalid 0x%x\n",
4203 (u32) patched_cb_handle);
4204 if (!parser->patched_cb) {
4205 rc = -EFAULT;
4206 goto out;
4207 }
4208
4209 rc = gaudi_patch_cb(hdev, parser);
4210
4211 if (rc)
4212 hl_cb_put(parser->patched_cb);
4213
4214out:
4215 /*
4216 * Always call cb destroy here because we still have 1 reference
4217 * to it by calling cb_get earlier. After the job will be completed,
4218 * cb_put will release it, but here we want to remove it from the
4219 * idr
4220 */
4221 hl_cb_destroy(hdev, &hdev->kernel_cb_mgr,
4222 patched_cb_handle << PAGE_SHIFT);
4223
4224free_userptr:
4225 if (rc)
4226 hl_userptr_delete_list(hdev, parser->job_userptr_list);
4227 return rc;
4228}
4229
4230static int gaudi_parse_cb_no_ext_queue(struct hl_device *hdev,
4231 struct hl_cs_parser *parser)
4232{
4233 struct asic_fixed_properties *asic_prop = &hdev->asic_prop;
4234
4235 /* For internal queue jobs just check if CB address is valid */
4236 if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,
4237 parser->user_cb_size,
4238 asic_prop->sram_user_base_address,
4239 asic_prop->sram_end_address))
4240 return 0;
4241
4242 if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,
4243 parser->user_cb_size,
4244 asic_prop->dram_user_base_address,
4245 asic_prop->dram_end_address))
4246 return 0;
4247
4248 /* PMMU and HPMMU addresses are equal, check only one of them */
4249 if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,
4250 parser->user_cb_size,
4251 asic_prop->pmmu.start_addr,
4252 asic_prop->pmmu.end_addr))
4253 return 0;
4254
4255 dev_err(hdev->dev,
4256 "CB address 0x%px + 0x%x for internal QMAN is not valid\n",
4257 parser->user_cb, parser->user_cb_size);
4258
4259 return -EFAULT;
4260}
4261
4262static int gaudi_cs_parser(struct hl_device *hdev, struct hl_cs_parser *parser)
4263{
4264 struct gaudi_device *gaudi = hdev->asic_specific;
4265
4266 if (parser->queue_type == QUEUE_TYPE_INT)
4267 return gaudi_parse_cb_no_ext_queue(hdev, parser);
4268
4269 if (gaudi->hw_cap_initialized & HW_CAP_MMU)
4270 return gaudi_parse_cb_mmu(hdev, parser);
4271 else
4272 return gaudi_parse_cb_no_mmu(hdev, parser);
4273}
4274
4275static void gaudi_add_end_of_cb_packets(struct hl_device *hdev,
4276 u64 kernel_address, u32 len,
4277 u64 cq_addr, u32 cq_val, u32 msi_vec,
4278 bool eb)
4279{
4280 struct gaudi_device *gaudi = hdev->asic_specific;
4281 struct packet_msg_prot *cq_pkt;
4282 u32 tmp;
4283
4284 cq_pkt = (struct packet_msg_prot *) (uintptr_t)
4285 (kernel_address + len - (sizeof(struct packet_msg_prot) * 2));
4286
Oded Gabbay65887292020-08-12 11:21:01 +03004287 tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT);
4288 tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03004289
4290 if (eb)
Oded Gabbay65887292020-08-12 11:21:01 +03004291 tmp |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03004292
4293 cq_pkt->ctl = cpu_to_le32(tmp);
4294 cq_pkt->value = cpu_to_le32(cq_val);
4295 cq_pkt->addr = cpu_to_le64(cq_addr);
4296
4297 cq_pkt++;
4298
Oded Gabbay65887292020-08-12 11:21:01 +03004299 tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT);
4300 tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03004301 cq_pkt->ctl = cpu_to_le32(tmp);
4302 cq_pkt->value = cpu_to_le32(1);
4303
4304 if (!gaudi->multi_msi_mode)
4305 msi_vec = 0;
4306
4307 cq_pkt->addr = cpu_to_le64(CFG_BASE + mmPCIE_MSI_INTR_0 + msi_vec * 4);
4308}
4309
4310static void gaudi_update_eq_ci(struct hl_device *hdev, u32 val)
4311{
4312 WREG32(mmCPU_IF_EQ_RD_OFFS, val);
4313}
4314
4315static int gaudi_memset_device_memory(struct hl_device *hdev, u64 addr,
4316 u32 size, u64 val)
4317{
4318 struct packet_lin_dma *lin_dma_pkt;
4319 struct hl_cs_job *job;
Moti Haimovskia9855a2d92020-06-24 19:40:57 +03004320 u32 cb_size, ctl, err_cause;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03004321 struct hl_cb *cb;
4322 int rc;
4323
Ofir Bittona04b7cd2020-07-13 13:36:55 +03004324 cb = hl_cb_kernel_create(hdev, PAGE_SIZE, false);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03004325 if (!cb)
4326 return -EFAULT;
4327
4328 lin_dma_pkt = (struct packet_lin_dma *) (uintptr_t) cb->kernel_address;
4329 memset(lin_dma_pkt, 0, sizeof(*lin_dma_pkt));
4330 cb_size = sizeof(*lin_dma_pkt);
4331
Oded Gabbay65887292020-08-12 11:21:01 +03004332 ctl = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_LIN_DMA);
4333 ctl |= FIELD_PREP(GAUDI_PKT_LIN_DMA_CTL_MEMSET_MASK, 1);
4334 ctl |= FIELD_PREP(GAUDI_PKT_LIN_DMA_CTL_LIN_MASK, 1);
4335 ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
4336 ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
4337
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03004338 lin_dma_pkt->ctl = cpu_to_le32(ctl);
4339 lin_dma_pkt->src_addr = cpu_to_le64(val);
4340 lin_dma_pkt->dst_addr |= cpu_to_le64(addr);
4341 lin_dma_pkt->tsize = cpu_to_le32(size);
4342
4343 job = hl_cs_allocate_job(hdev, QUEUE_TYPE_EXT, true);
4344 if (!job) {
4345 dev_err(hdev->dev, "Failed to allocate a new job\n");
4346 rc = -ENOMEM;
4347 goto release_cb;
4348 }
4349
Moti Haimovskia9855a2d92020-06-24 19:40:57 +03004350 /* Verify DMA is OK */
4351 err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE);
4352 if (err_cause && !hdev->init_done) {
4353 dev_dbg(hdev->dev,
4354 "Clearing DMA0 engine from errors (cause 0x%x)\n",
4355 err_cause);
4356 WREG32(mmDMA0_CORE_ERR_CAUSE, err_cause);
4357 }
4358
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03004359 job->id = 0;
4360 job->user_cb = cb;
4361 job->user_cb->cs_cnt++;
4362 job->user_cb_size = cb_size;
4363 job->hw_queue_id = GAUDI_QUEUE_ID_DMA_0_0;
4364 job->patched_cb = job->user_cb;
4365 job->job_cb_size = job->user_cb_size + sizeof(struct packet_msg_prot);
4366
4367 hl_debugfs_add_job(hdev, job);
4368
4369 rc = gaudi_send_job_on_qman0(hdev, job);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03004370 hl_debugfs_remove_job(hdev, job);
4371 kfree(job);
4372 cb->cs_cnt--;
4373
Moti Haimovskia9855a2d92020-06-24 19:40:57 +03004374 /* Verify DMA is OK */
4375 err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE);
4376 if (err_cause) {
4377 dev_err(hdev->dev, "DMA Failed, cause 0x%x\n", err_cause);
4378 rc = -EIO;
4379 if (!hdev->init_done) {
4380 dev_dbg(hdev->dev,
4381 "Clearing DMA0 engine from errors (cause 0x%x)\n",
4382 err_cause);
4383 WREG32(mmDMA0_CORE_ERR_CAUSE, err_cause);
4384 }
4385 }
4386
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03004387release_cb:
4388 hl_cb_put(cb);
4389 hl_cb_destroy(hdev, &hdev->kernel_cb_mgr, cb->id << PAGE_SHIFT);
4390
4391 return rc;
4392}
4393
4394static void gaudi_restore_sm_registers(struct hl_device *hdev)
4395{
4396 int i;
4397
4398 for (i = 0 ; i < NUM_OF_SOB_IN_BLOCK << 2 ; i += 4) {
4399 WREG32(mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0 + i, 0);
4400 WREG32(mmSYNC_MNGR_E_S_SYNC_MNGR_OBJS_SOB_OBJ_0 + i, 0);
4401 WREG32(mmSYNC_MNGR_W_N_SYNC_MNGR_OBJS_SOB_OBJ_0 + i, 0);
4402 }
4403
4404 for (i = 0 ; i < NUM_OF_MONITORS_IN_BLOCK << 2 ; i += 4) {
4405 WREG32(mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_STATUS_0 + i, 0);
4406 WREG32(mmSYNC_MNGR_E_S_SYNC_MNGR_OBJS_MON_STATUS_0 + i, 0);
4407 WREG32(mmSYNC_MNGR_W_N_SYNC_MNGR_OBJS_MON_STATUS_0 + i, 0);
4408 }
4409
4410 i = GAUDI_FIRST_AVAILABLE_W_S_SYNC_OBJECT * 4;
4411
4412 for (; i < NUM_OF_SOB_IN_BLOCK << 2 ; i += 4)
4413 WREG32(mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 + i, 0);
4414
4415 i = GAUDI_FIRST_AVAILABLE_W_S_MONITOR * 4;
4416
4417 for (; i < NUM_OF_MONITORS_IN_BLOCK << 2 ; i += 4)
4418 WREG32(mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_STATUS_0 + i, 0);
4419}
4420
4421static void gaudi_restore_dma_registers(struct hl_device *hdev)
4422{
4423 u32 sob_delta = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_1 -
4424 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0;
4425 int i;
4426
4427 for (i = 0 ; i < DMA_NUMBER_OF_CHANNELS ; i++) {
4428 u64 sob_addr = CFG_BASE +
4429 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0 +
4430 (i * sob_delta);
4431 u32 dma_offset = i * DMA_CORE_OFFSET;
4432
4433 WREG32(mmDMA0_CORE_WR_COMP_ADDR_LO + dma_offset,
4434 lower_32_bits(sob_addr));
4435 WREG32(mmDMA0_CORE_WR_COMP_ADDR_HI + dma_offset,
4436 upper_32_bits(sob_addr));
4437 WREG32(mmDMA0_CORE_WR_COMP_WDATA + dma_offset, 0x80000001);
4438
4439 /* For DMAs 2-7, need to restore WR_AWUSER_31_11 as it can be
4440 * modified by the user for SRAM reduction
4441 */
4442 if (i > 1)
4443 WREG32(mmDMA0_CORE_WR_AWUSER_31_11 + dma_offset,
4444 0x00000001);
4445 }
4446}
4447
4448static void gaudi_restore_qm_registers(struct hl_device *hdev)
4449{
4450 u32 qman_offset;
4451 int i;
4452
4453 for (i = 0 ; i < DMA_NUMBER_OF_CHANNELS ; i++) {
4454 qman_offset = i * DMA_QMAN_OFFSET;
4455 WREG32(mmDMA0_QM_ARB_CFG_0 + qman_offset, 0);
4456 }
4457
4458 for (i = 0 ; i < MME_NUMBER_OF_MASTER_ENGINES ; i++) {
4459 qman_offset = i * (mmMME2_QM_BASE - mmMME0_QM_BASE);
4460 WREG32(mmMME0_QM_ARB_CFG_0 + qman_offset, 0);
4461 }
4462
4463 for (i = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) {
4464 qman_offset = i * TPC_QMAN_OFFSET;
4465 WREG32(mmTPC0_QM_ARB_CFG_0 + qman_offset, 0);
4466 }
4467}
4468
4469static void gaudi_restore_user_registers(struct hl_device *hdev)
4470{
4471 gaudi_restore_sm_registers(hdev);
4472 gaudi_restore_dma_registers(hdev);
4473 gaudi_restore_qm_registers(hdev);
4474}
4475
4476static int gaudi_context_switch(struct hl_device *hdev, u32 asid)
4477{
4478 struct asic_fixed_properties *prop = &hdev->asic_prop;
4479 u64 addr = prop->sram_user_base_address;
4480 u32 size = hdev->pldm ? 0x10000 :
4481 (prop->sram_size - SRAM_USER_BASE_OFFSET);
4482 u64 val = 0x7777777777777777ull;
4483 int rc;
4484
4485 rc = gaudi_memset_device_memory(hdev, addr, size, val);
4486 if (rc) {
4487 dev_err(hdev->dev, "Failed to clear SRAM in context switch\n");
4488 return rc;
4489 }
4490
4491 gaudi_mmu_prepare(hdev, asid);
4492
4493 gaudi_restore_user_registers(hdev);
4494
4495 return 0;
4496}
4497
4498static int gaudi_mmu_clear_pgt_range(struct hl_device *hdev)
4499{
4500 struct asic_fixed_properties *prop = &hdev->asic_prop;
4501 struct gaudi_device *gaudi = hdev->asic_specific;
4502 u64 addr = prop->mmu_pgt_addr;
4503 u32 size = prop->mmu_pgt_size + MMU_CACHE_MNG_SIZE;
4504
4505 if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
4506 return 0;
4507
4508 return gaudi_memset_device_memory(hdev, addr, size, 0);
4509}
4510
4511static void gaudi_restore_phase_topology(struct hl_device *hdev)
4512{
4513
4514}
4515
4516static int gaudi_debugfs_read32(struct hl_device *hdev, u64 addr, u32 *val)
4517{
4518 struct asic_fixed_properties *prop = &hdev->asic_prop;
4519 struct gaudi_device *gaudi = hdev->asic_specific;
4520 u64 hbm_bar_addr;
4521 int rc = 0;
4522
4523 if ((addr >= CFG_BASE) && (addr < CFG_BASE + CFG_SIZE)) {
Oded Gabbaye38bfd32020-07-03 20:46:12 +03004524
4525 if ((gaudi->hw_cap_initialized & HW_CAP_CLK_GATE) &&
4526 (hdev->clock_gating_mask &
4527 GAUDI_CLK_GATE_DEBUGFS_MASK)) {
4528
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03004529 dev_err_ratelimited(hdev->dev,
4530 "Can't read register - clock gating is enabled!\n");
4531 rc = -EFAULT;
4532 } else {
4533 *val = RREG32(addr - CFG_BASE);
4534 }
Oded Gabbaye38bfd32020-07-03 20:46:12 +03004535
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03004536 } else if ((addr >= SRAM_BASE_ADDR) &&
4537 (addr < SRAM_BASE_ADDR + SRAM_BAR_SIZE)) {
4538 *val = readl(hdev->pcie_bar[SRAM_BAR_ID] +
4539 (addr - SRAM_BASE_ADDR));
4540 } else if (addr < DRAM_PHYS_BASE + hdev->asic_prop.dram_size) {
4541 u64 bar_base_addr = DRAM_PHYS_BASE +
4542 (addr & ~(prop->dram_pci_bar_size - 0x1ull));
4543
4544 hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, bar_base_addr);
4545 if (hbm_bar_addr != U64_MAX) {
4546 *val = readl(hdev->pcie_bar[HBM_BAR_ID] +
4547 (addr - bar_base_addr));
4548
4549 hbm_bar_addr = gaudi_set_hbm_bar_base(hdev,
4550 hbm_bar_addr);
4551 }
4552 if (hbm_bar_addr == U64_MAX)
4553 rc = -EIO;
4554 } else if (addr >= HOST_PHYS_BASE && !iommu_present(&pci_bus_type)) {
4555 *val = *(u32 *) phys_to_virt(addr - HOST_PHYS_BASE);
4556 } else {
4557 rc = -EFAULT;
4558 }
4559
4560 return rc;
4561}
4562
4563static int gaudi_debugfs_write32(struct hl_device *hdev, u64 addr, u32 val)
4564{
4565 struct asic_fixed_properties *prop = &hdev->asic_prop;
4566 struct gaudi_device *gaudi = hdev->asic_specific;
4567 u64 hbm_bar_addr;
4568 int rc = 0;
4569
4570 if ((addr >= CFG_BASE) && (addr < CFG_BASE + CFG_SIZE)) {
Oded Gabbaye38bfd32020-07-03 20:46:12 +03004571
4572 if ((gaudi->hw_cap_initialized & HW_CAP_CLK_GATE) &&
4573 (hdev->clock_gating_mask &
4574 GAUDI_CLK_GATE_DEBUGFS_MASK)) {
4575
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03004576 dev_err_ratelimited(hdev->dev,
4577 "Can't write register - clock gating is enabled!\n");
4578 rc = -EFAULT;
4579 } else {
4580 WREG32(addr - CFG_BASE, val);
4581 }
Oded Gabbaye38bfd32020-07-03 20:46:12 +03004582
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03004583 } else if ((addr >= SRAM_BASE_ADDR) &&
4584 (addr < SRAM_BASE_ADDR + SRAM_BAR_SIZE)) {
4585 writel(val, hdev->pcie_bar[SRAM_BAR_ID] +
4586 (addr - SRAM_BASE_ADDR));
4587 } else if (addr < DRAM_PHYS_BASE + hdev->asic_prop.dram_size) {
4588 u64 bar_base_addr = DRAM_PHYS_BASE +
4589 (addr & ~(prop->dram_pci_bar_size - 0x1ull));
4590
4591 hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, bar_base_addr);
4592 if (hbm_bar_addr != U64_MAX) {
4593 writel(val, hdev->pcie_bar[HBM_BAR_ID] +
4594 (addr - bar_base_addr));
4595
4596 hbm_bar_addr = gaudi_set_hbm_bar_base(hdev,
4597 hbm_bar_addr);
4598 }
4599 if (hbm_bar_addr == U64_MAX)
4600 rc = -EIO;
4601 } else if (addr >= HOST_PHYS_BASE && !iommu_present(&pci_bus_type)) {
4602 *(u32 *) phys_to_virt(addr - HOST_PHYS_BASE) = val;
4603 } else {
4604 rc = -EFAULT;
4605 }
4606
4607 return rc;
4608}
4609
4610static int gaudi_debugfs_read64(struct hl_device *hdev, u64 addr, u64 *val)
4611{
4612 struct asic_fixed_properties *prop = &hdev->asic_prop;
4613 struct gaudi_device *gaudi = hdev->asic_specific;
4614 u64 hbm_bar_addr;
4615 int rc = 0;
4616
4617 if ((addr >= CFG_BASE) && (addr <= CFG_BASE + CFG_SIZE - sizeof(u64))) {
Oded Gabbaye38bfd32020-07-03 20:46:12 +03004618
4619 if ((gaudi->hw_cap_initialized & HW_CAP_CLK_GATE) &&
4620 (hdev->clock_gating_mask &
4621 GAUDI_CLK_GATE_DEBUGFS_MASK)) {
4622
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03004623 dev_err_ratelimited(hdev->dev,
4624 "Can't read register - clock gating is enabled!\n");
4625 rc = -EFAULT;
4626 } else {
4627 u32 val_l = RREG32(addr - CFG_BASE);
4628 u32 val_h = RREG32(addr + sizeof(u32) - CFG_BASE);
4629
4630 *val = (((u64) val_h) << 32) | val_l;
4631 }
Oded Gabbaye38bfd32020-07-03 20:46:12 +03004632
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03004633 } else if ((addr >= SRAM_BASE_ADDR) &&
4634 (addr <= SRAM_BASE_ADDR + SRAM_BAR_SIZE - sizeof(u64))) {
4635 *val = readq(hdev->pcie_bar[SRAM_BAR_ID] +
4636 (addr - SRAM_BASE_ADDR));
4637 } else if (addr <=
4638 DRAM_PHYS_BASE + hdev->asic_prop.dram_size - sizeof(u64)) {
4639 u64 bar_base_addr = DRAM_PHYS_BASE +
4640 (addr & ~(prop->dram_pci_bar_size - 0x1ull));
4641
4642 hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, bar_base_addr);
4643 if (hbm_bar_addr != U64_MAX) {
4644 *val = readq(hdev->pcie_bar[HBM_BAR_ID] +
4645 (addr - bar_base_addr));
4646
4647 hbm_bar_addr = gaudi_set_hbm_bar_base(hdev,
4648 hbm_bar_addr);
4649 }
4650 if (hbm_bar_addr == U64_MAX)
4651 rc = -EIO;
4652 } else if (addr >= HOST_PHYS_BASE && !iommu_present(&pci_bus_type)) {
4653 *val = *(u64 *) phys_to_virt(addr - HOST_PHYS_BASE);
4654 } else {
4655 rc = -EFAULT;
4656 }
4657
4658 return rc;
4659}
4660
4661static int gaudi_debugfs_write64(struct hl_device *hdev, u64 addr, u64 val)
4662{
4663 struct asic_fixed_properties *prop = &hdev->asic_prop;
4664 struct gaudi_device *gaudi = hdev->asic_specific;
4665 u64 hbm_bar_addr;
4666 int rc = 0;
4667
4668 if ((addr >= CFG_BASE) && (addr <= CFG_BASE + CFG_SIZE - sizeof(u64))) {
Oded Gabbaye38bfd32020-07-03 20:46:12 +03004669
4670 if ((gaudi->hw_cap_initialized & HW_CAP_CLK_GATE) &&
4671 (hdev->clock_gating_mask &
4672 GAUDI_CLK_GATE_DEBUGFS_MASK)) {
4673
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03004674 dev_err_ratelimited(hdev->dev,
4675 "Can't write register - clock gating is enabled!\n");
4676 rc = -EFAULT;
4677 } else {
4678 WREG32(addr - CFG_BASE, lower_32_bits(val));
4679 WREG32(addr + sizeof(u32) - CFG_BASE,
4680 upper_32_bits(val));
4681 }
Oded Gabbaye38bfd32020-07-03 20:46:12 +03004682
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03004683 } else if ((addr >= SRAM_BASE_ADDR) &&
4684 (addr <= SRAM_BASE_ADDR + SRAM_BAR_SIZE - sizeof(u64))) {
4685 writeq(val, hdev->pcie_bar[SRAM_BAR_ID] +
4686 (addr - SRAM_BASE_ADDR));
4687 } else if (addr <=
4688 DRAM_PHYS_BASE + hdev->asic_prop.dram_size - sizeof(u64)) {
4689 u64 bar_base_addr = DRAM_PHYS_BASE +
4690 (addr & ~(prop->dram_pci_bar_size - 0x1ull));
4691
4692 hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, bar_base_addr);
4693 if (hbm_bar_addr != U64_MAX) {
4694 writeq(val, hdev->pcie_bar[HBM_BAR_ID] +
4695 (addr - bar_base_addr));
4696
4697 hbm_bar_addr = gaudi_set_hbm_bar_base(hdev,
4698 hbm_bar_addr);
4699 }
4700 if (hbm_bar_addr == U64_MAX)
4701 rc = -EIO;
4702 } else if (addr >= HOST_PHYS_BASE && !iommu_present(&pci_bus_type)) {
4703 *(u64 *) phys_to_virt(addr - HOST_PHYS_BASE) = val;
4704 } else {
4705 rc = -EFAULT;
4706 }
4707
4708 return rc;
4709}
4710
4711static u64 gaudi_read_pte(struct hl_device *hdev, u64 addr)
4712{
4713 struct gaudi_device *gaudi = hdev->asic_specific;
4714
4715 if (hdev->hard_reset_pending)
4716 return U64_MAX;
4717
4718 return readq(hdev->pcie_bar[HBM_BAR_ID] +
4719 (addr - gaudi->hbm_bar_cur_addr));
4720}
4721
4722static void gaudi_write_pte(struct hl_device *hdev, u64 addr, u64 val)
4723{
4724 struct gaudi_device *gaudi = hdev->asic_specific;
4725
4726 if (hdev->hard_reset_pending)
4727 return;
4728
4729 writeq(val, hdev->pcie_bar[HBM_BAR_ID] +
4730 (addr - gaudi->hbm_bar_cur_addr));
4731}
4732
4733static void gaudi_mmu_prepare_reg(struct hl_device *hdev, u64 reg, u32 asid)
4734{
4735 /* mask to zero the MMBP and ASID bits */
4736 WREG32_AND(reg, ~0x7FF);
4737 WREG32_OR(reg, asid);
4738}
4739
4740static void gaudi_mmu_prepare(struct hl_device *hdev, u32 asid)
4741{
4742 struct gaudi_device *gaudi = hdev->asic_specific;
4743
4744 if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
4745 return;
4746
4747 if (asid & ~DMA0_QM_GLBL_NON_SECURE_PROPS_0_ASID_MASK) {
4748 WARN(1, "asid %u is too big\n", asid);
4749 return;
4750 }
4751
4752 mutex_lock(&gaudi->clk_gate_mutex);
4753
4754 hdev->asic_funcs->disable_clock_gating(hdev);
4755
4756 gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_0, asid);
4757 gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_1, asid);
4758 gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_2, asid);
4759 gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_3, asid);
4760 gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_4, asid);
4761
4762 gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_0, asid);
4763 gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_1, asid);
4764 gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_2, asid);
4765 gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_3, asid);
4766 gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_4, asid);
4767
4768 gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_0, asid);
4769 gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_1, asid);
4770 gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_2, asid);
4771 gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_3, asid);
4772 gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_4, asid);
4773
4774 gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_0, asid);
4775 gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_1, asid);
4776 gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_2, asid);
4777 gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_3, asid);
4778 gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_4, asid);
4779
4780 gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_0, asid);
4781 gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_1, asid);
4782 gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_2, asid);
4783 gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_3, asid);
4784 gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_4, asid);
4785
4786 gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_0, asid);
4787 gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_1, asid);
4788 gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_2, asid);
4789 gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_3, asid);
4790 gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_4, asid);
4791
4792 gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_0, asid);
4793 gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_1, asid);
4794 gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_2, asid);
4795 gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_3, asid);
4796 gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_4, asid);
4797
4798 gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_0, asid);
4799 gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_1, asid);
4800 gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_2, asid);
4801 gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_3, asid);
4802 gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_4, asid);
4803
4804 gaudi_mmu_prepare_reg(hdev, mmDMA0_CORE_NON_SECURE_PROPS, asid);
4805 gaudi_mmu_prepare_reg(hdev, mmDMA1_CORE_NON_SECURE_PROPS, asid);
4806 gaudi_mmu_prepare_reg(hdev, mmDMA2_CORE_NON_SECURE_PROPS, asid);
4807 gaudi_mmu_prepare_reg(hdev, mmDMA3_CORE_NON_SECURE_PROPS, asid);
4808 gaudi_mmu_prepare_reg(hdev, mmDMA4_CORE_NON_SECURE_PROPS, asid);
4809 gaudi_mmu_prepare_reg(hdev, mmDMA5_CORE_NON_SECURE_PROPS, asid);
4810 gaudi_mmu_prepare_reg(hdev, mmDMA6_CORE_NON_SECURE_PROPS, asid);
4811 gaudi_mmu_prepare_reg(hdev, mmDMA7_CORE_NON_SECURE_PROPS, asid);
4812
4813 gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_0, asid);
4814 gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_1, asid);
4815 gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_2, asid);
4816 gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_3, asid);
4817 gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_4, asid);
4818 gaudi_mmu_prepare_reg(hdev, mmTPC0_CFG_ARUSER_LO, asid);
4819 gaudi_mmu_prepare_reg(hdev, mmTPC0_CFG_AWUSER_LO, asid);
4820
4821 gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_0, asid);
4822 gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_1, asid);
4823 gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_2, asid);
4824 gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_3, asid);
4825 gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_4, asid);
4826 gaudi_mmu_prepare_reg(hdev, mmTPC1_CFG_ARUSER_LO, asid);
4827 gaudi_mmu_prepare_reg(hdev, mmTPC1_CFG_AWUSER_LO, asid);
4828
4829 gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_0, asid);
4830 gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_1, asid);
4831 gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_2, asid);
4832 gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_3, asid);
4833 gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_4, asid);
4834 gaudi_mmu_prepare_reg(hdev, mmTPC2_CFG_ARUSER_LO, asid);
4835 gaudi_mmu_prepare_reg(hdev, mmTPC2_CFG_AWUSER_LO, asid);
4836
4837 gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_0, asid);
4838 gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_1, asid);
4839 gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_2, asid);
4840 gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_3, asid);
4841 gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_4, asid);
4842 gaudi_mmu_prepare_reg(hdev, mmTPC3_CFG_ARUSER_LO, asid);
4843 gaudi_mmu_prepare_reg(hdev, mmTPC3_CFG_AWUSER_LO, asid);
4844
4845 gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_0, asid);
4846 gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_1, asid);
4847 gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_2, asid);
4848 gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_3, asid);
4849 gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_4, asid);
4850 gaudi_mmu_prepare_reg(hdev, mmTPC4_CFG_ARUSER_LO, asid);
4851 gaudi_mmu_prepare_reg(hdev, mmTPC4_CFG_AWUSER_LO, asid);
4852
4853 gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_0, asid);
4854 gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_1, asid);
4855 gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_2, asid);
4856 gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_3, asid);
4857 gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_4, asid);
4858 gaudi_mmu_prepare_reg(hdev, mmTPC5_CFG_ARUSER_LO, asid);
4859 gaudi_mmu_prepare_reg(hdev, mmTPC5_CFG_AWUSER_LO, asid);
4860
4861 gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_0, asid);
4862 gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_1, asid);
4863 gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_2, asid);
4864 gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_3, asid);
4865 gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_4, asid);
4866 gaudi_mmu_prepare_reg(hdev, mmTPC6_CFG_ARUSER_LO, asid);
4867 gaudi_mmu_prepare_reg(hdev, mmTPC6_CFG_AWUSER_LO, asid);
4868
4869 gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_0, asid);
4870 gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_1, asid);
4871 gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_2, asid);
4872 gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_3, asid);
4873 gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_4, asid);
4874 gaudi_mmu_prepare_reg(hdev, mmTPC7_CFG_ARUSER_LO, asid);
4875 gaudi_mmu_prepare_reg(hdev, mmTPC7_CFG_AWUSER_LO, asid);
4876
4877 gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_0, asid);
4878 gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_1, asid);
4879 gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_2, asid);
4880 gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_3, asid);
4881 gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_4, asid);
4882 gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_0, asid);
4883 gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_1, asid);
4884 gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_2, asid);
4885 gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_3, asid);
4886 gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_4, asid);
4887
4888 gaudi_mmu_prepare_reg(hdev, mmMME0_SBAB_ARUSER0, asid);
4889 gaudi_mmu_prepare_reg(hdev, mmMME0_SBAB_ARUSER1, asid);
4890 gaudi_mmu_prepare_reg(hdev, mmMME1_SBAB_ARUSER0, asid);
4891 gaudi_mmu_prepare_reg(hdev, mmMME1_SBAB_ARUSER1, asid);
4892 gaudi_mmu_prepare_reg(hdev, mmMME2_SBAB_ARUSER0, asid);
4893 gaudi_mmu_prepare_reg(hdev, mmMME2_SBAB_ARUSER1, asid);
4894 gaudi_mmu_prepare_reg(hdev, mmMME3_SBAB_ARUSER0, asid);
4895 gaudi_mmu_prepare_reg(hdev, mmMME3_SBAB_ARUSER1, asid);
4896 gaudi_mmu_prepare_reg(hdev, mmMME0_ACC_WBC, asid);
4897 gaudi_mmu_prepare_reg(hdev, mmMME1_ACC_WBC, asid);
4898 gaudi_mmu_prepare_reg(hdev, mmMME2_ACC_WBC, asid);
4899 gaudi_mmu_prepare_reg(hdev, mmMME3_ACC_WBC, asid);
4900
4901 gaudi_mmu_prepare_reg(hdev, mmPSOC_GLOBAL_CONF_TRACE_ARUSER, asid);
4902 gaudi_mmu_prepare_reg(hdev, mmPSOC_GLOBAL_CONF_TRACE_AWUSER, asid);
4903
Oded Gabbaye38bfd32020-07-03 20:46:12 +03004904 hdev->asic_funcs->set_clock_gating(hdev);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03004905
4906 mutex_unlock(&gaudi->clk_gate_mutex);
4907}
4908
4909static int gaudi_send_job_on_qman0(struct hl_device *hdev,
4910 struct hl_cs_job *job)
4911{
4912 struct packet_msg_prot *fence_pkt;
4913 u32 *fence_ptr;
4914 dma_addr_t fence_dma_addr;
4915 struct hl_cb *cb;
4916 u32 tmp, timeout, dma_offset;
4917 int rc;
4918
4919 if (hdev->pldm)
4920 timeout = GAUDI_PLDM_QMAN0_TIMEOUT_USEC;
4921 else
4922 timeout = HL_DEVICE_TIMEOUT_USEC;
4923
4924 if (!hdev->asic_funcs->is_device_idle(hdev, NULL, NULL)) {
4925 dev_err_ratelimited(hdev->dev,
4926 "Can't send driver job on QMAN0 because the device is not idle\n");
4927 return -EBUSY;
4928 }
4929
4930 fence_ptr = hdev->asic_funcs->asic_dma_pool_zalloc(hdev, 4, GFP_KERNEL,
4931 &fence_dma_addr);
4932 if (!fence_ptr) {
4933 dev_err(hdev->dev,
4934 "Failed to allocate fence memory for QMAN0\n");
4935 return -ENOMEM;
4936 }
4937
4938 cb = job->patched_cb;
4939
4940 fence_pkt = (struct packet_msg_prot *) (uintptr_t) (cb->kernel_address +
4941 job->job_cb_size - sizeof(struct packet_msg_prot));
4942
Oded Gabbay65887292020-08-12 11:21:01 +03004943 tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT);
4944 tmp |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);
4945 tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
4946
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03004947 fence_pkt->ctl = cpu_to_le32(tmp);
4948 fence_pkt->value = cpu_to_le32(GAUDI_QMAN0_FENCE_VAL);
4949 fence_pkt->addr = cpu_to_le64(fence_dma_addr);
4950
4951 dma_offset = gaudi_dma_assignment[GAUDI_PCI_DMA_1] * DMA_CORE_OFFSET;
4952
4953 WREG32_OR(mmDMA0_CORE_PROT + dma_offset, BIT(DMA0_CORE_PROT_VAL_SHIFT));
4954
4955 rc = hl_hw_queue_send_cb_no_cmpl(hdev, GAUDI_QUEUE_ID_DMA_0_0,
4956 job->job_cb_size, cb->bus_address);
4957 if (rc) {
4958 dev_err(hdev->dev, "Failed to send CB on QMAN0, %d\n", rc);
4959 goto free_fence_ptr;
4960 }
4961
4962 rc = hl_poll_timeout_memory(hdev, fence_ptr, tmp,
4963 (tmp == GAUDI_QMAN0_FENCE_VAL), 1000,
4964 timeout, true);
4965
4966 hl_hw_queue_inc_ci_kernel(hdev, GAUDI_QUEUE_ID_DMA_0_0);
4967
4968 if (rc == -ETIMEDOUT) {
4969 dev_err(hdev->dev, "QMAN0 Job timeout (0x%x)\n", tmp);
4970 goto free_fence_ptr;
4971 }
4972
4973free_fence_ptr:
4974 WREG32_AND(mmDMA0_CORE_PROT + dma_offset,
4975 ~BIT(DMA0_CORE_PROT_VAL_SHIFT));
4976
4977 hdev->asic_funcs->asic_dma_pool_free(hdev, (void *) fence_ptr,
4978 fence_dma_addr);
4979 return rc;
4980}
4981
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03004982static void gaudi_get_event_desc(u16 event_type, char *desc, size_t size)
4983{
Ofir Bittonebd8d122020-05-10 13:41:28 +03004984 if (event_type >= GAUDI_EVENT_SIZE)
4985 goto event_not_supported;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03004986
Ofir Bittonebd8d122020-05-10 13:41:28 +03004987 if (!gaudi_irq_map_table[event_type].valid)
4988 goto event_not_supported;
4989
4990 snprintf(desc, size, gaudi_irq_map_table[event_type].name);
4991
4992 return;
4993
4994event_not_supported:
4995 snprintf(desc, size, "N/A");
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03004996}
4997
4998static const char *gaudi_get_razwi_initiator_dma_name(struct hl_device *hdev,
4999 u32 x_y, bool is_write)
5000{
5001 u32 dma_id[2], dma_offset, err_cause[2], mask, i;
5002
5003 mask = is_write ? DMA0_CORE_ERR_CAUSE_HBW_WR_ERR_MASK :
5004 DMA0_CORE_ERR_CAUSE_HBW_RD_ERR_MASK;
5005
5006 switch (x_y) {
5007 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_0:
5008 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_1:
5009 dma_id[0] = 0;
5010 dma_id[1] = 2;
5011 break;
5012 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_0:
5013 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_1:
5014 dma_id[0] = 1;
5015 dma_id[1] = 3;
5016 break;
5017 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_0:
5018 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_1:
5019 dma_id[0] = 4;
5020 dma_id[1] = 6;
5021 break;
5022 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_0:
5023 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_1:
5024 dma_id[0] = 5;
5025 dma_id[1] = 7;
5026 break;
5027 default:
5028 goto unknown_initiator;
5029 }
5030
5031 for (i = 0 ; i < 2 ; i++) {
5032 dma_offset = dma_id[i] * DMA_CORE_OFFSET;
5033 err_cause[i] = RREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset);
5034 }
5035
5036 switch (x_y) {
5037 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_0:
5038 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_1:
5039 if ((err_cause[0] & mask) && !(err_cause[1] & mask))
5040 return "DMA0";
5041 else if (!(err_cause[0] & mask) && (err_cause[1] & mask))
5042 return "DMA2";
5043 else
5044 return "DMA0 or DMA2";
5045 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_0:
5046 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_1:
5047 if ((err_cause[0] & mask) && !(err_cause[1] & mask))
5048 return "DMA1";
5049 else if (!(err_cause[0] & mask) && (err_cause[1] & mask))
5050 return "DMA3";
5051 else
5052 return "DMA1 or DMA3";
5053 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_0:
5054 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_1:
5055 if ((err_cause[0] & mask) && !(err_cause[1] & mask))
5056 return "DMA4";
5057 else if (!(err_cause[0] & mask) && (err_cause[1] & mask))
5058 return "DMA6";
5059 else
5060 return "DMA4 or DMA6";
5061 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_0:
5062 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_1:
5063 if ((err_cause[0] & mask) && !(err_cause[1] & mask))
5064 return "DMA5";
5065 else if (!(err_cause[0] & mask) && (err_cause[1] & mask))
5066 return "DMA7";
5067 else
5068 return "DMA5 or DMA7";
5069 }
5070
5071unknown_initiator:
5072 return "unknown initiator";
5073}
5074
5075static const char *gaudi_get_razwi_initiator_name(struct hl_device *hdev,
5076 bool is_write)
5077{
5078 u32 val, x_y, axi_id;
5079
5080 val = is_write ? RREG32(mmMMU_UP_RAZWI_WRITE_ID) :
5081 RREG32(mmMMU_UP_RAZWI_READ_ID);
5082 x_y = val & ((RAZWI_INITIATOR_Y_MASK << RAZWI_INITIATOR_Y_SHIFT) |
5083 (RAZWI_INITIATOR_X_MASK << RAZWI_INITIATOR_X_SHIFT));
5084 axi_id = val & (RAZWI_INITIATOR_AXI_ID_MASK <<
5085 RAZWI_INITIATOR_AXI_ID_SHIFT);
5086
5087 switch (x_y) {
5088 case RAZWI_INITIATOR_ID_X_Y_TPC0_NIC0:
5089 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC))
5090 return "TPC0";
5091 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC))
5092 return "NIC0";
5093 break;
5094 case RAZWI_INITIATOR_ID_X_Y_TPC1:
5095 return "TPC1";
5096 case RAZWI_INITIATOR_ID_X_Y_MME0_0:
5097 case RAZWI_INITIATOR_ID_X_Y_MME0_1:
5098 return "MME0";
5099 case RAZWI_INITIATOR_ID_X_Y_MME1_0:
5100 case RAZWI_INITIATOR_ID_X_Y_MME1_1:
5101 return "MME1";
5102 case RAZWI_INITIATOR_ID_X_Y_TPC2:
5103 return "TPC2";
5104 case RAZWI_INITIATOR_ID_X_Y_TPC3_PCI_CPU_PSOC:
5105 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC))
5106 return "TPC3";
5107 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_PCI))
5108 return "PCI";
5109 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_CPU))
5110 return "CPU";
5111 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_PSOC))
5112 return "PSOC";
5113 break;
5114 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_0:
5115 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_1:
5116 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_0:
5117 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_1:
5118 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_0:
5119 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_1:
5120 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_0:
5121 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_1:
5122 return gaudi_get_razwi_initiator_dma_name(hdev, x_y, is_write);
5123 case RAZWI_INITIATOR_ID_X_Y_TPC4_NIC1_NIC2:
5124 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC))
5125 return "TPC4";
5126 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC))
5127 return "NIC1";
5128 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC_FT))
5129 return "NIC2";
5130 break;
5131 case RAZWI_INITIATOR_ID_X_Y_TPC5:
5132 return "TPC5";
5133 case RAZWI_INITIATOR_ID_X_Y_MME2_0:
5134 case RAZWI_INITIATOR_ID_X_Y_MME2_1:
5135 return "MME2";
5136 case RAZWI_INITIATOR_ID_X_Y_MME3_0:
5137 case RAZWI_INITIATOR_ID_X_Y_MME3_1:
5138 return "MME3";
5139 case RAZWI_INITIATOR_ID_X_Y_TPC6:
5140 return "TPC6";
5141 case RAZWI_INITIATOR_ID_X_Y_TPC7_NIC4_NIC5:
5142 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC))
5143 return "TPC7";
5144 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC))
5145 return "NIC4";
5146 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC_FT))
5147 return "NIC5";
5148 break;
5149 default:
5150 break;
5151 }
5152
5153 dev_err(hdev->dev,
5154 "Unknown RAZWI initiator ID 0x%x [Y=%d, X=%d, AXI_ID=%d]\n",
5155 val,
5156 (val >> RAZWI_INITIATOR_Y_SHIFT) & RAZWI_INITIATOR_Y_MASK,
5157 (val >> RAZWI_INITIATOR_X_SHIFT) & RAZWI_INITIATOR_X_MASK,
5158 (val >> RAZWI_INITIATOR_AXI_ID_SHIFT) &
5159 RAZWI_INITIATOR_AXI_ID_MASK);
5160
5161 return "unknown initiator";
5162}
5163
5164static void gaudi_print_razwi_info(struct hl_device *hdev)
5165{
5166 if (RREG32(mmMMU_UP_RAZWI_WRITE_VLD)) {
5167 dev_err_ratelimited(hdev->dev,
5168 "RAZWI event caused by illegal write of %s\n",
5169 gaudi_get_razwi_initiator_name(hdev, true));
5170 WREG32(mmMMU_UP_RAZWI_WRITE_VLD, 0);
5171 }
5172
5173 if (RREG32(mmMMU_UP_RAZWI_READ_VLD)) {
5174 dev_err_ratelimited(hdev->dev,
5175 "RAZWI event caused by illegal read of %s\n",
5176 gaudi_get_razwi_initiator_name(hdev, false));
5177 WREG32(mmMMU_UP_RAZWI_READ_VLD, 0);
5178 }
5179}
5180
5181static void gaudi_print_mmu_error_info(struct hl_device *hdev)
5182{
5183 struct gaudi_device *gaudi = hdev->asic_specific;
5184 u64 addr;
5185 u32 val;
5186
5187 if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
5188 return;
5189
5190 val = RREG32(mmMMU_UP_PAGE_ERROR_CAPTURE);
5191 if (val & MMU_UP_PAGE_ERROR_CAPTURE_ENTRY_VALID_MASK) {
5192 addr = val & MMU_UP_PAGE_ERROR_CAPTURE_VA_49_32_MASK;
5193 addr <<= 32;
5194 addr |= RREG32(mmMMU_UP_PAGE_ERROR_CAPTURE_VA);
5195
5196 dev_err_ratelimited(hdev->dev, "MMU page fault on va 0x%llx\n",
5197 addr);
5198
5199 WREG32(mmMMU_UP_PAGE_ERROR_CAPTURE, 0);
5200 }
5201
5202 val = RREG32(mmMMU_UP_ACCESS_ERROR_CAPTURE);
5203 if (val & MMU_UP_ACCESS_ERROR_CAPTURE_ENTRY_VALID_MASK) {
5204 addr = val & MMU_UP_ACCESS_ERROR_CAPTURE_VA_49_32_MASK;
5205 addr <<= 32;
5206 addr |= RREG32(mmMMU_UP_ACCESS_ERROR_CAPTURE_VA);
5207
5208 dev_err_ratelimited(hdev->dev,
5209 "MMU access error on va 0x%llx\n", addr);
5210
5211 WREG32(mmMMU_UP_ACCESS_ERROR_CAPTURE, 0);
5212 }
5213}
5214
5215/*
5216 * +-------------------+------------------------------------------------------+
5217 * | Configuration Reg | Description |
5218 * | Address | |
5219 * +-------------------+------------------------------------------------------+
5220 * | 0xF30 - 0xF3F |ECC single error indication (1 bit per memory wrapper)|
5221 * | |0xF30 memory wrappers 31:0 (MSB to LSB) |
5222 * | |0xF34 memory wrappers 63:32 |
5223 * | |0xF38 memory wrappers 95:64 |
5224 * | |0xF3C memory wrappers 127:96 |
5225 * +-------------------+------------------------------------------------------+
5226 * | 0xF40 - 0xF4F |ECC double error indication (1 bit per memory wrapper)|
5227 * | |0xF40 memory wrappers 31:0 (MSB to LSB) |
5228 * | |0xF44 memory wrappers 63:32 |
5229 * | |0xF48 memory wrappers 95:64 |
5230 * | |0xF4C memory wrappers 127:96 |
5231 * +-------------------+------------------------------------------------------+
5232 */
Oded Gabbayfcc6a4e2020-05-17 08:20:35 +03005233static int gaudi_extract_ecc_info(struct hl_device *hdev,
5234 struct ecc_info_extract_params *params, u64 *ecc_address,
5235 u64 *ecc_syndrom, u8 *memory_wrapper_idx)
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005236{
5237 struct gaudi_device *gaudi = hdev->asic_specific;
Oded Gabbayfcc6a4e2020-05-17 08:20:35 +03005238 u32 i, num_mem_regs, reg, err_bit;
5239 u64 err_addr, err_word = 0;
5240 int rc = 0;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005241
Oded Gabbayfcc6a4e2020-05-17 08:20:35 +03005242 num_mem_regs = params->num_memories / 32 +
5243 ((params->num_memories % 32) ? 1 : 0);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005244
Oded Gabbayfcc6a4e2020-05-17 08:20:35 +03005245 if (params->block_address >= CFG_BASE)
5246 params->block_address -= CFG_BASE;
5247
5248 if (params->derr)
5249 err_addr = params->block_address + GAUDI_ECC_DERR0_OFFSET;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005250 else
Oded Gabbayfcc6a4e2020-05-17 08:20:35 +03005251 err_addr = params->block_address + GAUDI_ECC_SERR0_OFFSET;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005252
Oded Gabbayfcc6a4e2020-05-17 08:20:35 +03005253 if (params->disable_clock_gating) {
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005254 mutex_lock(&gaudi->clk_gate_mutex);
5255 hdev->asic_funcs->disable_clock_gating(hdev);
5256 }
5257
Oded Gabbayfcc6a4e2020-05-17 08:20:35 +03005258 /* Set invalid wrapper index */
5259 *memory_wrapper_idx = 0xFF;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005260
Oded Gabbayfcc6a4e2020-05-17 08:20:35 +03005261 /* Iterate through memory wrappers, a single bit must be set */
Dan Carpenterb0353542020-08-05 12:51:05 +03005262 for (i = 0 ; i < num_mem_regs ; i++) {
Oded Gabbayfcc6a4e2020-05-17 08:20:35 +03005263 err_addr += i * 4;
5264 err_word = RREG32(err_addr);
5265 if (err_word) {
5266 err_bit = __ffs(err_word);
5267 *memory_wrapper_idx = err_bit + (32 * i);
5268 break;
5269 }
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005270 }
5271
Oded Gabbayfcc6a4e2020-05-17 08:20:35 +03005272 if (*memory_wrapper_idx == 0xFF) {
5273 dev_err(hdev->dev, "ECC error information cannot be found\n");
5274 rc = -EINVAL;
5275 goto enable_clk_gate;
5276 }
5277
5278 WREG32(params->block_address + GAUDI_ECC_MEM_SEL_OFFSET,
5279 *memory_wrapper_idx);
5280
5281 *ecc_address =
5282 RREG32(params->block_address + GAUDI_ECC_ADDRESS_OFFSET);
5283 *ecc_syndrom =
5284 RREG32(params->block_address + GAUDI_ECC_SYNDROME_OFFSET);
5285
5286 /* Clear error indication */
5287 reg = RREG32(params->block_address + GAUDI_ECC_MEM_INFO_CLR_OFFSET);
5288 if (params->derr)
5289 reg |= FIELD_PREP(GAUDI_ECC_MEM_INFO_CLR_DERR_MASK, 1);
5290 else
5291 reg |= FIELD_PREP(GAUDI_ECC_MEM_INFO_CLR_SERR_MASK, 1);
5292
5293 WREG32(params->block_address + GAUDI_ECC_MEM_INFO_CLR_OFFSET, reg);
5294
5295enable_clk_gate:
5296 if (params->disable_clock_gating) {
Oded Gabbaye38bfd32020-07-03 20:46:12 +03005297 hdev->asic_funcs->set_clock_gating(hdev);
Greg Kroah-Hartman65a9bde62020-07-27 11:49:37 +02005298
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005299 mutex_unlock(&gaudi->clk_gate_mutex);
5300 }
Oded Gabbayfcc6a4e2020-05-17 08:20:35 +03005301
5302 return rc;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005303}
5304
5305static void gaudi_handle_qman_err_generic(struct hl_device *hdev,
5306 const char *qm_name,
5307 u64 glbl_sts_addr,
5308 u64 arb_err_addr)
5309{
5310 u32 i, j, glbl_sts_val, arb_err_val, glbl_sts_clr_val;
5311 char reg_desc[32];
5312
5313 /* Iterate through all stream GLBL_STS1 registers + Lower CP */
5314 for (i = 0 ; i < QMAN_STREAMS + 1 ; i++) {
5315 glbl_sts_clr_val = 0;
5316 glbl_sts_val = RREG32(glbl_sts_addr + 4 * i);
5317
5318 if (!glbl_sts_val)
5319 continue;
5320
5321 if (i == QMAN_STREAMS)
5322 snprintf(reg_desc, ARRAY_SIZE(reg_desc), "LowerCP");
5323 else
5324 snprintf(reg_desc, ARRAY_SIZE(reg_desc), "stream%u", i);
5325
5326 for (j = 0 ; j < GAUDI_NUM_OF_QM_ERR_CAUSE ; j++) {
5327 if (glbl_sts_val & BIT(j)) {
5328 dev_err_ratelimited(hdev->dev,
5329 "%s %s. err cause: %s\n",
5330 qm_name, reg_desc,
5331 gaudi_qman_error_cause[j]);
5332 glbl_sts_clr_val |= BIT(j);
5333 }
5334 }
5335
5336 /* Write 1 clear errors */
5337 WREG32(glbl_sts_addr + 4 * i, glbl_sts_clr_val);
5338 }
5339
5340 arb_err_val = RREG32(arb_err_addr);
5341
5342 if (!arb_err_val)
5343 return;
5344
5345 for (j = 0 ; j < GAUDI_NUM_OF_QM_ARB_ERR_CAUSE ; j++) {
5346 if (arb_err_val & BIT(j)) {
5347 dev_err_ratelimited(hdev->dev,
5348 "%s ARB_ERR. err cause: %s\n",
5349 qm_name,
5350 gaudi_qman_arb_error_cause[j]);
5351 }
5352 }
5353}
5354
Oded Gabbayfcc6a4e2020-05-17 08:20:35 +03005355static void gaudi_handle_ecc_event(struct hl_device *hdev, u16 event_type,
5356 struct hl_eq_ecc_data *ecc_data)
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005357{
Oded Gabbayfcc6a4e2020-05-17 08:20:35 +03005358 struct ecc_info_extract_params params;
5359 u64 ecc_address = 0, ecc_syndrom = 0;
5360 u8 index, memory_wrapper_idx = 0;
5361 bool extract_info_from_fw;
5362 int rc;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005363
5364 switch (event_type) {
Oded Gabbayfcc6a4e2020-05-17 08:20:35 +03005365 case GAUDI_EVENT_PCIE_CORE_SERR ... GAUDI_EVENT_PCIE_PHY_DERR:
5366 case GAUDI_EVENT_DMA0_SERR_ECC ... GAUDI_EVENT_MMU_DERR:
5367 extract_info_from_fw = true;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005368 break;
5369 case GAUDI_EVENT_TPC0_SERR ... GAUDI_EVENT_TPC7_SERR:
5370 index = event_type - GAUDI_EVENT_TPC0_SERR;
Oded Gabbayfcc6a4e2020-05-17 08:20:35 +03005371 params.block_address = mmTPC0_CFG_BASE + index * TPC_CFG_OFFSET;
5372 params.num_memories = 90;
5373 params.derr = false;
5374 params.disable_clock_gating = true;
5375 extract_info_from_fw = false;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005376 break;
5377 case GAUDI_EVENT_TPC0_DERR ... GAUDI_EVENT_TPC7_DERR:
5378 index = event_type - GAUDI_EVENT_TPC0_DERR;
Oded Gabbayfcc6a4e2020-05-17 08:20:35 +03005379 params.block_address =
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005380 mmTPC0_CFG_BASE + index * TPC_CFG_OFFSET;
Oded Gabbayfcc6a4e2020-05-17 08:20:35 +03005381 params.num_memories = 90;
5382 params.derr = true;
5383 params.disable_clock_gating = true;
5384 extract_info_from_fw = false;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005385 break;
5386 case GAUDI_EVENT_MME0_ACC_SERR:
5387 case GAUDI_EVENT_MME1_ACC_SERR:
5388 case GAUDI_EVENT_MME2_ACC_SERR:
5389 case GAUDI_EVENT_MME3_ACC_SERR:
5390 index = (event_type - GAUDI_EVENT_MME0_ACC_SERR) / 4;
Oded Gabbayfcc6a4e2020-05-17 08:20:35 +03005391 params.block_address = mmMME0_ACC_BASE + index * MME_ACC_OFFSET;
5392 params.num_memories = 128;
5393 params.derr = false;
5394 params.disable_clock_gating = true;
5395 extract_info_from_fw = false;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005396 break;
5397 case GAUDI_EVENT_MME0_ACC_DERR:
5398 case GAUDI_EVENT_MME1_ACC_DERR:
5399 case GAUDI_EVENT_MME2_ACC_DERR:
5400 case GAUDI_EVENT_MME3_ACC_DERR:
5401 index = (event_type - GAUDI_EVENT_MME0_ACC_DERR) / 4;
Oded Gabbayfcc6a4e2020-05-17 08:20:35 +03005402 params.block_address = mmMME0_ACC_BASE + index * MME_ACC_OFFSET;
5403 params.num_memories = 128;
5404 params.derr = true;
5405 params.disable_clock_gating = true;
5406 extract_info_from_fw = false;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005407 break;
5408 case GAUDI_EVENT_MME0_SBAB_SERR:
5409 case GAUDI_EVENT_MME1_SBAB_SERR:
5410 case GAUDI_EVENT_MME2_SBAB_SERR:
5411 case GAUDI_EVENT_MME3_SBAB_SERR:
5412 index = (event_type - GAUDI_EVENT_MME0_SBAB_SERR) / 4;
Oded Gabbayfcc6a4e2020-05-17 08:20:35 +03005413 params.block_address =
5414 mmMME0_SBAB_BASE + index * MME_ACC_OFFSET;
5415 params.num_memories = 33;
5416 params.derr = false;
5417 params.disable_clock_gating = true;
5418 extract_info_from_fw = false;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005419 break;
5420 case GAUDI_EVENT_MME0_SBAB_DERR:
5421 case GAUDI_EVENT_MME1_SBAB_DERR:
5422 case GAUDI_EVENT_MME2_SBAB_DERR:
5423 case GAUDI_EVENT_MME3_SBAB_DERR:
5424 index = (event_type - GAUDI_EVENT_MME0_SBAB_DERR) / 4;
Oded Gabbayfcc6a4e2020-05-17 08:20:35 +03005425 params.block_address =
5426 mmMME0_SBAB_BASE + index * MME_ACC_OFFSET;
5427 params.num_memories = 33;
5428 params.derr = true;
5429 params.disable_clock_gating = true;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005430 default:
5431 return;
5432 }
5433
Oded Gabbayfcc6a4e2020-05-17 08:20:35 +03005434 if (extract_info_from_fw) {
5435 ecc_address = le64_to_cpu(ecc_data->ecc_address);
5436 ecc_syndrom = le64_to_cpu(ecc_data->ecc_syndrom);
5437 memory_wrapper_idx = ecc_data->memory_wrapper_idx;
5438 } else {
5439 rc = gaudi_extract_ecc_info(hdev, &params, &ecc_address,
5440 &ecc_syndrom, &memory_wrapper_idx);
5441 if (rc)
5442 return;
5443 }
5444
5445 dev_err(hdev->dev,
5446 "ECC error detected. address: %#llx. Syndrom: %#llx. block id %u\n",
5447 ecc_address, ecc_syndrom, memory_wrapper_idx);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005448}
5449
5450static void gaudi_handle_qman_err(struct hl_device *hdev, u16 event_type)
5451{
5452 u64 glbl_sts_addr, arb_err_addr;
5453 u8 index;
5454 char desc[32];
5455
5456 switch (event_type) {
5457 case GAUDI_EVENT_TPC0_QM ... GAUDI_EVENT_TPC7_QM:
5458 index = event_type - GAUDI_EVENT_TPC0_QM;
5459 glbl_sts_addr =
5460 mmTPC0_QM_GLBL_STS1_0 + index * TPC_QMAN_OFFSET;
5461 arb_err_addr =
5462 mmTPC0_QM_ARB_ERR_CAUSE + index * TPC_QMAN_OFFSET;
5463 snprintf(desc, ARRAY_SIZE(desc), "%s%d", "TPC_QM", index);
5464 break;
5465 case GAUDI_EVENT_MME0_QM ... GAUDI_EVENT_MME2_QM:
5466 index = event_type - GAUDI_EVENT_MME0_QM;
5467 glbl_sts_addr =
5468 mmMME0_QM_GLBL_STS1_0 + index * MME_QMAN_OFFSET;
5469 arb_err_addr =
5470 mmMME0_QM_ARB_ERR_CAUSE + index * MME_QMAN_OFFSET;
5471 snprintf(desc, ARRAY_SIZE(desc), "%s%d", "MME_QM", index);
5472 break;
5473 case GAUDI_EVENT_DMA0_QM ... GAUDI_EVENT_DMA7_QM:
5474 index = event_type - GAUDI_EVENT_DMA0_QM;
5475 glbl_sts_addr =
5476 mmDMA0_QM_GLBL_STS1_0 + index * DMA_QMAN_OFFSET;
5477 arb_err_addr =
5478 mmDMA0_QM_ARB_ERR_CAUSE + index * DMA_QMAN_OFFSET;
5479 snprintf(desc, ARRAY_SIZE(desc), "%s%d", "DMA_QM", index);
5480 break;
5481 default:
5482 return;
5483 }
5484
5485 gaudi_handle_qman_err_generic(hdev, desc, glbl_sts_addr, arb_err_addr);
5486}
5487
5488static void gaudi_print_irq_info(struct hl_device *hdev, u16 event_type,
5489 bool razwi)
5490{
Ofir Bittonebd8d122020-05-10 13:41:28 +03005491 char desc[64] = "";
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005492
5493 gaudi_get_event_desc(event_type, desc, sizeof(desc));
5494 dev_err_ratelimited(hdev->dev, "Received H/W interrupt %d [\"%s\"]\n",
5495 event_type, desc);
5496
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005497 if (razwi) {
5498 gaudi_print_razwi_info(hdev);
5499 gaudi_print_mmu_error_info(hdev);
5500 }
5501}
5502
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005503static int gaudi_soft_reset_late_init(struct hl_device *hdev)
5504{
Ofir Bittonebd8d122020-05-10 13:41:28 +03005505 struct gaudi_device *gaudi = hdev->asic_specific;
5506
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005507 /* Unmask all IRQs since some could have been received
5508 * during the soft reset
5509 */
Ofir Bittonebd8d122020-05-10 13:41:28 +03005510 return hl_fw_unmask_irq_arr(hdev, gaudi->events, sizeof(gaudi->events));
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005511}
5512
5513static int gaudi_hbm_read_interrupts(struct hl_device *hdev, int device)
5514{
5515 int ch, err = 0;
5516 u32 base, val, val2;
5517
5518 base = GAUDI_HBM_CFG_BASE + device * GAUDI_HBM_CFG_OFFSET;
5519 for (ch = 0 ; ch < GAUDI_HBM_CHANNELS ; ch++) {
5520 val = RREG32_MASK(base + ch * 0x1000 + 0x06C, 0x0000FFFF);
5521 val = (val & 0xFF) | ((val >> 8) & 0xFF);
5522 if (val) {
5523 err = 1;
5524 dev_err(hdev->dev,
5525 "HBM%d pc%d interrupts info: WR_PAR=%d, RD_PAR=%d, CA_PAR=%d, SERR=%d, DERR=%d\n",
5526 device, ch * 2, val & 0x1, (val >> 1) & 0x1,
5527 (val >> 2) & 0x1, (val >> 3) & 0x1,
5528 (val >> 4) & 0x1);
5529
5530 val2 = RREG32(base + ch * 0x1000 + 0x060);
5531 dev_err(hdev->dev,
5532 "HBM%d pc%d ECC info: 1ST_ERR_ADDR=0x%x, 1ST_ERR_TYPE=%d, SEC_CONT_CNT=%d, SEC_CNT=%d, DED_CNT=%d\n",
5533 device, ch * 2,
5534 RREG32(base + ch * 0x1000 + 0x064),
5535 (val2 & 0x200) >> 9, (val2 & 0xFC00) >> 10,
5536 (val2 & 0xFF0000) >> 16,
5537 (val2 & 0xFF000000) >> 24);
5538 }
5539
5540 val = RREG32_MASK(base + ch * 0x1000 + 0x07C, 0x0000FFFF);
5541 val = (val & 0xFF) | ((val >> 8) & 0xFF);
5542 if (val) {
5543 err = 1;
5544 dev_err(hdev->dev,
5545 "HBM%d pc%d interrupts info: WR_PAR=%d, RD_PAR=%d, CA_PAR=%d, SERR=%d, DERR=%d\n",
5546 device, ch * 2 + 1, val & 0x1, (val >> 1) & 0x1,
5547 (val >> 2) & 0x1, (val >> 3) & 0x1,
5548 (val >> 4) & 0x1);
5549
5550 val2 = RREG32(base + ch * 0x1000 + 0x070);
5551 dev_err(hdev->dev,
5552 "HBM%d pc%d ECC info: 1ST_ERR_ADDR=0x%x, 1ST_ERR_TYPE=%d, SEC_CONT_CNT=%d, SEC_CNT=%d, DED_CNT=%d\n",
5553 device, ch * 2 + 1,
5554 RREG32(base + ch * 0x1000 + 0x074),
5555 (val2 & 0x200) >> 9, (val2 & 0xFC00) >> 10,
5556 (val2 & 0xFF0000) >> 16,
5557 (val2 & 0xFF000000) >> 24);
5558 }
5559
5560 /* Clear interrupts */
5561 RMWREG32(base + (ch * 0x1000) + 0x060, 0x1C8, 0x1FF);
5562 RMWREG32(base + (ch * 0x1000) + 0x070, 0x1C8, 0x1FF);
5563 WREG32(base + (ch * 0x1000) + 0x06C, 0x1F1F);
5564 WREG32(base + (ch * 0x1000) + 0x07C, 0x1F1F);
5565 RMWREG32(base + (ch * 0x1000) + 0x060, 0x0, 0xF);
5566 RMWREG32(base + (ch * 0x1000) + 0x070, 0x0, 0xF);
5567 }
5568
5569 val = RREG32(base + 0x8F30);
5570 val2 = RREG32(base + 0x8F34);
5571 if (val | val2) {
5572 err = 1;
5573 dev_err(hdev->dev,
5574 "HBM %d MC SRAM SERR info: Reg 0x8F30=0x%x, Reg 0x8F34=0x%x\n",
5575 device, val, val2);
5576 }
5577 val = RREG32(base + 0x8F40);
5578 val2 = RREG32(base + 0x8F44);
5579 if (val | val2) {
5580 err = 1;
5581 dev_err(hdev->dev,
5582 "HBM %d MC SRAM DERR info: Reg 0x8F40=0x%x, Reg 0x8F44=0x%x\n",
5583 device, val, val2);
5584 }
5585
5586 return err;
5587}
5588
5589static int gaudi_hbm_event_to_dev(u16 hbm_event_type)
5590{
5591 switch (hbm_event_type) {
5592 case GAUDI_EVENT_HBM0_SPI_0:
5593 case GAUDI_EVENT_HBM0_SPI_1:
5594 return 0;
5595 case GAUDI_EVENT_HBM1_SPI_0:
5596 case GAUDI_EVENT_HBM1_SPI_1:
5597 return 1;
5598 case GAUDI_EVENT_HBM2_SPI_0:
5599 case GAUDI_EVENT_HBM2_SPI_1:
5600 return 2;
5601 case GAUDI_EVENT_HBM3_SPI_0:
5602 case GAUDI_EVENT_HBM3_SPI_1:
5603 return 3;
5604 default:
5605 break;
5606 }
5607
5608 /* Should never happen */
5609 return 0;
5610}
5611
5612static bool gaudi_tpc_read_interrupts(struct hl_device *hdev, u8 tpc_id,
5613 char *interrupt_name)
5614{
5615 struct gaudi_device *gaudi = hdev->asic_specific;
5616 u32 tpc_offset = tpc_id * TPC_CFG_OFFSET, tpc_interrupts_cause, i;
5617 bool soft_reset_required = false;
5618
5619 /* Accessing the TPC_INTR_CAUSE registers requires disabling the clock
5620 * gating, and thus cannot be done in ArmCP and should be done instead
5621 * by the driver.
5622 */
5623
5624 mutex_lock(&gaudi->clk_gate_mutex);
5625
5626 hdev->asic_funcs->disable_clock_gating(hdev);
5627
5628 tpc_interrupts_cause = RREG32(mmTPC0_CFG_TPC_INTR_CAUSE + tpc_offset) &
5629 TPC0_CFG_TPC_INTR_CAUSE_CAUSE_MASK;
5630
5631 for (i = 0 ; i < GAUDI_NUM_OF_TPC_INTR_CAUSE ; i++)
5632 if (tpc_interrupts_cause & BIT(i)) {
5633 dev_err_ratelimited(hdev->dev,
5634 "TPC%d_%s interrupt cause: %s\n",
5635 tpc_id, interrupt_name,
5636 gaudi_tpc_interrupts_cause[i]);
5637 /* If this is QM error, we need to soft-reset */
5638 if (i == 15)
5639 soft_reset_required = true;
5640 }
5641
5642 /* Clear interrupts */
5643 WREG32(mmTPC0_CFG_TPC_INTR_CAUSE + tpc_offset, 0);
5644
Oded Gabbaye38bfd32020-07-03 20:46:12 +03005645 hdev->asic_funcs->set_clock_gating(hdev);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005646
5647 mutex_unlock(&gaudi->clk_gate_mutex);
5648
5649 return soft_reset_required;
5650}
5651
5652static int tpc_dec_event_to_tpc_id(u16 tpc_dec_event_type)
5653{
5654 return (tpc_dec_event_type - GAUDI_EVENT_TPC0_DEC) >> 1;
5655}
5656
5657static int tpc_krn_event_to_tpc_id(u16 tpc_dec_event_type)
5658{
5659 return (tpc_dec_event_type - GAUDI_EVENT_TPC0_KRN_ERR) / 6;
5660}
5661
5662static void gaudi_print_clk_change_info(struct hl_device *hdev,
5663 u16 event_type)
5664{
5665 switch (event_type) {
5666 case GAUDI_EVENT_FIX_POWER_ENV_S:
Ofir Bitton0a068ad2020-07-21 10:49:51 +03005667 hdev->clk_throttling_reason |= HL_CLK_THROTTLE_POWER;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005668 dev_info_ratelimited(hdev->dev,
5669 "Clock throttling due to power consumption\n");
5670 break;
5671
5672 case GAUDI_EVENT_FIX_POWER_ENV_E:
Ofir Bitton0a068ad2020-07-21 10:49:51 +03005673 hdev->clk_throttling_reason &= ~HL_CLK_THROTTLE_POWER;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005674 dev_info_ratelimited(hdev->dev,
5675 "Power envelop is safe, back to optimal clock\n");
5676 break;
5677
5678 case GAUDI_EVENT_FIX_THERMAL_ENV_S:
Ofir Bitton0a068ad2020-07-21 10:49:51 +03005679 hdev->clk_throttling_reason |= HL_CLK_THROTTLE_THERMAL;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005680 dev_info_ratelimited(hdev->dev,
5681 "Clock throttling due to overheating\n");
5682 break;
5683
5684 case GAUDI_EVENT_FIX_THERMAL_ENV_E:
Ofir Bitton0a068ad2020-07-21 10:49:51 +03005685 hdev->clk_throttling_reason &= ~HL_CLK_THROTTLE_THERMAL;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005686 dev_info_ratelimited(hdev->dev,
5687 "Thermal envelop is safe, back to optimal clock\n");
5688 break;
5689
5690 default:
5691 dev_err(hdev->dev, "Received invalid clock change event %d\n",
5692 event_type);
5693 break;
5694 }
5695}
5696
5697static void gaudi_handle_eqe(struct hl_device *hdev,
5698 struct hl_eq_entry *eq_entry)
5699{
5700 struct gaudi_device *gaudi = hdev->asic_specific;
5701 u32 ctl = le32_to_cpu(eq_entry->hdr.ctl);
5702 u16 event_type = ((ctl & EQ_CTL_EVENT_TYPE_MASK)
5703 >> EQ_CTL_EVENT_TYPE_SHIFT);
5704 u8 cause;
Oded Gabbay66446822020-05-18 16:48:01 +03005705 bool reset_required;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005706
5707 gaudi->events_stat[event_type]++;
5708 gaudi->events_stat_aggregate[event_type]++;
5709
5710 switch (event_type) {
5711 case GAUDI_EVENT_PCIE_CORE_DERR:
5712 case GAUDI_EVENT_PCIE_IF_DERR:
5713 case GAUDI_EVENT_PCIE_PHY_DERR:
5714 case GAUDI_EVENT_TPC0_DERR ... GAUDI_EVENT_TPC7_DERR:
5715 case GAUDI_EVENT_MME0_ACC_DERR:
5716 case GAUDI_EVENT_MME0_SBAB_DERR:
5717 case GAUDI_EVENT_MME1_ACC_DERR:
5718 case GAUDI_EVENT_MME1_SBAB_DERR:
5719 case GAUDI_EVENT_MME2_ACC_DERR:
5720 case GAUDI_EVENT_MME2_SBAB_DERR:
5721 case GAUDI_EVENT_MME3_ACC_DERR:
5722 case GAUDI_EVENT_MME3_SBAB_DERR:
5723 case GAUDI_EVENT_DMA0_DERR_ECC ... GAUDI_EVENT_DMA7_DERR_ECC:
5724 fallthrough;
5725 case GAUDI_EVENT_CPU_IF_ECC_DERR:
5726 case GAUDI_EVENT_PSOC_MEM_DERR:
5727 case GAUDI_EVENT_PSOC_CORESIGHT_DERR:
5728 case GAUDI_EVENT_SRAM0_DERR ... GAUDI_EVENT_SRAM28_DERR:
5729 case GAUDI_EVENT_DMA_IF0_DERR ... GAUDI_EVENT_DMA_IF3_DERR:
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005730 case GAUDI_EVENT_HBM_0_DERR ... GAUDI_EVENT_HBM_3_DERR:
5731 case GAUDI_EVENT_MMU_DERR:
Oded Gabbayfcc6a4e2020-05-17 08:20:35 +03005732 gaudi_print_irq_info(hdev, event_type, true);
5733 gaudi_handle_ecc_event(hdev, event_type, &eq_entry->ecc_data);
5734 if (hdev->hard_reset_on_fw_events)
5735 hl_device_reset(hdev, true, false);
5736 break;
5737
5738 case GAUDI_EVENT_GIC500:
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005739 case GAUDI_EVENT_AXI_ECC:
5740 case GAUDI_EVENT_L2_RAM_ECC:
5741 case GAUDI_EVENT_PLL0 ... GAUDI_EVENT_PLL17:
5742 gaudi_print_irq_info(hdev, event_type, false);
5743 if (hdev->hard_reset_on_fw_events)
5744 hl_device_reset(hdev, true, false);
5745 break;
5746
5747 case GAUDI_EVENT_HBM0_SPI_0:
5748 case GAUDI_EVENT_HBM1_SPI_0:
5749 case GAUDI_EVENT_HBM2_SPI_0:
5750 case GAUDI_EVENT_HBM3_SPI_0:
5751 gaudi_print_irq_info(hdev, event_type, false);
5752 gaudi_hbm_read_interrupts(hdev,
5753 gaudi_hbm_event_to_dev(event_type));
5754 if (hdev->hard_reset_on_fw_events)
5755 hl_device_reset(hdev, true, false);
5756 break;
5757
5758 case GAUDI_EVENT_HBM0_SPI_1:
5759 case GAUDI_EVENT_HBM1_SPI_1:
5760 case GAUDI_EVENT_HBM2_SPI_1:
5761 case GAUDI_EVENT_HBM3_SPI_1:
5762 gaudi_print_irq_info(hdev, event_type, false);
5763 gaudi_hbm_read_interrupts(hdev,
5764 gaudi_hbm_event_to_dev(event_type));
5765 break;
5766
5767 case GAUDI_EVENT_TPC0_DEC:
5768 case GAUDI_EVENT_TPC1_DEC:
5769 case GAUDI_EVENT_TPC2_DEC:
5770 case GAUDI_EVENT_TPC3_DEC:
5771 case GAUDI_EVENT_TPC4_DEC:
5772 case GAUDI_EVENT_TPC5_DEC:
5773 case GAUDI_EVENT_TPC6_DEC:
5774 case GAUDI_EVENT_TPC7_DEC:
5775 gaudi_print_irq_info(hdev, event_type, true);
Oded Gabbay66446822020-05-18 16:48:01 +03005776 reset_required = gaudi_tpc_read_interrupts(hdev,
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005777 tpc_dec_event_to_tpc_id(event_type),
5778 "AXI_SLV_DEC_Error");
Oded Gabbay66446822020-05-18 16:48:01 +03005779 if (reset_required) {
5780 dev_err(hdev->dev, "hard reset required due to %s\n",
5781 gaudi_irq_map_table[event_type].name);
5782
5783 if (hdev->hard_reset_on_fw_events)
5784 hl_device_reset(hdev, true, false);
5785 } else {
5786 hl_fw_unmask_irq(hdev, event_type);
Omer Shpigelmand7985072020-05-17 23:01:22 +03005787 }
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005788 break;
5789
5790 case GAUDI_EVENT_TPC0_KRN_ERR:
5791 case GAUDI_EVENT_TPC1_KRN_ERR:
5792 case GAUDI_EVENT_TPC2_KRN_ERR:
5793 case GAUDI_EVENT_TPC3_KRN_ERR:
5794 case GAUDI_EVENT_TPC4_KRN_ERR:
5795 case GAUDI_EVENT_TPC5_KRN_ERR:
5796 case GAUDI_EVENT_TPC6_KRN_ERR:
5797 case GAUDI_EVENT_TPC7_KRN_ERR:
5798 gaudi_print_irq_info(hdev, event_type, true);
Oded Gabbay66446822020-05-18 16:48:01 +03005799 reset_required = gaudi_tpc_read_interrupts(hdev,
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005800 tpc_krn_event_to_tpc_id(event_type),
5801 "KRN_ERR");
Oded Gabbay66446822020-05-18 16:48:01 +03005802 if (reset_required) {
5803 dev_err(hdev->dev, "hard reset required due to %s\n",
5804 gaudi_irq_map_table[event_type].name);
5805
5806 if (hdev->hard_reset_on_fw_events)
5807 hl_device_reset(hdev, true, false);
5808 } else {
5809 hl_fw_unmask_irq(hdev, event_type);
Omer Shpigelmand7985072020-05-17 23:01:22 +03005810 }
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005811 break;
5812
5813 case GAUDI_EVENT_PCIE_CORE_SERR:
5814 case GAUDI_EVENT_PCIE_IF_SERR:
5815 case GAUDI_EVENT_PCIE_PHY_SERR:
5816 case GAUDI_EVENT_TPC0_SERR ... GAUDI_EVENT_TPC7_SERR:
5817 case GAUDI_EVENT_MME0_ACC_SERR:
5818 case GAUDI_EVENT_MME0_SBAB_SERR:
5819 case GAUDI_EVENT_MME1_ACC_SERR:
5820 case GAUDI_EVENT_MME1_SBAB_SERR:
5821 case GAUDI_EVENT_MME2_ACC_SERR:
5822 case GAUDI_EVENT_MME2_SBAB_SERR:
5823 case GAUDI_EVENT_MME3_ACC_SERR:
5824 case GAUDI_EVENT_MME3_SBAB_SERR:
5825 case GAUDI_EVENT_DMA0_SERR_ECC ... GAUDI_EVENT_DMA7_SERR_ECC:
5826 case GAUDI_EVENT_CPU_IF_ECC_SERR:
5827 case GAUDI_EVENT_PSOC_MEM_SERR:
5828 case GAUDI_EVENT_PSOC_CORESIGHT_SERR:
5829 case GAUDI_EVENT_SRAM0_SERR ... GAUDI_EVENT_SRAM28_SERR:
5830 case GAUDI_EVENT_DMA_IF0_SERR ... GAUDI_EVENT_DMA_IF3_SERR:
5831 case GAUDI_EVENT_HBM_0_SERR ... GAUDI_EVENT_HBM_3_SERR:
5832 fallthrough;
5833 case GAUDI_EVENT_MMU_SERR:
Oded Gabbayfcc6a4e2020-05-17 08:20:35 +03005834 gaudi_print_irq_info(hdev, event_type, true);
5835 gaudi_handle_ecc_event(hdev, event_type, &eq_entry->ecc_data);
5836 hl_fw_unmask_irq(hdev, event_type);
5837 break;
5838
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005839 case GAUDI_EVENT_PCIE_DEC:
5840 case GAUDI_EVENT_MME0_WBC_RSP:
5841 case GAUDI_EVENT_MME0_SBAB0_RSP:
5842 case GAUDI_EVENT_MME1_WBC_RSP:
5843 case GAUDI_EVENT_MME1_SBAB0_RSP:
5844 case GAUDI_EVENT_MME2_WBC_RSP:
5845 case GAUDI_EVENT_MME2_SBAB0_RSP:
5846 case GAUDI_EVENT_MME3_WBC_RSP:
5847 case GAUDI_EVENT_MME3_SBAB0_RSP:
5848 case GAUDI_EVENT_CPU_AXI_SPLITTER:
5849 case GAUDI_EVENT_PSOC_AXI_DEC:
5850 case GAUDI_EVENT_PSOC_PRSTN_FALL:
5851 case GAUDI_EVENT_MMU_PAGE_FAULT:
5852 case GAUDI_EVENT_MMU_WR_PERM:
5853 case GAUDI_EVENT_RAZWI_OR_ADC:
5854 case GAUDI_EVENT_TPC0_QM ... GAUDI_EVENT_TPC7_QM:
5855 case GAUDI_EVENT_MME0_QM ... GAUDI_EVENT_MME2_QM:
5856 case GAUDI_EVENT_DMA0_QM ... GAUDI_EVENT_DMA7_QM:
5857 fallthrough;
5858 case GAUDI_EVENT_DMA0_CORE ... GAUDI_EVENT_DMA7_CORE:
5859 gaudi_print_irq_info(hdev, event_type, true);
5860 gaudi_handle_qman_err(hdev, event_type);
Ofir Bittonebd8d122020-05-10 13:41:28 +03005861 hl_fw_unmask_irq(hdev, event_type);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005862 break;
5863
5864 case GAUDI_EVENT_RAZWI_OR_ADC_SW:
5865 gaudi_print_irq_info(hdev, event_type, true);
Oded Gabbay66446822020-05-18 16:48:01 +03005866 if (hdev->hard_reset_on_fw_events)
5867 hl_device_reset(hdev, true, false);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005868 break;
5869
5870 case GAUDI_EVENT_TPC0_BMON_SPMU:
5871 case GAUDI_EVENT_TPC1_BMON_SPMU:
5872 case GAUDI_EVENT_TPC2_BMON_SPMU:
5873 case GAUDI_EVENT_TPC3_BMON_SPMU:
5874 case GAUDI_EVENT_TPC4_BMON_SPMU:
5875 case GAUDI_EVENT_TPC5_BMON_SPMU:
5876 case GAUDI_EVENT_TPC6_BMON_SPMU:
5877 case GAUDI_EVENT_TPC7_BMON_SPMU:
5878 case GAUDI_EVENT_DMA_BM_CH0 ... GAUDI_EVENT_DMA_BM_CH7:
5879 gaudi_print_irq_info(hdev, event_type, false);
Ofir Bittonebd8d122020-05-10 13:41:28 +03005880 hl_fw_unmask_irq(hdev, event_type);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005881 break;
5882
5883 case GAUDI_EVENT_FIX_POWER_ENV_S ... GAUDI_EVENT_FIX_THERMAL_ENV_E:
5884 gaudi_print_clk_change_info(hdev, event_type);
Ofir Bittonebd8d122020-05-10 13:41:28 +03005885 hl_fw_unmask_irq(hdev, event_type);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005886 break;
5887
5888 case GAUDI_EVENT_PSOC_GPIO_U16_0:
5889 cause = le64_to_cpu(eq_entry->data[0]) & 0xFF;
5890 dev_err(hdev->dev,
5891 "Received high temp H/W interrupt %d (cause %d)\n",
5892 event_type, cause);
5893 break;
5894
5895 default:
5896 dev_err(hdev->dev, "Received invalid H/W interrupt %d\n",
5897 event_type);
5898 break;
5899 }
5900}
5901
5902static void *gaudi_get_events_stat(struct hl_device *hdev, bool aggregate,
5903 u32 *size)
5904{
5905 struct gaudi_device *gaudi = hdev->asic_specific;
5906
5907 if (aggregate) {
5908 *size = (u32) sizeof(gaudi->events_stat_aggregate);
5909 return gaudi->events_stat_aggregate;
5910 }
5911
5912 *size = (u32) sizeof(gaudi->events_stat);
5913 return gaudi->events_stat;
5914}
5915
Omer Shpigelman8ff5f4f2020-05-24 23:06:59 +03005916static int gaudi_mmu_invalidate_cache(struct hl_device *hdev, bool is_hard,
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005917 u32 flags)
5918{
5919 struct gaudi_device *gaudi = hdev->asic_specific;
5920 u32 status, timeout_usec;
5921 int rc;
5922
5923 if (!(gaudi->hw_cap_initialized & HW_CAP_MMU) ||
5924 hdev->hard_reset_pending)
Omer Shpigelman8ff5f4f2020-05-24 23:06:59 +03005925 return 0;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005926
5927 if (hdev->pldm)
5928 timeout_usec = GAUDI_PLDM_MMU_TIMEOUT_USEC;
5929 else
5930 timeout_usec = MMU_CONFIG_TIMEOUT_USEC;
5931
Omer Shpigelman8ff5f4f2020-05-24 23:06:59 +03005932 mutex_lock(&hdev->mmu_cache_lock);
5933
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005934 /* L0 & L1 invalidation */
Omer Shpigelmancfd41762020-06-03 13:03:35 +03005935 WREG32(mmSTLB_INV_PS, 3);
5936 WREG32(mmSTLB_CACHE_INV, gaudi->mmu_cache_inv_pi++);
Omer Shpigelman42d0b0b2020-05-17 17:35:39 +03005937 WREG32(mmSTLB_INV_PS, 2);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005938
5939 rc = hl_poll_timeout(
5940 hdev,
Omer Shpigelman42d0b0b2020-05-17 17:35:39 +03005941 mmSTLB_INV_PS,
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005942 status,
5943 !status,
5944 1000,
5945 timeout_usec);
5946
Omer Shpigelman42d0b0b2020-05-17 17:35:39 +03005947 WREG32(mmSTLB_INV_SET, 0);
5948
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005949 mutex_unlock(&hdev->mmu_cache_lock);
Omer Shpigelman8ff5f4f2020-05-24 23:06:59 +03005950
5951 if (rc) {
5952 dev_err_ratelimited(hdev->dev,
5953 "MMU cache invalidation timeout\n");
5954 hl_device_reset(hdev, true, false);
5955 }
5956
5957 return rc;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005958}
5959
Omer Shpigelman8ff5f4f2020-05-24 23:06:59 +03005960static int gaudi_mmu_invalidate_cache_range(struct hl_device *hdev,
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005961 bool is_hard, u32 asid, u64 va, u64 size)
5962{
5963 struct gaudi_device *gaudi = hdev->asic_specific;
5964 u32 status, timeout_usec;
5965 u32 inv_data;
5966 u32 pi;
5967 int rc;
5968
5969 if (!(gaudi->hw_cap_initialized & HW_CAP_MMU) ||
5970 hdev->hard_reset_pending)
Omer Shpigelman8ff5f4f2020-05-24 23:06:59 +03005971 return 0;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005972
5973 mutex_lock(&hdev->mmu_cache_lock);
5974
5975 if (hdev->pldm)
5976 timeout_usec = GAUDI_PLDM_MMU_TIMEOUT_USEC;
5977 else
5978 timeout_usec = MMU_CONFIG_TIMEOUT_USEC;
5979
5980 /*
5981 * TODO: currently invalidate entire L0 & L1 as in regular hard
5982 * invalidation. Need to apply invalidation of specific cache
5983 * lines with mask of ASID & VA & size.
5984 * Note that L1 with be flushed entirely in any case.
5985 */
5986
5987 /* L0 & L1 invalidation */
5988 inv_data = RREG32(mmSTLB_CACHE_INV);
5989 /* PI is 8 bit */
5990 pi = ((inv_data & STLB_CACHE_INV_PRODUCER_INDEX_MASK) + 1) & 0xFF;
5991 WREG32(mmSTLB_CACHE_INV,
5992 (inv_data & STLB_CACHE_INV_INDEX_MASK_MASK) | pi);
5993
5994 rc = hl_poll_timeout(
5995 hdev,
5996 mmSTLB_INV_CONSUMER_INDEX,
5997 status,
5998 status == pi,
5999 1000,
6000 timeout_usec);
6001
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006002 mutex_unlock(&hdev->mmu_cache_lock);
Omer Shpigelman8ff5f4f2020-05-24 23:06:59 +03006003
6004 if (rc) {
6005 dev_err_ratelimited(hdev->dev,
6006 "MMU cache invalidation timeout\n");
6007 hl_device_reset(hdev, true, false);
6008 }
6009
6010 return rc;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006011}
6012
6013static int gaudi_mmu_update_asid_hop0_addr(struct hl_device *hdev,
6014 u32 asid, u64 phys_addr)
6015{
6016 u32 status, timeout_usec;
6017 int rc;
6018
6019 if (hdev->pldm)
6020 timeout_usec = GAUDI_PLDM_MMU_TIMEOUT_USEC;
6021 else
6022 timeout_usec = MMU_CONFIG_TIMEOUT_USEC;
6023
6024 WREG32(MMU_ASID, asid);
6025 WREG32(MMU_HOP0_PA43_12, phys_addr >> MMU_HOP0_PA43_12_SHIFT);
6026 WREG32(MMU_HOP0_PA49_44, phys_addr >> MMU_HOP0_PA49_44_SHIFT);
6027 WREG32(MMU_BUSY, 0x80000000);
6028
6029 rc = hl_poll_timeout(
6030 hdev,
6031 MMU_BUSY,
6032 status,
6033 !(status & 0x80000000),
6034 1000,
6035 timeout_usec);
6036
6037 if (rc) {
6038 dev_err(hdev->dev,
6039 "Timeout during MMU hop0 config of asid %d\n", asid);
6040 return rc;
6041 }
6042
6043 return 0;
6044}
6045
6046static int gaudi_send_heartbeat(struct hl_device *hdev)
6047{
6048 struct gaudi_device *gaudi = hdev->asic_specific;
6049
6050 if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
6051 return 0;
6052
6053 return hl_fw_send_heartbeat(hdev);
6054}
6055
6056static int gaudi_armcp_info_get(struct hl_device *hdev)
6057{
6058 struct gaudi_device *gaudi = hdev->asic_specific;
6059 struct asic_fixed_properties *prop = &hdev->asic_prop;
6060 int rc;
6061
6062 if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
6063 return 0;
6064
6065 rc = hl_fw_armcp_info_get(hdev);
6066 if (rc)
6067 return rc;
6068
6069 if (!strlen(prop->armcp_info.card_name))
6070 strncpy(prop->armcp_info.card_name, GAUDI_DEFAULT_CARD_NAME,
6071 CARD_NAME_MAX_LEN);
6072
Oded Gabbay58361aa2020-08-08 23:34:47 +03006073 hdev->card_type = le32_to_cpu(hdev->asic_prop.armcp_info.card_type);
6074
6075 if (hdev->card_type == armcp_card_type_pci)
6076 prop->max_power_default = MAX_POWER_DEFAULT_PCI;
6077 else if (hdev->card_type == armcp_card_type_pmc)
6078 prop->max_power_default = MAX_POWER_DEFAULT_PMC;
6079
6080 hdev->max_power = prop->max_power_default;
6081
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006082 return 0;
6083}
6084
farah kassabrid90416c2020-08-12 17:20:13 +03006085static bool gaudi_is_device_idle(struct hl_device *hdev, u64 *mask,
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006086 struct seq_file *s)
6087{
6088 struct gaudi_device *gaudi = hdev->asic_specific;
6089 const char *fmt = "%-5d%-9s%#-14x%#-12x%#x\n";
6090 const char *mme_slave_fmt = "%-5d%-9s%-14s%-12s%#x\n";
6091 u32 qm_glbl_sts0, qm_cgm_sts, dma_core_sts0, tpc_cfg_sts, mme_arch_sts;
6092 bool is_idle = true, is_eng_idle, is_slave;
6093 u64 offset;
6094 int i, dma_id;
6095
6096 mutex_lock(&gaudi->clk_gate_mutex);
6097
6098 hdev->asic_funcs->disable_clock_gating(hdev);
6099
6100 if (s)
6101 seq_puts(s,
6102 "\nDMA is_idle QM_GLBL_STS0 QM_CGM_STS DMA_CORE_STS0\n"
6103 "--- ------- ------------ ---------- -------------\n");
6104
6105 for (i = 0 ; i < DMA_NUMBER_OF_CHNLS ; i++) {
6106 dma_id = gaudi_dma_assignment[i];
6107 offset = dma_id * DMA_QMAN_OFFSET;
6108
6109 qm_glbl_sts0 = RREG32(mmDMA0_QM_GLBL_STS0 + offset);
6110 qm_cgm_sts = RREG32(mmDMA0_QM_CGM_STS + offset);
6111 dma_core_sts0 = RREG32(mmDMA0_CORE_STS0 + offset);
6112 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts) &&
6113 IS_DMA_IDLE(dma_core_sts0);
6114 is_idle &= is_eng_idle;
6115
6116 if (mask)
6117 *mask |= !is_eng_idle <<
6118 (GAUDI_ENGINE_ID_DMA_0 + dma_id);
6119 if (s)
6120 seq_printf(s, fmt, dma_id,
6121 is_eng_idle ? "Y" : "N", qm_glbl_sts0,
6122 qm_cgm_sts, dma_core_sts0);
6123 }
6124
6125 if (s)
6126 seq_puts(s,
6127 "\nTPC is_idle QM_GLBL_STS0 QM_CGM_STS CFG_STATUS\n"
6128 "--- ------- ------------ ---------- ----------\n");
6129
6130 for (i = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) {
6131 offset = i * TPC_QMAN_OFFSET;
6132 qm_glbl_sts0 = RREG32(mmTPC0_QM_GLBL_STS0 + offset);
6133 qm_cgm_sts = RREG32(mmTPC0_QM_CGM_STS + offset);
6134 tpc_cfg_sts = RREG32(mmTPC0_CFG_STATUS + offset);
6135 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts) &&
6136 IS_TPC_IDLE(tpc_cfg_sts);
6137 is_idle &= is_eng_idle;
6138
6139 if (mask)
6140 *mask |= !is_eng_idle << (GAUDI_ENGINE_ID_TPC_0 + i);
6141 if (s)
6142 seq_printf(s, fmt, i,
6143 is_eng_idle ? "Y" : "N",
6144 qm_glbl_sts0, qm_cgm_sts, tpc_cfg_sts);
6145 }
6146
6147 if (s)
6148 seq_puts(s,
6149 "\nMME is_idle QM_GLBL_STS0 QM_CGM_STS ARCH_STATUS\n"
6150 "--- ------- ------------ ---------- -----------\n");
6151
6152 for (i = 0 ; i < MME_NUMBER_OF_ENGINES ; i++) {
6153 offset = i * MME_QMAN_OFFSET;
6154 mme_arch_sts = RREG32(mmMME0_CTRL_ARCH_STATUS + offset);
6155 is_eng_idle = IS_MME_IDLE(mme_arch_sts);
6156
6157 /* MME 1 & 3 are slaves, no need to check their QMANs */
6158 is_slave = i % 2;
6159 if (!is_slave) {
6160 qm_glbl_sts0 = RREG32(mmMME0_QM_GLBL_STS0 + offset);
6161 qm_cgm_sts = RREG32(mmMME0_QM_CGM_STS + offset);
6162 is_eng_idle &= IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts);
6163 }
6164
6165 is_idle &= is_eng_idle;
6166
6167 if (mask)
6168 *mask |= !is_eng_idle << (GAUDI_ENGINE_ID_MME_0 + i);
6169 if (s) {
6170 if (!is_slave)
6171 seq_printf(s, fmt, i,
6172 is_eng_idle ? "Y" : "N",
6173 qm_glbl_sts0, qm_cgm_sts, mme_arch_sts);
6174 else
6175 seq_printf(s, mme_slave_fmt, i,
6176 is_eng_idle ? "Y" : "N", "-",
6177 "-", mme_arch_sts);
6178 }
6179 }
6180
6181 if (s)
6182 seq_puts(s, "\n");
6183
Oded Gabbaye38bfd32020-07-03 20:46:12 +03006184 hdev->asic_funcs->set_clock_gating(hdev);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006185
6186 mutex_unlock(&gaudi->clk_gate_mutex);
6187
6188 return is_idle;
6189}
6190
6191static void gaudi_hw_queues_lock(struct hl_device *hdev)
6192 __acquires(&gaudi->hw_queues_lock)
6193{
6194 struct gaudi_device *gaudi = hdev->asic_specific;
6195
6196 spin_lock(&gaudi->hw_queues_lock);
6197}
6198
6199static void gaudi_hw_queues_unlock(struct hl_device *hdev)
6200 __releases(&gaudi->hw_queues_lock)
6201{
6202 struct gaudi_device *gaudi = hdev->asic_specific;
6203
6204 spin_unlock(&gaudi->hw_queues_lock);
6205}
6206
6207static u32 gaudi_get_pci_id(struct hl_device *hdev)
6208{
6209 return hdev->pdev->device;
6210}
6211
6212static int gaudi_get_eeprom_data(struct hl_device *hdev, void *data,
6213 size_t max_size)
6214{
6215 struct gaudi_device *gaudi = hdev->asic_specific;
6216
6217 if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
6218 return 0;
6219
6220 return hl_fw_get_eeprom_data(hdev, data, max_size);
6221}
6222
6223/*
6224 * this function should be used only during initialization and/or after reset,
6225 * when there are no active users.
6226 */
6227static int gaudi_run_tpc_kernel(struct hl_device *hdev, u64 tpc_kernel,
6228 u32 tpc_id)
6229{
6230 struct gaudi_device *gaudi = hdev->asic_specific;
6231 u64 kernel_timeout;
6232 u32 status, offset;
6233 int rc;
6234
6235 offset = tpc_id * (mmTPC1_CFG_STATUS - mmTPC0_CFG_STATUS);
6236
6237 if (hdev->pldm)
6238 kernel_timeout = GAUDI_PLDM_TPC_KERNEL_WAIT_USEC;
6239 else
6240 kernel_timeout = HL_DEVICE_TIMEOUT_USEC;
6241
6242 mutex_lock(&gaudi->clk_gate_mutex);
6243
6244 hdev->asic_funcs->disable_clock_gating(hdev);
6245
6246 WREG32(mmTPC0_CFG_QM_KERNEL_BASE_ADDRESS_LOW + offset,
6247 lower_32_bits(tpc_kernel));
6248 WREG32(mmTPC0_CFG_QM_KERNEL_BASE_ADDRESS_HIGH + offset,
6249 upper_32_bits(tpc_kernel));
6250
6251 WREG32(mmTPC0_CFG_ICACHE_BASE_ADDERESS_LOW + offset,
6252 lower_32_bits(tpc_kernel));
6253 WREG32(mmTPC0_CFG_ICACHE_BASE_ADDERESS_HIGH + offset,
6254 upper_32_bits(tpc_kernel));
6255 /* set a valid LUT pointer, content is of no significance */
6256 WREG32(mmTPC0_CFG_LUT_FUNC256_BASE_ADDR_LO + offset,
6257 lower_32_bits(tpc_kernel));
6258 WREG32(mmTPC0_CFG_LUT_FUNC256_BASE_ADDR_HI + offset,
6259 upper_32_bits(tpc_kernel));
6260
6261 WREG32(mmTPC0_CFG_QM_SYNC_OBJECT_ADDR + offset,
6262 lower_32_bits(CFG_BASE +
6263 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0));
6264
6265 WREG32(mmTPC0_CFG_TPC_CMD + offset,
6266 (1 << TPC0_CFG_TPC_CMD_ICACHE_INVALIDATE_SHIFT |
6267 1 << TPC0_CFG_TPC_CMD_ICACHE_PREFETCH_64KB_SHIFT));
6268 /* wait a bit for the engine to start executing */
6269 usleep_range(1000, 1500);
6270
6271 /* wait until engine has finished executing */
6272 rc = hl_poll_timeout(
6273 hdev,
6274 mmTPC0_CFG_STATUS + offset,
6275 status,
6276 (status & TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK) ==
6277 TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK,
6278 1000,
6279 kernel_timeout);
6280
6281 if (rc) {
6282 dev_err(hdev->dev,
6283 "Timeout while waiting for TPC%d icache prefetch\n",
6284 tpc_id);
Oded Gabbaye38bfd32020-07-03 20:46:12 +03006285 hdev->asic_funcs->set_clock_gating(hdev);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006286 mutex_unlock(&gaudi->clk_gate_mutex);
6287 return -EIO;
6288 }
6289
6290 WREG32(mmTPC0_CFG_TPC_EXECUTE + offset,
6291 1 << TPC0_CFG_TPC_EXECUTE_V_SHIFT);
6292
6293 /* wait a bit for the engine to start executing */
6294 usleep_range(1000, 1500);
6295
6296 /* wait until engine has finished executing */
6297 rc = hl_poll_timeout(
6298 hdev,
6299 mmTPC0_CFG_STATUS + offset,
6300 status,
6301 (status & TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK) ==
6302 TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK,
6303 1000,
6304 kernel_timeout);
6305
Oded Gabbay31ac1f12020-08-12 11:28:13 +03006306 if (rc) {
6307 dev_err(hdev->dev,
6308 "Timeout while waiting for TPC%d vector pipe\n",
6309 tpc_id);
6310 hdev->asic_funcs->set_clock_gating(hdev);
6311 mutex_unlock(&gaudi->clk_gate_mutex);
6312 return -EIO;
6313 }
6314
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006315 rc = hl_poll_timeout(
6316 hdev,
6317 mmTPC0_CFG_WQ_INFLIGHT_CNTR + offset,
6318 status,
6319 (status == 0),
6320 1000,
6321 kernel_timeout);
6322
Oded Gabbaye38bfd32020-07-03 20:46:12 +03006323 hdev->asic_funcs->set_clock_gating(hdev);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006324 mutex_unlock(&gaudi->clk_gate_mutex);
6325
6326 if (rc) {
6327 dev_err(hdev->dev,
6328 "Timeout while waiting for TPC%d kernel to execute\n",
6329 tpc_id);
6330 return -EIO;
6331 }
6332
6333 return 0;
6334}
6335
6336static enum hl_device_hw_state gaudi_get_hw_state(struct hl_device *hdev)
6337{
6338 return RREG32(mmHW_STATE);
6339}
6340
kernel test robotbb34bf72020-07-29 08:03:13 +08006341static int gaudi_ctx_init(struct hl_ctx *ctx)
Ofir Bittona04b7cd2020-07-13 13:36:55 +03006342{
6343 return 0;
6344}
6345
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006346static u32 gaudi_get_queue_id_for_cq(struct hl_device *hdev, u32 cq_idx)
6347{
6348 return gaudi_cq_assignment[cq_idx];
6349}
6350
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006351static u32 gaudi_get_signal_cb_size(struct hl_device *hdev)
6352{
6353 return sizeof(struct packet_msg_short) +
6354 sizeof(struct packet_msg_prot) * 2;
6355}
6356
6357static u32 gaudi_get_wait_cb_size(struct hl_device *hdev)
6358{
6359 return sizeof(struct packet_msg_short) * 4 +
6360 sizeof(struct packet_fence) +
6361 sizeof(struct packet_msg_prot) * 2;
6362}
6363
6364static void gaudi_gen_signal_cb(struct hl_device *hdev, void *data, u16 sob_id)
6365{
6366 struct hl_cb *cb = (struct hl_cb *) data;
6367 struct packet_msg_short *pkt;
6368 u32 value, ctl;
6369
6370 pkt = (struct packet_msg_short *) (uintptr_t) cb->kernel_address;
6371 memset(pkt, 0, sizeof(*pkt));
6372
Ofir Bitton6c07bab2020-06-01 10:38:46 +03006373 /* Inc by 1, Mode ADD */
6374 value = FIELD_PREP(GAUDI_PKT_SHORT_VAL_SOB_SYNC_VAL_MASK, 1);
6375 value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_SOB_MOD_MASK, 1);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006376
Ofir Bitton6c07bab2020-06-01 10:38:46 +03006377 ctl = FIELD_PREP(GAUDI_PKT_SHORT_CTL_ADDR_MASK, sob_id * 4);
6378 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_OP_MASK, 0); /* write the value */
6379 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_BASE_MASK, 3); /* W_S SOB base */
6380 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_OPCODE_MASK, PACKET_MSG_SHORT);
6381 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_EB_MASK, 1);
6382 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_RB_MASK, 1);
6383 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_MB_MASK, 1);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006384
6385 pkt->value = cpu_to_le32(value);
6386 pkt->ctl = cpu_to_le32(ctl);
6387}
6388
6389static u32 gaudi_add_mon_msg_short(struct packet_msg_short *pkt, u32 value,
6390 u16 addr)
6391{
6392 u32 ctl, pkt_size = sizeof(*pkt);
6393
6394 memset(pkt, 0, pkt_size);
6395
Ofir Bitton6c07bab2020-06-01 10:38:46 +03006396 ctl = FIELD_PREP(GAUDI_PKT_SHORT_CTL_ADDR_MASK, addr);
6397 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_BASE_MASK, 2); /* W_S MON base */
6398 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_OPCODE_MASK, PACKET_MSG_SHORT);
6399 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_EB_MASK, 0);
6400 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_RB_MASK, 1);
6401 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_MB_MASK, 0); /* last pkt MB */
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006402
6403 pkt->value = cpu_to_le32(value);
6404 pkt->ctl = cpu_to_le32(ctl);
6405
6406 return pkt_size;
6407}
6408
6409static u32 gaudi_add_arm_monitor_pkt(struct packet_msg_short *pkt, u16 sob_id,
6410 u16 sob_val, u16 addr)
6411{
6412 u32 ctl, value, pkt_size = sizeof(*pkt);
6413 u8 mask = ~(1 << (sob_id & 0x7));
6414
6415 memset(pkt, 0, pkt_size);
6416
Ofir Bitton6c07bab2020-06-01 10:38:46 +03006417 value = FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_SYNC_GID_MASK, sob_id / 8);
6418 value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_SYNC_VAL_MASK, sob_val);
6419 value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_MODE_MASK,
6420 0); /* GREATER OR EQUAL*/
6421 value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_MASK_MASK, mask);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006422
Ofir Bitton6c07bab2020-06-01 10:38:46 +03006423 ctl = FIELD_PREP(GAUDI_PKT_SHORT_CTL_ADDR_MASK, addr);
6424 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_OP_MASK, 0); /* write the value */
6425 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_BASE_MASK, 2); /* W_S MON base */
6426 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_OPCODE_MASK, PACKET_MSG_SHORT);
6427 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_EB_MASK, 0);
6428 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_RB_MASK, 1);
6429 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_MB_MASK, 1);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006430
6431 pkt->value = cpu_to_le32(value);
6432 pkt->ctl = cpu_to_le32(ctl);
6433
6434 return pkt_size;
6435}
6436
6437static u32 gaudi_add_fence_pkt(struct packet_fence *pkt)
6438{
6439 u32 ctl, cfg, pkt_size = sizeof(*pkt);
6440
6441 memset(pkt, 0, pkt_size);
6442
Ofir Bitton6c07bab2020-06-01 10:38:46 +03006443 cfg = FIELD_PREP(GAUDI_PKT_FENCE_CFG_DEC_VAL_MASK, 1);
6444 cfg |= FIELD_PREP(GAUDI_PKT_FENCE_CFG_TARGET_VAL_MASK, 1);
6445 cfg |= FIELD_PREP(GAUDI_PKT_FENCE_CFG_ID_MASK, 2);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006446
Ofir Bitton6c07bab2020-06-01 10:38:46 +03006447 ctl = FIELD_PREP(GAUDI_PKT_FENCE_CTL_OPCODE_MASK, PACKET_FENCE);
6448 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_EB_MASK, 0);
6449 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_RB_MASK, 1);
6450 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_MB_MASK, 1);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006451
6452 pkt->cfg = cpu_to_le32(cfg);
6453 pkt->ctl = cpu_to_le32(ctl);
6454
6455 return pkt_size;
6456}
6457
6458static void gaudi_gen_wait_cb(struct hl_device *hdev, void *data, u16 sob_id,
6459 u16 sob_val, u16 mon_id, u32 q_idx)
6460{
6461 struct hl_cb *cb = (struct hl_cb *) data;
6462 void *buf = (void *) (uintptr_t) cb->kernel_address;
6463 u64 monitor_base, fence_addr = 0;
6464 u32 size = 0;
6465 u16 msg_addr_offset;
6466
6467 switch (q_idx) {
6468 case GAUDI_QUEUE_ID_DMA_0_0:
6469 fence_addr = mmDMA0_QM_CP_FENCE2_RDATA_0;
6470 break;
6471 case GAUDI_QUEUE_ID_DMA_0_1:
6472 fence_addr = mmDMA0_QM_CP_FENCE2_RDATA_1;
6473 break;
6474 case GAUDI_QUEUE_ID_DMA_0_2:
6475 fence_addr = mmDMA0_QM_CP_FENCE2_RDATA_2;
6476 break;
6477 case GAUDI_QUEUE_ID_DMA_0_3:
6478 fence_addr = mmDMA0_QM_CP_FENCE2_RDATA_3;
6479 break;
6480 case GAUDI_QUEUE_ID_DMA_1_0:
6481 fence_addr = mmDMA1_QM_CP_FENCE2_RDATA_0;
6482 break;
6483 case GAUDI_QUEUE_ID_DMA_1_1:
6484 fence_addr = mmDMA1_QM_CP_FENCE2_RDATA_1;
6485 break;
6486 case GAUDI_QUEUE_ID_DMA_1_2:
6487 fence_addr = mmDMA1_QM_CP_FENCE2_RDATA_2;
6488 break;
6489 case GAUDI_QUEUE_ID_DMA_1_3:
6490 fence_addr = mmDMA1_QM_CP_FENCE2_RDATA_3;
6491 break;
6492 case GAUDI_QUEUE_ID_DMA_5_0:
6493 fence_addr = mmDMA5_QM_CP_FENCE2_RDATA_0;
6494 break;
6495 case GAUDI_QUEUE_ID_DMA_5_1:
6496 fence_addr = mmDMA5_QM_CP_FENCE2_RDATA_1;
6497 break;
6498 case GAUDI_QUEUE_ID_DMA_5_2:
6499 fence_addr = mmDMA5_QM_CP_FENCE2_RDATA_2;
6500 break;
6501 case GAUDI_QUEUE_ID_DMA_5_3:
6502 fence_addr = mmDMA5_QM_CP_FENCE2_RDATA_3;
6503 break;
6504 default:
6505 /* queue index should be valid here */
6506 dev_crit(hdev->dev, "wrong queue id %d for wait packet\n",
6507 q_idx);
6508 return;
6509 }
6510
6511 fence_addr += CFG_BASE;
6512
6513 /*
6514 * monitor_base should be the content of the base0 address registers,
6515 * so it will be added to the msg short offsets
6516 */
6517 monitor_base = mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0;
6518
6519 /* First monitor config packet: low address of the sync */
6520 msg_addr_offset =
6521 (mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0 + mon_id * 4) -
6522 monitor_base;
6523
6524 size += gaudi_add_mon_msg_short(buf + size, (u32) fence_addr,
6525 msg_addr_offset);
6526
6527 /* Second monitor config packet: high address of the sync */
6528 msg_addr_offset =
6529 (mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRH_0 + mon_id * 4) -
6530 monitor_base;
6531
6532 size += gaudi_add_mon_msg_short(buf + size, (u32) (fence_addr >> 32),
6533 msg_addr_offset);
6534
6535 /*
6536 * Third monitor config packet: the payload, i.e. what to write when the
6537 * sync triggers
6538 */
6539 msg_addr_offset =
6540 (mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_DATA_0 + mon_id * 4) -
6541 monitor_base;
6542
6543 size += gaudi_add_mon_msg_short(buf + size, 1, msg_addr_offset);
6544
6545 /* Fourth monitor config packet: bind the monitor to a sync object */
6546 msg_addr_offset =
6547 (mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0 + mon_id * 4) -
6548 monitor_base;
6549 size += gaudi_add_arm_monitor_pkt(buf + size, sob_id, sob_val,
6550 msg_addr_offset);
6551
6552 /* Fence packet */
6553 size += gaudi_add_fence_pkt(buf + size);
6554}
6555
6556static void gaudi_reset_sob(struct hl_device *hdev, void *data)
6557{
6558 struct hl_hw_sob *hw_sob = (struct hl_hw_sob *) data;
6559
6560 dev_dbg(hdev->dev, "reset SOB, q_idx: %d, sob_id: %d\n", hw_sob->q_idx,
6561 hw_sob->sob_id);
6562
6563 WREG32(mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 + hw_sob->sob_id * 4,
6564 0);
6565
6566 kref_init(&hw_sob->kref);
6567}
6568
6569static void gaudi_set_dma_mask_from_fw(struct hl_device *hdev)
6570{
6571 if (RREG32(mmPSOC_GLOBAL_CONF_NON_RST_FLOPS_0) ==
6572 HL_POWER9_HOST_MAGIC) {
6573 hdev->power9_64bit_dma_enable = 1;
6574 hdev->dma_mask = 64;
6575 } else {
6576 hdev->power9_64bit_dma_enable = 0;
6577 hdev->dma_mask = 48;
6578 }
6579}
6580
6581static u64 gaudi_get_device_time(struct hl_device *hdev)
6582{
6583 u64 device_time = ((u64) RREG32(mmPSOC_TIMESTAMP_CNTCVU)) << 32;
6584
6585 return device_time | RREG32(mmPSOC_TIMESTAMP_CNTCVL);
6586}
6587
6588static const struct hl_asic_funcs gaudi_funcs = {
6589 .early_init = gaudi_early_init,
6590 .early_fini = gaudi_early_fini,
6591 .late_init = gaudi_late_init,
6592 .late_fini = gaudi_late_fini,
6593 .sw_init = gaudi_sw_init,
6594 .sw_fini = gaudi_sw_fini,
6595 .hw_init = gaudi_hw_init,
6596 .hw_fini = gaudi_hw_fini,
6597 .halt_engines = gaudi_halt_engines,
6598 .suspend = gaudi_suspend,
6599 .resume = gaudi_resume,
6600 .cb_mmap = gaudi_cb_mmap,
6601 .ring_doorbell = gaudi_ring_doorbell,
6602 .pqe_write = gaudi_pqe_write,
6603 .asic_dma_alloc_coherent = gaudi_dma_alloc_coherent,
6604 .asic_dma_free_coherent = gaudi_dma_free_coherent,
6605 .get_int_queue_base = gaudi_get_int_queue_base,
6606 .test_queues = gaudi_test_queues,
6607 .asic_dma_pool_zalloc = gaudi_dma_pool_zalloc,
6608 .asic_dma_pool_free = gaudi_dma_pool_free,
6609 .cpu_accessible_dma_pool_alloc = gaudi_cpu_accessible_dma_pool_alloc,
6610 .cpu_accessible_dma_pool_free = gaudi_cpu_accessible_dma_pool_free,
6611 .hl_dma_unmap_sg = gaudi_dma_unmap_sg,
6612 .cs_parser = gaudi_cs_parser,
6613 .asic_dma_map_sg = gaudi_dma_map_sg,
6614 .get_dma_desc_list_size = gaudi_get_dma_desc_list_size,
6615 .add_end_of_cb_packets = gaudi_add_end_of_cb_packets,
6616 .update_eq_ci = gaudi_update_eq_ci,
6617 .context_switch = gaudi_context_switch,
6618 .restore_phase_topology = gaudi_restore_phase_topology,
6619 .debugfs_read32 = gaudi_debugfs_read32,
6620 .debugfs_write32 = gaudi_debugfs_write32,
6621 .debugfs_read64 = gaudi_debugfs_read64,
6622 .debugfs_write64 = gaudi_debugfs_write64,
Oded Gabbaybcaf4152020-05-11 10:41:37 +03006623 .add_device_attr = gaudi_add_device_attr,
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006624 .handle_eqe = gaudi_handle_eqe,
Oded Gabbaybcaf4152020-05-11 10:41:37 +03006625 .set_pll_profile = gaudi_set_pll_profile,
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006626 .get_events_stat = gaudi_get_events_stat,
6627 .read_pte = gaudi_read_pte,
6628 .write_pte = gaudi_write_pte,
6629 .mmu_invalidate_cache = gaudi_mmu_invalidate_cache,
6630 .mmu_invalidate_cache_range = gaudi_mmu_invalidate_cache_range,
6631 .send_heartbeat = gaudi_send_heartbeat,
Oded Gabbaye38bfd32020-07-03 20:46:12 +03006632 .set_clock_gating = gaudi_set_clock_gating,
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006633 .disable_clock_gating = gaudi_disable_clock_gating,
Omer Shpigelman79fc7a92020-05-11 10:46:29 +03006634 .debug_coresight = gaudi_debug_coresight,
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006635 .is_device_idle = gaudi_is_device_idle,
6636 .soft_reset_late_init = gaudi_soft_reset_late_init,
6637 .hw_queues_lock = gaudi_hw_queues_lock,
6638 .hw_queues_unlock = gaudi_hw_queues_unlock,
6639 .get_pci_id = gaudi_get_pci_id,
6640 .get_eeprom_data = gaudi_get_eeprom_data,
6641 .send_cpu_message = gaudi_send_cpu_message,
6642 .get_hw_state = gaudi_get_hw_state,
6643 .pci_bars_map = gaudi_pci_bars_map,
6644 .set_dram_bar_base = gaudi_set_hbm_bar_base,
6645 .init_iatu = gaudi_init_iatu,
6646 .rreg = hl_rreg,
6647 .wreg = hl_wreg,
Omer Shpigelman79fc7a92020-05-11 10:46:29 +03006648 .halt_coresight = gaudi_halt_coresight,
Ofir Bittona04b7cd2020-07-13 13:36:55 +03006649 .ctx_init = gaudi_ctx_init,
Oded Gabbaybcaf4152020-05-11 10:41:37 +03006650 .get_clk_rate = gaudi_get_clk_rate,
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006651 .get_queue_id_for_cq = gaudi_get_queue_id_for_cq,
6652 .read_device_fw_version = gaudi_read_device_fw_version,
6653 .load_firmware_to_device = gaudi_load_firmware_to_device,
6654 .load_boot_fit_to_device = gaudi_load_boot_fit_to_device,
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006655 .get_signal_cb_size = gaudi_get_signal_cb_size,
6656 .get_wait_cb_size = gaudi_get_wait_cb_size,
6657 .gen_signal_cb = gaudi_gen_signal_cb,
6658 .gen_wait_cb = gaudi_gen_wait_cb,
6659 .reset_sob = gaudi_reset_sob,
6660 .set_dma_mask_from_fw = gaudi_set_dma_mask_from_fw,
6661 .get_device_time = gaudi_get_device_time
6662};
6663
6664/**
6665 * gaudi_set_asic_funcs - set GAUDI function pointers
6666 *
Lee Jonesf7d227c2020-07-01 09:58:42 +01006667 * @hdev: pointer to hl_device structure
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006668 *
6669 */
6670void gaudi_set_asic_funcs(struct hl_device *hdev)
6671{
6672 hdev->asic_funcs = &gaudi_funcs;
6673}