blob: 45ba3a5f5b141da0f0eb223b4fea1e0155ecd5e4 [file] [log] [blame]
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03001// SPDX-License-Identifier: GPL-2.0
2
3/*
4 * Copyright 2016-2020 HabanaLabs, Ltd.
5 * All Rights Reserved.
6 */
7
8#include "gaudiP.h"
Greg Kroah-Hartman7b16a152020-07-28 19:18:51 +02009#include "../include/hw_ip/mmu/mmu_general.h"
10#include "../include/hw_ip/mmu/mmu_v1_1.h"
11#include "../include/gaudi/gaudi_masks.h"
12#include "../include/gaudi/gaudi_fw_if.h"
13#include "../include/gaudi/gaudi_reg_map.h"
14#include "../include/gaudi/gaudi_async_ids_map_extended.h"
Oded Gabbayac0ae6a2020-05-11 10:29:27 +030015
16#include <linux/module.h>
17#include <linux/pci.h>
18#include <linux/firmware.h>
19#include <linux/hwmon.h>
20#include <linux/genalloc.h>
21#include <linux/io-64-nonatomic-lo-hi.h>
22#include <linux/iommu.h>
23#include <linux/seq_file.h>
Ofir Bitton6c07bab2020-06-01 10:38:46 +030024#include <linux/bitfield.h>
Oded Gabbayac0ae6a2020-05-11 10:29:27 +030025
26/*
27 * Gaudi security scheme:
28 *
29 * 1. Host is protected by:
30 * - Range registers
31 * - MMU
32 *
33 * 2. DDR is protected by:
34 * - Range registers (protect the first 512MB)
35 *
36 * 3. Configuration is protected by:
37 * - Range registers
38 * - Protection bits
39 *
40 * MMU is always enabled.
41 *
42 * QMAN DMA channels 0,1,5 (PCI DMAN):
43 * - DMA is not secured.
44 * - PQ and CQ are secured.
45 * - CP is secured: The driver needs to parse CB but WREG should be allowed
46 * because of TDMA (tensor DMA). Hence, WREG is always not
47 * secured.
48 *
49 * When the driver needs to use DMA it will check that Gaudi is idle, set DMA
50 * channel 0 to be secured, execute the DMA and change it back to not secured.
51 * Currently, the driver doesn't use the DMA while there are compute jobs
52 * running.
53 *
54 * The current use cases for the driver to use the DMA are:
55 * - Clear SRAM on context switch (happens on context switch when device is
56 * idle)
57 * - MMU page tables area clear (happens on init)
58 *
59 * QMAN DMA 2-4,6,7, TPC, MME, NIC:
60 * PQ is secured and is located on the Host (HBM CON TPC3 bug)
61 * CQ, CP and the engine are not secured
62 *
63 */
64
65#define GAUDI_BOOT_FIT_FILE "habanalabs/gaudi/gaudi-boot-fit.itb"
66#define GAUDI_LINUX_FW_FILE "habanalabs/gaudi/gaudi-fit.itb"
67#define GAUDI_TPC_FW_FILE "habanalabs/gaudi/gaudi_tpc.bin"
68
69#define GAUDI_DMA_POOL_BLK_SIZE 0x100 /* 256 bytes */
70
71#define GAUDI_RESET_TIMEOUT_MSEC 1000 /* 1000ms */
72#define GAUDI_RESET_WAIT_MSEC 1 /* 1ms */
73#define GAUDI_CPU_RESET_WAIT_MSEC 200 /* 200ms */
74#define GAUDI_TEST_QUEUE_WAIT_USEC 100000 /* 100ms */
75
76#define GAUDI_PLDM_RESET_WAIT_MSEC 1000 /* 1s */
77#define GAUDI_PLDM_HRESET_TIMEOUT_MSEC 20000 /* 20s */
Oded Gabbayac0ae6a2020-05-11 10:29:27 +030078#define GAUDI_PLDM_TEST_QUEUE_WAIT_USEC 1000000 /* 1s */
79#define GAUDI_PLDM_MMU_TIMEOUT_USEC (MMU_CONFIG_TIMEOUT_USEC * 100)
80#define GAUDI_PLDM_QMAN0_TIMEOUT_USEC (HL_DEVICE_TIMEOUT_USEC * 30)
81#define GAUDI_PLDM_TPC_KERNEL_WAIT_USEC (HL_DEVICE_TIMEOUT_USEC * 30)
82#define GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC 1000000 /* 1s */
Oded Gabbay788cacf2020-07-07 17:30:13 +030083#define GAUDI_MSG_TO_CPU_TIMEOUT_USEC 4000000 /* 4s */
Oded Gabbayac0ae6a2020-05-11 10:29:27 +030084
85#define GAUDI_QMAN0_FENCE_VAL 0x72E91AB9
86
87#define GAUDI_MAX_STRING_LEN 20
88
89#define GAUDI_CB_POOL_CB_CNT 512
90#define GAUDI_CB_POOL_CB_SIZE 0x20000 /* 128KB */
91
92#define GAUDI_ALLOC_CPU_MEM_RETRY_CNT 3
93
94#define GAUDI_NUM_OF_TPC_INTR_CAUSE 20
95
96#define GAUDI_NUM_OF_QM_ERR_CAUSE 16
97
98#define GAUDI_NUM_OF_QM_ARB_ERR_CAUSE 3
99
Oded Gabbay647e8352020-06-07 11:26:48 +0300100#define GAUDI_ARB_WDT_TIMEOUT 0x1000000
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300101
Oded Gabbaye38bfd32020-07-03 20:46:12 +0300102#define GAUDI_CLK_GATE_DEBUGFS_MASK (\
103 BIT(GAUDI_ENGINE_ID_MME_0) |\
104 BIT(GAUDI_ENGINE_ID_MME_2) |\
105 GENMASK_ULL(GAUDI_ENGINE_ID_TPC_7, GAUDI_ENGINE_ID_TPC_0))
106
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300107static const char gaudi_irq_name[GAUDI_MSI_ENTRIES][GAUDI_MAX_STRING_LEN] = {
108 "gaudi cq 0_0", "gaudi cq 0_1", "gaudi cq 0_2", "gaudi cq 0_3",
109 "gaudi cq 1_0", "gaudi cq 1_1", "gaudi cq 1_2", "gaudi cq 1_3",
110 "gaudi cq 5_0", "gaudi cq 5_1", "gaudi cq 5_2", "gaudi cq 5_3",
111 "gaudi cpu eq"
112};
113
114static const u8 gaudi_dma_assignment[GAUDI_DMA_MAX] = {
Oded Gabbaye38bfd32020-07-03 20:46:12 +0300115 [GAUDI_PCI_DMA_1] = GAUDI_ENGINE_ID_DMA_0,
116 [GAUDI_PCI_DMA_2] = GAUDI_ENGINE_ID_DMA_1,
117 [GAUDI_PCI_DMA_3] = GAUDI_ENGINE_ID_DMA_5,
118 [GAUDI_HBM_DMA_1] = GAUDI_ENGINE_ID_DMA_2,
119 [GAUDI_HBM_DMA_2] = GAUDI_ENGINE_ID_DMA_3,
120 [GAUDI_HBM_DMA_3] = GAUDI_ENGINE_ID_DMA_4,
121 [GAUDI_HBM_DMA_4] = GAUDI_ENGINE_ID_DMA_6,
122 [GAUDI_HBM_DMA_5] = GAUDI_ENGINE_ID_DMA_7
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300123};
124
125static const u8 gaudi_cq_assignment[NUMBER_OF_CMPLT_QUEUES] = {
126 [0] = GAUDI_QUEUE_ID_DMA_0_0,
127 [1] = GAUDI_QUEUE_ID_DMA_0_1,
128 [2] = GAUDI_QUEUE_ID_DMA_0_2,
129 [3] = GAUDI_QUEUE_ID_DMA_0_3,
130 [4] = GAUDI_QUEUE_ID_DMA_1_0,
131 [5] = GAUDI_QUEUE_ID_DMA_1_1,
132 [6] = GAUDI_QUEUE_ID_DMA_1_2,
133 [7] = GAUDI_QUEUE_ID_DMA_1_3,
134 [8] = GAUDI_QUEUE_ID_DMA_5_0,
135 [9] = GAUDI_QUEUE_ID_DMA_5_1,
136 [10] = GAUDI_QUEUE_ID_DMA_5_2,
137 [11] = GAUDI_QUEUE_ID_DMA_5_3
138};
139
140static const u16 gaudi_packet_sizes[MAX_PACKET_ID] = {
141 [PACKET_WREG_32] = sizeof(struct packet_wreg32),
142 [PACKET_WREG_BULK] = sizeof(struct packet_wreg_bulk),
143 [PACKET_MSG_LONG] = sizeof(struct packet_msg_long),
144 [PACKET_MSG_SHORT] = sizeof(struct packet_msg_short),
145 [PACKET_CP_DMA] = sizeof(struct packet_cp_dma),
146 [PACKET_REPEAT] = sizeof(struct packet_repeat),
147 [PACKET_MSG_PROT] = sizeof(struct packet_msg_prot),
148 [PACKET_FENCE] = sizeof(struct packet_fence),
149 [PACKET_LIN_DMA] = sizeof(struct packet_lin_dma),
150 [PACKET_NOP] = sizeof(struct packet_nop),
151 [PACKET_STOP] = sizeof(struct packet_stop),
152 [PACKET_ARB_POINT] = sizeof(struct packet_arb_point),
153 [PACKET_WAIT] = sizeof(struct packet_wait),
154 [PACKET_LOAD_AND_EXE] = sizeof(struct packet_load_and_exe)
155};
156
Ofir Bittonbc75be22020-07-30 14:56:38 +0300157static inline bool validate_packet_id(enum packet_id id)
158{
159 switch (id) {
160 case PACKET_WREG_32:
161 case PACKET_WREG_BULK:
162 case PACKET_MSG_LONG:
163 case PACKET_MSG_SHORT:
164 case PACKET_CP_DMA:
165 case PACKET_REPEAT:
166 case PACKET_MSG_PROT:
167 case PACKET_FENCE:
168 case PACKET_LIN_DMA:
169 case PACKET_NOP:
170 case PACKET_STOP:
171 case PACKET_ARB_POINT:
172 case PACKET_WAIT:
173 case PACKET_LOAD_AND_EXE:
174 return true;
175 default:
176 return false;
177 }
178}
179
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300180static const char * const
181gaudi_tpc_interrupts_cause[GAUDI_NUM_OF_TPC_INTR_CAUSE] = {
182 "tpc_address_exceed_slm",
183 "tpc_div_by_0",
184 "tpc_spu_mac_overflow",
185 "tpc_spu_addsub_overflow",
186 "tpc_spu_abs_overflow",
187 "tpc_spu_fp_dst_nan_inf",
188 "tpc_spu_fp_dst_denorm",
189 "tpc_vpu_mac_overflow",
190 "tpc_vpu_addsub_overflow",
191 "tpc_vpu_abs_overflow",
192 "tpc_vpu_fp_dst_nan_inf",
193 "tpc_vpu_fp_dst_denorm",
194 "tpc_assertions",
195 "tpc_illegal_instruction",
196 "tpc_pc_wrap_around",
197 "tpc_qm_sw_err",
198 "tpc_hbw_rresp_err",
199 "tpc_hbw_bresp_err",
200 "tpc_lbw_rresp_err",
201 "tpc_lbw_bresp_err"
202};
203
204static const char * const
205gaudi_qman_error_cause[GAUDI_NUM_OF_QM_ERR_CAUSE] = {
206 "PQ AXI HBW error",
207 "CQ AXI HBW error",
208 "CP AXI HBW error",
209 "CP error due to undefined OPCODE",
210 "CP encountered STOP OPCODE",
211 "CP AXI LBW error",
212 "CP WRREG32 or WRBULK returned error",
213 "N/A",
214 "FENCE 0 inc over max value and clipped",
215 "FENCE 1 inc over max value and clipped",
216 "FENCE 2 inc over max value and clipped",
217 "FENCE 3 inc over max value and clipped",
218 "FENCE 0 dec under min value and clipped",
219 "FENCE 1 dec under min value and clipped",
220 "FENCE 2 dec under min value and clipped",
221 "FENCE 3 dec under min value and clipped"
222};
223
224static const char * const
225gaudi_qman_arb_error_cause[GAUDI_NUM_OF_QM_ARB_ERR_CAUSE] = {
226 "Choice push while full error",
227 "Choice Q watchdog error",
228 "MSG AXI LBW returned with error"
229};
230
231static enum hl_queue_type gaudi_queue_type[GAUDI_QUEUE_ID_SIZE] = {
232 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_0 */
233 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_1 */
234 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_2 */
235 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_3 */
236 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_0 */
237 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_1 */
238 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_2 */
239 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_3 */
240 QUEUE_TYPE_CPU, /* GAUDI_QUEUE_ID_CPU_PQ */
241 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_0 */
242 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_1 */
243 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_2 */
244 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_3 */
245 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_0 */
246 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_1 */
247 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_2 */
248 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_3 */
249 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_0 */
250 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_1 */
251 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_2 */
252 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_3 */
253 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_5_0 */
254 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_5_1 */
255 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_5_2 */
256 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_5_3 */
257 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_0 */
258 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_1 */
259 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_2 */
260 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_3 */
261 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_0 */
262 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_1 */
263 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_2 */
264 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_3 */
265 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_0 */
266 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_1 */
267 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_2 */
268 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_3 */
269 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_0 */
270 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_1 */
271 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_2 */
272 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_3 */
273 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_0 */
274 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_1 */
275 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_2 */
276 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_3 */
277 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_0 */
278 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_1 */
279 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_2 */
280 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_3 */
281 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_0 */
282 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_1 */
283 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_2 */
284 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_3 */
285 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_0 */
286 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_1 */
287 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_2 */
288 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_3 */
289 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_0 */
290 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_1 */
291 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_2 */
292 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_3 */
293 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_0 */
294 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_1 */
295 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_2 */
296 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_3 */
297 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_0 */
298 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_1 */
299 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_2 */
300 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_3 */
301 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_0 */
302 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_1 */
303 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_2 */
304 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_3 */
305 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_0_0 */
306 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_0_1 */
307 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_0_2 */
308 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_0_3 */
309 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_1_0 */
310 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_1_1 */
311 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_1_2 */
312 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_1_3 */
313 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_2_0 */
314 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_2_1 */
315 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_2_2 */
316 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_2_3 */
317 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_3_0 */
318 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_3_1 */
319 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_3_2 */
320 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_3_3 */
321 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_4_0 */
322 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_4_1 */
323 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_4_2 */
324 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_4_3 */
325 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_5_0 */
326 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_5_1 */
327 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_5_2 */
328 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_5_3 */
329 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_6_0 */
330 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_6_1 */
331 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_6_2 */
332 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_6_3 */
333 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_7_0 */
334 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_7_1 */
335 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_7_2 */
336 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_7_3 */
337 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_8_0 */
338 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_8_1 */
339 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_8_2 */
340 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_8_3 */
341 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_9_0 */
342 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_9_1 */
343 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_9_2 */
344 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_9_3 */
345};
346
Oded Gabbayfcc6a4e2020-05-17 08:20:35 +0300347struct ecc_info_extract_params {
348 u64 block_address;
349 u32 num_memories;
350 bool derr;
351 bool disable_clock_gating;
352};
353
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300354static int gaudi_mmu_update_asid_hop0_addr(struct hl_device *hdev, u32 asid,
355 u64 phys_addr);
356static int gaudi_send_job_on_qman0(struct hl_device *hdev,
357 struct hl_cs_job *job);
358static int gaudi_memset_device_memory(struct hl_device *hdev, u64 addr,
359 u32 size, u64 val);
360static int gaudi_run_tpc_kernel(struct hl_device *hdev, u64 tpc_kernel,
361 u32 tpc_id);
362static int gaudi_mmu_clear_pgt_range(struct hl_device *hdev);
363static int gaudi_armcp_info_get(struct hl_device *hdev);
364static void gaudi_disable_clock_gating(struct hl_device *hdev);
365static void gaudi_mmu_prepare(struct hl_device *hdev, u32 asid);
366
367static int gaudi_get_fixed_properties(struct hl_device *hdev)
368{
369 struct asic_fixed_properties *prop = &hdev->asic_prop;
Ofir Bitton843839b2020-07-19 11:08:09 +0300370 u32 num_sync_stream_queues = 0;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300371 int i;
372
Ofir Bitton3abc99b2020-06-23 14:50:39 +0300373 prop->max_queues = GAUDI_QUEUE_ID_SIZE;
374 prop->hw_queues_props = kcalloc(prop->max_queues,
375 sizeof(struct hw_queue_properties),
376 GFP_KERNEL);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300377
Ofir Bitton3abc99b2020-06-23 14:50:39 +0300378 if (!prop->hw_queues_props)
379 return -ENOMEM;
380
381 for (i = 0 ; i < prop->max_queues ; i++) {
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300382 if (gaudi_queue_type[i] == QUEUE_TYPE_EXT) {
383 prop->hw_queues_props[i].type = QUEUE_TYPE_EXT;
384 prop->hw_queues_props[i].driver_only = 0;
385 prop->hw_queues_props[i].requires_kernel_cb = 1;
Ofir Bitton21e7a342020-05-14 18:25:47 +0300386 prop->hw_queues_props[i].supports_sync_stream = 1;
Ofir Bitton843839b2020-07-19 11:08:09 +0300387 num_sync_stream_queues++;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300388 } else if (gaudi_queue_type[i] == QUEUE_TYPE_CPU) {
389 prop->hw_queues_props[i].type = QUEUE_TYPE_CPU;
390 prop->hw_queues_props[i].driver_only = 1;
391 prop->hw_queues_props[i].requires_kernel_cb = 0;
Ofir Bitton21e7a342020-05-14 18:25:47 +0300392 prop->hw_queues_props[i].supports_sync_stream = 0;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300393 } else if (gaudi_queue_type[i] == QUEUE_TYPE_INT) {
394 prop->hw_queues_props[i].type = QUEUE_TYPE_INT;
395 prop->hw_queues_props[i].driver_only = 0;
396 prop->hw_queues_props[i].requires_kernel_cb = 0;
397 } else if (gaudi_queue_type[i] == QUEUE_TYPE_NA) {
398 prop->hw_queues_props[i].type = QUEUE_TYPE_NA;
399 prop->hw_queues_props[i].driver_only = 0;
400 prop->hw_queues_props[i].requires_kernel_cb = 0;
Ofir Bitton21e7a342020-05-14 18:25:47 +0300401 prop->hw_queues_props[i].supports_sync_stream = 0;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300402 }
403 }
404
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300405 prop->completion_queues_count = NUMBER_OF_CMPLT_QUEUES;
Ofir Bitton21e7a342020-05-14 18:25:47 +0300406 prop->sync_stream_first_sob = 0;
407 prop->sync_stream_first_mon = 0;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300408 prop->dram_base_address = DRAM_PHYS_BASE;
409 prop->dram_size = GAUDI_HBM_SIZE_32GB;
410 prop->dram_end_address = prop->dram_base_address +
411 prop->dram_size;
412 prop->dram_user_base_address = DRAM_BASE_ADDR_USER;
413
414 prop->sram_base_address = SRAM_BASE_ADDR;
415 prop->sram_size = SRAM_SIZE;
416 prop->sram_end_address = prop->sram_base_address +
417 prop->sram_size;
418 prop->sram_user_base_address = prop->sram_base_address +
419 SRAM_USER_BASE_OFFSET;
420
421 prop->mmu_pgt_addr = MMU_PAGE_TABLES_ADDR;
422 if (hdev->pldm)
423 prop->mmu_pgt_size = 0x800000; /* 8MB */
424 else
425 prop->mmu_pgt_size = MMU_PAGE_TABLES_SIZE;
426 prop->mmu_pte_size = HL_PTE_SIZE;
427 prop->mmu_hop_table_size = HOP_TABLE_SIZE;
428 prop->mmu_hop0_tables_total_size = HOP0_TABLES_TOTAL_SIZE;
429 prop->dram_page_size = PAGE_SIZE_2MB;
430
431 prop->pmmu.hop0_shift = HOP0_SHIFT;
432 prop->pmmu.hop1_shift = HOP1_SHIFT;
433 prop->pmmu.hop2_shift = HOP2_SHIFT;
434 prop->pmmu.hop3_shift = HOP3_SHIFT;
435 prop->pmmu.hop4_shift = HOP4_SHIFT;
436 prop->pmmu.hop0_mask = HOP0_MASK;
437 prop->pmmu.hop1_mask = HOP1_MASK;
438 prop->pmmu.hop2_mask = HOP2_MASK;
439 prop->pmmu.hop3_mask = HOP3_MASK;
440 prop->pmmu.hop4_mask = HOP4_MASK;
441 prop->pmmu.start_addr = VA_HOST_SPACE_START;
442 prop->pmmu.end_addr =
443 (VA_HOST_SPACE_START + VA_HOST_SPACE_SIZE / 2) - 1;
444 prop->pmmu.page_size = PAGE_SIZE_4KB;
445
446 /* PMMU and HPMMU are the same except of page size */
447 memcpy(&prop->pmmu_huge, &prop->pmmu, sizeof(prop->pmmu));
448 prop->pmmu_huge.page_size = PAGE_SIZE_2MB;
449
450 /* shifts and masks are the same in PMMU and DMMU */
451 memcpy(&prop->dmmu, &prop->pmmu, sizeof(prop->pmmu));
452 prop->dmmu.start_addr = (VA_HOST_SPACE_START + VA_HOST_SPACE_SIZE / 2);
453 prop->dmmu.end_addr = VA_HOST_SPACE_END;
454 prop->dmmu.page_size = PAGE_SIZE_2MB;
455
456 prop->cfg_size = CFG_SIZE;
457 prop->max_asid = MAX_ASID;
458 prop->num_of_events = GAUDI_EVENT_SIZE;
459 prop->tpc_enabled_mask = TPC_ENABLED_MASK;
460
Oded Gabbay58361aa2020-08-08 23:34:47 +0300461 prop->max_power_default = MAX_POWER_DEFAULT_PCI;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300462
463 prop->cb_pool_cb_cnt = GAUDI_CB_POOL_CB_CNT;
464 prop->cb_pool_cb_size = GAUDI_CB_POOL_CB_SIZE;
465
466 prop->pcie_dbi_base_address = mmPCIE_DBI_BASE;
467 prop->pcie_aux_dbi_reg_addr = CFG_BASE + mmPCIE_AUX_DBI;
468
469 strncpy(prop->armcp_info.card_name, GAUDI_DEFAULT_CARD_NAME,
470 CARD_NAME_MAX_LEN);
471
Ofir Bittonc16d45f2020-06-02 12:28:27 +0300472 prop->max_pending_cs = GAUDI_MAX_PENDING_CS;
473
Ofir Bitton843839b2020-07-19 11:08:09 +0300474 prop->first_available_user_sob[HL_GAUDI_WS_DCORE] =
475 num_sync_stream_queues * HL_RSVD_SOBS;
476 prop->first_available_user_mon[HL_GAUDI_WS_DCORE] =
477 num_sync_stream_queues * HL_RSVD_MONS;
478
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300479 return 0;
480}
481
482static int gaudi_pci_bars_map(struct hl_device *hdev)
483{
484 static const char * const name[] = {"SRAM", "CFG", "HBM"};
485 bool is_wc[3] = {false, false, true};
486 int rc;
487
488 rc = hl_pci_bars_map(hdev, name, is_wc);
489 if (rc)
490 return rc;
491
492 hdev->rmmio = hdev->pcie_bar[CFG_BAR_ID] +
493 (CFG_BASE - SPI_FLASH_BASE_ADDR);
494
495 return 0;
496}
497
498static u64 gaudi_set_hbm_bar_base(struct hl_device *hdev, u64 addr)
499{
500 struct gaudi_device *gaudi = hdev->asic_specific;
Ofir Bittonf4cbfd22020-06-15 17:45:12 +0300501 struct hl_inbound_pci_region pci_region;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300502 u64 old_addr = addr;
503 int rc;
504
505 if ((gaudi) && (gaudi->hbm_bar_cur_addr == addr))
506 return old_addr;
507
508 /* Inbound Region 2 - Bar 4 - Point to HBM */
Ofir Bittonf4cbfd22020-06-15 17:45:12 +0300509 pci_region.mode = PCI_BAR_MATCH_MODE;
510 pci_region.bar = HBM_BAR_ID;
511 pci_region.addr = addr;
512 rc = hl_pci_set_inbound_region(hdev, 2, &pci_region);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300513 if (rc)
514 return U64_MAX;
515
516 if (gaudi) {
517 old_addr = gaudi->hbm_bar_cur_addr;
518 gaudi->hbm_bar_cur_addr = addr;
519 }
520
521 return old_addr;
522}
523
524static int gaudi_init_iatu(struct hl_device *hdev)
525{
Ofir Bittonf4cbfd22020-06-15 17:45:12 +0300526 struct hl_inbound_pci_region inbound_region;
527 struct hl_outbound_pci_region outbound_region;
528 int rc;
529
530 /* Inbound Region 0 - Bar 0 - Point to SRAM + CFG */
531 inbound_region.mode = PCI_BAR_MATCH_MODE;
532 inbound_region.bar = SRAM_BAR_ID;
533 inbound_region.addr = SRAM_BASE_ADDR;
534 rc = hl_pci_set_inbound_region(hdev, 0, &inbound_region);
535 if (rc)
536 goto done;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300537
538 /* Inbound Region 1 - Bar 2 - Point to SPI FLASH */
Ofir Bittonf4cbfd22020-06-15 17:45:12 +0300539 inbound_region.mode = PCI_BAR_MATCH_MODE;
540 inbound_region.bar = CFG_BAR_ID;
541 inbound_region.addr = SPI_FLASH_BASE_ADDR;
542 rc = hl_pci_set_inbound_region(hdev, 1, &inbound_region);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300543 if (rc)
Ofir Bittonf4cbfd22020-06-15 17:45:12 +0300544 goto done;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300545
Ofir Bittonf4cbfd22020-06-15 17:45:12 +0300546 /* Inbound Region 2 - Bar 4 - Point to HBM */
547 inbound_region.mode = PCI_BAR_MATCH_MODE;
548 inbound_region.bar = HBM_BAR_ID;
549 inbound_region.addr = DRAM_PHYS_BASE;
550 rc = hl_pci_set_inbound_region(hdev, 2, &inbound_region);
551 if (rc)
552 goto done;
553
554 hdev->asic_funcs->set_dma_mask_from_fw(hdev);
555
556 /* Outbound Region 0 - Point to Host */
557 outbound_region.addr = HOST_PHYS_BASE;
558 outbound_region.size = HOST_PHYS_SIZE;
559 rc = hl_pci_set_outbound_region(hdev, &outbound_region);
560
561done:
562 return rc;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300563}
564
565static int gaudi_early_init(struct hl_device *hdev)
566{
567 struct asic_fixed_properties *prop = &hdev->asic_prop;
568 struct pci_dev *pdev = hdev->pdev;
569 int rc;
570
571 rc = gaudi_get_fixed_properties(hdev);
572 if (rc) {
573 dev_err(hdev->dev, "Failed to get fixed properties\n");
574 return rc;
575 }
576
577 /* Check BAR sizes */
578 if (pci_resource_len(pdev, SRAM_BAR_ID) != SRAM_BAR_SIZE) {
579 dev_err(hdev->dev,
580 "Not " HL_NAME "? BAR %d size %llu, expecting %llu\n",
581 SRAM_BAR_ID,
582 (unsigned long long) pci_resource_len(pdev,
583 SRAM_BAR_ID),
584 SRAM_BAR_SIZE);
Ofir Bitton3abc99b2020-06-23 14:50:39 +0300585 rc = -ENODEV;
586 goto free_queue_props;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300587 }
588
589 if (pci_resource_len(pdev, CFG_BAR_ID) != CFG_BAR_SIZE) {
590 dev_err(hdev->dev,
591 "Not " HL_NAME "? BAR %d size %llu, expecting %llu\n",
592 CFG_BAR_ID,
593 (unsigned long long) pci_resource_len(pdev,
594 CFG_BAR_ID),
595 CFG_BAR_SIZE);
Ofir Bitton3abc99b2020-06-23 14:50:39 +0300596 rc = -ENODEV;
597 goto free_queue_props;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300598 }
599
600 prop->dram_pci_bar_size = pci_resource_len(pdev, HBM_BAR_ID);
601
602 rc = hl_pci_init(hdev);
603 if (rc)
Ofir Bitton3abc99b2020-06-23 14:50:39 +0300604 goto free_queue_props;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300605
606 return 0;
Ofir Bitton3abc99b2020-06-23 14:50:39 +0300607
608free_queue_props:
609 kfree(hdev->asic_prop.hw_queues_props);
610 return rc;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300611}
612
613static int gaudi_early_fini(struct hl_device *hdev)
614{
Ofir Bitton3abc99b2020-06-23 14:50:39 +0300615 kfree(hdev->asic_prop.hw_queues_props);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300616 hl_pci_fini(hdev);
617
618 return 0;
619}
620
621/**
622 * gaudi_fetch_psoc_frequency - Fetch PSOC frequency values
623 *
624 * @hdev: pointer to hl_device structure
625 *
626 */
627static void gaudi_fetch_psoc_frequency(struct hl_device *hdev)
628{
629 struct asic_fixed_properties *prop = &hdev->asic_prop;
Adam Aharone8edded2020-05-26 11:04:30 +0300630 u32 trace_freq = 0;
631 u32 pll_clk = 0;
632 u32 div_fctr = RREG32(mmPSOC_CPU_PLL_DIV_FACTOR_2);
633 u32 div_sel = RREG32(mmPSOC_CPU_PLL_DIV_SEL_2);
634 u32 nr = RREG32(mmPSOC_CPU_PLL_NR);
635 u32 nf = RREG32(mmPSOC_CPU_PLL_NF);
636 u32 od = RREG32(mmPSOC_CPU_PLL_OD);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300637
Adam Aharone8edded2020-05-26 11:04:30 +0300638 if (div_sel == DIV_SEL_REF_CLK || div_sel == DIV_SEL_DIVIDED_REF) {
639 if (div_sel == DIV_SEL_REF_CLK)
640 trace_freq = PLL_REF_CLK;
641 else
642 trace_freq = PLL_REF_CLK / (div_fctr + 1);
643 } else if (div_sel == DIV_SEL_PLL_CLK ||
644 div_sel == DIV_SEL_DIVIDED_PLL) {
645 pll_clk = PLL_REF_CLK * (nf + 1) / ((nr + 1) * (od + 1));
646 if (div_sel == DIV_SEL_PLL_CLK)
647 trace_freq = pll_clk;
648 else
649 trace_freq = pll_clk / (div_fctr + 1);
650 } else {
651 dev_warn(hdev->dev,
652 "Received invalid div select value: %d", div_sel);
653 }
654
655 prop->psoc_timestamp_frequency = trace_freq;
656 prop->psoc_pci_pll_nr = nr;
657 prop->psoc_pci_pll_nf = nf;
658 prop->psoc_pci_pll_od = od;
659 prop->psoc_pci_pll_div_factor = div_fctr;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300660}
661
662static int _gaudi_init_tpc_mem(struct hl_device *hdev,
663 dma_addr_t tpc_kernel_src_addr, u32 tpc_kernel_size)
664{
665 struct asic_fixed_properties *prop = &hdev->asic_prop;
666 struct packet_lin_dma *init_tpc_mem_pkt;
667 struct hl_cs_job *job;
668 struct hl_cb *cb;
669 u64 dst_addr;
670 u32 cb_size, ctl;
671 u8 tpc_id;
672 int rc;
673
Ofir Bittona04b7cd2020-07-13 13:36:55 +0300674 cb = hl_cb_kernel_create(hdev, PAGE_SIZE, false);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300675 if (!cb)
676 return -EFAULT;
677
678 init_tpc_mem_pkt = (struct packet_lin_dma *) (uintptr_t)
679 cb->kernel_address;
680 cb_size = sizeof(*init_tpc_mem_pkt);
681 memset(init_tpc_mem_pkt, 0, cb_size);
682
683 init_tpc_mem_pkt->tsize = cpu_to_le32(tpc_kernel_size);
684
685 ctl = ((PACKET_LIN_DMA << GAUDI_PKT_CTL_OPCODE_SHIFT) |
686 (1 << GAUDI_PKT_LIN_DMA_CTL_LIN_SHIFT) |
687 (1 << GAUDI_PKT_CTL_RB_SHIFT) |
688 (1 << GAUDI_PKT_CTL_MB_SHIFT));
689
690 init_tpc_mem_pkt->ctl = cpu_to_le32(ctl);
691
692 init_tpc_mem_pkt->src_addr = cpu_to_le64(tpc_kernel_src_addr);
693 dst_addr = (prop->sram_user_base_address &
694 GAUDI_PKT_LIN_DMA_DST_ADDR_MASK) >>
695 GAUDI_PKT_LIN_DMA_DST_ADDR_SHIFT;
696 init_tpc_mem_pkt->dst_addr |= cpu_to_le64(dst_addr);
697
698 job = hl_cs_allocate_job(hdev, QUEUE_TYPE_EXT, true);
699 if (!job) {
700 dev_err(hdev->dev, "Failed to allocate a new job\n");
701 rc = -ENOMEM;
702 goto release_cb;
703 }
704
705 job->id = 0;
706 job->user_cb = cb;
707 job->user_cb->cs_cnt++;
708 job->user_cb_size = cb_size;
709 job->hw_queue_id = GAUDI_QUEUE_ID_DMA_0_0;
710 job->patched_cb = job->user_cb;
711 job->job_cb_size = job->user_cb_size + sizeof(struct packet_msg_prot);
712
713 hl_debugfs_add_job(hdev, job);
714
715 rc = gaudi_send_job_on_qman0(hdev, job);
716
717 if (rc)
718 goto free_job;
719
720 for (tpc_id = 0 ; tpc_id < TPC_NUMBER_OF_ENGINES ; tpc_id++) {
721 rc = gaudi_run_tpc_kernel(hdev, dst_addr, tpc_id);
722 if (rc)
723 break;
724 }
725
726free_job:
727 hl_userptr_delete_list(hdev, &job->userptr_list);
728 hl_debugfs_remove_job(hdev, job);
729 kfree(job);
730 cb->cs_cnt--;
731
732release_cb:
733 hl_cb_put(cb);
734 hl_cb_destroy(hdev, &hdev->kernel_cb_mgr, cb->id << PAGE_SHIFT);
735
736 return rc;
737}
738
739/*
740 * gaudi_init_tpc_mem() - Initialize TPC memories.
741 * @hdev: Pointer to hl_device structure.
742 *
743 * Copy TPC kernel fw from firmware file and run it to initialize TPC memories.
744 *
745 * Return: 0 for success, negative value for error.
746 */
747static int gaudi_init_tpc_mem(struct hl_device *hdev)
748{
749 const struct firmware *fw;
750 size_t fw_size;
751 void *cpu_addr;
752 dma_addr_t dma_handle;
753 int rc;
754
755 rc = request_firmware(&fw, GAUDI_TPC_FW_FILE, hdev->dev);
756 if (rc) {
757 dev_err(hdev->dev, "Firmware file %s is not found!\n",
758 GAUDI_TPC_FW_FILE);
759 goto out;
760 }
761
762 fw_size = fw->size;
763 cpu_addr = hdev->asic_funcs->asic_dma_alloc_coherent(hdev, fw_size,
764 &dma_handle, GFP_KERNEL | __GFP_ZERO);
765 if (!cpu_addr) {
766 dev_err(hdev->dev,
767 "Failed to allocate %zu of dma memory for TPC kernel\n",
768 fw_size);
769 rc = -ENOMEM;
770 goto out;
771 }
772
773 memcpy(cpu_addr, fw->data, fw_size);
774
775 rc = _gaudi_init_tpc_mem(hdev, dma_handle, fw_size);
776
777 hdev->asic_funcs->asic_dma_free_coherent(hdev, fw->size, cpu_addr,
778 dma_handle);
779
780out:
781 release_firmware(fw);
782 return rc;
783}
784
785static int gaudi_late_init(struct hl_device *hdev)
786{
787 struct gaudi_device *gaudi = hdev->asic_specific;
788 int rc;
789
790 rc = gaudi->armcp_info_get(hdev);
791 if (rc) {
792 dev_err(hdev->dev, "Failed to get armcp info\n");
793 return rc;
794 }
795
796 rc = hl_fw_send_pci_access_msg(hdev, ARMCP_PACKET_ENABLE_PCI_ACCESS);
797 if (rc) {
798 dev_err(hdev->dev, "Failed to enable PCI access from CPU\n");
799 return rc;
800 }
801
802 WREG32(mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR, GAUDI_EVENT_INTS_REGISTER);
803
804 gaudi_fetch_psoc_frequency(hdev);
805
806 rc = gaudi_mmu_clear_pgt_range(hdev);
807 if (rc) {
808 dev_err(hdev->dev, "Failed to clear MMU page tables range\n");
809 goto disable_pci_access;
810 }
811
812 rc = gaudi_init_tpc_mem(hdev);
813 if (rc) {
814 dev_err(hdev->dev, "Failed to initialize TPC memories\n");
815 goto disable_pci_access;
816 }
817
818 return 0;
819
820disable_pci_access:
821 hl_fw_send_pci_access_msg(hdev, ARMCP_PACKET_DISABLE_PCI_ACCESS);
822
823 return rc;
824}
825
826static void gaudi_late_fini(struct hl_device *hdev)
827{
828 const struct hwmon_channel_info **channel_info_arr;
829 int i = 0;
830
831 if (!hdev->hl_chip_info->info)
832 return;
833
834 channel_info_arr = hdev->hl_chip_info->info;
835
836 while (channel_info_arr[i]) {
837 kfree(channel_info_arr[i]->config);
838 kfree(channel_info_arr[i]);
839 i++;
840 }
841
842 kfree(channel_info_arr);
843
844 hdev->hl_chip_info->info = NULL;
845}
846
847static int gaudi_alloc_cpu_accessible_dma_mem(struct hl_device *hdev)
848{
849 dma_addr_t dma_addr_arr[GAUDI_ALLOC_CPU_MEM_RETRY_CNT] = {}, end_addr;
850 void *virt_addr_arr[GAUDI_ALLOC_CPU_MEM_RETRY_CNT] = {};
851 int i, j, rc = 0;
852
853 /*
854 * The device CPU works with 40-bits addresses, while bit 39 must be set
855 * to '1' when accessing the host.
856 * Bits 49:39 of the full host address are saved for a later
857 * configuration of the HW to perform extension to 50 bits.
858 * Because there is a single HW register that holds the extension bits,
859 * these bits must be identical in all allocated range.
860 */
861
862 for (i = 0 ; i < GAUDI_ALLOC_CPU_MEM_RETRY_CNT ; i++) {
863 virt_addr_arr[i] =
864 hdev->asic_funcs->asic_dma_alloc_coherent(hdev,
865 HL_CPU_ACCESSIBLE_MEM_SIZE,
866 &dma_addr_arr[i],
867 GFP_KERNEL | __GFP_ZERO);
868 if (!virt_addr_arr[i]) {
869 rc = -ENOMEM;
870 goto free_dma_mem_arr;
871 }
872
873 end_addr = dma_addr_arr[i] + HL_CPU_ACCESSIBLE_MEM_SIZE - 1;
874 if (GAUDI_CPU_PCI_MSB_ADDR(dma_addr_arr[i]) ==
875 GAUDI_CPU_PCI_MSB_ADDR(end_addr))
876 break;
877 }
878
879 if (i == GAUDI_ALLOC_CPU_MEM_RETRY_CNT) {
880 dev_err(hdev->dev,
881 "MSB of CPU accessible DMA memory are not identical in all range\n");
882 rc = -EFAULT;
883 goto free_dma_mem_arr;
884 }
885
886 hdev->cpu_accessible_dma_mem = virt_addr_arr[i];
887 hdev->cpu_accessible_dma_address = dma_addr_arr[i];
888 hdev->cpu_pci_msb_addr =
889 GAUDI_CPU_PCI_MSB_ADDR(hdev->cpu_accessible_dma_address);
890
891 GAUDI_PCI_TO_CPU_ADDR(hdev->cpu_accessible_dma_address);
892
893free_dma_mem_arr:
894 for (j = 0 ; j < i ; j++)
895 hdev->asic_funcs->asic_dma_free_coherent(hdev,
896 HL_CPU_ACCESSIBLE_MEM_SIZE,
897 virt_addr_arr[j],
898 dma_addr_arr[j]);
899
900 return rc;
901}
902
903static void gaudi_free_internal_qmans_pq_mem(struct hl_device *hdev)
904{
905 struct gaudi_device *gaudi = hdev->asic_specific;
906 struct gaudi_internal_qman_info *q;
907 u32 i;
908
909 for (i = 0 ; i < GAUDI_QUEUE_ID_SIZE ; i++) {
910 q = &gaudi->internal_qmans[i];
911 if (!q->pq_kernel_addr)
912 continue;
913 hdev->asic_funcs->asic_dma_free_coherent(hdev, q->pq_size,
914 q->pq_kernel_addr,
915 q->pq_dma_addr);
916 }
917}
918
919static int gaudi_alloc_internal_qmans_pq_mem(struct hl_device *hdev)
920{
921 struct gaudi_device *gaudi = hdev->asic_specific;
922 struct gaudi_internal_qman_info *q;
923 int rc, i;
924
925 for (i = 0 ; i < GAUDI_QUEUE_ID_SIZE ; i++) {
926 if (gaudi_queue_type[i] != QUEUE_TYPE_INT)
927 continue;
928
929 q = &gaudi->internal_qmans[i];
930
931 switch (i) {
932 case GAUDI_QUEUE_ID_DMA_2_0 ... GAUDI_QUEUE_ID_DMA_4_3:
933 case GAUDI_QUEUE_ID_DMA_6_0 ... GAUDI_QUEUE_ID_DMA_7_3:
934 q->pq_size = HBM_DMA_QMAN_SIZE_IN_BYTES;
935 break;
936 case GAUDI_QUEUE_ID_MME_0_0 ... GAUDI_QUEUE_ID_MME_1_3:
937 q->pq_size = MME_QMAN_SIZE_IN_BYTES;
938 break;
939 case GAUDI_QUEUE_ID_TPC_0_0 ... GAUDI_QUEUE_ID_TPC_7_3:
940 q->pq_size = TPC_QMAN_SIZE_IN_BYTES;
941 break;
942 default:
943 dev_err(hdev->dev, "Bad internal queue index %d", i);
944 rc = -EINVAL;
945 goto free_internal_qmans_pq_mem;
946 }
947
948 q->pq_kernel_addr = hdev->asic_funcs->asic_dma_alloc_coherent(
949 hdev, q->pq_size,
950 &q->pq_dma_addr,
951 GFP_KERNEL | __GFP_ZERO);
952 if (!q->pq_kernel_addr) {
953 rc = -ENOMEM;
954 goto free_internal_qmans_pq_mem;
955 }
956 }
957
958 return 0;
959
960free_internal_qmans_pq_mem:
961 gaudi_free_internal_qmans_pq_mem(hdev);
962 return rc;
963}
964
965static int gaudi_sw_init(struct hl_device *hdev)
966{
967 struct gaudi_device *gaudi;
Ofir Bittonebd8d122020-05-10 13:41:28 +0300968 u32 i, event_id = 0;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300969 int rc;
970
971 /* Allocate device structure */
972 gaudi = kzalloc(sizeof(*gaudi), GFP_KERNEL);
973 if (!gaudi)
974 return -ENOMEM;
975
Ofir Bittonebd8d122020-05-10 13:41:28 +0300976 for (i = 0 ; i < ARRAY_SIZE(gaudi_irq_map_table) ; i++) {
977 if (gaudi_irq_map_table[i].valid) {
978 if (event_id == GAUDI_EVENT_SIZE) {
979 dev_err(hdev->dev,
980 "Event array exceeds the limit of %u events\n",
981 GAUDI_EVENT_SIZE);
982 rc = -EINVAL;
983 goto free_gaudi_device;
984 }
985
986 gaudi->events[event_id++] =
987 gaudi_irq_map_table[i].fc_id;
988 }
989 }
990
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300991 gaudi->armcp_info_get = gaudi_armcp_info_get;
992
993 gaudi->max_freq_value = GAUDI_MAX_CLK_FREQ;
994
995 hdev->asic_specific = gaudi;
996
997 /* Create DMA pool for small allocations */
998 hdev->dma_pool = dma_pool_create(dev_name(hdev->dev),
999 &hdev->pdev->dev, GAUDI_DMA_POOL_BLK_SIZE, 8, 0);
1000 if (!hdev->dma_pool) {
1001 dev_err(hdev->dev, "failed to create DMA pool\n");
1002 rc = -ENOMEM;
1003 goto free_gaudi_device;
1004 }
1005
1006 rc = gaudi_alloc_cpu_accessible_dma_mem(hdev);
1007 if (rc)
1008 goto free_dma_pool;
1009
1010 hdev->cpu_accessible_dma_pool = gen_pool_create(ilog2(32), -1);
1011 if (!hdev->cpu_accessible_dma_pool) {
1012 dev_err(hdev->dev,
1013 "Failed to create CPU accessible DMA pool\n");
1014 rc = -ENOMEM;
1015 goto free_cpu_dma_mem;
1016 }
1017
1018 rc = gen_pool_add(hdev->cpu_accessible_dma_pool,
1019 (uintptr_t) hdev->cpu_accessible_dma_mem,
1020 HL_CPU_ACCESSIBLE_MEM_SIZE, -1);
1021 if (rc) {
1022 dev_err(hdev->dev,
1023 "Failed to add memory to CPU accessible DMA pool\n");
1024 rc = -EFAULT;
1025 goto free_cpu_accessible_dma_pool;
1026 }
1027
1028 rc = gaudi_alloc_internal_qmans_pq_mem(hdev);
1029 if (rc)
1030 goto free_cpu_accessible_dma_pool;
1031
1032 spin_lock_init(&gaudi->hw_queues_lock);
1033 mutex_init(&gaudi->clk_gate_mutex);
1034
1035 hdev->supports_sync_stream = true;
1036 hdev->supports_coresight = true;
1037
1038 return 0;
1039
1040free_cpu_accessible_dma_pool:
1041 gen_pool_destroy(hdev->cpu_accessible_dma_pool);
1042free_cpu_dma_mem:
1043 GAUDI_CPU_TO_PCI_ADDR(hdev->cpu_accessible_dma_address,
1044 hdev->cpu_pci_msb_addr);
1045 hdev->asic_funcs->asic_dma_free_coherent(hdev,
1046 HL_CPU_ACCESSIBLE_MEM_SIZE,
1047 hdev->cpu_accessible_dma_mem,
1048 hdev->cpu_accessible_dma_address);
1049free_dma_pool:
1050 dma_pool_destroy(hdev->dma_pool);
1051free_gaudi_device:
1052 kfree(gaudi);
1053 return rc;
1054}
1055
1056static int gaudi_sw_fini(struct hl_device *hdev)
1057{
1058 struct gaudi_device *gaudi = hdev->asic_specific;
1059
1060 gaudi_free_internal_qmans_pq_mem(hdev);
1061
1062 gen_pool_destroy(hdev->cpu_accessible_dma_pool);
1063
1064 GAUDI_CPU_TO_PCI_ADDR(hdev->cpu_accessible_dma_address,
1065 hdev->cpu_pci_msb_addr);
1066 hdev->asic_funcs->asic_dma_free_coherent(hdev,
1067 HL_CPU_ACCESSIBLE_MEM_SIZE,
1068 hdev->cpu_accessible_dma_mem,
1069 hdev->cpu_accessible_dma_address);
1070
1071 dma_pool_destroy(hdev->dma_pool);
1072
1073 mutex_destroy(&gaudi->clk_gate_mutex);
1074
1075 kfree(gaudi);
1076
1077 return 0;
1078}
1079
1080static irqreturn_t gaudi_irq_handler_single(int irq, void *arg)
1081{
1082 struct hl_device *hdev = arg;
1083 int i;
1084
1085 if (hdev->disabled)
1086 return IRQ_HANDLED;
1087
1088 for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++)
1089 hl_irq_handler_cq(irq, &hdev->completion_queue[i]);
1090
1091 hl_irq_handler_eq(irq, &hdev->event_queue);
1092
1093 return IRQ_HANDLED;
1094}
1095
1096/*
1097 * For backward compatibility, new MSI interrupts should be set after the
1098 * existing CPU and NIC interrupts.
1099 */
1100static int gaudi_pci_irq_vector(struct hl_device *hdev, unsigned int nr,
1101 bool cpu_eq)
1102{
1103 int msi_vec;
1104
1105 if ((nr != GAUDI_EVENT_QUEUE_MSI_IDX) && (cpu_eq))
1106 dev_crit(hdev->dev, "CPU EQ must use IRQ %d\n",
1107 GAUDI_EVENT_QUEUE_MSI_IDX);
1108
1109 msi_vec = ((nr < GAUDI_EVENT_QUEUE_MSI_IDX) || (cpu_eq)) ? nr :
1110 (nr + NIC_NUMBER_OF_ENGINES + 1);
1111
1112 return pci_irq_vector(hdev->pdev, msi_vec);
1113}
1114
1115static int gaudi_enable_msi_single(struct hl_device *hdev)
1116{
1117 int rc, irq;
1118
1119 dev_info(hdev->dev, "Working in single MSI IRQ mode\n");
1120
1121 irq = gaudi_pci_irq_vector(hdev, 0, false);
1122 rc = request_irq(irq, gaudi_irq_handler_single, 0,
1123 "gaudi single msi", hdev);
1124 if (rc)
1125 dev_err(hdev->dev,
1126 "Failed to request single MSI IRQ\n");
1127
1128 return rc;
1129}
1130
1131static int gaudi_enable_msi_multi(struct hl_device *hdev)
1132{
1133 int cq_cnt = hdev->asic_prop.completion_queues_count;
1134 int rc, i, irq_cnt_init, irq;
1135
1136 for (i = 0, irq_cnt_init = 0 ; i < cq_cnt ; i++, irq_cnt_init++) {
1137 irq = gaudi_pci_irq_vector(hdev, i, false);
1138 rc = request_irq(irq, hl_irq_handler_cq, 0, gaudi_irq_name[i],
1139 &hdev->completion_queue[i]);
1140 if (rc) {
1141 dev_err(hdev->dev, "Failed to request IRQ %d", irq);
1142 goto free_irqs;
1143 }
1144 }
1145
1146 irq = gaudi_pci_irq_vector(hdev, GAUDI_EVENT_QUEUE_MSI_IDX, true);
1147 rc = request_irq(irq, hl_irq_handler_eq, 0, gaudi_irq_name[cq_cnt],
1148 &hdev->event_queue);
1149 if (rc) {
1150 dev_err(hdev->dev, "Failed to request IRQ %d", irq);
1151 goto free_irqs;
1152 }
1153
1154 return 0;
1155
1156free_irqs:
1157 for (i = 0 ; i < irq_cnt_init ; i++)
1158 free_irq(gaudi_pci_irq_vector(hdev, i, false),
1159 &hdev->completion_queue[i]);
1160 return rc;
1161}
1162
1163static int gaudi_enable_msi(struct hl_device *hdev)
1164{
1165 struct gaudi_device *gaudi = hdev->asic_specific;
1166 int rc;
1167
1168 if (gaudi->hw_cap_initialized & HW_CAP_MSI)
1169 return 0;
1170
1171 rc = pci_alloc_irq_vectors(hdev->pdev, 1, GAUDI_MSI_ENTRIES,
1172 PCI_IRQ_MSI);
1173 if (rc < 0) {
1174 dev_err(hdev->dev, "MSI: Failed to enable support %d\n", rc);
1175 return rc;
1176 }
1177
1178 if (rc < NUMBER_OF_INTERRUPTS) {
1179 gaudi->multi_msi_mode = false;
1180 rc = gaudi_enable_msi_single(hdev);
1181 } else {
1182 gaudi->multi_msi_mode = true;
1183 rc = gaudi_enable_msi_multi(hdev);
1184 }
1185
1186 if (rc)
1187 goto free_pci_irq_vectors;
1188
1189 gaudi->hw_cap_initialized |= HW_CAP_MSI;
1190
1191 return 0;
1192
1193free_pci_irq_vectors:
1194 pci_free_irq_vectors(hdev->pdev);
1195 return rc;
1196}
1197
1198static void gaudi_sync_irqs(struct hl_device *hdev)
1199{
1200 struct gaudi_device *gaudi = hdev->asic_specific;
1201 int i, cq_cnt = hdev->asic_prop.completion_queues_count;
1202
1203 if (!(gaudi->hw_cap_initialized & HW_CAP_MSI))
1204 return;
1205
1206 /* Wait for all pending IRQs to be finished */
1207 if (gaudi->multi_msi_mode) {
1208 for (i = 0 ; i < cq_cnt ; i++)
1209 synchronize_irq(gaudi_pci_irq_vector(hdev, i, false));
1210
1211 synchronize_irq(gaudi_pci_irq_vector(hdev,
1212 GAUDI_EVENT_QUEUE_MSI_IDX,
1213 true));
1214 } else {
1215 synchronize_irq(gaudi_pci_irq_vector(hdev, 0, false));
1216 }
1217}
1218
1219static void gaudi_disable_msi(struct hl_device *hdev)
1220{
1221 struct gaudi_device *gaudi = hdev->asic_specific;
1222 int i, irq, cq_cnt = hdev->asic_prop.completion_queues_count;
1223
1224 if (!(gaudi->hw_cap_initialized & HW_CAP_MSI))
1225 return;
1226
1227 gaudi_sync_irqs(hdev);
1228
1229 if (gaudi->multi_msi_mode) {
1230 irq = gaudi_pci_irq_vector(hdev, GAUDI_EVENT_QUEUE_MSI_IDX,
1231 true);
1232 free_irq(irq, &hdev->event_queue);
1233
1234 for (i = 0 ; i < cq_cnt ; i++) {
1235 irq = gaudi_pci_irq_vector(hdev, i, false);
1236 free_irq(irq, &hdev->completion_queue[i]);
1237 }
1238 } else {
1239 free_irq(gaudi_pci_irq_vector(hdev, 0, false), hdev);
1240 }
1241
1242 pci_free_irq_vectors(hdev->pdev);
1243
1244 gaudi->hw_cap_initialized &= ~HW_CAP_MSI;
1245}
1246
1247static void gaudi_init_scrambler_sram(struct hl_device *hdev)
1248{
1249 struct gaudi_device *gaudi = hdev->asic_specific;
1250
1251 if (gaudi->hw_cap_initialized & HW_CAP_SRAM_SCRAMBLER)
1252 return;
1253
1254 if (!hdev->sram_scrambler_enable)
1255 return;
1256
1257 WREG32(mmNIF_RTR_CTRL_0_SCRAM_SRAM_EN,
1258 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1259 WREG32(mmNIF_RTR_CTRL_1_SCRAM_SRAM_EN,
1260 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1261 WREG32(mmNIF_RTR_CTRL_2_SCRAM_SRAM_EN,
1262 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1263 WREG32(mmNIF_RTR_CTRL_3_SCRAM_SRAM_EN,
1264 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1265 WREG32(mmNIF_RTR_CTRL_4_SCRAM_SRAM_EN,
1266 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1267 WREG32(mmNIF_RTR_CTRL_5_SCRAM_SRAM_EN,
1268 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1269 WREG32(mmNIF_RTR_CTRL_6_SCRAM_SRAM_EN,
1270 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1271 WREG32(mmNIF_RTR_CTRL_7_SCRAM_SRAM_EN,
1272 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1273
1274 WREG32(mmSIF_RTR_CTRL_0_SCRAM_SRAM_EN,
1275 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1276 WREG32(mmSIF_RTR_CTRL_1_SCRAM_SRAM_EN,
1277 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1278 WREG32(mmSIF_RTR_CTRL_2_SCRAM_SRAM_EN,
1279 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1280 WREG32(mmSIF_RTR_CTRL_3_SCRAM_SRAM_EN,
1281 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1282 WREG32(mmSIF_RTR_CTRL_4_SCRAM_SRAM_EN,
1283 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1284 WREG32(mmSIF_RTR_CTRL_5_SCRAM_SRAM_EN,
1285 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1286 WREG32(mmSIF_RTR_CTRL_6_SCRAM_SRAM_EN,
1287 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1288 WREG32(mmSIF_RTR_CTRL_7_SCRAM_SRAM_EN,
1289 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1290
1291 WREG32(mmDMA_IF_E_N_DOWN_CH0_SCRAM_SRAM_EN,
1292 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
1293 WREG32(mmDMA_IF_E_N_DOWN_CH1_SCRAM_SRAM_EN,
1294 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
1295 WREG32(mmDMA_IF_E_S_DOWN_CH0_SCRAM_SRAM_EN,
1296 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
1297 WREG32(mmDMA_IF_E_S_DOWN_CH1_SCRAM_SRAM_EN,
1298 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
1299 WREG32(mmDMA_IF_W_N_DOWN_CH0_SCRAM_SRAM_EN,
1300 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
1301 WREG32(mmDMA_IF_W_N_DOWN_CH1_SCRAM_SRAM_EN,
1302 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
1303 WREG32(mmDMA_IF_W_S_DOWN_CH0_SCRAM_SRAM_EN,
1304 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
1305 WREG32(mmDMA_IF_W_S_DOWN_CH1_SCRAM_SRAM_EN,
1306 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
1307
1308 gaudi->hw_cap_initialized |= HW_CAP_SRAM_SCRAMBLER;
1309}
1310
1311static void gaudi_init_scrambler_hbm(struct hl_device *hdev)
1312{
1313 struct gaudi_device *gaudi = hdev->asic_specific;
1314
1315 if (gaudi->hw_cap_initialized & HW_CAP_HBM_SCRAMBLER)
1316 return;
1317
1318 if (!hdev->dram_scrambler_enable)
1319 return;
1320
1321 WREG32(mmNIF_RTR_CTRL_0_SCRAM_HBM_EN,
1322 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1323 WREG32(mmNIF_RTR_CTRL_1_SCRAM_HBM_EN,
1324 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1325 WREG32(mmNIF_RTR_CTRL_2_SCRAM_HBM_EN,
1326 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1327 WREG32(mmNIF_RTR_CTRL_3_SCRAM_HBM_EN,
1328 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1329 WREG32(mmNIF_RTR_CTRL_4_SCRAM_HBM_EN,
1330 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1331 WREG32(mmNIF_RTR_CTRL_5_SCRAM_HBM_EN,
1332 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1333 WREG32(mmNIF_RTR_CTRL_6_SCRAM_HBM_EN,
1334 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1335 WREG32(mmNIF_RTR_CTRL_7_SCRAM_HBM_EN,
1336 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1337
1338 WREG32(mmSIF_RTR_CTRL_0_SCRAM_HBM_EN,
1339 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1340 WREG32(mmSIF_RTR_CTRL_1_SCRAM_HBM_EN,
1341 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1342 WREG32(mmSIF_RTR_CTRL_2_SCRAM_HBM_EN,
1343 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1344 WREG32(mmSIF_RTR_CTRL_3_SCRAM_HBM_EN,
1345 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1346 WREG32(mmSIF_RTR_CTRL_4_SCRAM_HBM_EN,
1347 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1348 WREG32(mmSIF_RTR_CTRL_5_SCRAM_HBM_EN,
1349 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1350 WREG32(mmSIF_RTR_CTRL_6_SCRAM_HBM_EN,
1351 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1352 WREG32(mmSIF_RTR_CTRL_7_SCRAM_HBM_EN,
1353 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1354
1355 WREG32(mmDMA_IF_E_N_DOWN_CH0_SCRAM_HBM_EN,
1356 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
1357 WREG32(mmDMA_IF_E_N_DOWN_CH1_SCRAM_HBM_EN,
1358 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
1359 WREG32(mmDMA_IF_E_S_DOWN_CH0_SCRAM_HBM_EN,
1360 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
1361 WREG32(mmDMA_IF_E_S_DOWN_CH1_SCRAM_HBM_EN,
1362 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
1363 WREG32(mmDMA_IF_W_N_DOWN_CH0_SCRAM_HBM_EN,
1364 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
1365 WREG32(mmDMA_IF_W_N_DOWN_CH1_SCRAM_HBM_EN,
1366 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
1367 WREG32(mmDMA_IF_W_S_DOWN_CH0_SCRAM_HBM_EN,
1368 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
1369 WREG32(mmDMA_IF_W_S_DOWN_CH1_SCRAM_HBM_EN,
1370 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
1371
1372 gaudi->hw_cap_initialized |= HW_CAP_HBM_SCRAMBLER;
1373}
1374
1375static void gaudi_init_e2e(struct hl_device *hdev)
1376{
1377 WREG32(mmSIF_RTR_CTRL_0_E2E_HBM_WR_SIZE, 247 >> 3);
1378 WREG32(mmSIF_RTR_CTRL_0_E2E_HBM_RD_SIZE, 785 >> 3);
1379 WREG32(mmSIF_RTR_CTRL_0_E2E_PCI_WR_SIZE, 49);
1380 WREG32(mmSIF_RTR_CTRL_0_E2E_PCI_RD_SIZE, 101);
1381
1382 WREG32(mmSIF_RTR_CTRL_1_E2E_HBM_WR_SIZE, 275 >> 3);
1383 WREG32(mmSIF_RTR_CTRL_1_E2E_HBM_RD_SIZE, 614 >> 3);
1384 WREG32(mmSIF_RTR_CTRL_1_E2E_PCI_WR_SIZE, 1);
1385 WREG32(mmSIF_RTR_CTRL_1_E2E_PCI_RD_SIZE, 39);
1386
1387 WREG32(mmSIF_RTR_CTRL_2_E2E_HBM_WR_SIZE, 1);
1388 WREG32(mmSIF_RTR_CTRL_2_E2E_HBM_RD_SIZE, 1);
1389 WREG32(mmSIF_RTR_CTRL_2_E2E_PCI_WR_SIZE, 1);
1390 WREG32(mmSIF_RTR_CTRL_2_E2E_PCI_RD_SIZE, 32);
1391
1392 WREG32(mmSIF_RTR_CTRL_3_E2E_HBM_WR_SIZE, 176 >> 3);
1393 WREG32(mmSIF_RTR_CTRL_3_E2E_HBM_RD_SIZE, 32 >> 3);
1394 WREG32(mmSIF_RTR_CTRL_3_E2E_PCI_WR_SIZE, 19);
1395 WREG32(mmSIF_RTR_CTRL_3_E2E_PCI_RD_SIZE, 32);
1396
1397 WREG32(mmSIF_RTR_CTRL_4_E2E_HBM_WR_SIZE, 176 >> 3);
1398 WREG32(mmSIF_RTR_CTRL_4_E2E_HBM_RD_SIZE, 32 >> 3);
1399 WREG32(mmSIF_RTR_CTRL_4_E2E_PCI_WR_SIZE, 19);
1400 WREG32(mmSIF_RTR_CTRL_4_E2E_PCI_RD_SIZE, 32);
1401
1402 WREG32(mmSIF_RTR_CTRL_5_E2E_HBM_WR_SIZE, 1);
1403 WREG32(mmSIF_RTR_CTRL_5_E2E_HBM_RD_SIZE, 1);
1404 WREG32(mmSIF_RTR_CTRL_5_E2E_PCI_WR_SIZE, 1);
1405 WREG32(mmSIF_RTR_CTRL_5_E2E_PCI_RD_SIZE, 32);
1406
1407 WREG32(mmSIF_RTR_CTRL_6_E2E_HBM_WR_SIZE, 275 >> 3);
1408 WREG32(mmSIF_RTR_CTRL_6_E2E_HBM_RD_SIZE, 614 >> 3);
1409 WREG32(mmSIF_RTR_CTRL_6_E2E_PCI_WR_SIZE, 1);
1410 WREG32(mmSIF_RTR_CTRL_6_E2E_PCI_RD_SIZE, 39);
1411
1412 WREG32(mmSIF_RTR_CTRL_7_E2E_HBM_WR_SIZE, 297 >> 3);
1413 WREG32(mmSIF_RTR_CTRL_7_E2E_HBM_RD_SIZE, 908 >> 3);
1414 WREG32(mmSIF_RTR_CTRL_7_E2E_PCI_WR_SIZE, 19);
1415 WREG32(mmSIF_RTR_CTRL_7_E2E_PCI_RD_SIZE, 19);
1416
1417 WREG32(mmNIF_RTR_CTRL_0_E2E_HBM_WR_SIZE, 318 >> 3);
1418 WREG32(mmNIF_RTR_CTRL_0_E2E_HBM_RD_SIZE, 956 >> 3);
1419 WREG32(mmNIF_RTR_CTRL_0_E2E_PCI_WR_SIZE, 79);
1420 WREG32(mmNIF_RTR_CTRL_0_E2E_PCI_RD_SIZE, 163);
1421
1422 WREG32(mmNIF_RTR_CTRL_1_E2E_HBM_WR_SIZE, 275 >> 3);
1423 WREG32(mmNIF_RTR_CTRL_1_E2E_HBM_RD_SIZE, 614 >> 3);
1424 WREG32(mmNIF_RTR_CTRL_1_E2E_PCI_WR_SIZE, 1);
1425 WREG32(mmNIF_RTR_CTRL_1_E2E_PCI_RD_SIZE, 39);
1426
1427 WREG32(mmNIF_RTR_CTRL_2_E2E_HBM_WR_SIZE, 1);
1428 WREG32(mmNIF_RTR_CTRL_2_E2E_HBM_RD_SIZE, 1);
1429 WREG32(mmNIF_RTR_CTRL_2_E2E_PCI_WR_SIZE, 1);
1430 WREG32(mmNIF_RTR_CTRL_2_E2E_PCI_RD_SIZE, 32);
1431
1432 WREG32(mmNIF_RTR_CTRL_3_E2E_HBM_WR_SIZE, 176 >> 3);
1433 WREG32(mmNIF_RTR_CTRL_3_E2E_HBM_RD_SIZE, 32 >> 3);
1434 WREG32(mmNIF_RTR_CTRL_3_E2E_PCI_WR_SIZE, 19);
1435 WREG32(mmNIF_RTR_CTRL_3_E2E_PCI_RD_SIZE, 32);
1436
1437 WREG32(mmNIF_RTR_CTRL_4_E2E_HBM_WR_SIZE, 176 >> 3);
1438 WREG32(mmNIF_RTR_CTRL_4_E2E_HBM_RD_SIZE, 32 >> 3);
1439 WREG32(mmNIF_RTR_CTRL_4_E2E_PCI_WR_SIZE, 19);
1440 WREG32(mmNIF_RTR_CTRL_4_E2E_PCI_RD_SIZE, 32);
1441
1442 WREG32(mmNIF_RTR_CTRL_5_E2E_HBM_WR_SIZE, 1);
1443 WREG32(mmNIF_RTR_CTRL_5_E2E_HBM_RD_SIZE, 1);
1444 WREG32(mmNIF_RTR_CTRL_5_E2E_PCI_WR_SIZE, 1);
1445 WREG32(mmNIF_RTR_CTRL_5_E2E_PCI_RD_SIZE, 32);
1446
1447 WREG32(mmNIF_RTR_CTRL_6_E2E_HBM_WR_SIZE, 275 >> 3);
1448 WREG32(mmNIF_RTR_CTRL_6_E2E_HBM_RD_SIZE, 614 >> 3);
1449 WREG32(mmNIF_RTR_CTRL_6_E2E_PCI_WR_SIZE, 1);
1450 WREG32(mmNIF_RTR_CTRL_6_E2E_PCI_RD_SIZE, 39);
1451
1452 WREG32(mmNIF_RTR_CTRL_7_E2E_HBM_WR_SIZE, 318 >> 3);
1453 WREG32(mmNIF_RTR_CTRL_7_E2E_HBM_RD_SIZE, 956 >> 3);
1454 WREG32(mmNIF_RTR_CTRL_7_E2E_PCI_WR_SIZE, 79);
1455 WREG32(mmNIF_RTR_CTRL_7_E2E_PCI_RD_SIZE, 79);
1456
1457 WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
1458 WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
1459 WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
1460 WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
1461
1462 WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
1463 WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
1464 WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
1465 WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
1466
1467 WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
1468 WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
1469 WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
1470 WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
1471
1472 WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
1473 WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
1474 WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
1475 WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
1476
1477 WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
1478 WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
1479 WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
1480 WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
1481
1482 WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
1483 WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
1484 WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
1485 WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
1486
1487 WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
1488 WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
1489 WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
1490 WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
1491
1492 WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
1493 WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
1494 WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
1495 WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
1496
1497 if (!hdev->dram_scrambler_enable) {
1498 WREG32(mmSIF_RTR_CTRL_0_NL_HBM_SEL_0, 0x21);
1499 WREG32(mmSIF_RTR_CTRL_0_NL_HBM_SEL_1, 0x22);
1500 WREG32(mmSIF_RTR_CTRL_0_NL_HBM_OFFSET_18, 0x1F);
1501 WREG32(mmSIF_RTR_CTRL_0_NL_HBM_PC_SEL_3, 0x20);
1502
1503 WREG32(mmSIF_RTR_CTRL_1_NL_HBM_SEL_0, 0x21);
1504 WREG32(mmSIF_RTR_CTRL_1_NL_HBM_SEL_1, 0x22);
1505 WREG32(mmSIF_RTR_CTRL_1_NL_HBM_OFFSET_18, 0x1F);
1506 WREG32(mmSIF_RTR_CTRL_1_NL_HBM_PC_SEL_3, 0x20);
1507
1508 WREG32(mmSIF_RTR_CTRL_2_NL_HBM_SEL_0, 0x21);
1509 WREG32(mmSIF_RTR_CTRL_2_NL_HBM_SEL_1, 0x22);
1510 WREG32(mmSIF_RTR_CTRL_2_NL_HBM_OFFSET_18, 0x1F);
1511 WREG32(mmSIF_RTR_CTRL_2_NL_HBM_PC_SEL_3, 0x20);
1512
1513 WREG32(mmSIF_RTR_CTRL_3_NL_HBM_SEL_0, 0x21);
1514 WREG32(mmSIF_RTR_CTRL_3_NL_HBM_SEL_1, 0x22);
1515 WREG32(mmSIF_RTR_CTRL_3_NL_HBM_OFFSET_18, 0x1F);
1516 WREG32(mmSIF_RTR_CTRL_3_NL_HBM_PC_SEL_3, 0x20);
1517
1518 WREG32(mmSIF_RTR_CTRL_4_NL_HBM_SEL_0, 0x21);
1519 WREG32(mmSIF_RTR_CTRL_4_NL_HBM_SEL_1, 0x22);
1520 WREG32(mmSIF_RTR_CTRL_4_NL_HBM_OFFSET_18, 0x1F);
1521 WREG32(mmSIF_RTR_CTRL_4_NL_HBM_PC_SEL_3, 0x20);
1522
1523 WREG32(mmSIF_RTR_CTRL_5_NL_HBM_SEL_0, 0x21);
1524 WREG32(mmSIF_RTR_CTRL_5_NL_HBM_SEL_1, 0x22);
1525 WREG32(mmSIF_RTR_CTRL_5_NL_HBM_OFFSET_18, 0x1F);
1526 WREG32(mmSIF_RTR_CTRL_5_NL_HBM_PC_SEL_3, 0x20);
1527
1528 WREG32(mmSIF_RTR_CTRL_6_NL_HBM_SEL_0, 0x21);
1529 WREG32(mmSIF_RTR_CTRL_6_NL_HBM_SEL_1, 0x22);
1530 WREG32(mmSIF_RTR_CTRL_6_NL_HBM_OFFSET_18, 0x1F);
1531 WREG32(mmSIF_RTR_CTRL_6_NL_HBM_PC_SEL_3, 0x20);
1532
1533 WREG32(mmSIF_RTR_CTRL_7_NL_HBM_SEL_0, 0x21);
1534 WREG32(mmSIF_RTR_CTRL_7_NL_HBM_SEL_1, 0x22);
1535 WREG32(mmSIF_RTR_CTRL_7_NL_HBM_OFFSET_18, 0x1F);
1536 WREG32(mmSIF_RTR_CTRL_7_NL_HBM_PC_SEL_3, 0x20);
1537
1538 WREG32(mmNIF_RTR_CTRL_0_NL_HBM_SEL_0, 0x21);
1539 WREG32(mmNIF_RTR_CTRL_0_NL_HBM_SEL_1, 0x22);
1540 WREG32(mmNIF_RTR_CTRL_0_NL_HBM_OFFSET_18, 0x1F);
1541 WREG32(mmNIF_RTR_CTRL_0_NL_HBM_PC_SEL_3, 0x20);
1542
1543 WREG32(mmNIF_RTR_CTRL_1_NL_HBM_SEL_0, 0x21);
1544 WREG32(mmNIF_RTR_CTRL_1_NL_HBM_SEL_1, 0x22);
1545 WREG32(mmNIF_RTR_CTRL_1_NL_HBM_OFFSET_18, 0x1F);
1546 WREG32(mmNIF_RTR_CTRL_1_NL_HBM_PC_SEL_3, 0x20);
1547
1548 WREG32(mmNIF_RTR_CTRL_2_NL_HBM_SEL_0, 0x21);
1549 WREG32(mmNIF_RTR_CTRL_2_NL_HBM_SEL_1, 0x22);
1550 WREG32(mmNIF_RTR_CTRL_2_NL_HBM_OFFSET_18, 0x1F);
1551 WREG32(mmNIF_RTR_CTRL_2_NL_HBM_PC_SEL_3, 0x20);
1552
1553 WREG32(mmNIF_RTR_CTRL_3_NL_HBM_SEL_0, 0x21);
1554 WREG32(mmNIF_RTR_CTRL_3_NL_HBM_SEL_1, 0x22);
1555 WREG32(mmNIF_RTR_CTRL_3_NL_HBM_OFFSET_18, 0x1F);
1556 WREG32(mmNIF_RTR_CTRL_3_NL_HBM_PC_SEL_3, 0x20);
1557
1558 WREG32(mmNIF_RTR_CTRL_4_NL_HBM_SEL_0, 0x21);
1559 WREG32(mmNIF_RTR_CTRL_4_NL_HBM_SEL_1, 0x22);
1560 WREG32(mmNIF_RTR_CTRL_4_NL_HBM_OFFSET_18, 0x1F);
1561 WREG32(mmNIF_RTR_CTRL_4_NL_HBM_PC_SEL_3, 0x20);
1562
1563 WREG32(mmNIF_RTR_CTRL_5_NL_HBM_SEL_0, 0x21);
1564 WREG32(mmNIF_RTR_CTRL_5_NL_HBM_SEL_1, 0x22);
1565 WREG32(mmNIF_RTR_CTRL_5_NL_HBM_OFFSET_18, 0x1F);
1566 WREG32(mmNIF_RTR_CTRL_5_NL_HBM_PC_SEL_3, 0x20);
1567
1568 WREG32(mmNIF_RTR_CTRL_6_NL_HBM_SEL_0, 0x21);
1569 WREG32(mmNIF_RTR_CTRL_6_NL_HBM_SEL_1, 0x22);
1570 WREG32(mmNIF_RTR_CTRL_6_NL_HBM_OFFSET_18, 0x1F);
1571 WREG32(mmNIF_RTR_CTRL_6_NL_HBM_PC_SEL_3, 0x20);
1572
1573 WREG32(mmNIF_RTR_CTRL_7_NL_HBM_SEL_0, 0x21);
1574 WREG32(mmNIF_RTR_CTRL_7_NL_HBM_SEL_1, 0x22);
1575 WREG32(mmNIF_RTR_CTRL_7_NL_HBM_OFFSET_18, 0x1F);
1576 WREG32(mmNIF_RTR_CTRL_7_NL_HBM_PC_SEL_3, 0x20);
1577
1578 WREG32(mmDMA_IF_E_N_DOWN_CH0_NL_HBM_SEL_0, 0x21);
1579 WREG32(mmDMA_IF_E_N_DOWN_CH0_NL_HBM_SEL_1, 0x22);
1580 WREG32(mmDMA_IF_E_N_DOWN_CH0_NL_HBM_OFFSET_18, 0x1F);
1581 WREG32(mmDMA_IF_E_N_DOWN_CH0_NL_HBM_PC_SEL_3, 0x20);
1582
1583 WREG32(mmDMA_IF_E_N_DOWN_CH1_NL_HBM_SEL_0, 0x21);
1584 WREG32(mmDMA_IF_E_N_DOWN_CH1_NL_HBM_SEL_1, 0x22);
1585 WREG32(mmDMA_IF_E_N_DOWN_CH1_NL_HBM_OFFSET_18, 0x1F);
1586 WREG32(mmDMA_IF_E_N_DOWN_CH1_NL_HBM_PC_SEL_3, 0x20);
1587
1588 WREG32(mmDMA_IF_E_S_DOWN_CH0_NL_HBM_SEL_0, 0x21);
1589 WREG32(mmDMA_IF_E_S_DOWN_CH0_NL_HBM_SEL_1, 0x22);
1590 WREG32(mmDMA_IF_E_S_DOWN_CH0_NL_HBM_OFFSET_18, 0x1F);
1591 WREG32(mmDMA_IF_E_S_DOWN_CH0_NL_HBM_PC_SEL_3, 0x20);
1592
1593 WREG32(mmDMA_IF_E_S_DOWN_CH1_NL_HBM_SEL_0, 0x21);
1594 WREG32(mmDMA_IF_E_S_DOWN_CH1_NL_HBM_SEL_1, 0x22);
1595 WREG32(mmDMA_IF_E_S_DOWN_CH1_NL_HBM_OFFSET_18, 0x1F);
1596 WREG32(mmDMA_IF_E_S_DOWN_CH1_NL_HBM_PC_SEL_3, 0x20);
1597
1598 WREG32(mmDMA_IF_W_N_DOWN_CH0_NL_HBM_SEL_0, 0x21);
1599 WREG32(mmDMA_IF_W_N_DOWN_CH0_NL_HBM_SEL_1, 0x22);
1600 WREG32(mmDMA_IF_W_N_DOWN_CH0_NL_HBM_OFFSET_18, 0x1F);
1601 WREG32(mmDMA_IF_W_N_DOWN_CH0_NL_HBM_PC_SEL_3, 0x20);
1602
1603 WREG32(mmDMA_IF_W_N_DOWN_CH1_NL_HBM_SEL_0, 0x21);
1604 WREG32(mmDMA_IF_W_N_DOWN_CH1_NL_HBM_SEL_1, 0x22);
1605 WREG32(mmDMA_IF_W_N_DOWN_CH1_NL_HBM_OFFSET_18, 0x1F);
1606 WREG32(mmDMA_IF_W_N_DOWN_CH1_NL_HBM_PC_SEL_3, 0x20);
1607
1608 WREG32(mmDMA_IF_W_S_DOWN_CH0_NL_HBM_SEL_0, 0x21);
1609 WREG32(mmDMA_IF_W_S_DOWN_CH0_NL_HBM_SEL_1, 0x22);
1610 WREG32(mmDMA_IF_W_S_DOWN_CH0_NL_HBM_OFFSET_18, 0x1F);
1611 WREG32(mmDMA_IF_W_S_DOWN_CH0_NL_HBM_PC_SEL_3, 0x20);
1612
1613 WREG32(mmDMA_IF_W_S_DOWN_CH1_NL_HBM_SEL_0, 0x21);
1614 WREG32(mmDMA_IF_W_S_DOWN_CH1_NL_HBM_SEL_1, 0x22);
1615 WREG32(mmDMA_IF_W_S_DOWN_CH1_NL_HBM_OFFSET_18, 0x1F);
1616 WREG32(mmDMA_IF_W_S_DOWN_CH1_NL_HBM_PC_SEL_3, 0x20);
1617 }
1618
1619 WREG32(mmSIF_RTR_CTRL_0_E2E_HBM_EN,
1620 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
1621 WREG32(mmSIF_RTR_CTRL_0_E2E_PCI_EN,
1622 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
1623
1624 WREG32(mmSIF_RTR_CTRL_1_E2E_HBM_EN,
1625 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
1626 WREG32(mmSIF_RTR_CTRL_1_E2E_PCI_EN,
1627 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
1628
1629 WREG32(mmSIF_RTR_CTRL_2_E2E_HBM_EN,
1630 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
1631 WREG32(mmSIF_RTR_CTRL_2_E2E_PCI_EN,
1632 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
1633
1634 WREG32(mmSIF_RTR_CTRL_3_E2E_HBM_EN,
1635 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
1636 WREG32(mmSIF_RTR_CTRL_3_E2E_PCI_EN,
1637 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
1638
1639 WREG32(mmSIF_RTR_CTRL_4_E2E_HBM_EN,
1640 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
1641 WREG32(mmSIF_RTR_CTRL_4_E2E_PCI_EN,
1642 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
1643
1644 WREG32(mmSIF_RTR_CTRL_5_E2E_HBM_EN,
1645 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
1646 WREG32(mmSIF_RTR_CTRL_5_E2E_PCI_EN,
1647 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
1648
1649 WREG32(mmSIF_RTR_CTRL_6_E2E_HBM_EN,
1650 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
1651 WREG32(mmSIF_RTR_CTRL_6_E2E_PCI_EN,
1652 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
1653
1654 WREG32(mmSIF_RTR_CTRL_7_E2E_HBM_EN,
1655 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
1656 WREG32(mmSIF_RTR_CTRL_7_E2E_PCI_EN,
1657 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
1658
1659 WREG32(mmNIF_RTR_CTRL_0_E2E_HBM_EN,
1660 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
1661 WREG32(mmNIF_RTR_CTRL_0_E2E_PCI_EN,
1662 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
1663
1664 WREG32(mmNIF_RTR_CTRL_1_E2E_HBM_EN,
1665 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
1666 WREG32(mmNIF_RTR_CTRL_1_E2E_PCI_EN,
1667 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
1668
1669 WREG32(mmNIF_RTR_CTRL_2_E2E_HBM_EN,
1670 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
1671 WREG32(mmNIF_RTR_CTRL_2_E2E_PCI_EN,
1672 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
1673
1674 WREG32(mmNIF_RTR_CTRL_3_E2E_HBM_EN,
1675 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
1676 WREG32(mmNIF_RTR_CTRL_3_E2E_PCI_EN,
1677 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
1678
1679 WREG32(mmNIF_RTR_CTRL_4_E2E_HBM_EN,
1680 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
1681 WREG32(mmNIF_RTR_CTRL_4_E2E_PCI_EN,
1682 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
1683
1684 WREG32(mmNIF_RTR_CTRL_5_E2E_HBM_EN,
1685 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
1686 WREG32(mmNIF_RTR_CTRL_5_E2E_PCI_EN,
1687 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
1688
1689 WREG32(mmNIF_RTR_CTRL_6_E2E_HBM_EN,
1690 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
1691 WREG32(mmNIF_RTR_CTRL_6_E2E_PCI_EN,
1692 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
1693
1694 WREG32(mmNIF_RTR_CTRL_7_E2E_HBM_EN,
1695 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
1696 WREG32(mmNIF_RTR_CTRL_7_E2E_PCI_EN,
1697 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
1698
1699 WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_HBM_EN,
1700 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
1701 WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_PCI_EN,
1702 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
1703
1704 WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_HBM_EN,
1705 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
1706 WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_PCI_EN,
1707 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
1708
1709 WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_HBM_EN,
1710 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
1711 WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_PCI_EN,
1712 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
1713
1714 WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_HBM_EN,
1715 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
1716 WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_PCI_EN,
1717 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
1718
1719 WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_HBM_EN,
1720 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
1721 WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_PCI_EN,
1722 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
1723
1724 WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_HBM_EN,
1725 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
1726 WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_PCI_EN,
1727 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
1728
1729 WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_HBM_EN,
1730 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
1731 WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_PCI_EN,
1732 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
1733
1734 WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_HBM_EN,
1735 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
1736 WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_PCI_EN,
1737 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
1738}
1739
1740static void gaudi_init_hbm_cred(struct hl_device *hdev)
1741{
1742 uint32_t hbm0_wr, hbm1_wr, hbm0_rd, hbm1_rd;
1743
1744 hbm0_wr = 0x33333333;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03001745 hbm0_rd = 0x77777777;
Oded Gabbay0b168c82020-06-15 19:25:57 +03001746 hbm1_wr = 0x55555555;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03001747 hbm1_rd = 0xDDDDDDDD;
1748
1749 WREG32(mmDMA_IF_E_N_HBM0_WR_CRED_CNT, hbm0_wr);
1750 WREG32(mmDMA_IF_E_N_HBM1_WR_CRED_CNT, hbm1_wr);
1751 WREG32(mmDMA_IF_E_N_HBM0_RD_CRED_CNT, hbm0_rd);
1752 WREG32(mmDMA_IF_E_N_HBM1_RD_CRED_CNT, hbm1_rd);
1753
1754 WREG32(mmDMA_IF_E_S_HBM0_WR_CRED_CNT, hbm0_wr);
1755 WREG32(mmDMA_IF_E_S_HBM1_WR_CRED_CNT, hbm1_wr);
1756 WREG32(mmDMA_IF_E_S_HBM0_RD_CRED_CNT, hbm0_rd);
1757 WREG32(mmDMA_IF_E_S_HBM1_RD_CRED_CNT, hbm1_rd);
1758
1759 WREG32(mmDMA_IF_W_N_HBM0_WR_CRED_CNT, hbm0_wr);
1760 WREG32(mmDMA_IF_W_N_HBM1_WR_CRED_CNT, hbm1_wr);
1761 WREG32(mmDMA_IF_W_N_HBM0_RD_CRED_CNT, hbm0_rd);
1762 WREG32(mmDMA_IF_W_N_HBM1_RD_CRED_CNT, hbm1_rd);
1763
1764 WREG32(mmDMA_IF_W_S_HBM0_WR_CRED_CNT, hbm0_wr);
1765 WREG32(mmDMA_IF_W_S_HBM1_WR_CRED_CNT, hbm1_wr);
1766 WREG32(mmDMA_IF_W_S_HBM0_RD_CRED_CNT, hbm0_rd);
1767 WREG32(mmDMA_IF_W_S_HBM1_RD_CRED_CNT, hbm1_rd);
1768
1769 WREG32(mmDMA_IF_E_N_HBM_CRED_EN_0,
1770 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
1771 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
1772 WREG32(mmDMA_IF_E_S_HBM_CRED_EN_0,
1773 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
1774 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
1775 WREG32(mmDMA_IF_W_N_HBM_CRED_EN_0,
1776 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
1777 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
1778 WREG32(mmDMA_IF_W_S_HBM_CRED_EN_0,
1779 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
1780 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
1781
1782 WREG32(mmDMA_IF_E_N_HBM_CRED_EN_1,
1783 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
1784 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
1785 WREG32(mmDMA_IF_E_S_HBM_CRED_EN_1,
1786 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
1787 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
1788 WREG32(mmDMA_IF_W_N_HBM_CRED_EN_1,
1789 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
1790 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
1791 WREG32(mmDMA_IF_W_S_HBM_CRED_EN_1,
1792 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
1793 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
1794}
1795
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03001796static void gaudi_init_golden_registers(struct hl_device *hdev)
1797{
1798 u32 tpc_offset;
1799 int tpc_id, i;
1800
1801 gaudi_init_e2e(hdev);
1802
1803 gaudi_init_hbm_cred(hdev);
1804
Oded Gabbaye38bfd32020-07-03 20:46:12 +03001805 hdev->asic_funcs->disable_clock_gating(hdev);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03001806
1807 for (tpc_id = 0, tpc_offset = 0;
1808 tpc_id < TPC_NUMBER_OF_ENGINES;
1809 tpc_id++, tpc_offset += TPC_CFG_OFFSET) {
1810 /* Mask all arithmetic interrupts from TPC */
1811 WREG32(mmTPC0_CFG_TPC_INTR_MASK + tpc_offset, 0x8FFF);
1812 /* Set 16 cache lines */
1813 WREG32_FIELD(TPC0_CFG_MSS_CONFIG, tpc_offset,
1814 ICACHE_FETCH_LINE_NUM, 2);
1815 }
1816
1817 /* Make sure 1st 128 bytes in SRAM are 0 for Tensor DMA */
1818 for (i = 0 ; i < 128 ; i += 8)
1819 writeq(0, hdev->pcie_bar[SRAM_BAR_ID] + i);
1820
1821 WREG32(mmMME0_CTRL_EUS_ROLLUP_CNT_ADD, 3);
1822 WREG32(mmMME1_CTRL_EUS_ROLLUP_CNT_ADD, 3);
1823 WREG32(mmMME2_CTRL_EUS_ROLLUP_CNT_ADD, 3);
1824 WREG32(mmMME3_CTRL_EUS_ROLLUP_CNT_ADD, 3);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03001825}
1826
1827static void gaudi_init_pci_dma_qman(struct hl_device *hdev, int dma_id,
1828 int qman_id, dma_addr_t qman_pq_addr)
1829{
1830 u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi;
1831 u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi;
1832 u32 q_off, dma_qm_offset;
1833 u32 dma_qm_err_cfg;
1834
1835 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
1836
1837 mtr_base_en_lo = lower_32_bits(CFG_BASE +
1838 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
1839 mtr_base_en_hi = upper_32_bits(CFG_BASE +
1840 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
1841 so_base_en_lo = lower_32_bits(CFG_BASE +
1842 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
1843 so_base_en_hi = upper_32_bits(CFG_BASE +
1844 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
1845 mtr_base_ws_lo = lower_32_bits(CFG_BASE +
1846 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
1847 mtr_base_ws_hi = upper_32_bits(CFG_BASE +
1848 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
1849 so_base_ws_lo = lower_32_bits(CFG_BASE +
1850 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
1851 so_base_ws_hi = upper_32_bits(CFG_BASE +
1852 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
1853
1854 q_off = dma_qm_offset + qman_id * 4;
1855
1856 WREG32(mmDMA0_QM_PQ_BASE_LO_0 + q_off, lower_32_bits(qman_pq_addr));
1857 WREG32(mmDMA0_QM_PQ_BASE_HI_0 + q_off, upper_32_bits(qman_pq_addr));
1858
1859 WREG32(mmDMA0_QM_PQ_SIZE_0 + q_off, ilog2(HL_QUEUE_LENGTH));
1860 WREG32(mmDMA0_QM_PQ_PI_0 + q_off, 0);
1861 WREG32(mmDMA0_QM_PQ_CI_0 + q_off, 0);
1862
1863 WREG32(mmDMA0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off, 0x74);
1864 WREG32(mmDMA0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off, 0x14);
1865 WREG32(mmDMA0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off, 0x1C);
1866
1867 WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo);
1868 WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi);
1869 WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo);
1870 WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi);
1871 WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_LO_0 + q_off, mtr_base_ws_lo);
1872 WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_HI_0 + q_off, mtr_base_ws_hi);
1873 WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_LO_0 + q_off, so_base_ws_lo);
1874 WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_HI_0 + q_off, so_base_ws_hi);
1875
Omer Shpigelmance043262020-06-16 17:56:27 +03001876 WREG32(mmDMA0_QM_CP_BARRIER_CFG_0 + q_off, 0x100);
1877
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03001878 /* The following configuration is needed only once per QMAN */
1879 if (qman_id == 0) {
1880 /* Configure RAZWI IRQ */
1881 dma_qm_err_cfg = PCI_DMA_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
1882 if (hdev->stop_on_err) {
1883 dma_qm_err_cfg |=
1884 PCI_DMA_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
1885 }
1886
1887 WREG32(mmDMA0_QM_GLBL_ERR_CFG + dma_qm_offset, dma_qm_err_cfg);
1888 WREG32(mmDMA0_QM_GLBL_ERR_ADDR_LO + dma_qm_offset,
1889 lower_32_bits(CFG_BASE +
1890 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR));
1891 WREG32(mmDMA0_QM_GLBL_ERR_ADDR_HI + dma_qm_offset,
1892 upper_32_bits(CFG_BASE +
1893 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR));
1894 WREG32(mmDMA0_QM_GLBL_ERR_WDATA + dma_qm_offset,
1895 gaudi_irq_map_table[GAUDI_EVENT_DMA0_QM].cpu_id +
1896 dma_id);
1897
1898 WREG32(mmDMA0_QM_ARB_ERR_MSG_EN + dma_qm_offset,
1899 QM_ARB_ERR_MSG_EN_MASK);
1900
1901 /* Increase ARB WDT to support streams architecture */
1902 WREG32(mmDMA0_QM_ARB_SLV_CHOISE_WDT + dma_qm_offset,
1903 GAUDI_ARB_WDT_TIMEOUT);
1904
1905 WREG32(mmDMA0_QM_GLBL_PROT + dma_qm_offset,
1906 QMAN_EXTERNAL_MAKE_TRUSTED);
1907
1908 WREG32(mmDMA0_QM_GLBL_CFG1 + dma_qm_offset, 0);
1909 }
1910}
1911
1912static void gaudi_init_dma_core(struct hl_device *hdev, int dma_id)
1913{
1914 u32 dma_offset = dma_id * DMA_CORE_OFFSET;
1915 u32 dma_err_cfg = 1 << DMA0_CORE_ERR_CFG_ERR_MSG_EN_SHIFT;
1916
1917 /* Set to maximum possible according to physical size */
1918 WREG32(mmDMA0_CORE_RD_MAX_OUTSTAND + dma_offset, 0);
1919 WREG32(mmDMA0_CORE_RD_MAX_SIZE + dma_offset, 0);
1920
1921 /* STOP_ON bit implies no completion to operation in case of RAZWI */
1922 if (hdev->stop_on_err)
1923 dma_err_cfg |= 1 << DMA0_CORE_ERR_CFG_STOP_ON_ERR_SHIFT;
1924
1925 WREG32(mmDMA0_CORE_ERR_CFG + dma_offset, dma_err_cfg);
1926 WREG32(mmDMA0_CORE_ERRMSG_ADDR_LO + dma_offset,
1927 lower_32_bits(CFG_BASE + mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR));
1928 WREG32(mmDMA0_CORE_ERRMSG_ADDR_HI + dma_offset,
1929 upper_32_bits(CFG_BASE + mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR));
1930 WREG32(mmDMA0_CORE_ERRMSG_WDATA + dma_offset,
1931 gaudi_irq_map_table[GAUDI_EVENT_DMA0_CORE].cpu_id + dma_id);
1932 WREG32(mmDMA0_CORE_PROT + dma_offset,
1933 1 << DMA0_CORE_PROT_ERR_VAL_SHIFT);
1934 /* If the channel is secured, it should be in MMU bypass mode */
1935 WREG32(mmDMA0_CORE_SECURE_PROPS + dma_offset,
1936 1 << DMA0_CORE_SECURE_PROPS_MMBP_SHIFT);
1937 WREG32(mmDMA0_CORE_CFG_0 + dma_offset, 1 << DMA0_CORE_CFG_0_EN_SHIFT);
1938}
1939
1940static void gaudi_enable_qman(struct hl_device *hdev, int dma_id,
1941 u32 enable_mask)
1942{
1943 u32 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
1944
1945 WREG32(mmDMA0_QM_GLBL_CFG0 + dma_qm_offset, enable_mask);
1946}
1947
1948static void gaudi_init_pci_dma_qmans(struct hl_device *hdev)
1949{
1950 struct gaudi_device *gaudi = hdev->asic_specific;
1951 struct hl_hw_queue *q;
1952 int i, j, dma_id, cpu_skip, nic_skip, cq_id = 0, q_idx, msi_vec = 0;
1953
1954 if (gaudi->hw_cap_initialized & HW_CAP_PCI_DMA)
1955 return;
1956
1957 for (i = 0 ; i < PCI_DMA_NUMBER_OF_CHNLS ; i++) {
1958 dma_id = gaudi_dma_assignment[i];
1959 /*
1960 * For queues after the CPU Q need to add 1 to get the correct
1961 * queue. In addition, need to add the CPU EQ and NIC IRQs in
1962 * order to get the correct MSI register.
1963 */
1964 if (dma_id > 1) {
1965 cpu_skip = 1;
1966 nic_skip = NIC_NUMBER_OF_ENGINES;
1967 } else {
1968 cpu_skip = 0;
1969 nic_skip = 0;
1970 }
1971
1972 for (j = 0 ; j < QMAN_STREAMS ; j++) {
1973 q_idx = 4 * dma_id + j + cpu_skip;
1974 q = &hdev->kernel_queues[q_idx];
1975 q->cq_id = cq_id++;
1976 q->msi_vec = nic_skip + cpu_skip + msi_vec++;
1977 gaudi_init_pci_dma_qman(hdev, dma_id, j,
1978 q->bus_address);
1979 }
1980
1981 gaudi_init_dma_core(hdev, dma_id);
1982
1983 gaudi_enable_qman(hdev, dma_id, PCI_DMA_QMAN_ENABLE);
1984 }
1985
1986 gaudi->hw_cap_initialized |= HW_CAP_PCI_DMA;
1987}
1988
1989static void gaudi_init_hbm_dma_qman(struct hl_device *hdev, int dma_id,
1990 int qman_id, u64 qman_base_addr)
1991{
1992 u32 mtr_base_lo, mtr_base_hi;
1993 u32 so_base_lo, so_base_hi;
1994 u32 q_off, dma_qm_offset;
1995 u32 dma_qm_err_cfg;
1996
1997 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
1998
1999 mtr_base_lo = lower_32_bits(CFG_BASE +
2000 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2001 mtr_base_hi = upper_32_bits(CFG_BASE +
2002 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2003 so_base_lo = lower_32_bits(CFG_BASE +
2004 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2005 so_base_hi = upper_32_bits(CFG_BASE +
2006 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2007
2008 q_off = dma_qm_offset + qman_id * 4;
2009
2010 if (qman_id < 4) {
2011 WREG32(mmDMA0_QM_PQ_BASE_LO_0 + q_off,
2012 lower_32_bits(qman_base_addr));
2013 WREG32(mmDMA0_QM_PQ_BASE_HI_0 + q_off,
2014 upper_32_bits(qman_base_addr));
2015
2016 WREG32(mmDMA0_QM_PQ_SIZE_0 + q_off, ilog2(HBM_DMA_QMAN_LENGTH));
2017 WREG32(mmDMA0_QM_PQ_PI_0 + q_off, 0);
2018 WREG32(mmDMA0_QM_PQ_CI_0 + q_off, 0);
2019
2020 WREG32(mmDMA0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off, 0x81BC);
2021 WREG32(mmDMA0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off, 0x81B4);
2022 WREG32(mmDMA0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off, 0x1C);
2023 } else {
2024 WREG32(mmDMA0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off, 0x74);
2025 WREG32(mmDMA0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off, 0x14);
2026 WREG32(mmDMA0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off, 0x1C);
2027
2028 /* Configure RAZWI IRQ */
2029 dma_qm_err_cfg = HBM_DMA_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
2030 if (hdev->stop_on_err) {
2031 dma_qm_err_cfg |=
2032 HBM_DMA_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
2033 }
2034 WREG32(mmDMA0_QM_GLBL_ERR_CFG + dma_qm_offset, dma_qm_err_cfg);
2035
2036 WREG32(mmDMA0_QM_GLBL_ERR_ADDR_LO + dma_qm_offset,
2037 lower_32_bits(CFG_BASE +
2038 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR));
2039 WREG32(mmDMA0_QM_GLBL_ERR_ADDR_HI + dma_qm_offset,
2040 upper_32_bits(CFG_BASE +
2041 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR));
2042 WREG32(mmDMA0_QM_GLBL_ERR_WDATA + dma_qm_offset,
2043 gaudi_irq_map_table[GAUDI_EVENT_DMA0_QM].cpu_id +
2044 dma_id);
2045
2046 WREG32(mmDMA0_QM_ARB_ERR_MSG_EN + dma_qm_offset,
2047 QM_ARB_ERR_MSG_EN_MASK);
2048
2049 /* Increase ARB WDT to support streams architecture */
2050 WREG32(mmDMA0_QM_ARB_SLV_CHOISE_WDT + dma_qm_offset,
2051 GAUDI_ARB_WDT_TIMEOUT);
2052
2053 WREG32(mmDMA0_QM_GLBL_CFG1 + dma_qm_offset, 0);
2054 WREG32(mmDMA0_QM_GLBL_PROT + dma_qm_offset,
2055 QMAN_INTERNAL_MAKE_TRUSTED);
2056 }
2057
2058 WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_lo);
2059 WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_hi);
2060 WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_lo);
2061 WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_hi);
2062}
2063
2064static void gaudi_init_hbm_dma_qmans(struct hl_device *hdev)
2065{
2066 struct gaudi_device *gaudi = hdev->asic_specific;
2067 struct gaudi_internal_qman_info *q;
2068 u64 qman_base_addr;
2069 int i, j, dma_id, internal_q_index;
2070
2071 if (gaudi->hw_cap_initialized & HW_CAP_HBM_DMA)
2072 return;
2073
2074 for (i = 0 ; i < HBM_DMA_NUMBER_OF_CHNLS ; i++) {
2075 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_1 + i];
2076
2077 for (j = 0 ; j < QMAN_STREAMS ; j++) {
2078 /*
2079 * Add the CPU queue in order to get the correct queue
2080 * number as all internal queue are placed after it
2081 */
2082 internal_q_index = dma_id * QMAN_STREAMS + j + 1;
2083
2084 q = &gaudi->internal_qmans[internal_q_index];
2085 qman_base_addr = (u64) q->pq_dma_addr;
2086 gaudi_init_hbm_dma_qman(hdev, dma_id, j,
2087 qman_base_addr);
2088 }
2089
2090 /* Initializing lower CP for HBM DMA QMAN */
2091 gaudi_init_hbm_dma_qman(hdev, dma_id, 4, 0);
2092
2093 gaudi_init_dma_core(hdev, dma_id);
2094
2095 gaudi_enable_qman(hdev, dma_id, HBM_DMA_QMAN_ENABLE);
2096 }
2097
2098 gaudi->hw_cap_initialized |= HW_CAP_HBM_DMA;
2099}
2100
2101static void gaudi_init_mme_qman(struct hl_device *hdev, u32 mme_offset,
2102 int qman_id, u64 qman_base_addr)
2103{
2104 u32 mtr_base_lo, mtr_base_hi;
2105 u32 so_base_lo, so_base_hi;
2106 u32 q_off, mme_id;
2107 u32 mme_qm_err_cfg;
2108
2109 mtr_base_lo = lower_32_bits(CFG_BASE +
2110 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2111 mtr_base_hi = upper_32_bits(CFG_BASE +
2112 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2113 so_base_lo = lower_32_bits(CFG_BASE +
2114 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2115 so_base_hi = upper_32_bits(CFG_BASE +
2116 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2117
2118 q_off = mme_offset + qman_id * 4;
2119
2120 if (qman_id < 4) {
2121 WREG32(mmMME0_QM_PQ_BASE_LO_0 + q_off,
2122 lower_32_bits(qman_base_addr));
2123 WREG32(mmMME0_QM_PQ_BASE_HI_0 + q_off,
2124 upper_32_bits(qman_base_addr));
2125
2126 WREG32(mmMME0_QM_PQ_SIZE_0 + q_off, ilog2(MME_QMAN_LENGTH));
2127 WREG32(mmMME0_QM_PQ_PI_0 + q_off, 0);
2128 WREG32(mmMME0_QM_PQ_CI_0 + q_off, 0);
2129
2130 WREG32(mmMME0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off, 0x81BC);
2131 WREG32(mmMME0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off, 0x81B4);
2132 WREG32(mmMME0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off, 0x1C);
2133 } else {
2134 WREG32(mmMME0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off, 0x74);
2135 WREG32(mmMME0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off, 0x14);
2136 WREG32(mmMME0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off, 0x1C);
2137
2138 /* Configure RAZWI IRQ */
2139 mme_id = mme_offset /
2140 (mmMME1_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0);
2141
2142 mme_qm_err_cfg = MME_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
2143 if (hdev->stop_on_err) {
2144 mme_qm_err_cfg |=
2145 MME_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
2146 }
2147 WREG32(mmMME0_QM_GLBL_ERR_CFG + mme_offset, mme_qm_err_cfg);
2148 WREG32(mmMME0_QM_GLBL_ERR_ADDR_LO + mme_offset,
2149 lower_32_bits(CFG_BASE +
2150 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR));
2151 WREG32(mmMME0_QM_GLBL_ERR_ADDR_HI + mme_offset,
2152 upper_32_bits(CFG_BASE +
2153 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR));
2154 WREG32(mmMME0_QM_GLBL_ERR_WDATA + mme_offset,
2155 gaudi_irq_map_table[GAUDI_EVENT_MME0_QM].cpu_id +
2156 mme_id);
2157
2158 WREG32(mmMME0_QM_ARB_ERR_MSG_EN + mme_offset,
2159 QM_ARB_ERR_MSG_EN_MASK);
2160
2161 /* Increase ARB WDT to support streams architecture */
2162 WREG32(mmMME0_QM_ARB_SLV_CHOISE_WDT + mme_offset,
2163 GAUDI_ARB_WDT_TIMEOUT);
2164
2165 WREG32(mmMME0_QM_GLBL_CFG1 + mme_offset, 0);
2166 WREG32(mmMME0_QM_GLBL_PROT + mme_offset,
2167 QMAN_INTERNAL_MAKE_TRUSTED);
2168 }
2169
2170 WREG32(mmMME0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_lo);
2171 WREG32(mmMME0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_hi);
2172 WREG32(mmMME0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_lo);
2173 WREG32(mmMME0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_hi);
2174}
2175
2176static void gaudi_init_mme_qmans(struct hl_device *hdev)
2177{
2178 struct gaudi_device *gaudi = hdev->asic_specific;
2179 struct gaudi_internal_qman_info *q;
2180 u64 qman_base_addr;
2181 u32 mme_offset;
2182 int i, internal_q_index;
2183
2184 if (gaudi->hw_cap_initialized & HW_CAP_MME)
2185 return;
2186
2187 /*
2188 * map GAUDI_QUEUE_ID_MME_0_X to the N_W_MME (mmMME2_QM_BASE)
2189 * and GAUDI_QUEUE_ID_MME_1_X to the S_W_MME (mmMME0_QM_BASE)
2190 */
2191
2192 mme_offset = mmMME2_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0;
2193
2194 for (i = 0 ; i < MME_NUMBER_OF_QMANS ; i++) {
2195 internal_q_index = GAUDI_QUEUE_ID_MME_0_0 + i;
2196 q = &gaudi->internal_qmans[internal_q_index];
2197 qman_base_addr = (u64) q->pq_dma_addr;
2198 gaudi_init_mme_qman(hdev, mme_offset, (i & 0x3),
2199 qman_base_addr);
2200 if (i == 3)
2201 mme_offset = 0;
2202 }
2203
2204 /* Initializing lower CP for MME QMANs */
2205 mme_offset = mmMME2_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0;
2206 gaudi_init_mme_qman(hdev, mme_offset, 4, 0);
2207 gaudi_init_mme_qman(hdev, 0, 4, 0);
2208
2209 WREG32(mmMME2_QM_GLBL_CFG0, QMAN_MME_ENABLE);
2210 WREG32(mmMME0_QM_GLBL_CFG0, QMAN_MME_ENABLE);
2211
2212 gaudi->hw_cap_initialized |= HW_CAP_MME;
2213}
2214
2215static void gaudi_init_tpc_qman(struct hl_device *hdev, u32 tpc_offset,
2216 int qman_id, u64 qman_base_addr)
2217{
2218 u32 mtr_base_lo, mtr_base_hi;
2219 u32 so_base_lo, so_base_hi;
2220 u32 q_off, tpc_id;
2221 u32 tpc_qm_err_cfg;
2222
2223 mtr_base_lo = lower_32_bits(CFG_BASE +
2224 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2225 mtr_base_hi = upper_32_bits(CFG_BASE +
2226 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2227 so_base_lo = lower_32_bits(CFG_BASE +
2228 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2229 so_base_hi = upper_32_bits(CFG_BASE +
2230 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2231
2232 q_off = tpc_offset + qman_id * 4;
2233
2234 if (qman_id < 4) {
2235 WREG32(mmTPC0_QM_PQ_BASE_LO_0 + q_off,
2236 lower_32_bits(qman_base_addr));
2237 WREG32(mmTPC0_QM_PQ_BASE_HI_0 + q_off,
2238 upper_32_bits(qman_base_addr));
2239
2240 WREG32(mmTPC0_QM_PQ_SIZE_0 + q_off, ilog2(TPC_QMAN_LENGTH));
2241 WREG32(mmTPC0_QM_PQ_PI_0 + q_off, 0);
2242 WREG32(mmTPC0_QM_PQ_CI_0 + q_off, 0);
2243
2244 WREG32(mmTPC0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off, 0x81BC);
2245 WREG32(mmTPC0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off, 0x81B4);
2246 WREG32(mmTPC0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off, 0x1C);
2247 } else {
2248 WREG32(mmTPC0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off, 0x74);
2249 WREG32(mmTPC0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off, 0x14);
2250 WREG32(mmTPC0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off, 0x1C);
2251
2252 /* Configure RAZWI IRQ */
2253 tpc_id = tpc_offset /
2254 (mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0);
2255
2256 tpc_qm_err_cfg = TPC_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
2257 if (hdev->stop_on_err) {
2258 tpc_qm_err_cfg |=
2259 TPC_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
2260 }
2261
2262 WREG32(mmTPC0_QM_GLBL_ERR_CFG + tpc_offset, tpc_qm_err_cfg);
2263 WREG32(mmTPC0_QM_GLBL_ERR_ADDR_LO + tpc_offset,
2264 lower_32_bits(CFG_BASE +
2265 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR));
2266 WREG32(mmTPC0_QM_GLBL_ERR_ADDR_HI + tpc_offset,
2267 upper_32_bits(CFG_BASE +
2268 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR));
2269 WREG32(mmTPC0_QM_GLBL_ERR_WDATA + tpc_offset,
2270 gaudi_irq_map_table[GAUDI_EVENT_TPC0_QM].cpu_id +
2271 tpc_id);
2272
2273 WREG32(mmTPC0_QM_ARB_ERR_MSG_EN + tpc_offset,
2274 QM_ARB_ERR_MSG_EN_MASK);
2275
2276 /* Increase ARB WDT to support streams architecture */
2277 WREG32(mmTPC0_QM_ARB_SLV_CHOISE_WDT + tpc_offset,
2278 GAUDI_ARB_WDT_TIMEOUT);
2279
2280 WREG32(mmTPC0_QM_GLBL_CFG1 + tpc_offset, 0);
2281 WREG32(mmTPC0_QM_GLBL_PROT + tpc_offset,
2282 QMAN_INTERNAL_MAKE_TRUSTED);
2283 }
2284
2285 WREG32(mmTPC0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_lo);
2286 WREG32(mmTPC0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_hi);
2287 WREG32(mmTPC0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_lo);
2288 WREG32(mmTPC0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_hi);
2289}
2290
2291static void gaudi_init_tpc_qmans(struct hl_device *hdev)
2292{
2293 struct gaudi_device *gaudi = hdev->asic_specific;
2294 struct gaudi_internal_qman_info *q;
2295 u64 qman_base_addr;
2296 u32 so_base_hi, tpc_offset = 0;
2297 u32 tpc_delta = mmTPC1_CFG_SM_BASE_ADDRESS_HIGH -
2298 mmTPC0_CFG_SM_BASE_ADDRESS_HIGH;
2299 int i, tpc_id, internal_q_index;
2300
2301 if (gaudi->hw_cap_initialized & HW_CAP_TPC_MASK)
2302 return;
2303
2304 so_base_hi = upper_32_bits(CFG_BASE +
2305 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2306
2307 for (tpc_id = 0 ; tpc_id < TPC_NUMBER_OF_ENGINES ; tpc_id++) {
2308 for (i = 0 ; i < QMAN_STREAMS ; i++) {
2309 internal_q_index = GAUDI_QUEUE_ID_TPC_0_0 +
2310 tpc_id * QMAN_STREAMS + i;
2311 q = &gaudi->internal_qmans[internal_q_index];
2312 qman_base_addr = (u64) q->pq_dma_addr;
2313 gaudi_init_tpc_qman(hdev, tpc_offset, i,
2314 qman_base_addr);
2315
2316 if (i == 3) {
2317 /* Initializing lower CP for TPC QMAN */
2318 gaudi_init_tpc_qman(hdev, tpc_offset, 4, 0);
2319
2320 /* Enable the QMAN and TPC channel */
2321 WREG32(mmTPC0_QM_GLBL_CFG0 + tpc_offset,
2322 QMAN_TPC_ENABLE);
2323 }
2324 }
2325
2326 WREG32(mmTPC0_CFG_SM_BASE_ADDRESS_HIGH + tpc_id * tpc_delta,
2327 so_base_hi);
2328
2329 tpc_offset += mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0;
2330
2331 gaudi->hw_cap_initialized |= 1 << (HW_CAP_TPC_SHIFT + tpc_id);
2332 }
2333}
2334
2335static void gaudi_disable_pci_dma_qmans(struct hl_device *hdev)
2336{
2337 struct gaudi_device *gaudi = hdev->asic_specific;
2338
2339 if (!(gaudi->hw_cap_initialized & HW_CAP_PCI_DMA))
2340 return;
2341
2342 WREG32(mmDMA0_QM_GLBL_CFG0, 0);
2343 WREG32(mmDMA1_QM_GLBL_CFG0, 0);
2344 WREG32(mmDMA5_QM_GLBL_CFG0, 0);
2345}
2346
2347static void gaudi_disable_hbm_dma_qmans(struct hl_device *hdev)
2348{
2349 struct gaudi_device *gaudi = hdev->asic_specific;
2350
2351 if (!(gaudi->hw_cap_initialized & HW_CAP_HBM_DMA))
2352 return;
2353
2354 WREG32(mmDMA2_QM_GLBL_CFG0, 0);
2355 WREG32(mmDMA3_QM_GLBL_CFG0, 0);
2356 WREG32(mmDMA4_QM_GLBL_CFG0, 0);
2357 WREG32(mmDMA6_QM_GLBL_CFG0, 0);
2358 WREG32(mmDMA7_QM_GLBL_CFG0, 0);
2359}
2360
2361static void gaudi_disable_mme_qmans(struct hl_device *hdev)
2362{
2363 struct gaudi_device *gaudi = hdev->asic_specific;
2364
2365 if (!(gaudi->hw_cap_initialized & HW_CAP_MME))
2366 return;
2367
2368 WREG32(mmMME2_QM_GLBL_CFG0, 0);
2369 WREG32(mmMME0_QM_GLBL_CFG0, 0);
2370}
2371
2372static void gaudi_disable_tpc_qmans(struct hl_device *hdev)
2373{
2374 struct gaudi_device *gaudi = hdev->asic_specific;
2375 u32 tpc_offset = 0;
2376 int tpc_id;
2377
2378 if (!(gaudi->hw_cap_initialized & HW_CAP_TPC_MASK))
2379 return;
2380
2381 for (tpc_id = 0 ; tpc_id < TPC_NUMBER_OF_ENGINES ; tpc_id++) {
2382 WREG32(mmTPC0_QM_GLBL_CFG0 + tpc_offset, 0);
2383 tpc_offset += mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0;
2384 }
2385}
2386
2387static void gaudi_stop_pci_dma_qmans(struct hl_device *hdev)
2388{
2389 struct gaudi_device *gaudi = hdev->asic_specific;
2390
2391 if (!(gaudi->hw_cap_initialized & HW_CAP_PCI_DMA))
2392 return;
2393
2394 /* Stop upper CPs of QMANs 0.0 to 1.3 and 5.0 to 5.3 */
2395 WREG32(mmDMA0_QM_GLBL_CFG1, 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
2396 WREG32(mmDMA1_QM_GLBL_CFG1, 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
2397 WREG32(mmDMA5_QM_GLBL_CFG1, 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
2398}
2399
2400static void gaudi_stop_hbm_dma_qmans(struct hl_device *hdev)
2401{
2402 struct gaudi_device *gaudi = hdev->asic_specific;
2403
2404 if (!(gaudi->hw_cap_initialized & HW_CAP_HBM_DMA))
2405 return;
2406
2407 /* Stop CPs of HBM DMA QMANs */
2408
2409 WREG32(mmDMA2_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
2410 WREG32(mmDMA3_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
2411 WREG32(mmDMA4_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
2412 WREG32(mmDMA6_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
2413 WREG32(mmDMA7_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
2414}
2415
2416static void gaudi_stop_mme_qmans(struct hl_device *hdev)
2417{
2418 struct gaudi_device *gaudi = hdev->asic_specific;
2419
2420 if (!(gaudi->hw_cap_initialized & HW_CAP_MME))
2421 return;
2422
2423 /* Stop CPs of MME QMANs */
2424 WREG32(mmMME2_QM_GLBL_CFG1, 0x1F << MME0_QM_GLBL_CFG1_CP_STOP_SHIFT);
2425 WREG32(mmMME0_QM_GLBL_CFG1, 0x1F << MME0_QM_GLBL_CFG1_CP_STOP_SHIFT);
2426}
2427
2428static void gaudi_stop_tpc_qmans(struct hl_device *hdev)
2429{
2430 struct gaudi_device *gaudi = hdev->asic_specific;
2431
2432 if (!(gaudi->hw_cap_initialized & HW_CAP_TPC_MASK))
2433 return;
2434
2435 WREG32(mmTPC0_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
2436 WREG32(mmTPC1_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
2437 WREG32(mmTPC2_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
2438 WREG32(mmTPC3_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
2439 WREG32(mmTPC4_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
2440 WREG32(mmTPC5_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
2441 WREG32(mmTPC6_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
2442 WREG32(mmTPC7_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
2443}
2444
2445static void gaudi_pci_dma_stall(struct hl_device *hdev)
2446{
2447 struct gaudi_device *gaudi = hdev->asic_specific;
2448
2449 if (!(gaudi->hw_cap_initialized & HW_CAP_PCI_DMA))
2450 return;
2451
2452 WREG32(mmDMA0_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
2453 WREG32(mmDMA1_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
2454 WREG32(mmDMA5_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
2455}
2456
2457static void gaudi_hbm_dma_stall(struct hl_device *hdev)
2458{
2459 struct gaudi_device *gaudi = hdev->asic_specific;
2460
2461 if (!(gaudi->hw_cap_initialized & HW_CAP_HBM_DMA))
2462 return;
2463
2464 WREG32(mmDMA2_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
2465 WREG32(mmDMA3_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
2466 WREG32(mmDMA4_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
2467 WREG32(mmDMA6_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
2468 WREG32(mmDMA7_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
2469}
2470
2471static void gaudi_mme_stall(struct hl_device *hdev)
2472{
2473 struct gaudi_device *gaudi = hdev->asic_specific;
2474
2475 if (!(gaudi->hw_cap_initialized & HW_CAP_MME))
2476 return;
2477
2478 /* WA for H3-1800 bug: do ACC and SBAB writes twice */
2479 WREG32(mmMME0_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
2480 WREG32(mmMME0_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
2481 WREG32(mmMME0_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
2482 WREG32(mmMME0_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
2483 WREG32(mmMME1_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
2484 WREG32(mmMME1_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
2485 WREG32(mmMME1_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
2486 WREG32(mmMME1_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
2487 WREG32(mmMME2_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
2488 WREG32(mmMME2_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
2489 WREG32(mmMME2_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
2490 WREG32(mmMME2_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
2491 WREG32(mmMME3_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
2492 WREG32(mmMME3_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
2493 WREG32(mmMME3_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
2494 WREG32(mmMME3_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
2495}
2496
2497static void gaudi_tpc_stall(struct hl_device *hdev)
2498{
2499 struct gaudi_device *gaudi = hdev->asic_specific;
2500
2501 if (!(gaudi->hw_cap_initialized & HW_CAP_TPC_MASK))
2502 return;
2503
2504 WREG32(mmTPC0_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
2505 WREG32(mmTPC1_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
2506 WREG32(mmTPC2_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
2507 WREG32(mmTPC3_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
2508 WREG32(mmTPC4_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
2509 WREG32(mmTPC5_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
2510 WREG32(mmTPC6_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
2511 WREG32(mmTPC7_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
2512}
2513
Oded Gabbaye38bfd32020-07-03 20:46:12 +03002514static void gaudi_set_clock_gating(struct hl_device *hdev)
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03002515{
2516 struct gaudi_device *gaudi = hdev->asic_specific;
2517 u32 qman_offset;
Ofir Bittonf44d23b2020-08-04 13:38:43 +03002518 bool enable;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03002519 int i;
2520
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03002521 /* In case we are during debug session, don't enable the clock gate
2522 * as it may interfere
2523 */
2524 if (hdev->in_debug)
2525 return;
2526
Oded Gabbaye38bfd32020-07-03 20:46:12 +03002527 for (i = GAUDI_PCI_DMA_1, qman_offset = 0 ; i < GAUDI_HBM_DMA_1 ; i++) {
Ofir Bittonf44d23b2020-08-04 13:38:43 +03002528 enable = !!(hdev->clock_gating_mask &
2529 (BIT_ULL(gaudi_dma_assignment[i])));
Oded Gabbaye38bfd32020-07-03 20:46:12 +03002530
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03002531 qman_offset = gaudi_dma_assignment[i] * DMA_QMAN_OFFSET;
Ofir Bittonf44d23b2020-08-04 13:38:43 +03002532 WREG32(mmDMA0_QM_CGM_CFG1 + qman_offset,
2533 enable ? QMAN_CGM1_PWR_GATE_EN : 0);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03002534 WREG32(mmDMA0_QM_CGM_CFG + qman_offset,
Ofir Bittonf44d23b2020-08-04 13:38:43 +03002535 enable ? QMAN_UPPER_CP_CGM_PWR_GATE_EN : 0);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03002536 }
2537
Oded Gabbaye38bfd32020-07-03 20:46:12 +03002538 for (i = GAUDI_HBM_DMA_1 ; i < GAUDI_DMA_MAX ; i++) {
Ofir Bittonf44d23b2020-08-04 13:38:43 +03002539 enable = !!(hdev->clock_gating_mask &
2540 (BIT_ULL(gaudi_dma_assignment[i])));
Oded Gabbaye38bfd32020-07-03 20:46:12 +03002541
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03002542 qman_offset = gaudi_dma_assignment[i] * DMA_QMAN_OFFSET;
Ofir Bittonf44d23b2020-08-04 13:38:43 +03002543 WREG32(mmDMA0_QM_CGM_CFG1 + qman_offset,
2544 enable ? QMAN_CGM1_PWR_GATE_EN : 0);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03002545 WREG32(mmDMA0_QM_CGM_CFG + qman_offset,
Ofir Bittonf44d23b2020-08-04 13:38:43 +03002546 enable ? QMAN_COMMON_CP_CGM_PWR_GATE_EN : 0);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03002547 }
2548
Ofir Bittonf44d23b2020-08-04 13:38:43 +03002549 enable = !!(hdev->clock_gating_mask & (BIT_ULL(GAUDI_ENGINE_ID_MME_0)));
2550 WREG32(mmMME0_QM_CGM_CFG1, enable ? QMAN_CGM1_PWR_GATE_EN : 0);
2551 WREG32(mmMME0_QM_CGM_CFG, enable ? QMAN_COMMON_CP_CGM_PWR_GATE_EN : 0);
Oded Gabbaye38bfd32020-07-03 20:46:12 +03002552
Ofir Bittonf44d23b2020-08-04 13:38:43 +03002553 enable = !!(hdev->clock_gating_mask & (BIT_ULL(GAUDI_ENGINE_ID_MME_2)));
2554 WREG32(mmMME2_QM_CGM_CFG1, enable ? QMAN_CGM1_PWR_GATE_EN : 0);
2555 WREG32(mmMME2_QM_CGM_CFG, enable ? QMAN_COMMON_CP_CGM_PWR_GATE_EN : 0);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03002556
2557 for (i = 0, qman_offset = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) {
Ofir Bittonf44d23b2020-08-04 13:38:43 +03002558 enable = !!(hdev->clock_gating_mask &
2559 (BIT_ULL(GAUDI_ENGINE_ID_TPC_0 + i)));
Oded Gabbaye38bfd32020-07-03 20:46:12 +03002560
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03002561 WREG32(mmTPC0_QM_CGM_CFG1 + qman_offset,
Ofir Bittonf44d23b2020-08-04 13:38:43 +03002562 enable ? QMAN_CGM1_PWR_GATE_EN : 0);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03002563 WREG32(mmTPC0_QM_CGM_CFG + qman_offset,
Ofir Bittonf44d23b2020-08-04 13:38:43 +03002564 enable ? QMAN_COMMON_CP_CGM_PWR_GATE_EN : 0);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03002565
2566 qman_offset += TPC_QMAN_OFFSET;
2567 }
2568
2569 gaudi->hw_cap_initialized |= HW_CAP_CLK_GATE;
2570}
2571
2572static void gaudi_disable_clock_gating(struct hl_device *hdev)
2573{
2574 struct gaudi_device *gaudi = hdev->asic_specific;
2575 u32 qman_offset;
2576 int i;
2577
2578 if (!(gaudi->hw_cap_initialized & HW_CAP_CLK_GATE))
2579 return;
2580
2581 for (i = 0, qman_offset = 0 ; i < DMA_NUMBER_OF_CHANNELS ; i++) {
2582 WREG32(mmDMA0_QM_CGM_CFG + qman_offset, 0);
2583 WREG32(mmDMA0_QM_CGM_CFG1 + qman_offset, 0);
2584
2585 qman_offset += (mmDMA1_QM_CGM_CFG - mmDMA0_QM_CGM_CFG);
2586 }
2587
2588 WREG32(mmMME0_QM_CGM_CFG, 0);
2589 WREG32(mmMME0_QM_CGM_CFG1, 0);
2590 WREG32(mmMME2_QM_CGM_CFG, 0);
2591 WREG32(mmMME2_QM_CGM_CFG1, 0);
2592
2593 for (i = 0, qman_offset = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) {
2594 WREG32(mmTPC0_QM_CGM_CFG + qman_offset, 0);
2595 WREG32(mmTPC0_QM_CGM_CFG1 + qman_offset, 0);
2596
2597 qman_offset += (mmTPC1_QM_CGM_CFG - mmTPC0_QM_CGM_CFG);
2598 }
2599
2600 gaudi->hw_cap_initialized &= ~(HW_CAP_CLK_GATE);
2601}
2602
2603static void gaudi_enable_timestamp(struct hl_device *hdev)
2604{
2605 /* Disable the timestamp counter */
2606 WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 0);
2607
2608 /* Zero the lower/upper parts of the 64-bit counter */
2609 WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE + 0xC, 0);
2610 WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE + 0x8, 0);
2611
2612 /* Enable the counter */
2613 WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 1);
2614}
2615
2616static void gaudi_disable_timestamp(struct hl_device *hdev)
2617{
2618 /* Disable the timestamp counter */
2619 WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 0);
2620}
2621
2622static void gaudi_halt_engines(struct hl_device *hdev, bool hard_reset)
2623{
Oded Gabbayc83c4172020-07-05 15:48:34 +03002624 u32 wait_timeout_ms;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03002625
2626 dev_info(hdev->dev,
2627 "Halting compute engines and disabling interrupts\n");
2628
Oded Gabbayc83c4172020-07-05 15:48:34 +03002629 if (hdev->pldm)
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03002630 wait_timeout_ms = GAUDI_PLDM_RESET_WAIT_MSEC;
Oded Gabbayc83c4172020-07-05 15:48:34 +03002631 else
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03002632 wait_timeout_ms = GAUDI_RESET_WAIT_MSEC;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03002633
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03002634
2635 gaudi_stop_mme_qmans(hdev);
2636 gaudi_stop_tpc_qmans(hdev);
2637 gaudi_stop_hbm_dma_qmans(hdev);
2638 gaudi_stop_pci_dma_qmans(hdev);
2639
Oded Gabbaye38bfd32020-07-03 20:46:12 +03002640 hdev->asic_funcs->disable_clock_gating(hdev);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03002641
2642 msleep(wait_timeout_ms);
2643
2644 gaudi_pci_dma_stall(hdev);
2645 gaudi_hbm_dma_stall(hdev);
2646 gaudi_tpc_stall(hdev);
2647 gaudi_mme_stall(hdev);
2648
2649 msleep(wait_timeout_ms);
2650
2651 gaudi_disable_mme_qmans(hdev);
2652 gaudi_disable_tpc_qmans(hdev);
2653 gaudi_disable_hbm_dma_qmans(hdev);
2654 gaudi_disable_pci_dma_qmans(hdev);
2655
2656 gaudi_disable_timestamp(hdev);
2657
Oded Gabbay12ae3132020-07-03 20:58:23 +03002658 gaudi_disable_msi(hdev);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03002659}
2660
2661static int gaudi_mmu_init(struct hl_device *hdev)
2662{
2663 struct asic_fixed_properties *prop = &hdev->asic_prop;
2664 struct gaudi_device *gaudi = hdev->asic_specific;
2665 u64 hop0_addr;
2666 int rc, i;
2667
2668 if (!hdev->mmu_enable)
2669 return 0;
2670
2671 if (gaudi->hw_cap_initialized & HW_CAP_MMU)
2672 return 0;
2673
2674 hdev->dram_supports_virtual_memory = false;
2675
2676 for (i = 0 ; i < prop->max_asid ; i++) {
2677 hop0_addr = prop->mmu_pgt_addr +
2678 (i * prop->mmu_hop_table_size);
2679
2680 rc = gaudi_mmu_update_asid_hop0_addr(hdev, i, hop0_addr);
2681 if (rc) {
2682 dev_err(hdev->dev,
2683 "failed to set hop0 addr for asid %d\n", i);
2684 goto err;
2685 }
2686 }
2687
2688 /* init MMU cache manage page */
2689 WREG32(mmSTLB_CACHE_INV_BASE_39_8, MMU_CACHE_MNG_ADDR >> 8);
2690 WREG32(mmSTLB_CACHE_INV_BASE_49_40, MMU_CACHE_MNG_ADDR >> 40);
2691
Tomer Tayar644883e2020-07-19 11:00:03 +03002692 hdev->asic_funcs->mmu_invalidate_cache(hdev, true, 0);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03002693
2694 WREG32(mmMMU_UP_MMU_ENABLE, 1);
2695 WREG32(mmMMU_UP_SPI_MASK, 0xF);
2696
2697 WREG32(mmSTLB_HOP_CONFIGURATION,
2698 hdev->mmu_huge_page_opt ? 0x30440 : 0x40440);
2699
Omer Shpigelmancfd41762020-06-03 13:03:35 +03002700 /*
2701 * The H/W expects the first PI after init to be 1. After wraparound
2702 * we'll write 0.
2703 */
2704 gaudi->mmu_cache_inv_pi = 1;
2705
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03002706 gaudi->hw_cap_initialized |= HW_CAP_MMU;
2707
2708 return 0;
2709
2710err:
2711 return rc;
2712}
2713
2714static int gaudi_load_firmware_to_device(struct hl_device *hdev)
2715{
2716 void __iomem *dst;
2717
2718 /* HBM scrambler must be initialized before pushing F/W to HBM */
2719 gaudi_init_scrambler_hbm(hdev);
2720
2721 dst = hdev->pcie_bar[HBM_BAR_ID] + LINUX_FW_OFFSET;
2722
2723 return hl_fw_load_fw_to_device(hdev, GAUDI_LINUX_FW_FILE, dst);
2724}
2725
2726static int gaudi_load_boot_fit_to_device(struct hl_device *hdev)
2727{
2728 void __iomem *dst;
2729
2730 dst = hdev->pcie_bar[SRAM_BAR_ID] + BOOT_FIT_SRAM_OFFSET;
2731
2732 return hl_fw_load_fw_to_device(hdev, GAUDI_BOOT_FIT_FILE, dst);
2733}
2734
2735static void gaudi_read_device_fw_version(struct hl_device *hdev,
2736 enum hl_fw_component fwc)
2737{
2738 const char *name;
2739 u32 ver_off;
2740 char *dest;
2741
2742 switch (fwc) {
2743 case FW_COMP_UBOOT:
2744 ver_off = RREG32(mmUBOOT_VER_OFFSET);
2745 dest = hdev->asic_prop.uboot_ver;
2746 name = "U-Boot";
2747 break;
2748 case FW_COMP_PREBOOT:
2749 ver_off = RREG32(mmPREBOOT_VER_OFFSET);
2750 dest = hdev->asic_prop.preboot_ver;
2751 name = "Preboot";
2752 break;
2753 default:
2754 dev_warn(hdev->dev, "Undefined FW component: %d\n", fwc);
2755 return;
2756 }
2757
2758 ver_off &= ~((u32)SRAM_BASE_ADDR);
2759
2760 if (ver_off < SRAM_SIZE - VERSION_MAX_LEN) {
2761 memcpy_fromio(dest, hdev->pcie_bar[SRAM_BAR_ID] + ver_off,
2762 VERSION_MAX_LEN);
2763 } else {
2764 dev_err(hdev->dev, "%s version offset (0x%x) is above SRAM\n",
2765 name, ver_off);
2766 strcpy(dest, "unavailable");
2767 }
2768}
2769
2770static int gaudi_init_cpu(struct hl_device *hdev)
2771{
2772 struct gaudi_device *gaudi = hdev->asic_specific;
2773 int rc;
2774
2775 if (!hdev->cpu_enable)
2776 return 0;
2777
2778 if (gaudi->hw_cap_initialized & HW_CAP_CPU)
2779 return 0;
2780
2781 /*
2782 * The device CPU works with 40 bits addresses.
2783 * This register sets the extension to 50 bits.
2784 */
2785 WREG32(mmCPU_IF_CPU_MSB_ADDR, hdev->cpu_pci_msb_addr);
2786
2787 rc = hl_fw_init_cpu(hdev, mmPSOC_GLOBAL_CONF_CPU_BOOT_STATUS,
2788 mmPSOC_GLOBAL_CONF_KMD_MSG_TO_CPU,
2789 mmCPU_CMD_STATUS_TO_HOST,
2790 mmCPU_BOOT_ERR0,
2791 !hdev->bmc_enable, GAUDI_CPU_TIMEOUT_USEC,
2792 GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC);
2793
2794 if (rc)
2795 return rc;
2796
2797 gaudi->hw_cap_initialized |= HW_CAP_CPU;
2798
2799 return 0;
2800}
2801
2802static int gaudi_init_cpu_queues(struct hl_device *hdev, u32 cpu_timeout)
2803{
2804 struct gaudi_device *gaudi = hdev->asic_specific;
2805 struct hl_eq *eq;
2806 u32 status;
2807 struct hl_hw_queue *cpu_pq =
2808 &hdev->kernel_queues[GAUDI_QUEUE_ID_CPU_PQ];
2809 int err;
2810
2811 if (!hdev->cpu_queues_enable)
2812 return 0;
2813
2814 if (gaudi->hw_cap_initialized & HW_CAP_CPU_Q)
2815 return 0;
2816
2817 eq = &hdev->event_queue;
2818
2819 WREG32(mmCPU_IF_PQ_BASE_ADDR_LOW, lower_32_bits(cpu_pq->bus_address));
2820 WREG32(mmCPU_IF_PQ_BASE_ADDR_HIGH, upper_32_bits(cpu_pq->bus_address));
2821
2822 WREG32(mmCPU_IF_EQ_BASE_ADDR_LOW, lower_32_bits(eq->bus_address));
2823 WREG32(mmCPU_IF_EQ_BASE_ADDR_HIGH, upper_32_bits(eq->bus_address));
2824
2825 WREG32(mmCPU_IF_CQ_BASE_ADDR_LOW,
2826 lower_32_bits(hdev->cpu_accessible_dma_address));
2827 WREG32(mmCPU_IF_CQ_BASE_ADDR_HIGH,
2828 upper_32_bits(hdev->cpu_accessible_dma_address));
2829
2830 WREG32(mmCPU_IF_PQ_LENGTH, HL_QUEUE_SIZE_IN_BYTES);
2831 WREG32(mmCPU_IF_EQ_LENGTH, HL_EQ_SIZE_IN_BYTES);
2832 WREG32(mmCPU_IF_CQ_LENGTH, HL_CPU_ACCESSIBLE_MEM_SIZE);
2833
2834 /* Used for EQ CI */
2835 WREG32(mmCPU_IF_EQ_RD_OFFS, 0);
2836
2837 WREG32(mmCPU_IF_PF_PQ_PI, 0);
2838
2839 if (gaudi->multi_msi_mode)
2840 WREG32(mmCPU_IF_QUEUE_INIT, PQ_INIT_STATUS_READY_FOR_CP);
2841 else
2842 WREG32(mmCPU_IF_QUEUE_INIT,
2843 PQ_INIT_STATUS_READY_FOR_CP_SINGLE_MSI);
2844
2845 WREG32(mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR, GAUDI_EVENT_PI_UPDATE);
2846
2847 err = hl_poll_timeout(
2848 hdev,
2849 mmCPU_IF_QUEUE_INIT,
2850 status,
2851 (status == PQ_INIT_STATUS_READY_FOR_HOST),
2852 1000,
2853 cpu_timeout);
2854
2855 if (err) {
2856 dev_err(hdev->dev,
2857 "Failed to communicate with ARM CPU (ArmCP timeout)\n");
2858 return -EIO;
2859 }
2860
2861 gaudi->hw_cap_initialized |= HW_CAP_CPU_Q;
2862 return 0;
2863}
2864
2865static void gaudi_pre_hw_init(struct hl_device *hdev)
2866{
2867 /* Perform read from the device to make sure device is up */
2868 RREG32(mmPCIE_DBI_DEVICE_ID_VENDOR_ID_REG);
2869
2870 /*
2871 * Let's mark in the H/W that we have reached this point. We check
2872 * this value in the reset_before_init function to understand whether
2873 * we need to reset the chip before doing H/W init. This register is
2874 * cleared by the H/W upon H/W reset
2875 */
2876 WREG32(mmHW_STATE, HL_DEVICE_HW_STATE_DIRTY);
2877
2878 /* Set the access through PCI bars (Linux driver only) as secured */
2879 WREG32(mmPCIE_WRAP_LBW_PROT_OVR, (PCIE_WRAP_LBW_PROT_OVR_RD_EN_MASK |
2880 PCIE_WRAP_LBW_PROT_OVR_WR_EN_MASK));
2881
2882 /* Perform read to flush the waiting writes to ensure configuration
2883 * was set in the device
2884 */
2885 RREG32(mmPCIE_WRAP_LBW_PROT_OVR);
2886
2887 if (hdev->axi_drain) {
2888 WREG32(mmPCIE_WRAP_LBW_DRAIN_CFG,
2889 1 << PCIE_WRAP_LBW_DRAIN_CFG_EN_SHIFT);
2890 WREG32(mmPCIE_WRAP_HBW_DRAIN_CFG,
2891 1 << PCIE_WRAP_HBW_DRAIN_CFG_EN_SHIFT);
2892
2893 /* Perform read to flush the DRAIN cfg */
2894 RREG32(mmPCIE_WRAP_HBW_DRAIN_CFG);
2895 } else {
2896 WREG32(mmPCIE_WRAP_LBW_DRAIN_CFG, 0);
2897 WREG32(mmPCIE_WRAP_HBW_DRAIN_CFG, 0);
2898
2899 /* Perform read to flush the DRAIN cfg */
2900 RREG32(mmPCIE_WRAP_HBW_DRAIN_CFG);
2901 }
2902
2903 /* Configure the reset registers. Must be done as early as possible
2904 * in case we fail during H/W initialization
2905 */
2906 WREG32(mmPSOC_GLOBAL_CONF_SOFT_RST_CFG_H,
2907 (CFG_RST_H_DMA_MASK |
2908 CFG_RST_H_MME_MASK |
2909 CFG_RST_H_SM_MASK |
2910 CFG_RST_H_TPC_MASK));
2911
2912 WREG32(mmPSOC_GLOBAL_CONF_SOFT_RST_CFG_L, CFG_RST_L_TPC_MASK);
2913
2914 WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST_CFG_H,
2915 (CFG_RST_H_HBM_MASK |
2916 CFG_RST_H_TPC_MASK |
2917 CFG_RST_H_NIC_MASK |
2918 CFG_RST_H_SM_MASK |
2919 CFG_RST_H_DMA_MASK |
2920 CFG_RST_H_MME_MASK |
2921 CFG_RST_H_CPU_MASK |
2922 CFG_RST_H_MMU_MASK));
2923
2924 WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST_CFG_L,
2925 (CFG_RST_L_IF_MASK |
2926 CFG_RST_L_PSOC_MASK |
2927 CFG_RST_L_TPC_MASK));
2928}
2929
2930static int gaudi_hw_init(struct hl_device *hdev)
2931{
2932 int rc;
2933
2934 dev_info(hdev->dev, "Starting initialization of H/W\n");
2935
2936 gaudi_pre_hw_init(hdev);
2937
2938 gaudi_init_pci_dma_qmans(hdev);
2939
2940 gaudi_init_hbm_dma_qmans(hdev);
2941
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03002942 rc = gaudi_init_cpu(hdev);
2943 if (rc) {
2944 dev_err(hdev->dev, "failed to initialize CPU\n");
2945 return rc;
2946 }
2947
2948 /* SRAM scrambler must be initialized after CPU is running from HBM */
2949 gaudi_init_scrambler_sram(hdev);
2950
2951 /* This is here just in case we are working without CPU */
2952 gaudi_init_scrambler_hbm(hdev);
2953
2954 gaudi_init_golden_registers(hdev);
2955
2956 rc = gaudi_mmu_init(hdev);
2957 if (rc)
2958 return rc;
2959
Omer Shpigelman3a3a5bf12020-05-11 10:45:12 +03002960 gaudi_init_security(hdev);
2961
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03002962 gaudi_init_mme_qmans(hdev);
2963
2964 gaudi_init_tpc_qmans(hdev);
2965
Oded Gabbaye38bfd32020-07-03 20:46:12 +03002966 hdev->asic_funcs->set_clock_gating(hdev);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03002967
2968 gaudi_enable_timestamp(hdev);
2969
2970 /* MSI must be enabled before CPU queues are initialized */
2971 rc = gaudi_enable_msi(hdev);
2972 if (rc)
2973 goto disable_queues;
2974
2975 /* must be called after MSI was enabled */
2976 rc = gaudi_init_cpu_queues(hdev, GAUDI_CPU_TIMEOUT_USEC);
2977 if (rc) {
2978 dev_err(hdev->dev, "failed to initialize CPU H/W queues %d\n",
2979 rc);
2980 goto disable_msi;
2981 }
2982
2983 /* Perform read from the device to flush all configuration */
2984 RREG32(mmPCIE_DBI_DEVICE_ID_VENDOR_ID_REG);
2985
2986 return 0;
2987
2988disable_msi:
2989 gaudi_disable_msi(hdev);
2990disable_queues:
2991 gaudi_disable_mme_qmans(hdev);
2992 gaudi_disable_pci_dma_qmans(hdev);
2993
2994 return rc;
2995}
2996
2997static void gaudi_hw_fini(struct hl_device *hdev, bool hard_reset)
2998{
2999 struct gaudi_device *gaudi = hdev->asic_specific;
Oded Gabbayc83c4172020-07-05 15:48:34 +03003000 u32 status, reset_timeout_ms, cpu_timeout_ms, boot_strap = 0;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003001
Oded Gabbay12ae3132020-07-03 20:58:23 +03003002 if (!hard_reset) {
3003 dev_err(hdev->dev, "GAUDI doesn't support soft-reset\n");
3004 return;
3005 }
3006
Oded Gabbayc83c4172020-07-05 15:48:34 +03003007 if (hdev->pldm) {
Oded Gabbay12ae3132020-07-03 20:58:23 +03003008 reset_timeout_ms = GAUDI_PLDM_HRESET_TIMEOUT_MSEC;
Oded Gabbayc83c4172020-07-05 15:48:34 +03003009 cpu_timeout_ms = GAUDI_PLDM_RESET_WAIT_MSEC;
3010 } else {
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003011 reset_timeout_ms = GAUDI_RESET_TIMEOUT_MSEC;
Oded Gabbayc83c4172020-07-05 15:48:34 +03003012 cpu_timeout_ms = GAUDI_CPU_RESET_WAIT_MSEC;
3013 }
3014
3015 /* Set device to handle FLR by H/W as we will put the device CPU to
3016 * halt mode
3017 */
3018 WREG32(mmPCIE_AUX_FLR_CTRL, (PCIE_AUX_FLR_CTRL_HW_CTRL_MASK |
3019 PCIE_AUX_FLR_CTRL_INT_MASK_MASK));
3020
3021 /* I don't know what is the state of the CPU so make sure it is
3022 * stopped in any means necessary
3023 */
3024 WREG32(mmPSOC_GLOBAL_CONF_KMD_MSG_TO_CPU, KMD_MSG_GOTO_WFE);
3025 WREG32(mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR, GAUDI_EVENT_HALT_MACHINE);
3026
3027 msleep(cpu_timeout_ms);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003028
Oded Gabbay12ae3132020-07-03 20:58:23 +03003029 /* Tell ASIC not to re-initialize PCIe */
3030 WREG32(mmPREBOOT_PCIE_EN, LKD_HARD_RESET_MAGIC);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003031
Oded Gabbay12ae3132020-07-03 20:58:23 +03003032 boot_strap = RREG32(mmPSOC_GLOBAL_CONF_BOOT_STRAP_PINS);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003033
Oded Gabbay12ae3132020-07-03 20:58:23 +03003034 /* H/W bug WA:
3035 * rdata[31:0] = strap_read_val;
3036 * wdata[31:0] = rdata[30:21],1'b0,rdata[20:0]
3037 */
3038 boot_strap = (((boot_strap & 0x7FE00000) << 1) |
3039 (boot_strap & 0x001FFFFF));
3040 WREG32(mmPSOC_GLOBAL_CONF_BOOT_STRAP_PINS, boot_strap & ~0x2);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003041
Oded Gabbay12ae3132020-07-03 20:58:23 +03003042 /* Restart BTL/BLR upon hard-reset */
3043 WREG32(mmPSOC_GLOBAL_CONF_BOOT_SEQ_RE_START, 1);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003044
Oded Gabbay12ae3132020-07-03 20:58:23 +03003045 WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST,
3046 1 << PSOC_GLOBAL_CONF_SW_ALL_RST_IND_SHIFT);
3047 dev_info(hdev->dev,
3048 "Issued HARD reset command, going to wait %dms\n",
3049 reset_timeout_ms);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003050
3051 /*
3052 * After hard reset, we can't poll the BTM_FSM register because the PSOC
3053 * itself is in reset. Need to wait until the reset is deasserted
3054 */
3055 msleep(reset_timeout_ms);
3056
3057 status = RREG32(mmPSOC_GLOBAL_CONF_BTM_FSM);
3058 if (status & PSOC_GLOBAL_CONF_BTM_FSM_STATE_MASK)
3059 dev_err(hdev->dev,
3060 "Timeout while waiting for device to reset 0x%x\n",
3061 status);
3062
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003063 WREG32(mmPSOC_GLOBAL_CONF_BOOT_STRAP_PINS, boot_strap);
3064
3065 gaudi->hw_cap_initialized &= ~(HW_CAP_CPU | HW_CAP_CPU_Q |
3066 HW_CAP_HBM | HW_CAP_PCI_DMA |
3067 HW_CAP_MME | HW_CAP_TPC_MASK |
3068 HW_CAP_HBM_DMA | HW_CAP_PLL |
3069 HW_CAP_MMU |
3070 HW_CAP_SRAM_SCRAMBLER |
Oded Gabbaye38bfd32020-07-03 20:46:12 +03003071 HW_CAP_HBM_SCRAMBLER |
3072 HW_CAP_CLK_GATE);
3073
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003074 memset(gaudi->events_stat, 0, sizeof(gaudi->events_stat));
3075}
3076
3077static int gaudi_suspend(struct hl_device *hdev)
3078{
3079 int rc;
3080
3081 rc = hl_fw_send_pci_access_msg(hdev, ARMCP_PACKET_DISABLE_PCI_ACCESS);
3082 if (rc)
3083 dev_err(hdev->dev, "Failed to disable PCI access from CPU\n");
3084
3085 return rc;
3086}
3087
3088static int gaudi_resume(struct hl_device *hdev)
3089{
3090 return gaudi_init_iatu(hdev);
3091}
3092
3093static int gaudi_cb_mmap(struct hl_device *hdev, struct vm_area_struct *vma,
3094 u64 kaddress, phys_addr_t paddress, u32 size)
3095{
3096 int rc;
3097
3098 vma->vm_flags |= VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP |
3099 VM_DONTCOPY | VM_NORESERVE;
3100
3101 rc = remap_pfn_range(vma, vma->vm_start, paddress >> PAGE_SHIFT,
3102 size, vma->vm_page_prot);
3103 if (rc)
3104 dev_err(hdev->dev, "remap_pfn_range error %d", rc);
3105
3106 return rc;
3107}
3108
3109static void gaudi_ring_doorbell(struct hl_device *hdev, u32 hw_queue_id, u32 pi)
3110{
3111 struct gaudi_device *gaudi = hdev->asic_specific;
3112 u32 db_reg_offset, db_value, dma_qm_offset, q_off;
3113 int dma_id;
3114 bool invalid_queue = false;
3115
3116 switch (hw_queue_id) {
3117 case GAUDI_QUEUE_ID_DMA_0_0...GAUDI_QUEUE_ID_DMA_0_3:
3118 dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_1];
3119 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
3120 q_off = dma_qm_offset + (hw_queue_id & 0x3) * 4;
3121 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
3122 break;
3123
3124 case GAUDI_QUEUE_ID_DMA_1_0...GAUDI_QUEUE_ID_DMA_1_3:
3125 dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_2];
3126 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
3127 q_off = dma_qm_offset + (hw_queue_id & 0x3) * 4;
3128 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
3129 break;
3130
3131 case GAUDI_QUEUE_ID_DMA_2_0...GAUDI_QUEUE_ID_DMA_2_3:
3132 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_1];
3133 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
3134 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
3135 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
3136 break;
3137
3138 case GAUDI_QUEUE_ID_DMA_3_0...GAUDI_QUEUE_ID_DMA_3_3:
3139 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_2];
3140 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
3141 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
3142 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
3143 break;
3144
3145 case GAUDI_QUEUE_ID_DMA_4_0...GAUDI_QUEUE_ID_DMA_4_3:
3146 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_3];
3147 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
3148 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
3149 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
3150 break;
3151
3152 case GAUDI_QUEUE_ID_DMA_5_0...GAUDI_QUEUE_ID_DMA_5_3:
3153 dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_3];
3154 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
3155 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
3156 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
3157 break;
3158
3159 case GAUDI_QUEUE_ID_DMA_6_0...GAUDI_QUEUE_ID_DMA_6_3:
3160 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_4];
3161 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
3162 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
3163 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
3164 break;
3165
3166 case GAUDI_QUEUE_ID_DMA_7_0...GAUDI_QUEUE_ID_DMA_7_3:
3167 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_5];
3168 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
3169 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
3170 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
3171 break;
3172
3173 case GAUDI_QUEUE_ID_CPU_PQ:
3174 if (gaudi->hw_cap_initialized & HW_CAP_CPU_Q)
3175 db_reg_offset = mmCPU_IF_PF_PQ_PI;
3176 else
3177 invalid_queue = true;
3178 break;
3179
3180 case GAUDI_QUEUE_ID_MME_0_0:
3181 db_reg_offset = mmMME2_QM_PQ_PI_0;
3182 break;
3183
3184 case GAUDI_QUEUE_ID_MME_0_1:
3185 db_reg_offset = mmMME2_QM_PQ_PI_1;
3186 break;
3187
3188 case GAUDI_QUEUE_ID_MME_0_2:
3189 db_reg_offset = mmMME2_QM_PQ_PI_2;
3190 break;
3191
3192 case GAUDI_QUEUE_ID_MME_0_3:
3193 db_reg_offset = mmMME2_QM_PQ_PI_3;
3194 break;
3195
3196 case GAUDI_QUEUE_ID_MME_1_0:
3197 db_reg_offset = mmMME0_QM_PQ_PI_0;
3198 break;
3199
3200 case GAUDI_QUEUE_ID_MME_1_1:
3201 db_reg_offset = mmMME0_QM_PQ_PI_1;
3202 break;
3203
3204 case GAUDI_QUEUE_ID_MME_1_2:
3205 db_reg_offset = mmMME0_QM_PQ_PI_2;
3206 break;
3207
3208 case GAUDI_QUEUE_ID_MME_1_3:
3209 db_reg_offset = mmMME0_QM_PQ_PI_3;
3210 break;
3211
3212 case GAUDI_QUEUE_ID_TPC_0_0:
3213 db_reg_offset = mmTPC0_QM_PQ_PI_0;
3214 break;
3215
3216 case GAUDI_QUEUE_ID_TPC_0_1:
3217 db_reg_offset = mmTPC0_QM_PQ_PI_1;
3218 break;
3219
3220 case GAUDI_QUEUE_ID_TPC_0_2:
3221 db_reg_offset = mmTPC0_QM_PQ_PI_2;
3222 break;
3223
3224 case GAUDI_QUEUE_ID_TPC_0_3:
3225 db_reg_offset = mmTPC0_QM_PQ_PI_3;
3226 break;
3227
3228 case GAUDI_QUEUE_ID_TPC_1_0:
3229 db_reg_offset = mmTPC1_QM_PQ_PI_0;
3230 break;
3231
3232 case GAUDI_QUEUE_ID_TPC_1_1:
3233 db_reg_offset = mmTPC1_QM_PQ_PI_1;
3234 break;
3235
3236 case GAUDI_QUEUE_ID_TPC_1_2:
3237 db_reg_offset = mmTPC1_QM_PQ_PI_2;
3238 break;
3239
3240 case GAUDI_QUEUE_ID_TPC_1_3:
3241 db_reg_offset = mmTPC1_QM_PQ_PI_3;
3242 break;
3243
3244 case GAUDI_QUEUE_ID_TPC_2_0:
3245 db_reg_offset = mmTPC2_QM_PQ_PI_0;
3246 break;
3247
3248 case GAUDI_QUEUE_ID_TPC_2_1:
3249 db_reg_offset = mmTPC2_QM_PQ_PI_1;
3250 break;
3251
3252 case GAUDI_QUEUE_ID_TPC_2_2:
3253 db_reg_offset = mmTPC2_QM_PQ_PI_2;
3254 break;
3255
3256 case GAUDI_QUEUE_ID_TPC_2_3:
3257 db_reg_offset = mmTPC2_QM_PQ_PI_3;
3258 break;
3259
3260 case GAUDI_QUEUE_ID_TPC_3_0:
3261 db_reg_offset = mmTPC3_QM_PQ_PI_0;
3262 break;
3263
3264 case GAUDI_QUEUE_ID_TPC_3_1:
3265 db_reg_offset = mmTPC3_QM_PQ_PI_1;
3266 break;
3267
3268 case GAUDI_QUEUE_ID_TPC_3_2:
3269 db_reg_offset = mmTPC3_QM_PQ_PI_2;
3270 break;
3271
3272 case GAUDI_QUEUE_ID_TPC_3_3:
3273 db_reg_offset = mmTPC3_QM_PQ_PI_3;
3274 break;
3275
3276 case GAUDI_QUEUE_ID_TPC_4_0:
3277 db_reg_offset = mmTPC4_QM_PQ_PI_0;
3278 break;
3279
3280 case GAUDI_QUEUE_ID_TPC_4_1:
3281 db_reg_offset = mmTPC4_QM_PQ_PI_1;
3282 break;
3283
3284 case GAUDI_QUEUE_ID_TPC_4_2:
3285 db_reg_offset = mmTPC4_QM_PQ_PI_2;
3286 break;
3287
3288 case GAUDI_QUEUE_ID_TPC_4_3:
3289 db_reg_offset = mmTPC4_QM_PQ_PI_3;
3290 break;
3291
3292 case GAUDI_QUEUE_ID_TPC_5_0:
3293 db_reg_offset = mmTPC5_QM_PQ_PI_0;
3294 break;
3295
3296 case GAUDI_QUEUE_ID_TPC_5_1:
3297 db_reg_offset = mmTPC5_QM_PQ_PI_1;
3298 break;
3299
3300 case GAUDI_QUEUE_ID_TPC_5_2:
3301 db_reg_offset = mmTPC5_QM_PQ_PI_2;
3302 break;
3303
3304 case GAUDI_QUEUE_ID_TPC_5_3:
3305 db_reg_offset = mmTPC5_QM_PQ_PI_3;
3306 break;
3307
3308 case GAUDI_QUEUE_ID_TPC_6_0:
3309 db_reg_offset = mmTPC6_QM_PQ_PI_0;
3310 break;
3311
3312 case GAUDI_QUEUE_ID_TPC_6_1:
3313 db_reg_offset = mmTPC6_QM_PQ_PI_1;
3314 break;
3315
3316 case GAUDI_QUEUE_ID_TPC_6_2:
3317 db_reg_offset = mmTPC6_QM_PQ_PI_2;
3318 break;
3319
3320 case GAUDI_QUEUE_ID_TPC_6_3:
3321 db_reg_offset = mmTPC6_QM_PQ_PI_3;
3322 break;
3323
3324 case GAUDI_QUEUE_ID_TPC_7_0:
3325 db_reg_offset = mmTPC7_QM_PQ_PI_0;
3326 break;
3327
3328 case GAUDI_QUEUE_ID_TPC_7_1:
3329 db_reg_offset = mmTPC7_QM_PQ_PI_1;
3330 break;
3331
3332 case GAUDI_QUEUE_ID_TPC_7_2:
3333 db_reg_offset = mmTPC7_QM_PQ_PI_2;
3334 break;
3335
3336 case GAUDI_QUEUE_ID_TPC_7_3:
3337 db_reg_offset = mmTPC7_QM_PQ_PI_3;
3338 break;
3339
3340 default:
3341 invalid_queue = true;
3342 }
3343
3344 if (invalid_queue) {
3345 /* Should never get here */
3346 dev_err(hdev->dev, "h/w queue %d is invalid. Can't set pi\n",
3347 hw_queue_id);
3348 return;
3349 }
3350
3351 db_value = pi;
3352
3353 /* ring the doorbell */
3354 WREG32(db_reg_offset, db_value);
3355
3356 if (hw_queue_id == GAUDI_QUEUE_ID_CPU_PQ)
3357 WREG32(mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR,
3358 GAUDI_EVENT_PI_UPDATE);
3359}
3360
3361static void gaudi_pqe_write(struct hl_device *hdev, __le64 *pqe,
3362 struct hl_bd *bd)
3363{
3364 __le64 *pbd = (__le64 *) bd;
3365
3366 /* The QMANs are on the host memory so a simple copy suffice */
3367 pqe[0] = pbd[0];
3368 pqe[1] = pbd[1];
3369}
3370
3371static void *gaudi_dma_alloc_coherent(struct hl_device *hdev, size_t size,
3372 dma_addr_t *dma_handle, gfp_t flags)
3373{
3374 void *kernel_addr = dma_alloc_coherent(&hdev->pdev->dev, size,
3375 dma_handle, flags);
3376
3377 /* Shift to the device's base physical address of host memory */
3378 if (kernel_addr)
3379 *dma_handle += HOST_PHYS_BASE;
3380
3381 return kernel_addr;
3382}
3383
3384static void gaudi_dma_free_coherent(struct hl_device *hdev, size_t size,
3385 void *cpu_addr, dma_addr_t dma_handle)
3386{
3387 /* Cancel the device's base physical address of host memory */
3388 dma_addr_t fixed_dma_handle = dma_handle - HOST_PHYS_BASE;
3389
3390 dma_free_coherent(&hdev->pdev->dev, size, cpu_addr, fixed_dma_handle);
3391}
3392
3393static void *gaudi_get_int_queue_base(struct hl_device *hdev,
3394 u32 queue_id, dma_addr_t *dma_handle,
3395 u16 *queue_len)
3396{
3397 struct gaudi_device *gaudi = hdev->asic_specific;
3398 struct gaudi_internal_qman_info *q;
3399
3400 if (queue_id >= GAUDI_QUEUE_ID_SIZE ||
3401 gaudi_queue_type[queue_id] != QUEUE_TYPE_INT) {
3402 dev_err(hdev->dev, "Got invalid queue id %d\n", queue_id);
3403 return NULL;
3404 }
3405
3406 q = &gaudi->internal_qmans[queue_id];
3407 *dma_handle = q->pq_dma_addr;
3408 *queue_len = q->pq_size / QMAN_PQ_ENTRY_SIZE;
3409
3410 return q->pq_kernel_addr;
3411}
3412
3413static int gaudi_send_cpu_message(struct hl_device *hdev, u32 *msg,
3414 u16 len, u32 timeout, long *result)
3415{
3416 struct gaudi_device *gaudi = hdev->asic_specific;
3417
3418 if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q)) {
3419 if (result)
3420 *result = 0;
3421 return 0;
3422 }
3423
Oded Gabbay788cacf2020-07-07 17:30:13 +03003424 if (!timeout)
3425 timeout = GAUDI_MSG_TO_CPU_TIMEOUT_USEC;
3426
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003427 return hl_fw_send_cpu_message(hdev, GAUDI_QUEUE_ID_CPU_PQ, msg, len,
3428 timeout, result);
3429}
3430
3431static int gaudi_test_queue(struct hl_device *hdev, u32 hw_queue_id)
3432{
3433 struct packet_msg_prot *fence_pkt;
3434 dma_addr_t pkt_dma_addr;
3435 u32 fence_val, tmp, timeout_usec;
3436 dma_addr_t fence_dma_addr;
3437 u32 *fence_ptr;
3438 int rc;
3439
3440 if (hdev->pldm)
3441 timeout_usec = GAUDI_PLDM_TEST_QUEUE_WAIT_USEC;
3442 else
3443 timeout_usec = GAUDI_TEST_QUEUE_WAIT_USEC;
3444
3445 fence_val = GAUDI_QMAN0_FENCE_VAL;
3446
3447 fence_ptr = hdev->asic_funcs->asic_dma_pool_zalloc(hdev, 4, GFP_KERNEL,
3448 &fence_dma_addr);
3449 if (!fence_ptr) {
3450 dev_err(hdev->dev,
3451 "Failed to allocate memory for queue testing\n");
3452 return -ENOMEM;
3453 }
3454
3455 *fence_ptr = 0;
3456
3457 fence_pkt = hdev->asic_funcs->asic_dma_pool_zalloc(hdev,
3458 sizeof(struct packet_msg_prot),
3459 GFP_KERNEL, &pkt_dma_addr);
3460 if (!fence_pkt) {
3461 dev_err(hdev->dev,
3462 "Failed to allocate packet for queue testing\n");
3463 rc = -ENOMEM;
3464 goto free_fence_ptr;
3465 }
3466
3467 tmp = (PACKET_MSG_PROT << GAUDI_PKT_CTL_OPCODE_SHIFT) |
3468 (1 << GAUDI_PKT_CTL_EB_SHIFT) |
3469 (1 << GAUDI_PKT_CTL_MB_SHIFT);
3470 fence_pkt->ctl = cpu_to_le32(tmp);
3471 fence_pkt->value = cpu_to_le32(fence_val);
3472 fence_pkt->addr = cpu_to_le64(fence_dma_addr);
3473
3474 rc = hl_hw_queue_send_cb_no_cmpl(hdev, hw_queue_id,
3475 sizeof(struct packet_msg_prot),
3476 pkt_dma_addr);
3477 if (rc) {
3478 dev_err(hdev->dev,
3479 "Failed to send fence packet\n");
3480 goto free_pkt;
3481 }
3482
3483 rc = hl_poll_timeout_memory(hdev, fence_ptr, tmp, (tmp == fence_val),
3484 1000, timeout_usec, true);
3485
3486 hl_hw_queue_inc_ci_kernel(hdev, hw_queue_id);
3487
3488 if (rc == -ETIMEDOUT) {
3489 dev_err(hdev->dev,
3490 "H/W queue %d test failed (scratch(0x%08llX) == 0x%08X)\n",
3491 hw_queue_id, (unsigned long long) fence_dma_addr, tmp);
3492 rc = -EIO;
3493 }
3494
3495free_pkt:
3496 hdev->asic_funcs->asic_dma_pool_free(hdev, (void *) fence_pkt,
3497 pkt_dma_addr);
3498free_fence_ptr:
3499 hdev->asic_funcs->asic_dma_pool_free(hdev, (void *) fence_ptr,
3500 fence_dma_addr);
3501 return rc;
3502}
3503
3504static int gaudi_test_cpu_queue(struct hl_device *hdev)
3505{
3506 struct gaudi_device *gaudi = hdev->asic_specific;
3507
3508 /*
3509 * check capability here as send_cpu_message() won't update the result
3510 * value if no capability
3511 */
3512 if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
3513 return 0;
3514
3515 return hl_fw_test_cpu_queue(hdev);
3516}
3517
3518static int gaudi_test_queues(struct hl_device *hdev)
3519{
3520 int i, rc, ret_val = 0;
3521
Ofir Bitton3abc99b2020-06-23 14:50:39 +03003522 for (i = 0 ; i < hdev->asic_prop.max_queues ; i++) {
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003523 if (hdev->asic_prop.hw_queues_props[i].type == QUEUE_TYPE_EXT) {
3524 rc = gaudi_test_queue(hdev, i);
3525 if (rc)
3526 ret_val = -EINVAL;
3527 }
3528 }
3529
3530 rc = gaudi_test_cpu_queue(hdev);
3531 if (rc)
3532 ret_val = -EINVAL;
3533
3534 return ret_val;
3535}
3536
3537static void *gaudi_dma_pool_zalloc(struct hl_device *hdev, size_t size,
3538 gfp_t mem_flags, dma_addr_t *dma_handle)
3539{
3540 void *kernel_addr;
3541
3542 if (size > GAUDI_DMA_POOL_BLK_SIZE)
3543 return NULL;
3544
3545 kernel_addr = dma_pool_zalloc(hdev->dma_pool, mem_flags, dma_handle);
3546
3547 /* Shift to the device's base physical address of host memory */
3548 if (kernel_addr)
3549 *dma_handle += HOST_PHYS_BASE;
3550
3551 return kernel_addr;
3552}
3553
3554static void gaudi_dma_pool_free(struct hl_device *hdev, void *vaddr,
3555 dma_addr_t dma_addr)
3556{
3557 /* Cancel the device's base physical address of host memory */
3558 dma_addr_t fixed_dma_addr = dma_addr - HOST_PHYS_BASE;
3559
3560 dma_pool_free(hdev->dma_pool, vaddr, fixed_dma_addr);
3561}
3562
3563static void *gaudi_cpu_accessible_dma_pool_alloc(struct hl_device *hdev,
3564 size_t size, dma_addr_t *dma_handle)
3565{
3566 return hl_fw_cpu_accessible_dma_pool_alloc(hdev, size, dma_handle);
3567}
3568
3569static void gaudi_cpu_accessible_dma_pool_free(struct hl_device *hdev,
3570 size_t size, void *vaddr)
3571{
3572 hl_fw_cpu_accessible_dma_pool_free(hdev, size, vaddr);
3573}
3574
3575static int gaudi_dma_map_sg(struct hl_device *hdev, struct scatterlist *sgl,
3576 int nents, enum dma_data_direction dir)
3577{
3578 struct scatterlist *sg;
3579 int i;
3580
3581 if (!dma_map_sg(&hdev->pdev->dev, sgl, nents, dir))
3582 return -ENOMEM;
3583
3584 /* Shift to the device's base physical address of host memory */
3585 for_each_sg(sgl, sg, nents, i)
3586 sg->dma_address += HOST_PHYS_BASE;
3587
3588 return 0;
3589}
3590
3591static void gaudi_dma_unmap_sg(struct hl_device *hdev, struct scatterlist *sgl,
3592 int nents, enum dma_data_direction dir)
3593{
3594 struct scatterlist *sg;
3595 int i;
3596
3597 /* Cancel the device's base physical address of host memory */
3598 for_each_sg(sgl, sg, nents, i)
3599 sg->dma_address -= HOST_PHYS_BASE;
3600
3601 dma_unmap_sg(&hdev->pdev->dev, sgl, nents, dir);
3602}
3603
3604static u32 gaudi_get_dma_desc_list_size(struct hl_device *hdev,
3605 struct sg_table *sgt)
3606{
3607 struct scatterlist *sg, *sg_next_iter;
3608 u32 count, dma_desc_cnt;
3609 u64 len, len_next;
3610 dma_addr_t addr, addr_next;
3611
3612 dma_desc_cnt = 0;
3613
3614 for_each_sg(sgt->sgl, sg, sgt->nents, count) {
3615
3616 len = sg_dma_len(sg);
3617 addr = sg_dma_address(sg);
3618
3619 if (len == 0)
3620 break;
3621
3622 while ((count + 1) < sgt->nents) {
3623 sg_next_iter = sg_next(sg);
3624 len_next = sg_dma_len(sg_next_iter);
3625 addr_next = sg_dma_address(sg_next_iter);
3626
3627 if (len_next == 0)
3628 break;
3629
3630 if ((addr + len == addr_next) &&
3631 (len + len_next <= DMA_MAX_TRANSFER_SIZE)) {
3632 len += len_next;
3633 count++;
3634 sg = sg_next_iter;
3635 } else {
3636 break;
3637 }
3638 }
3639
3640 dma_desc_cnt++;
3641 }
3642
3643 return dma_desc_cnt * sizeof(struct packet_lin_dma);
3644}
3645
3646static int gaudi_pin_memory_before_cs(struct hl_device *hdev,
3647 struct hl_cs_parser *parser,
3648 struct packet_lin_dma *user_dma_pkt,
3649 u64 addr, enum dma_data_direction dir)
3650{
3651 struct hl_userptr *userptr;
3652 int rc;
3653
3654 if (hl_userptr_is_pinned(hdev, addr, le32_to_cpu(user_dma_pkt->tsize),
3655 parser->job_userptr_list, &userptr))
3656 goto already_pinned;
3657
3658 userptr = kzalloc(sizeof(*userptr), GFP_ATOMIC);
3659 if (!userptr)
3660 return -ENOMEM;
3661
3662 rc = hl_pin_host_memory(hdev, addr, le32_to_cpu(user_dma_pkt->tsize),
3663 userptr);
3664 if (rc)
3665 goto free_userptr;
3666
3667 list_add_tail(&userptr->job_node, parser->job_userptr_list);
3668
3669 rc = hdev->asic_funcs->asic_dma_map_sg(hdev, userptr->sgt->sgl,
3670 userptr->sgt->nents, dir);
3671 if (rc) {
3672 dev_err(hdev->dev, "failed to map sgt with DMA region\n");
3673 goto unpin_memory;
3674 }
3675
3676 userptr->dma_mapped = true;
3677 userptr->dir = dir;
3678
3679already_pinned:
3680 parser->patched_cb_size +=
3681 gaudi_get_dma_desc_list_size(hdev, userptr->sgt);
3682
3683 return 0;
3684
3685unpin_memory:
3686 hl_unpin_host_memory(hdev, userptr);
3687free_userptr:
3688 kfree(userptr);
3689 return rc;
3690}
3691
3692static int gaudi_validate_dma_pkt_host(struct hl_device *hdev,
3693 struct hl_cs_parser *parser,
3694 struct packet_lin_dma *user_dma_pkt,
3695 bool src_in_host)
3696{
3697 enum dma_data_direction dir;
3698 bool skip_host_mem_pin = false, user_memset;
3699 u64 addr;
3700 int rc = 0;
3701
3702 user_memset = (le32_to_cpu(user_dma_pkt->ctl) &
3703 GAUDI_PKT_LIN_DMA_CTL_MEMSET_MASK) >>
3704 GAUDI_PKT_LIN_DMA_CTL_MEMSET_SHIFT;
3705
3706 if (src_in_host) {
3707 if (user_memset)
3708 skip_host_mem_pin = true;
3709
3710 dev_dbg(hdev->dev, "DMA direction is HOST --> DEVICE\n");
3711 dir = DMA_TO_DEVICE;
3712 addr = le64_to_cpu(user_dma_pkt->src_addr);
3713 } else {
3714 dev_dbg(hdev->dev, "DMA direction is DEVICE --> HOST\n");
3715 dir = DMA_FROM_DEVICE;
3716 addr = (le64_to_cpu(user_dma_pkt->dst_addr) &
3717 GAUDI_PKT_LIN_DMA_DST_ADDR_MASK) >>
3718 GAUDI_PKT_LIN_DMA_DST_ADDR_SHIFT;
3719 }
3720
3721 if (skip_host_mem_pin)
3722 parser->patched_cb_size += sizeof(*user_dma_pkt);
3723 else
3724 rc = gaudi_pin_memory_before_cs(hdev, parser, user_dma_pkt,
3725 addr, dir);
3726
3727 return rc;
3728}
3729
3730static int gaudi_validate_dma_pkt_no_mmu(struct hl_device *hdev,
3731 struct hl_cs_parser *parser,
3732 struct packet_lin_dma *user_dma_pkt)
3733{
3734 bool src_in_host = false;
3735 u64 dst_addr = (le64_to_cpu(user_dma_pkt->dst_addr) &
3736 GAUDI_PKT_LIN_DMA_DST_ADDR_MASK) >>
3737 GAUDI_PKT_LIN_DMA_DST_ADDR_SHIFT;
3738
3739 dev_dbg(hdev->dev, "DMA packet details:\n");
3740 dev_dbg(hdev->dev, "source == 0x%llx\n",
3741 le64_to_cpu(user_dma_pkt->src_addr));
3742 dev_dbg(hdev->dev, "destination == 0x%llx\n", dst_addr);
3743 dev_dbg(hdev->dev, "size == %u\n", le32_to_cpu(user_dma_pkt->tsize));
3744
3745 /*
3746 * Special handling for DMA with size 0. Bypass all validations
3747 * because no transactions will be done except for WR_COMP, which
3748 * is not a security issue
3749 */
3750 if (!le32_to_cpu(user_dma_pkt->tsize)) {
3751 parser->patched_cb_size += sizeof(*user_dma_pkt);
3752 return 0;
3753 }
3754
3755 if (parser->hw_queue_id <= GAUDI_QUEUE_ID_DMA_0_3)
3756 src_in_host = true;
3757
3758 return gaudi_validate_dma_pkt_host(hdev, parser, user_dma_pkt,
3759 src_in_host);
3760}
3761
Oded Gabbay64536ab2020-05-27 12:38:16 +03003762static int gaudi_validate_load_and_exe_pkt(struct hl_device *hdev,
3763 struct hl_cs_parser *parser,
3764 struct packet_load_and_exe *user_pkt)
3765{
3766 u32 cfg;
3767
3768 cfg = le32_to_cpu(user_pkt->cfg);
3769
3770 if (cfg & GAUDI_PKT_LOAD_AND_EXE_CFG_DST_MASK) {
3771 dev_err(hdev->dev,
3772 "User not allowed to use Load and Execute\n");
3773 return -EPERM;
3774 }
3775
3776 parser->patched_cb_size += sizeof(struct packet_load_and_exe);
3777
3778 return 0;
3779}
3780
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003781static int gaudi_validate_cb(struct hl_device *hdev,
3782 struct hl_cs_parser *parser, bool is_mmu)
3783{
3784 u32 cb_parsed_length = 0;
3785 int rc = 0;
3786
3787 parser->patched_cb_size = 0;
3788
3789 /* cb_user_size is more than 0 so loop will always be executed */
3790 while (cb_parsed_length < parser->user_cb_size) {
3791 enum packet_id pkt_id;
3792 u16 pkt_size;
3793 struct gaudi_packet *user_pkt;
3794
3795 user_pkt = (struct gaudi_packet *) (uintptr_t)
3796 (parser->user_cb->kernel_address + cb_parsed_length);
3797
3798 pkt_id = (enum packet_id) (
3799 (le64_to_cpu(user_pkt->header) &
3800 PACKET_HEADER_PACKET_ID_MASK) >>
3801 PACKET_HEADER_PACKET_ID_SHIFT);
3802
Ofir Bittonbc75be22020-07-30 14:56:38 +03003803 if (!validate_packet_id(pkt_id)) {
3804 dev_err(hdev->dev, "Invalid packet id %u\n", pkt_id);
3805 rc = -EINVAL;
3806 break;
3807 }
3808
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003809 pkt_size = gaudi_packet_sizes[pkt_id];
3810 cb_parsed_length += pkt_size;
3811 if (cb_parsed_length > parser->user_cb_size) {
3812 dev_err(hdev->dev,
3813 "packet 0x%x is out of CB boundary\n", pkt_id);
3814 rc = -EINVAL;
3815 break;
3816 }
3817
3818 switch (pkt_id) {
3819 case PACKET_MSG_PROT:
3820 dev_err(hdev->dev,
3821 "User not allowed to use MSG_PROT\n");
3822 rc = -EPERM;
3823 break;
3824
3825 case PACKET_CP_DMA:
3826 dev_err(hdev->dev, "User not allowed to use CP_DMA\n");
3827 rc = -EPERM;
3828 break;
3829
3830 case PACKET_STOP:
3831 dev_err(hdev->dev, "User not allowed to use STOP\n");
3832 rc = -EPERM;
3833 break;
3834
Oded Gabbay2edc66e2020-07-03 19:28:54 +03003835 case PACKET_WREG_BULK:
3836 dev_err(hdev->dev,
3837 "User not allowed to use WREG_BULK\n");
3838 rc = -EPERM;
3839 break;
3840
Oded Gabbay64536ab2020-05-27 12:38:16 +03003841 case PACKET_LOAD_AND_EXE:
3842 rc = gaudi_validate_load_and_exe_pkt(hdev, parser,
3843 (struct packet_load_and_exe *) user_pkt);
3844 break;
3845
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003846 case PACKET_LIN_DMA:
3847 parser->contains_dma_pkt = true;
3848 if (is_mmu)
3849 parser->patched_cb_size += pkt_size;
3850 else
3851 rc = gaudi_validate_dma_pkt_no_mmu(hdev, parser,
3852 (struct packet_lin_dma *) user_pkt);
3853 break;
3854
3855 case PACKET_WREG_32:
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003856 case PACKET_MSG_LONG:
3857 case PACKET_MSG_SHORT:
3858 case PACKET_REPEAT:
3859 case PACKET_FENCE:
3860 case PACKET_NOP:
3861 case PACKET_ARB_POINT:
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003862 parser->patched_cb_size += pkt_size;
3863 break;
3864
3865 default:
3866 dev_err(hdev->dev, "Invalid packet header 0x%x\n",
3867 pkt_id);
3868 rc = -EINVAL;
3869 break;
3870 }
3871
3872 if (rc)
3873 break;
3874 }
3875
3876 /*
3877 * The new CB should have space at the end for two MSG_PROT packets:
3878 * 1. A packet that will act as a completion packet
3879 * 2. A packet that will generate MSI-X interrupt
3880 */
3881 parser->patched_cb_size += sizeof(struct packet_msg_prot) * 2;
3882
3883 return rc;
3884}
3885
3886static int gaudi_patch_dma_packet(struct hl_device *hdev,
3887 struct hl_cs_parser *parser,
3888 struct packet_lin_dma *user_dma_pkt,
3889 struct packet_lin_dma *new_dma_pkt,
3890 u32 *new_dma_pkt_size)
3891{
3892 struct hl_userptr *userptr;
3893 struct scatterlist *sg, *sg_next_iter;
3894 u32 count, dma_desc_cnt, user_wrcomp_en_mask, ctl;
3895 u64 len, len_next;
3896 dma_addr_t dma_addr, dma_addr_next;
3897 u64 device_memory_addr, addr;
3898 enum dma_data_direction dir;
3899 struct sg_table *sgt;
3900 bool src_in_host = false;
3901 bool skip_host_mem_pin = false;
3902 bool user_memset;
3903
3904 ctl = le32_to_cpu(user_dma_pkt->ctl);
3905
3906 if (parser->hw_queue_id <= GAUDI_QUEUE_ID_DMA_0_3)
3907 src_in_host = true;
3908
3909 user_memset = (ctl & GAUDI_PKT_LIN_DMA_CTL_MEMSET_MASK) >>
3910 GAUDI_PKT_LIN_DMA_CTL_MEMSET_SHIFT;
3911
3912 if (src_in_host) {
3913 addr = le64_to_cpu(user_dma_pkt->src_addr);
3914 device_memory_addr = le64_to_cpu(user_dma_pkt->dst_addr);
3915 dir = DMA_TO_DEVICE;
3916 if (user_memset)
3917 skip_host_mem_pin = true;
3918 } else {
3919 addr = le64_to_cpu(user_dma_pkt->dst_addr);
3920 device_memory_addr = le64_to_cpu(user_dma_pkt->src_addr);
3921 dir = DMA_FROM_DEVICE;
3922 }
3923
3924 if ((!skip_host_mem_pin) &&
3925 (!hl_userptr_is_pinned(hdev, addr,
3926 le32_to_cpu(user_dma_pkt->tsize),
3927 parser->job_userptr_list, &userptr))) {
3928 dev_err(hdev->dev, "Userptr 0x%llx + 0x%x NOT mapped\n",
3929 addr, user_dma_pkt->tsize);
3930 return -EFAULT;
3931 }
3932
3933 if ((user_memset) && (dir == DMA_TO_DEVICE)) {
3934 memcpy(new_dma_pkt, user_dma_pkt, sizeof(*user_dma_pkt));
3935 *new_dma_pkt_size = sizeof(*user_dma_pkt);
3936 return 0;
3937 }
3938
3939 user_wrcomp_en_mask = ctl & GAUDI_PKT_LIN_DMA_CTL_WRCOMP_EN_MASK;
3940
3941 sgt = userptr->sgt;
3942 dma_desc_cnt = 0;
3943
3944 for_each_sg(sgt->sgl, sg, sgt->nents, count) {
3945 len = sg_dma_len(sg);
3946 dma_addr = sg_dma_address(sg);
3947
3948 if (len == 0)
3949 break;
3950
3951 while ((count + 1) < sgt->nents) {
3952 sg_next_iter = sg_next(sg);
3953 len_next = sg_dma_len(sg_next_iter);
3954 dma_addr_next = sg_dma_address(sg_next_iter);
3955
3956 if (len_next == 0)
3957 break;
3958
3959 if ((dma_addr + len == dma_addr_next) &&
3960 (len + len_next <= DMA_MAX_TRANSFER_SIZE)) {
3961 len += len_next;
3962 count++;
3963 sg = sg_next_iter;
3964 } else {
3965 break;
3966 }
3967 }
3968
3969 new_dma_pkt->ctl = user_dma_pkt->ctl;
3970
3971 ctl = le32_to_cpu(user_dma_pkt->ctl);
3972 if (likely(dma_desc_cnt))
3973 ctl &= ~GAUDI_PKT_CTL_EB_MASK;
3974 ctl &= ~GAUDI_PKT_LIN_DMA_CTL_WRCOMP_EN_MASK;
3975 new_dma_pkt->ctl = cpu_to_le32(ctl);
3976 new_dma_pkt->tsize = cpu_to_le32(len);
3977
3978 if (dir == DMA_TO_DEVICE) {
3979 new_dma_pkt->src_addr = cpu_to_le64(dma_addr);
3980 new_dma_pkt->dst_addr = cpu_to_le64(device_memory_addr);
3981 } else {
3982 new_dma_pkt->src_addr = cpu_to_le64(device_memory_addr);
3983 new_dma_pkt->dst_addr = cpu_to_le64(dma_addr);
3984 }
3985
3986 if (!user_memset)
3987 device_memory_addr += len;
3988 dma_desc_cnt++;
3989 new_dma_pkt++;
3990 }
3991
3992 if (!dma_desc_cnt) {
3993 dev_err(hdev->dev,
3994 "Error of 0 SG entries when patching DMA packet\n");
3995 return -EFAULT;
3996 }
3997
3998 /* Fix the last dma packet - wrcomp must be as user set it */
3999 new_dma_pkt--;
4000 new_dma_pkt->ctl |= cpu_to_le32(user_wrcomp_en_mask);
4001
4002 *new_dma_pkt_size = dma_desc_cnt * sizeof(struct packet_lin_dma);
4003
4004 return 0;
4005}
4006
4007static int gaudi_patch_cb(struct hl_device *hdev,
4008 struct hl_cs_parser *parser)
4009{
4010 u32 cb_parsed_length = 0;
4011 u32 cb_patched_cur_length = 0;
4012 int rc = 0;
4013
4014 /* cb_user_size is more than 0 so loop will always be executed */
4015 while (cb_parsed_length < parser->user_cb_size) {
4016 enum packet_id pkt_id;
4017 u16 pkt_size;
4018 u32 new_pkt_size = 0;
4019 struct gaudi_packet *user_pkt, *kernel_pkt;
4020
4021 user_pkt = (struct gaudi_packet *) (uintptr_t)
4022 (parser->user_cb->kernel_address + cb_parsed_length);
4023 kernel_pkt = (struct gaudi_packet *) (uintptr_t)
4024 (parser->patched_cb->kernel_address +
4025 cb_patched_cur_length);
4026
4027 pkt_id = (enum packet_id) (
4028 (le64_to_cpu(user_pkt->header) &
4029 PACKET_HEADER_PACKET_ID_MASK) >>
4030 PACKET_HEADER_PACKET_ID_SHIFT);
4031
Ofir Bittonbc75be22020-07-30 14:56:38 +03004032 if (!validate_packet_id(pkt_id)) {
4033 dev_err(hdev->dev, "Invalid packet id %u\n", pkt_id);
4034 rc = -EINVAL;
4035 break;
4036 }
4037
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03004038 pkt_size = gaudi_packet_sizes[pkt_id];
4039 cb_parsed_length += pkt_size;
4040 if (cb_parsed_length > parser->user_cb_size) {
4041 dev_err(hdev->dev,
4042 "packet 0x%x is out of CB boundary\n", pkt_id);
4043 rc = -EINVAL;
4044 break;
4045 }
4046
4047 switch (pkt_id) {
4048 case PACKET_LIN_DMA:
4049 rc = gaudi_patch_dma_packet(hdev, parser,
4050 (struct packet_lin_dma *) user_pkt,
4051 (struct packet_lin_dma *) kernel_pkt,
4052 &new_pkt_size);
4053 cb_patched_cur_length += new_pkt_size;
4054 break;
4055
4056 case PACKET_MSG_PROT:
4057 dev_err(hdev->dev,
4058 "User not allowed to use MSG_PROT\n");
4059 rc = -EPERM;
4060 break;
4061
4062 case PACKET_CP_DMA:
4063 dev_err(hdev->dev, "User not allowed to use CP_DMA\n");
4064 rc = -EPERM;
4065 break;
4066
4067 case PACKET_STOP:
4068 dev_err(hdev->dev, "User not allowed to use STOP\n");
4069 rc = -EPERM;
4070 break;
4071
4072 case PACKET_WREG_32:
4073 case PACKET_WREG_BULK:
4074 case PACKET_MSG_LONG:
4075 case PACKET_MSG_SHORT:
4076 case PACKET_REPEAT:
4077 case PACKET_FENCE:
4078 case PACKET_NOP:
4079 case PACKET_ARB_POINT:
4080 case PACKET_LOAD_AND_EXE:
4081 memcpy(kernel_pkt, user_pkt, pkt_size);
4082 cb_patched_cur_length += pkt_size;
4083 break;
4084
4085 default:
4086 dev_err(hdev->dev, "Invalid packet header 0x%x\n",
4087 pkt_id);
4088 rc = -EINVAL;
4089 break;
4090 }
4091
4092 if (rc)
4093 break;
4094 }
4095
4096 return rc;
4097}
4098
4099static int gaudi_parse_cb_mmu(struct hl_device *hdev,
4100 struct hl_cs_parser *parser)
4101{
4102 u64 patched_cb_handle;
4103 u32 patched_cb_size;
4104 struct hl_cb *user_cb;
4105 int rc;
4106
4107 /*
4108 * The new CB should have space at the end for two MSG_PROT pkt:
4109 * 1. A packet that will act as a completion packet
4110 * 2. A packet that will generate MSI interrupt
4111 */
4112 parser->patched_cb_size = parser->user_cb_size +
4113 sizeof(struct packet_msg_prot) * 2;
4114
Ofir Bittona04b7cd2020-07-13 13:36:55 +03004115 rc = hl_cb_create(hdev, &hdev->kernel_cb_mgr, parser->patched_cb_size,
4116 &patched_cb_handle, HL_KERNEL_ASID_ID, false);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03004117
4118 if (rc) {
4119 dev_err(hdev->dev,
4120 "Failed to allocate patched CB for DMA CS %d\n",
4121 rc);
4122 return rc;
4123 }
4124
4125 patched_cb_handle >>= PAGE_SHIFT;
4126 parser->patched_cb = hl_cb_get(hdev, &hdev->kernel_cb_mgr,
4127 (u32) patched_cb_handle);
4128 /* hl_cb_get should never fail here so use kernel WARN */
4129 WARN(!parser->patched_cb, "DMA CB handle invalid 0x%x\n",
4130 (u32) patched_cb_handle);
4131 if (!parser->patched_cb) {
4132 rc = -EFAULT;
4133 goto out;
4134 }
4135
4136 /*
4137 * The check that parser->user_cb_size <= parser->user_cb->size was done
4138 * in validate_queue_index().
4139 */
4140 memcpy((void *) (uintptr_t) parser->patched_cb->kernel_address,
4141 (void *) (uintptr_t) parser->user_cb->kernel_address,
4142 parser->user_cb_size);
4143
4144 patched_cb_size = parser->patched_cb_size;
4145
4146 /* Validate patched CB instead of user CB */
4147 user_cb = parser->user_cb;
4148 parser->user_cb = parser->patched_cb;
4149 rc = gaudi_validate_cb(hdev, parser, true);
4150 parser->user_cb = user_cb;
4151
4152 if (rc) {
4153 hl_cb_put(parser->patched_cb);
4154 goto out;
4155 }
4156
4157 if (patched_cb_size != parser->patched_cb_size) {
4158 dev_err(hdev->dev, "user CB size mismatch\n");
4159 hl_cb_put(parser->patched_cb);
4160 rc = -EINVAL;
4161 goto out;
4162 }
4163
4164out:
4165 /*
4166 * Always call cb destroy here because we still have 1 reference
4167 * to it by calling cb_get earlier. After the job will be completed,
4168 * cb_put will release it, but here we want to remove it from the
4169 * idr
4170 */
4171 hl_cb_destroy(hdev, &hdev->kernel_cb_mgr,
4172 patched_cb_handle << PAGE_SHIFT);
4173
4174 return rc;
4175}
4176
4177static int gaudi_parse_cb_no_mmu(struct hl_device *hdev,
4178 struct hl_cs_parser *parser)
4179{
4180 u64 patched_cb_handle;
4181 int rc;
4182
4183 rc = gaudi_validate_cb(hdev, parser, false);
4184
4185 if (rc)
4186 goto free_userptr;
4187
Ofir Bittona04b7cd2020-07-13 13:36:55 +03004188 rc = hl_cb_create(hdev, &hdev->kernel_cb_mgr, parser->patched_cb_size,
4189 &patched_cb_handle, HL_KERNEL_ASID_ID, false);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03004190 if (rc) {
4191 dev_err(hdev->dev,
4192 "Failed to allocate patched CB for DMA CS %d\n", rc);
4193 goto free_userptr;
4194 }
4195
4196 patched_cb_handle >>= PAGE_SHIFT;
4197 parser->patched_cb = hl_cb_get(hdev, &hdev->kernel_cb_mgr,
4198 (u32) patched_cb_handle);
4199 /* hl_cb_get should never fail here so use kernel WARN */
4200 WARN(!parser->patched_cb, "DMA CB handle invalid 0x%x\n",
4201 (u32) patched_cb_handle);
4202 if (!parser->patched_cb) {
4203 rc = -EFAULT;
4204 goto out;
4205 }
4206
4207 rc = gaudi_patch_cb(hdev, parser);
4208
4209 if (rc)
4210 hl_cb_put(parser->patched_cb);
4211
4212out:
4213 /*
4214 * Always call cb destroy here because we still have 1 reference
4215 * to it by calling cb_get earlier. After the job will be completed,
4216 * cb_put will release it, but here we want to remove it from the
4217 * idr
4218 */
4219 hl_cb_destroy(hdev, &hdev->kernel_cb_mgr,
4220 patched_cb_handle << PAGE_SHIFT);
4221
4222free_userptr:
4223 if (rc)
4224 hl_userptr_delete_list(hdev, parser->job_userptr_list);
4225 return rc;
4226}
4227
4228static int gaudi_parse_cb_no_ext_queue(struct hl_device *hdev,
4229 struct hl_cs_parser *parser)
4230{
4231 struct asic_fixed_properties *asic_prop = &hdev->asic_prop;
4232
4233 /* For internal queue jobs just check if CB address is valid */
4234 if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,
4235 parser->user_cb_size,
4236 asic_prop->sram_user_base_address,
4237 asic_prop->sram_end_address))
4238 return 0;
4239
4240 if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,
4241 parser->user_cb_size,
4242 asic_prop->dram_user_base_address,
4243 asic_prop->dram_end_address))
4244 return 0;
4245
4246 /* PMMU and HPMMU addresses are equal, check only one of them */
4247 if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,
4248 parser->user_cb_size,
4249 asic_prop->pmmu.start_addr,
4250 asic_prop->pmmu.end_addr))
4251 return 0;
4252
4253 dev_err(hdev->dev,
4254 "CB address 0x%px + 0x%x for internal QMAN is not valid\n",
4255 parser->user_cb, parser->user_cb_size);
4256
4257 return -EFAULT;
4258}
4259
4260static int gaudi_cs_parser(struct hl_device *hdev, struct hl_cs_parser *parser)
4261{
4262 struct gaudi_device *gaudi = hdev->asic_specific;
4263
4264 if (parser->queue_type == QUEUE_TYPE_INT)
4265 return gaudi_parse_cb_no_ext_queue(hdev, parser);
4266
4267 if (gaudi->hw_cap_initialized & HW_CAP_MMU)
4268 return gaudi_parse_cb_mmu(hdev, parser);
4269 else
4270 return gaudi_parse_cb_no_mmu(hdev, parser);
4271}
4272
4273static void gaudi_add_end_of_cb_packets(struct hl_device *hdev,
4274 u64 kernel_address, u32 len,
4275 u64 cq_addr, u32 cq_val, u32 msi_vec,
4276 bool eb)
4277{
4278 struct gaudi_device *gaudi = hdev->asic_specific;
4279 struct packet_msg_prot *cq_pkt;
4280 u32 tmp;
4281
4282 cq_pkt = (struct packet_msg_prot *) (uintptr_t)
4283 (kernel_address + len - (sizeof(struct packet_msg_prot) * 2));
4284
4285 tmp = (PACKET_MSG_PROT << GAUDI_PKT_CTL_OPCODE_SHIFT) |
4286 (1 << GAUDI_PKT_CTL_MB_SHIFT);
4287
4288 if (eb)
4289 tmp |= (1 << GAUDI_PKT_CTL_EB_SHIFT);
4290
4291 cq_pkt->ctl = cpu_to_le32(tmp);
4292 cq_pkt->value = cpu_to_le32(cq_val);
4293 cq_pkt->addr = cpu_to_le64(cq_addr);
4294
4295 cq_pkt++;
4296
4297 tmp = (PACKET_MSG_PROT << GAUDI_PKT_CTL_OPCODE_SHIFT) |
4298 (1 << GAUDI_PKT_CTL_MB_SHIFT);
4299 cq_pkt->ctl = cpu_to_le32(tmp);
4300 cq_pkt->value = cpu_to_le32(1);
4301
4302 if (!gaudi->multi_msi_mode)
4303 msi_vec = 0;
4304
4305 cq_pkt->addr = cpu_to_le64(CFG_BASE + mmPCIE_MSI_INTR_0 + msi_vec * 4);
4306}
4307
4308static void gaudi_update_eq_ci(struct hl_device *hdev, u32 val)
4309{
4310 WREG32(mmCPU_IF_EQ_RD_OFFS, val);
4311}
4312
4313static int gaudi_memset_device_memory(struct hl_device *hdev, u64 addr,
4314 u32 size, u64 val)
4315{
4316 struct packet_lin_dma *lin_dma_pkt;
4317 struct hl_cs_job *job;
Moti Haimovskia9855a2d92020-06-24 19:40:57 +03004318 u32 cb_size, ctl, err_cause;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03004319 struct hl_cb *cb;
4320 int rc;
4321
Ofir Bittona04b7cd2020-07-13 13:36:55 +03004322 cb = hl_cb_kernel_create(hdev, PAGE_SIZE, false);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03004323 if (!cb)
4324 return -EFAULT;
4325
4326 lin_dma_pkt = (struct packet_lin_dma *) (uintptr_t) cb->kernel_address;
4327 memset(lin_dma_pkt, 0, sizeof(*lin_dma_pkt));
4328 cb_size = sizeof(*lin_dma_pkt);
4329
4330 ctl = ((PACKET_LIN_DMA << GAUDI_PKT_CTL_OPCODE_SHIFT) |
4331 (1 << GAUDI_PKT_LIN_DMA_CTL_MEMSET_SHIFT) |
4332 (1 << GAUDI_PKT_LIN_DMA_CTL_LIN_SHIFT) |
4333 (1 << GAUDI_PKT_CTL_RB_SHIFT) |
4334 (1 << GAUDI_PKT_CTL_MB_SHIFT));
4335 lin_dma_pkt->ctl = cpu_to_le32(ctl);
4336 lin_dma_pkt->src_addr = cpu_to_le64(val);
4337 lin_dma_pkt->dst_addr |= cpu_to_le64(addr);
4338 lin_dma_pkt->tsize = cpu_to_le32(size);
4339
4340 job = hl_cs_allocate_job(hdev, QUEUE_TYPE_EXT, true);
4341 if (!job) {
4342 dev_err(hdev->dev, "Failed to allocate a new job\n");
4343 rc = -ENOMEM;
4344 goto release_cb;
4345 }
4346
Moti Haimovskia9855a2d92020-06-24 19:40:57 +03004347 /* Verify DMA is OK */
4348 err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE);
4349 if (err_cause && !hdev->init_done) {
4350 dev_dbg(hdev->dev,
4351 "Clearing DMA0 engine from errors (cause 0x%x)\n",
4352 err_cause);
4353 WREG32(mmDMA0_CORE_ERR_CAUSE, err_cause);
4354 }
4355
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03004356 job->id = 0;
4357 job->user_cb = cb;
4358 job->user_cb->cs_cnt++;
4359 job->user_cb_size = cb_size;
4360 job->hw_queue_id = GAUDI_QUEUE_ID_DMA_0_0;
4361 job->patched_cb = job->user_cb;
4362 job->job_cb_size = job->user_cb_size + sizeof(struct packet_msg_prot);
4363
4364 hl_debugfs_add_job(hdev, job);
4365
4366 rc = gaudi_send_job_on_qman0(hdev, job);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03004367 hl_debugfs_remove_job(hdev, job);
4368 kfree(job);
4369 cb->cs_cnt--;
4370
Moti Haimovskia9855a2d92020-06-24 19:40:57 +03004371 /* Verify DMA is OK */
4372 err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE);
4373 if (err_cause) {
4374 dev_err(hdev->dev, "DMA Failed, cause 0x%x\n", err_cause);
4375 rc = -EIO;
4376 if (!hdev->init_done) {
4377 dev_dbg(hdev->dev,
4378 "Clearing DMA0 engine from errors (cause 0x%x)\n",
4379 err_cause);
4380 WREG32(mmDMA0_CORE_ERR_CAUSE, err_cause);
4381 }
4382 }
4383
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03004384release_cb:
4385 hl_cb_put(cb);
4386 hl_cb_destroy(hdev, &hdev->kernel_cb_mgr, cb->id << PAGE_SHIFT);
4387
4388 return rc;
4389}
4390
4391static void gaudi_restore_sm_registers(struct hl_device *hdev)
4392{
4393 int i;
4394
4395 for (i = 0 ; i < NUM_OF_SOB_IN_BLOCK << 2 ; i += 4) {
4396 WREG32(mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0 + i, 0);
4397 WREG32(mmSYNC_MNGR_E_S_SYNC_MNGR_OBJS_SOB_OBJ_0 + i, 0);
4398 WREG32(mmSYNC_MNGR_W_N_SYNC_MNGR_OBJS_SOB_OBJ_0 + i, 0);
4399 }
4400
4401 for (i = 0 ; i < NUM_OF_MONITORS_IN_BLOCK << 2 ; i += 4) {
4402 WREG32(mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_STATUS_0 + i, 0);
4403 WREG32(mmSYNC_MNGR_E_S_SYNC_MNGR_OBJS_MON_STATUS_0 + i, 0);
4404 WREG32(mmSYNC_MNGR_W_N_SYNC_MNGR_OBJS_MON_STATUS_0 + i, 0);
4405 }
4406
4407 i = GAUDI_FIRST_AVAILABLE_W_S_SYNC_OBJECT * 4;
4408
4409 for (; i < NUM_OF_SOB_IN_BLOCK << 2 ; i += 4)
4410 WREG32(mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 + i, 0);
4411
4412 i = GAUDI_FIRST_AVAILABLE_W_S_MONITOR * 4;
4413
4414 for (; i < NUM_OF_MONITORS_IN_BLOCK << 2 ; i += 4)
4415 WREG32(mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_STATUS_0 + i, 0);
4416}
4417
4418static void gaudi_restore_dma_registers(struct hl_device *hdev)
4419{
4420 u32 sob_delta = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_1 -
4421 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0;
4422 int i;
4423
4424 for (i = 0 ; i < DMA_NUMBER_OF_CHANNELS ; i++) {
4425 u64 sob_addr = CFG_BASE +
4426 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0 +
4427 (i * sob_delta);
4428 u32 dma_offset = i * DMA_CORE_OFFSET;
4429
4430 WREG32(mmDMA0_CORE_WR_COMP_ADDR_LO + dma_offset,
4431 lower_32_bits(sob_addr));
4432 WREG32(mmDMA0_CORE_WR_COMP_ADDR_HI + dma_offset,
4433 upper_32_bits(sob_addr));
4434 WREG32(mmDMA0_CORE_WR_COMP_WDATA + dma_offset, 0x80000001);
4435
4436 /* For DMAs 2-7, need to restore WR_AWUSER_31_11 as it can be
4437 * modified by the user for SRAM reduction
4438 */
4439 if (i > 1)
4440 WREG32(mmDMA0_CORE_WR_AWUSER_31_11 + dma_offset,
4441 0x00000001);
4442 }
4443}
4444
4445static void gaudi_restore_qm_registers(struct hl_device *hdev)
4446{
4447 u32 qman_offset;
4448 int i;
4449
4450 for (i = 0 ; i < DMA_NUMBER_OF_CHANNELS ; i++) {
4451 qman_offset = i * DMA_QMAN_OFFSET;
4452 WREG32(mmDMA0_QM_ARB_CFG_0 + qman_offset, 0);
4453 }
4454
4455 for (i = 0 ; i < MME_NUMBER_OF_MASTER_ENGINES ; i++) {
4456 qman_offset = i * (mmMME2_QM_BASE - mmMME0_QM_BASE);
4457 WREG32(mmMME0_QM_ARB_CFG_0 + qman_offset, 0);
4458 }
4459
4460 for (i = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) {
4461 qman_offset = i * TPC_QMAN_OFFSET;
4462 WREG32(mmTPC0_QM_ARB_CFG_0 + qman_offset, 0);
4463 }
4464}
4465
4466static void gaudi_restore_user_registers(struct hl_device *hdev)
4467{
4468 gaudi_restore_sm_registers(hdev);
4469 gaudi_restore_dma_registers(hdev);
4470 gaudi_restore_qm_registers(hdev);
4471}
4472
4473static int gaudi_context_switch(struct hl_device *hdev, u32 asid)
4474{
4475 struct asic_fixed_properties *prop = &hdev->asic_prop;
4476 u64 addr = prop->sram_user_base_address;
4477 u32 size = hdev->pldm ? 0x10000 :
4478 (prop->sram_size - SRAM_USER_BASE_OFFSET);
4479 u64 val = 0x7777777777777777ull;
4480 int rc;
4481
4482 rc = gaudi_memset_device_memory(hdev, addr, size, val);
4483 if (rc) {
4484 dev_err(hdev->dev, "Failed to clear SRAM in context switch\n");
4485 return rc;
4486 }
4487
4488 gaudi_mmu_prepare(hdev, asid);
4489
4490 gaudi_restore_user_registers(hdev);
4491
4492 return 0;
4493}
4494
4495static int gaudi_mmu_clear_pgt_range(struct hl_device *hdev)
4496{
4497 struct asic_fixed_properties *prop = &hdev->asic_prop;
4498 struct gaudi_device *gaudi = hdev->asic_specific;
4499 u64 addr = prop->mmu_pgt_addr;
4500 u32 size = prop->mmu_pgt_size + MMU_CACHE_MNG_SIZE;
4501
4502 if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
4503 return 0;
4504
4505 return gaudi_memset_device_memory(hdev, addr, size, 0);
4506}
4507
4508static void gaudi_restore_phase_topology(struct hl_device *hdev)
4509{
4510
4511}
4512
4513static int gaudi_debugfs_read32(struct hl_device *hdev, u64 addr, u32 *val)
4514{
4515 struct asic_fixed_properties *prop = &hdev->asic_prop;
4516 struct gaudi_device *gaudi = hdev->asic_specific;
4517 u64 hbm_bar_addr;
4518 int rc = 0;
4519
4520 if ((addr >= CFG_BASE) && (addr < CFG_BASE + CFG_SIZE)) {
Oded Gabbaye38bfd32020-07-03 20:46:12 +03004521
4522 if ((gaudi->hw_cap_initialized & HW_CAP_CLK_GATE) &&
4523 (hdev->clock_gating_mask &
4524 GAUDI_CLK_GATE_DEBUGFS_MASK)) {
4525
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03004526 dev_err_ratelimited(hdev->dev,
4527 "Can't read register - clock gating is enabled!\n");
4528 rc = -EFAULT;
4529 } else {
4530 *val = RREG32(addr - CFG_BASE);
4531 }
Oded Gabbaye38bfd32020-07-03 20:46:12 +03004532
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03004533 } else if ((addr >= SRAM_BASE_ADDR) &&
4534 (addr < SRAM_BASE_ADDR + SRAM_BAR_SIZE)) {
4535 *val = readl(hdev->pcie_bar[SRAM_BAR_ID] +
4536 (addr - SRAM_BASE_ADDR));
4537 } else if (addr < DRAM_PHYS_BASE + hdev->asic_prop.dram_size) {
4538 u64 bar_base_addr = DRAM_PHYS_BASE +
4539 (addr & ~(prop->dram_pci_bar_size - 0x1ull));
4540
4541 hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, bar_base_addr);
4542 if (hbm_bar_addr != U64_MAX) {
4543 *val = readl(hdev->pcie_bar[HBM_BAR_ID] +
4544 (addr - bar_base_addr));
4545
4546 hbm_bar_addr = gaudi_set_hbm_bar_base(hdev,
4547 hbm_bar_addr);
4548 }
4549 if (hbm_bar_addr == U64_MAX)
4550 rc = -EIO;
4551 } else if (addr >= HOST_PHYS_BASE && !iommu_present(&pci_bus_type)) {
4552 *val = *(u32 *) phys_to_virt(addr - HOST_PHYS_BASE);
4553 } else {
4554 rc = -EFAULT;
4555 }
4556
4557 return rc;
4558}
4559
4560static int gaudi_debugfs_write32(struct hl_device *hdev, u64 addr, u32 val)
4561{
4562 struct asic_fixed_properties *prop = &hdev->asic_prop;
4563 struct gaudi_device *gaudi = hdev->asic_specific;
4564 u64 hbm_bar_addr;
4565 int rc = 0;
4566
4567 if ((addr >= CFG_BASE) && (addr < CFG_BASE + CFG_SIZE)) {
Oded Gabbaye38bfd32020-07-03 20:46:12 +03004568
4569 if ((gaudi->hw_cap_initialized & HW_CAP_CLK_GATE) &&
4570 (hdev->clock_gating_mask &
4571 GAUDI_CLK_GATE_DEBUGFS_MASK)) {
4572
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03004573 dev_err_ratelimited(hdev->dev,
4574 "Can't write register - clock gating is enabled!\n");
4575 rc = -EFAULT;
4576 } else {
4577 WREG32(addr - CFG_BASE, val);
4578 }
Oded Gabbaye38bfd32020-07-03 20:46:12 +03004579
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03004580 } else if ((addr >= SRAM_BASE_ADDR) &&
4581 (addr < SRAM_BASE_ADDR + SRAM_BAR_SIZE)) {
4582 writel(val, hdev->pcie_bar[SRAM_BAR_ID] +
4583 (addr - SRAM_BASE_ADDR));
4584 } else if (addr < DRAM_PHYS_BASE + hdev->asic_prop.dram_size) {
4585 u64 bar_base_addr = DRAM_PHYS_BASE +
4586 (addr & ~(prop->dram_pci_bar_size - 0x1ull));
4587
4588 hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, bar_base_addr);
4589 if (hbm_bar_addr != U64_MAX) {
4590 writel(val, hdev->pcie_bar[HBM_BAR_ID] +
4591 (addr - bar_base_addr));
4592
4593 hbm_bar_addr = gaudi_set_hbm_bar_base(hdev,
4594 hbm_bar_addr);
4595 }
4596 if (hbm_bar_addr == U64_MAX)
4597 rc = -EIO;
4598 } else if (addr >= HOST_PHYS_BASE && !iommu_present(&pci_bus_type)) {
4599 *(u32 *) phys_to_virt(addr - HOST_PHYS_BASE) = val;
4600 } else {
4601 rc = -EFAULT;
4602 }
4603
4604 return rc;
4605}
4606
4607static int gaudi_debugfs_read64(struct hl_device *hdev, u64 addr, u64 *val)
4608{
4609 struct asic_fixed_properties *prop = &hdev->asic_prop;
4610 struct gaudi_device *gaudi = hdev->asic_specific;
4611 u64 hbm_bar_addr;
4612 int rc = 0;
4613
4614 if ((addr >= CFG_BASE) && (addr <= CFG_BASE + CFG_SIZE - sizeof(u64))) {
Oded Gabbaye38bfd32020-07-03 20:46:12 +03004615
4616 if ((gaudi->hw_cap_initialized & HW_CAP_CLK_GATE) &&
4617 (hdev->clock_gating_mask &
4618 GAUDI_CLK_GATE_DEBUGFS_MASK)) {
4619
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03004620 dev_err_ratelimited(hdev->dev,
4621 "Can't read register - clock gating is enabled!\n");
4622 rc = -EFAULT;
4623 } else {
4624 u32 val_l = RREG32(addr - CFG_BASE);
4625 u32 val_h = RREG32(addr + sizeof(u32) - CFG_BASE);
4626
4627 *val = (((u64) val_h) << 32) | val_l;
4628 }
Oded Gabbaye38bfd32020-07-03 20:46:12 +03004629
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03004630 } else if ((addr >= SRAM_BASE_ADDR) &&
4631 (addr <= SRAM_BASE_ADDR + SRAM_BAR_SIZE - sizeof(u64))) {
4632 *val = readq(hdev->pcie_bar[SRAM_BAR_ID] +
4633 (addr - SRAM_BASE_ADDR));
4634 } else if (addr <=
4635 DRAM_PHYS_BASE + hdev->asic_prop.dram_size - sizeof(u64)) {
4636 u64 bar_base_addr = DRAM_PHYS_BASE +
4637 (addr & ~(prop->dram_pci_bar_size - 0x1ull));
4638
4639 hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, bar_base_addr);
4640 if (hbm_bar_addr != U64_MAX) {
4641 *val = readq(hdev->pcie_bar[HBM_BAR_ID] +
4642 (addr - bar_base_addr));
4643
4644 hbm_bar_addr = gaudi_set_hbm_bar_base(hdev,
4645 hbm_bar_addr);
4646 }
4647 if (hbm_bar_addr == U64_MAX)
4648 rc = -EIO;
4649 } else if (addr >= HOST_PHYS_BASE && !iommu_present(&pci_bus_type)) {
4650 *val = *(u64 *) phys_to_virt(addr - HOST_PHYS_BASE);
4651 } else {
4652 rc = -EFAULT;
4653 }
4654
4655 return rc;
4656}
4657
4658static int gaudi_debugfs_write64(struct hl_device *hdev, u64 addr, u64 val)
4659{
4660 struct asic_fixed_properties *prop = &hdev->asic_prop;
4661 struct gaudi_device *gaudi = hdev->asic_specific;
4662 u64 hbm_bar_addr;
4663 int rc = 0;
4664
4665 if ((addr >= CFG_BASE) && (addr <= CFG_BASE + CFG_SIZE - sizeof(u64))) {
Oded Gabbaye38bfd32020-07-03 20:46:12 +03004666
4667 if ((gaudi->hw_cap_initialized & HW_CAP_CLK_GATE) &&
4668 (hdev->clock_gating_mask &
4669 GAUDI_CLK_GATE_DEBUGFS_MASK)) {
4670
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03004671 dev_err_ratelimited(hdev->dev,
4672 "Can't write register - clock gating is enabled!\n");
4673 rc = -EFAULT;
4674 } else {
4675 WREG32(addr - CFG_BASE, lower_32_bits(val));
4676 WREG32(addr + sizeof(u32) - CFG_BASE,
4677 upper_32_bits(val));
4678 }
Oded Gabbaye38bfd32020-07-03 20:46:12 +03004679
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03004680 } else if ((addr >= SRAM_BASE_ADDR) &&
4681 (addr <= SRAM_BASE_ADDR + SRAM_BAR_SIZE - sizeof(u64))) {
4682 writeq(val, hdev->pcie_bar[SRAM_BAR_ID] +
4683 (addr - SRAM_BASE_ADDR));
4684 } else if (addr <=
4685 DRAM_PHYS_BASE + hdev->asic_prop.dram_size - sizeof(u64)) {
4686 u64 bar_base_addr = DRAM_PHYS_BASE +
4687 (addr & ~(prop->dram_pci_bar_size - 0x1ull));
4688
4689 hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, bar_base_addr);
4690 if (hbm_bar_addr != U64_MAX) {
4691 writeq(val, hdev->pcie_bar[HBM_BAR_ID] +
4692 (addr - bar_base_addr));
4693
4694 hbm_bar_addr = gaudi_set_hbm_bar_base(hdev,
4695 hbm_bar_addr);
4696 }
4697 if (hbm_bar_addr == U64_MAX)
4698 rc = -EIO;
4699 } else if (addr >= HOST_PHYS_BASE && !iommu_present(&pci_bus_type)) {
4700 *(u64 *) phys_to_virt(addr - HOST_PHYS_BASE) = val;
4701 } else {
4702 rc = -EFAULT;
4703 }
4704
4705 return rc;
4706}
4707
4708static u64 gaudi_read_pte(struct hl_device *hdev, u64 addr)
4709{
4710 struct gaudi_device *gaudi = hdev->asic_specific;
4711
4712 if (hdev->hard_reset_pending)
4713 return U64_MAX;
4714
4715 return readq(hdev->pcie_bar[HBM_BAR_ID] +
4716 (addr - gaudi->hbm_bar_cur_addr));
4717}
4718
4719static void gaudi_write_pte(struct hl_device *hdev, u64 addr, u64 val)
4720{
4721 struct gaudi_device *gaudi = hdev->asic_specific;
4722
4723 if (hdev->hard_reset_pending)
4724 return;
4725
4726 writeq(val, hdev->pcie_bar[HBM_BAR_ID] +
4727 (addr - gaudi->hbm_bar_cur_addr));
4728}
4729
4730static void gaudi_mmu_prepare_reg(struct hl_device *hdev, u64 reg, u32 asid)
4731{
4732 /* mask to zero the MMBP and ASID bits */
4733 WREG32_AND(reg, ~0x7FF);
4734 WREG32_OR(reg, asid);
4735}
4736
4737static void gaudi_mmu_prepare(struct hl_device *hdev, u32 asid)
4738{
4739 struct gaudi_device *gaudi = hdev->asic_specific;
4740
4741 if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
4742 return;
4743
4744 if (asid & ~DMA0_QM_GLBL_NON_SECURE_PROPS_0_ASID_MASK) {
4745 WARN(1, "asid %u is too big\n", asid);
4746 return;
4747 }
4748
4749 mutex_lock(&gaudi->clk_gate_mutex);
4750
4751 hdev->asic_funcs->disable_clock_gating(hdev);
4752
4753 gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_0, asid);
4754 gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_1, asid);
4755 gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_2, asid);
4756 gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_3, asid);
4757 gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_4, asid);
4758
4759 gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_0, asid);
4760 gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_1, asid);
4761 gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_2, asid);
4762 gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_3, asid);
4763 gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_4, asid);
4764
4765 gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_0, asid);
4766 gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_1, asid);
4767 gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_2, asid);
4768 gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_3, asid);
4769 gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_4, asid);
4770
4771 gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_0, asid);
4772 gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_1, asid);
4773 gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_2, asid);
4774 gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_3, asid);
4775 gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_4, asid);
4776
4777 gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_0, asid);
4778 gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_1, asid);
4779 gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_2, asid);
4780 gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_3, asid);
4781 gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_4, asid);
4782
4783 gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_0, asid);
4784 gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_1, asid);
4785 gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_2, asid);
4786 gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_3, asid);
4787 gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_4, asid);
4788
4789 gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_0, asid);
4790 gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_1, asid);
4791 gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_2, asid);
4792 gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_3, asid);
4793 gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_4, asid);
4794
4795 gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_0, asid);
4796 gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_1, asid);
4797 gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_2, asid);
4798 gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_3, asid);
4799 gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_4, asid);
4800
4801 gaudi_mmu_prepare_reg(hdev, mmDMA0_CORE_NON_SECURE_PROPS, asid);
4802 gaudi_mmu_prepare_reg(hdev, mmDMA1_CORE_NON_SECURE_PROPS, asid);
4803 gaudi_mmu_prepare_reg(hdev, mmDMA2_CORE_NON_SECURE_PROPS, asid);
4804 gaudi_mmu_prepare_reg(hdev, mmDMA3_CORE_NON_SECURE_PROPS, asid);
4805 gaudi_mmu_prepare_reg(hdev, mmDMA4_CORE_NON_SECURE_PROPS, asid);
4806 gaudi_mmu_prepare_reg(hdev, mmDMA5_CORE_NON_SECURE_PROPS, asid);
4807 gaudi_mmu_prepare_reg(hdev, mmDMA6_CORE_NON_SECURE_PROPS, asid);
4808 gaudi_mmu_prepare_reg(hdev, mmDMA7_CORE_NON_SECURE_PROPS, asid);
4809
4810 gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_0, asid);
4811 gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_1, asid);
4812 gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_2, asid);
4813 gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_3, asid);
4814 gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_4, asid);
4815 gaudi_mmu_prepare_reg(hdev, mmTPC0_CFG_ARUSER_LO, asid);
4816 gaudi_mmu_prepare_reg(hdev, mmTPC0_CFG_AWUSER_LO, asid);
4817
4818 gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_0, asid);
4819 gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_1, asid);
4820 gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_2, asid);
4821 gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_3, asid);
4822 gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_4, asid);
4823 gaudi_mmu_prepare_reg(hdev, mmTPC1_CFG_ARUSER_LO, asid);
4824 gaudi_mmu_prepare_reg(hdev, mmTPC1_CFG_AWUSER_LO, asid);
4825
4826 gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_0, asid);
4827 gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_1, asid);
4828 gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_2, asid);
4829 gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_3, asid);
4830 gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_4, asid);
4831 gaudi_mmu_prepare_reg(hdev, mmTPC2_CFG_ARUSER_LO, asid);
4832 gaudi_mmu_prepare_reg(hdev, mmTPC2_CFG_AWUSER_LO, asid);
4833
4834 gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_0, asid);
4835 gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_1, asid);
4836 gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_2, asid);
4837 gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_3, asid);
4838 gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_4, asid);
4839 gaudi_mmu_prepare_reg(hdev, mmTPC3_CFG_ARUSER_LO, asid);
4840 gaudi_mmu_prepare_reg(hdev, mmTPC3_CFG_AWUSER_LO, asid);
4841
4842 gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_0, asid);
4843 gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_1, asid);
4844 gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_2, asid);
4845 gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_3, asid);
4846 gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_4, asid);
4847 gaudi_mmu_prepare_reg(hdev, mmTPC4_CFG_ARUSER_LO, asid);
4848 gaudi_mmu_prepare_reg(hdev, mmTPC4_CFG_AWUSER_LO, asid);
4849
4850 gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_0, asid);
4851 gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_1, asid);
4852 gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_2, asid);
4853 gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_3, asid);
4854 gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_4, asid);
4855 gaudi_mmu_prepare_reg(hdev, mmTPC5_CFG_ARUSER_LO, asid);
4856 gaudi_mmu_prepare_reg(hdev, mmTPC5_CFG_AWUSER_LO, asid);
4857
4858 gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_0, asid);
4859 gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_1, asid);
4860 gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_2, asid);
4861 gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_3, asid);
4862 gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_4, asid);
4863 gaudi_mmu_prepare_reg(hdev, mmTPC6_CFG_ARUSER_LO, asid);
4864 gaudi_mmu_prepare_reg(hdev, mmTPC6_CFG_AWUSER_LO, asid);
4865
4866 gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_0, asid);
4867 gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_1, asid);
4868 gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_2, asid);
4869 gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_3, asid);
4870 gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_4, asid);
4871 gaudi_mmu_prepare_reg(hdev, mmTPC7_CFG_ARUSER_LO, asid);
4872 gaudi_mmu_prepare_reg(hdev, mmTPC7_CFG_AWUSER_LO, asid);
4873
4874 gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_0, asid);
4875 gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_1, asid);
4876 gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_2, asid);
4877 gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_3, asid);
4878 gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_4, asid);
4879 gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_0, asid);
4880 gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_1, asid);
4881 gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_2, asid);
4882 gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_3, asid);
4883 gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_4, asid);
4884
4885 gaudi_mmu_prepare_reg(hdev, mmMME0_SBAB_ARUSER0, asid);
4886 gaudi_mmu_prepare_reg(hdev, mmMME0_SBAB_ARUSER1, asid);
4887 gaudi_mmu_prepare_reg(hdev, mmMME1_SBAB_ARUSER0, asid);
4888 gaudi_mmu_prepare_reg(hdev, mmMME1_SBAB_ARUSER1, asid);
4889 gaudi_mmu_prepare_reg(hdev, mmMME2_SBAB_ARUSER0, asid);
4890 gaudi_mmu_prepare_reg(hdev, mmMME2_SBAB_ARUSER1, asid);
4891 gaudi_mmu_prepare_reg(hdev, mmMME3_SBAB_ARUSER0, asid);
4892 gaudi_mmu_prepare_reg(hdev, mmMME3_SBAB_ARUSER1, asid);
4893 gaudi_mmu_prepare_reg(hdev, mmMME0_ACC_WBC, asid);
4894 gaudi_mmu_prepare_reg(hdev, mmMME1_ACC_WBC, asid);
4895 gaudi_mmu_prepare_reg(hdev, mmMME2_ACC_WBC, asid);
4896 gaudi_mmu_prepare_reg(hdev, mmMME3_ACC_WBC, asid);
4897
4898 gaudi_mmu_prepare_reg(hdev, mmPSOC_GLOBAL_CONF_TRACE_ARUSER, asid);
4899 gaudi_mmu_prepare_reg(hdev, mmPSOC_GLOBAL_CONF_TRACE_AWUSER, asid);
4900
Oded Gabbaye38bfd32020-07-03 20:46:12 +03004901 hdev->asic_funcs->set_clock_gating(hdev);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03004902
4903 mutex_unlock(&gaudi->clk_gate_mutex);
4904}
4905
4906static int gaudi_send_job_on_qman0(struct hl_device *hdev,
4907 struct hl_cs_job *job)
4908{
4909 struct packet_msg_prot *fence_pkt;
4910 u32 *fence_ptr;
4911 dma_addr_t fence_dma_addr;
4912 struct hl_cb *cb;
4913 u32 tmp, timeout, dma_offset;
4914 int rc;
4915
4916 if (hdev->pldm)
4917 timeout = GAUDI_PLDM_QMAN0_TIMEOUT_USEC;
4918 else
4919 timeout = HL_DEVICE_TIMEOUT_USEC;
4920
4921 if (!hdev->asic_funcs->is_device_idle(hdev, NULL, NULL)) {
4922 dev_err_ratelimited(hdev->dev,
4923 "Can't send driver job on QMAN0 because the device is not idle\n");
4924 return -EBUSY;
4925 }
4926
4927 fence_ptr = hdev->asic_funcs->asic_dma_pool_zalloc(hdev, 4, GFP_KERNEL,
4928 &fence_dma_addr);
4929 if (!fence_ptr) {
4930 dev_err(hdev->dev,
4931 "Failed to allocate fence memory for QMAN0\n");
4932 return -ENOMEM;
4933 }
4934
4935 cb = job->patched_cb;
4936
4937 fence_pkt = (struct packet_msg_prot *) (uintptr_t) (cb->kernel_address +
4938 job->job_cb_size - sizeof(struct packet_msg_prot));
4939
4940 tmp = (PACKET_MSG_PROT << GAUDI_PKT_CTL_OPCODE_SHIFT) |
4941 (1 << GAUDI_PKT_CTL_EB_SHIFT) |
4942 (1 << GAUDI_PKT_CTL_MB_SHIFT);
4943 fence_pkt->ctl = cpu_to_le32(tmp);
4944 fence_pkt->value = cpu_to_le32(GAUDI_QMAN0_FENCE_VAL);
4945 fence_pkt->addr = cpu_to_le64(fence_dma_addr);
4946
4947 dma_offset = gaudi_dma_assignment[GAUDI_PCI_DMA_1] * DMA_CORE_OFFSET;
4948
4949 WREG32_OR(mmDMA0_CORE_PROT + dma_offset, BIT(DMA0_CORE_PROT_VAL_SHIFT));
4950
4951 rc = hl_hw_queue_send_cb_no_cmpl(hdev, GAUDI_QUEUE_ID_DMA_0_0,
4952 job->job_cb_size, cb->bus_address);
4953 if (rc) {
4954 dev_err(hdev->dev, "Failed to send CB on QMAN0, %d\n", rc);
4955 goto free_fence_ptr;
4956 }
4957
4958 rc = hl_poll_timeout_memory(hdev, fence_ptr, tmp,
4959 (tmp == GAUDI_QMAN0_FENCE_VAL), 1000,
4960 timeout, true);
4961
4962 hl_hw_queue_inc_ci_kernel(hdev, GAUDI_QUEUE_ID_DMA_0_0);
4963
4964 if (rc == -ETIMEDOUT) {
4965 dev_err(hdev->dev, "QMAN0 Job timeout (0x%x)\n", tmp);
4966 goto free_fence_ptr;
4967 }
4968
4969free_fence_ptr:
4970 WREG32_AND(mmDMA0_CORE_PROT + dma_offset,
4971 ~BIT(DMA0_CORE_PROT_VAL_SHIFT));
4972
4973 hdev->asic_funcs->asic_dma_pool_free(hdev, (void *) fence_ptr,
4974 fence_dma_addr);
4975 return rc;
4976}
4977
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03004978static void gaudi_get_event_desc(u16 event_type, char *desc, size_t size)
4979{
Ofir Bittonebd8d122020-05-10 13:41:28 +03004980 if (event_type >= GAUDI_EVENT_SIZE)
4981 goto event_not_supported;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03004982
Ofir Bittonebd8d122020-05-10 13:41:28 +03004983 if (!gaudi_irq_map_table[event_type].valid)
4984 goto event_not_supported;
4985
4986 snprintf(desc, size, gaudi_irq_map_table[event_type].name);
4987
4988 return;
4989
4990event_not_supported:
4991 snprintf(desc, size, "N/A");
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03004992}
4993
4994static const char *gaudi_get_razwi_initiator_dma_name(struct hl_device *hdev,
4995 u32 x_y, bool is_write)
4996{
4997 u32 dma_id[2], dma_offset, err_cause[2], mask, i;
4998
4999 mask = is_write ? DMA0_CORE_ERR_CAUSE_HBW_WR_ERR_MASK :
5000 DMA0_CORE_ERR_CAUSE_HBW_RD_ERR_MASK;
5001
5002 switch (x_y) {
5003 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_0:
5004 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_1:
5005 dma_id[0] = 0;
5006 dma_id[1] = 2;
5007 break;
5008 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_0:
5009 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_1:
5010 dma_id[0] = 1;
5011 dma_id[1] = 3;
5012 break;
5013 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_0:
5014 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_1:
5015 dma_id[0] = 4;
5016 dma_id[1] = 6;
5017 break;
5018 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_0:
5019 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_1:
5020 dma_id[0] = 5;
5021 dma_id[1] = 7;
5022 break;
5023 default:
5024 goto unknown_initiator;
5025 }
5026
5027 for (i = 0 ; i < 2 ; i++) {
5028 dma_offset = dma_id[i] * DMA_CORE_OFFSET;
5029 err_cause[i] = RREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset);
5030 }
5031
5032 switch (x_y) {
5033 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_0:
5034 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_1:
5035 if ((err_cause[0] & mask) && !(err_cause[1] & mask))
5036 return "DMA0";
5037 else if (!(err_cause[0] & mask) && (err_cause[1] & mask))
5038 return "DMA2";
5039 else
5040 return "DMA0 or DMA2";
5041 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_0:
5042 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_1:
5043 if ((err_cause[0] & mask) && !(err_cause[1] & mask))
5044 return "DMA1";
5045 else if (!(err_cause[0] & mask) && (err_cause[1] & mask))
5046 return "DMA3";
5047 else
5048 return "DMA1 or DMA3";
5049 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_0:
5050 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_1:
5051 if ((err_cause[0] & mask) && !(err_cause[1] & mask))
5052 return "DMA4";
5053 else if (!(err_cause[0] & mask) && (err_cause[1] & mask))
5054 return "DMA6";
5055 else
5056 return "DMA4 or DMA6";
5057 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_0:
5058 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_1:
5059 if ((err_cause[0] & mask) && !(err_cause[1] & mask))
5060 return "DMA5";
5061 else if (!(err_cause[0] & mask) && (err_cause[1] & mask))
5062 return "DMA7";
5063 else
5064 return "DMA5 or DMA7";
5065 }
5066
5067unknown_initiator:
5068 return "unknown initiator";
5069}
5070
5071static const char *gaudi_get_razwi_initiator_name(struct hl_device *hdev,
5072 bool is_write)
5073{
5074 u32 val, x_y, axi_id;
5075
5076 val = is_write ? RREG32(mmMMU_UP_RAZWI_WRITE_ID) :
5077 RREG32(mmMMU_UP_RAZWI_READ_ID);
5078 x_y = val & ((RAZWI_INITIATOR_Y_MASK << RAZWI_INITIATOR_Y_SHIFT) |
5079 (RAZWI_INITIATOR_X_MASK << RAZWI_INITIATOR_X_SHIFT));
5080 axi_id = val & (RAZWI_INITIATOR_AXI_ID_MASK <<
5081 RAZWI_INITIATOR_AXI_ID_SHIFT);
5082
5083 switch (x_y) {
5084 case RAZWI_INITIATOR_ID_X_Y_TPC0_NIC0:
5085 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC))
5086 return "TPC0";
5087 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC))
5088 return "NIC0";
5089 break;
5090 case RAZWI_INITIATOR_ID_X_Y_TPC1:
5091 return "TPC1";
5092 case RAZWI_INITIATOR_ID_X_Y_MME0_0:
5093 case RAZWI_INITIATOR_ID_X_Y_MME0_1:
5094 return "MME0";
5095 case RAZWI_INITIATOR_ID_X_Y_MME1_0:
5096 case RAZWI_INITIATOR_ID_X_Y_MME1_1:
5097 return "MME1";
5098 case RAZWI_INITIATOR_ID_X_Y_TPC2:
5099 return "TPC2";
5100 case RAZWI_INITIATOR_ID_X_Y_TPC3_PCI_CPU_PSOC:
5101 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC))
5102 return "TPC3";
5103 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_PCI))
5104 return "PCI";
5105 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_CPU))
5106 return "CPU";
5107 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_PSOC))
5108 return "PSOC";
5109 break;
5110 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_0:
5111 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_1:
5112 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_0:
5113 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_1:
5114 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_0:
5115 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_1:
5116 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_0:
5117 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_1:
5118 return gaudi_get_razwi_initiator_dma_name(hdev, x_y, is_write);
5119 case RAZWI_INITIATOR_ID_X_Y_TPC4_NIC1_NIC2:
5120 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC))
5121 return "TPC4";
5122 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC))
5123 return "NIC1";
5124 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC_FT))
5125 return "NIC2";
5126 break;
5127 case RAZWI_INITIATOR_ID_X_Y_TPC5:
5128 return "TPC5";
5129 case RAZWI_INITIATOR_ID_X_Y_MME2_0:
5130 case RAZWI_INITIATOR_ID_X_Y_MME2_1:
5131 return "MME2";
5132 case RAZWI_INITIATOR_ID_X_Y_MME3_0:
5133 case RAZWI_INITIATOR_ID_X_Y_MME3_1:
5134 return "MME3";
5135 case RAZWI_INITIATOR_ID_X_Y_TPC6:
5136 return "TPC6";
5137 case RAZWI_INITIATOR_ID_X_Y_TPC7_NIC4_NIC5:
5138 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC))
5139 return "TPC7";
5140 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC))
5141 return "NIC4";
5142 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC_FT))
5143 return "NIC5";
5144 break;
5145 default:
5146 break;
5147 }
5148
5149 dev_err(hdev->dev,
5150 "Unknown RAZWI initiator ID 0x%x [Y=%d, X=%d, AXI_ID=%d]\n",
5151 val,
5152 (val >> RAZWI_INITIATOR_Y_SHIFT) & RAZWI_INITIATOR_Y_MASK,
5153 (val >> RAZWI_INITIATOR_X_SHIFT) & RAZWI_INITIATOR_X_MASK,
5154 (val >> RAZWI_INITIATOR_AXI_ID_SHIFT) &
5155 RAZWI_INITIATOR_AXI_ID_MASK);
5156
5157 return "unknown initiator";
5158}
5159
5160static void gaudi_print_razwi_info(struct hl_device *hdev)
5161{
5162 if (RREG32(mmMMU_UP_RAZWI_WRITE_VLD)) {
5163 dev_err_ratelimited(hdev->dev,
5164 "RAZWI event caused by illegal write of %s\n",
5165 gaudi_get_razwi_initiator_name(hdev, true));
5166 WREG32(mmMMU_UP_RAZWI_WRITE_VLD, 0);
5167 }
5168
5169 if (RREG32(mmMMU_UP_RAZWI_READ_VLD)) {
5170 dev_err_ratelimited(hdev->dev,
5171 "RAZWI event caused by illegal read of %s\n",
5172 gaudi_get_razwi_initiator_name(hdev, false));
5173 WREG32(mmMMU_UP_RAZWI_READ_VLD, 0);
5174 }
5175}
5176
5177static void gaudi_print_mmu_error_info(struct hl_device *hdev)
5178{
5179 struct gaudi_device *gaudi = hdev->asic_specific;
5180 u64 addr;
5181 u32 val;
5182
5183 if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
5184 return;
5185
5186 val = RREG32(mmMMU_UP_PAGE_ERROR_CAPTURE);
5187 if (val & MMU_UP_PAGE_ERROR_CAPTURE_ENTRY_VALID_MASK) {
5188 addr = val & MMU_UP_PAGE_ERROR_CAPTURE_VA_49_32_MASK;
5189 addr <<= 32;
5190 addr |= RREG32(mmMMU_UP_PAGE_ERROR_CAPTURE_VA);
5191
5192 dev_err_ratelimited(hdev->dev, "MMU page fault on va 0x%llx\n",
5193 addr);
5194
5195 WREG32(mmMMU_UP_PAGE_ERROR_CAPTURE, 0);
5196 }
5197
5198 val = RREG32(mmMMU_UP_ACCESS_ERROR_CAPTURE);
5199 if (val & MMU_UP_ACCESS_ERROR_CAPTURE_ENTRY_VALID_MASK) {
5200 addr = val & MMU_UP_ACCESS_ERROR_CAPTURE_VA_49_32_MASK;
5201 addr <<= 32;
5202 addr |= RREG32(mmMMU_UP_ACCESS_ERROR_CAPTURE_VA);
5203
5204 dev_err_ratelimited(hdev->dev,
5205 "MMU access error on va 0x%llx\n", addr);
5206
5207 WREG32(mmMMU_UP_ACCESS_ERROR_CAPTURE, 0);
5208 }
5209}
5210
5211/*
5212 * +-------------------+------------------------------------------------------+
5213 * | Configuration Reg | Description |
5214 * | Address | |
5215 * +-------------------+------------------------------------------------------+
5216 * | 0xF30 - 0xF3F |ECC single error indication (1 bit per memory wrapper)|
5217 * | |0xF30 memory wrappers 31:0 (MSB to LSB) |
5218 * | |0xF34 memory wrappers 63:32 |
5219 * | |0xF38 memory wrappers 95:64 |
5220 * | |0xF3C memory wrappers 127:96 |
5221 * +-------------------+------------------------------------------------------+
5222 * | 0xF40 - 0xF4F |ECC double error indication (1 bit per memory wrapper)|
5223 * | |0xF40 memory wrappers 31:0 (MSB to LSB) |
5224 * | |0xF44 memory wrappers 63:32 |
5225 * | |0xF48 memory wrappers 95:64 |
5226 * | |0xF4C memory wrappers 127:96 |
5227 * +-------------------+------------------------------------------------------+
5228 */
Oded Gabbayfcc6a4e2020-05-17 08:20:35 +03005229static int gaudi_extract_ecc_info(struct hl_device *hdev,
5230 struct ecc_info_extract_params *params, u64 *ecc_address,
5231 u64 *ecc_syndrom, u8 *memory_wrapper_idx)
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005232{
5233 struct gaudi_device *gaudi = hdev->asic_specific;
Oded Gabbayfcc6a4e2020-05-17 08:20:35 +03005234 u32 i, num_mem_regs, reg, err_bit;
5235 u64 err_addr, err_word = 0;
5236 int rc = 0;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005237
Oded Gabbayfcc6a4e2020-05-17 08:20:35 +03005238 num_mem_regs = params->num_memories / 32 +
5239 ((params->num_memories % 32) ? 1 : 0);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005240
Oded Gabbayfcc6a4e2020-05-17 08:20:35 +03005241 if (params->block_address >= CFG_BASE)
5242 params->block_address -= CFG_BASE;
5243
5244 if (params->derr)
5245 err_addr = params->block_address + GAUDI_ECC_DERR0_OFFSET;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005246 else
Oded Gabbayfcc6a4e2020-05-17 08:20:35 +03005247 err_addr = params->block_address + GAUDI_ECC_SERR0_OFFSET;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005248
Oded Gabbayfcc6a4e2020-05-17 08:20:35 +03005249 if (params->disable_clock_gating) {
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005250 mutex_lock(&gaudi->clk_gate_mutex);
5251 hdev->asic_funcs->disable_clock_gating(hdev);
5252 }
5253
Oded Gabbayfcc6a4e2020-05-17 08:20:35 +03005254 /* Set invalid wrapper index */
5255 *memory_wrapper_idx = 0xFF;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005256
Oded Gabbayfcc6a4e2020-05-17 08:20:35 +03005257 /* Iterate through memory wrappers, a single bit must be set */
Dan Carpenterb0353542020-08-05 12:51:05 +03005258 for (i = 0 ; i < num_mem_regs ; i++) {
Oded Gabbayfcc6a4e2020-05-17 08:20:35 +03005259 err_addr += i * 4;
5260 err_word = RREG32(err_addr);
5261 if (err_word) {
5262 err_bit = __ffs(err_word);
5263 *memory_wrapper_idx = err_bit + (32 * i);
5264 break;
5265 }
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005266 }
5267
Oded Gabbayfcc6a4e2020-05-17 08:20:35 +03005268 if (*memory_wrapper_idx == 0xFF) {
5269 dev_err(hdev->dev, "ECC error information cannot be found\n");
5270 rc = -EINVAL;
5271 goto enable_clk_gate;
5272 }
5273
5274 WREG32(params->block_address + GAUDI_ECC_MEM_SEL_OFFSET,
5275 *memory_wrapper_idx);
5276
5277 *ecc_address =
5278 RREG32(params->block_address + GAUDI_ECC_ADDRESS_OFFSET);
5279 *ecc_syndrom =
5280 RREG32(params->block_address + GAUDI_ECC_SYNDROME_OFFSET);
5281
5282 /* Clear error indication */
5283 reg = RREG32(params->block_address + GAUDI_ECC_MEM_INFO_CLR_OFFSET);
5284 if (params->derr)
5285 reg |= FIELD_PREP(GAUDI_ECC_MEM_INFO_CLR_DERR_MASK, 1);
5286 else
5287 reg |= FIELD_PREP(GAUDI_ECC_MEM_INFO_CLR_SERR_MASK, 1);
5288
5289 WREG32(params->block_address + GAUDI_ECC_MEM_INFO_CLR_OFFSET, reg);
5290
5291enable_clk_gate:
5292 if (params->disable_clock_gating) {
Oded Gabbaye38bfd32020-07-03 20:46:12 +03005293 hdev->asic_funcs->set_clock_gating(hdev);
Greg Kroah-Hartman65a9bde62020-07-27 11:49:37 +02005294
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005295 mutex_unlock(&gaudi->clk_gate_mutex);
5296 }
Oded Gabbayfcc6a4e2020-05-17 08:20:35 +03005297
5298 return rc;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005299}
5300
5301static void gaudi_handle_qman_err_generic(struct hl_device *hdev,
5302 const char *qm_name,
5303 u64 glbl_sts_addr,
5304 u64 arb_err_addr)
5305{
5306 u32 i, j, glbl_sts_val, arb_err_val, glbl_sts_clr_val;
5307 char reg_desc[32];
5308
5309 /* Iterate through all stream GLBL_STS1 registers + Lower CP */
5310 for (i = 0 ; i < QMAN_STREAMS + 1 ; i++) {
5311 glbl_sts_clr_val = 0;
5312 glbl_sts_val = RREG32(glbl_sts_addr + 4 * i);
5313
5314 if (!glbl_sts_val)
5315 continue;
5316
5317 if (i == QMAN_STREAMS)
5318 snprintf(reg_desc, ARRAY_SIZE(reg_desc), "LowerCP");
5319 else
5320 snprintf(reg_desc, ARRAY_SIZE(reg_desc), "stream%u", i);
5321
5322 for (j = 0 ; j < GAUDI_NUM_OF_QM_ERR_CAUSE ; j++) {
5323 if (glbl_sts_val & BIT(j)) {
5324 dev_err_ratelimited(hdev->dev,
5325 "%s %s. err cause: %s\n",
5326 qm_name, reg_desc,
5327 gaudi_qman_error_cause[j]);
5328 glbl_sts_clr_val |= BIT(j);
5329 }
5330 }
5331
5332 /* Write 1 clear errors */
5333 WREG32(glbl_sts_addr + 4 * i, glbl_sts_clr_val);
5334 }
5335
5336 arb_err_val = RREG32(arb_err_addr);
5337
5338 if (!arb_err_val)
5339 return;
5340
5341 for (j = 0 ; j < GAUDI_NUM_OF_QM_ARB_ERR_CAUSE ; j++) {
5342 if (arb_err_val & BIT(j)) {
5343 dev_err_ratelimited(hdev->dev,
5344 "%s ARB_ERR. err cause: %s\n",
5345 qm_name,
5346 gaudi_qman_arb_error_cause[j]);
5347 }
5348 }
5349}
5350
Oded Gabbayfcc6a4e2020-05-17 08:20:35 +03005351static void gaudi_handle_ecc_event(struct hl_device *hdev, u16 event_type,
5352 struct hl_eq_ecc_data *ecc_data)
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005353{
Oded Gabbayfcc6a4e2020-05-17 08:20:35 +03005354 struct ecc_info_extract_params params;
5355 u64 ecc_address = 0, ecc_syndrom = 0;
5356 u8 index, memory_wrapper_idx = 0;
5357 bool extract_info_from_fw;
5358 int rc;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005359
5360 switch (event_type) {
Oded Gabbayfcc6a4e2020-05-17 08:20:35 +03005361 case GAUDI_EVENT_PCIE_CORE_SERR ... GAUDI_EVENT_PCIE_PHY_DERR:
5362 case GAUDI_EVENT_DMA0_SERR_ECC ... GAUDI_EVENT_MMU_DERR:
5363 extract_info_from_fw = true;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005364 break;
5365 case GAUDI_EVENT_TPC0_SERR ... GAUDI_EVENT_TPC7_SERR:
5366 index = event_type - GAUDI_EVENT_TPC0_SERR;
Oded Gabbayfcc6a4e2020-05-17 08:20:35 +03005367 params.block_address = mmTPC0_CFG_BASE + index * TPC_CFG_OFFSET;
5368 params.num_memories = 90;
5369 params.derr = false;
5370 params.disable_clock_gating = true;
5371 extract_info_from_fw = false;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005372 break;
5373 case GAUDI_EVENT_TPC0_DERR ... GAUDI_EVENT_TPC7_DERR:
5374 index = event_type - GAUDI_EVENT_TPC0_DERR;
Oded Gabbayfcc6a4e2020-05-17 08:20:35 +03005375 params.block_address =
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005376 mmTPC0_CFG_BASE + index * TPC_CFG_OFFSET;
Oded Gabbayfcc6a4e2020-05-17 08:20:35 +03005377 params.num_memories = 90;
5378 params.derr = true;
5379 params.disable_clock_gating = true;
5380 extract_info_from_fw = false;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005381 break;
5382 case GAUDI_EVENT_MME0_ACC_SERR:
5383 case GAUDI_EVENT_MME1_ACC_SERR:
5384 case GAUDI_EVENT_MME2_ACC_SERR:
5385 case GAUDI_EVENT_MME3_ACC_SERR:
5386 index = (event_type - GAUDI_EVENT_MME0_ACC_SERR) / 4;
Oded Gabbayfcc6a4e2020-05-17 08:20:35 +03005387 params.block_address = mmMME0_ACC_BASE + index * MME_ACC_OFFSET;
5388 params.num_memories = 128;
5389 params.derr = false;
5390 params.disable_clock_gating = true;
5391 extract_info_from_fw = false;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005392 break;
5393 case GAUDI_EVENT_MME0_ACC_DERR:
5394 case GAUDI_EVENT_MME1_ACC_DERR:
5395 case GAUDI_EVENT_MME2_ACC_DERR:
5396 case GAUDI_EVENT_MME3_ACC_DERR:
5397 index = (event_type - GAUDI_EVENT_MME0_ACC_DERR) / 4;
Oded Gabbayfcc6a4e2020-05-17 08:20:35 +03005398 params.block_address = mmMME0_ACC_BASE + index * MME_ACC_OFFSET;
5399 params.num_memories = 128;
5400 params.derr = true;
5401 params.disable_clock_gating = true;
5402 extract_info_from_fw = false;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005403 break;
5404 case GAUDI_EVENT_MME0_SBAB_SERR:
5405 case GAUDI_EVENT_MME1_SBAB_SERR:
5406 case GAUDI_EVENT_MME2_SBAB_SERR:
5407 case GAUDI_EVENT_MME3_SBAB_SERR:
5408 index = (event_type - GAUDI_EVENT_MME0_SBAB_SERR) / 4;
Oded Gabbayfcc6a4e2020-05-17 08:20:35 +03005409 params.block_address =
5410 mmMME0_SBAB_BASE + index * MME_ACC_OFFSET;
5411 params.num_memories = 33;
5412 params.derr = false;
5413 params.disable_clock_gating = true;
5414 extract_info_from_fw = false;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005415 break;
5416 case GAUDI_EVENT_MME0_SBAB_DERR:
5417 case GAUDI_EVENT_MME1_SBAB_DERR:
5418 case GAUDI_EVENT_MME2_SBAB_DERR:
5419 case GAUDI_EVENT_MME3_SBAB_DERR:
5420 index = (event_type - GAUDI_EVENT_MME0_SBAB_DERR) / 4;
Oded Gabbayfcc6a4e2020-05-17 08:20:35 +03005421 params.block_address =
5422 mmMME0_SBAB_BASE + index * MME_ACC_OFFSET;
5423 params.num_memories = 33;
5424 params.derr = true;
5425 params.disable_clock_gating = true;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005426 default:
5427 return;
5428 }
5429
Oded Gabbayfcc6a4e2020-05-17 08:20:35 +03005430 if (extract_info_from_fw) {
5431 ecc_address = le64_to_cpu(ecc_data->ecc_address);
5432 ecc_syndrom = le64_to_cpu(ecc_data->ecc_syndrom);
5433 memory_wrapper_idx = ecc_data->memory_wrapper_idx;
5434 } else {
5435 rc = gaudi_extract_ecc_info(hdev, &params, &ecc_address,
5436 &ecc_syndrom, &memory_wrapper_idx);
5437 if (rc)
5438 return;
5439 }
5440
5441 dev_err(hdev->dev,
5442 "ECC error detected. address: %#llx. Syndrom: %#llx. block id %u\n",
5443 ecc_address, ecc_syndrom, memory_wrapper_idx);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005444}
5445
5446static void gaudi_handle_qman_err(struct hl_device *hdev, u16 event_type)
5447{
5448 u64 glbl_sts_addr, arb_err_addr;
5449 u8 index;
5450 char desc[32];
5451
5452 switch (event_type) {
5453 case GAUDI_EVENT_TPC0_QM ... GAUDI_EVENT_TPC7_QM:
5454 index = event_type - GAUDI_EVENT_TPC0_QM;
5455 glbl_sts_addr =
5456 mmTPC0_QM_GLBL_STS1_0 + index * TPC_QMAN_OFFSET;
5457 arb_err_addr =
5458 mmTPC0_QM_ARB_ERR_CAUSE + index * TPC_QMAN_OFFSET;
5459 snprintf(desc, ARRAY_SIZE(desc), "%s%d", "TPC_QM", index);
5460 break;
5461 case GAUDI_EVENT_MME0_QM ... GAUDI_EVENT_MME2_QM:
5462 index = event_type - GAUDI_EVENT_MME0_QM;
5463 glbl_sts_addr =
5464 mmMME0_QM_GLBL_STS1_0 + index * MME_QMAN_OFFSET;
5465 arb_err_addr =
5466 mmMME0_QM_ARB_ERR_CAUSE + index * MME_QMAN_OFFSET;
5467 snprintf(desc, ARRAY_SIZE(desc), "%s%d", "MME_QM", index);
5468 break;
5469 case GAUDI_EVENT_DMA0_QM ... GAUDI_EVENT_DMA7_QM:
5470 index = event_type - GAUDI_EVENT_DMA0_QM;
5471 glbl_sts_addr =
5472 mmDMA0_QM_GLBL_STS1_0 + index * DMA_QMAN_OFFSET;
5473 arb_err_addr =
5474 mmDMA0_QM_ARB_ERR_CAUSE + index * DMA_QMAN_OFFSET;
5475 snprintf(desc, ARRAY_SIZE(desc), "%s%d", "DMA_QM", index);
5476 break;
5477 default:
5478 return;
5479 }
5480
5481 gaudi_handle_qman_err_generic(hdev, desc, glbl_sts_addr, arb_err_addr);
5482}
5483
5484static void gaudi_print_irq_info(struct hl_device *hdev, u16 event_type,
5485 bool razwi)
5486{
Ofir Bittonebd8d122020-05-10 13:41:28 +03005487 char desc[64] = "";
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005488
5489 gaudi_get_event_desc(event_type, desc, sizeof(desc));
5490 dev_err_ratelimited(hdev->dev, "Received H/W interrupt %d [\"%s\"]\n",
5491 event_type, desc);
5492
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005493 if (razwi) {
5494 gaudi_print_razwi_info(hdev);
5495 gaudi_print_mmu_error_info(hdev);
5496 }
5497}
5498
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005499static int gaudi_soft_reset_late_init(struct hl_device *hdev)
5500{
Ofir Bittonebd8d122020-05-10 13:41:28 +03005501 struct gaudi_device *gaudi = hdev->asic_specific;
5502
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005503 /* Unmask all IRQs since some could have been received
5504 * during the soft reset
5505 */
Ofir Bittonebd8d122020-05-10 13:41:28 +03005506 return hl_fw_unmask_irq_arr(hdev, gaudi->events, sizeof(gaudi->events));
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005507}
5508
5509static int gaudi_hbm_read_interrupts(struct hl_device *hdev, int device)
5510{
5511 int ch, err = 0;
5512 u32 base, val, val2;
5513
5514 base = GAUDI_HBM_CFG_BASE + device * GAUDI_HBM_CFG_OFFSET;
5515 for (ch = 0 ; ch < GAUDI_HBM_CHANNELS ; ch++) {
5516 val = RREG32_MASK(base + ch * 0x1000 + 0x06C, 0x0000FFFF);
5517 val = (val & 0xFF) | ((val >> 8) & 0xFF);
5518 if (val) {
5519 err = 1;
5520 dev_err(hdev->dev,
5521 "HBM%d pc%d interrupts info: WR_PAR=%d, RD_PAR=%d, CA_PAR=%d, SERR=%d, DERR=%d\n",
5522 device, ch * 2, val & 0x1, (val >> 1) & 0x1,
5523 (val >> 2) & 0x1, (val >> 3) & 0x1,
5524 (val >> 4) & 0x1);
5525
5526 val2 = RREG32(base + ch * 0x1000 + 0x060);
5527 dev_err(hdev->dev,
5528 "HBM%d pc%d ECC info: 1ST_ERR_ADDR=0x%x, 1ST_ERR_TYPE=%d, SEC_CONT_CNT=%d, SEC_CNT=%d, DED_CNT=%d\n",
5529 device, ch * 2,
5530 RREG32(base + ch * 0x1000 + 0x064),
5531 (val2 & 0x200) >> 9, (val2 & 0xFC00) >> 10,
5532 (val2 & 0xFF0000) >> 16,
5533 (val2 & 0xFF000000) >> 24);
5534 }
5535
5536 val = RREG32_MASK(base + ch * 0x1000 + 0x07C, 0x0000FFFF);
5537 val = (val & 0xFF) | ((val >> 8) & 0xFF);
5538 if (val) {
5539 err = 1;
5540 dev_err(hdev->dev,
5541 "HBM%d pc%d interrupts info: WR_PAR=%d, RD_PAR=%d, CA_PAR=%d, SERR=%d, DERR=%d\n",
5542 device, ch * 2 + 1, val & 0x1, (val >> 1) & 0x1,
5543 (val >> 2) & 0x1, (val >> 3) & 0x1,
5544 (val >> 4) & 0x1);
5545
5546 val2 = RREG32(base + ch * 0x1000 + 0x070);
5547 dev_err(hdev->dev,
5548 "HBM%d pc%d ECC info: 1ST_ERR_ADDR=0x%x, 1ST_ERR_TYPE=%d, SEC_CONT_CNT=%d, SEC_CNT=%d, DED_CNT=%d\n",
5549 device, ch * 2 + 1,
5550 RREG32(base + ch * 0x1000 + 0x074),
5551 (val2 & 0x200) >> 9, (val2 & 0xFC00) >> 10,
5552 (val2 & 0xFF0000) >> 16,
5553 (val2 & 0xFF000000) >> 24);
5554 }
5555
5556 /* Clear interrupts */
5557 RMWREG32(base + (ch * 0x1000) + 0x060, 0x1C8, 0x1FF);
5558 RMWREG32(base + (ch * 0x1000) + 0x070, 0x1C8, 0x1FF);
5559 WREG32(base + (ch * 0x1000) + 0x06C, 0x1F1F);
5560 WREG32(base + (ch * 0x1000) + 0x07C, 0x1F1F);
5561 RMWREG32(base + (ch * 0x1000) + 0x060, 0x0, 0xF);
5562 RMWREG32(base + (ch * 0x1000) + 0x070, 0x0, 0xF);
5563 }
5564
5565 val = RREG32(base + 0x8F30);
5566 val2 = RREG32(base + 0x8F34);
5567 if (val | val2) {
5568 err = 1;
5569 dev_err(hdev->dev,
5570 "HBM %d MC SRAM SERR info: Reg 0x8F30=0x%x, Reg 0x8F34=0x%x\n",
5571 device, val, val2);
5572 }
5573 val = RREG32(base + 0x8F40);
5574 val2 = RREG32(base + 0x8F44);
5575 if (val | val2) {
5576 err = 1;
5577 dev_err(hdev->dev,
5578 "HBM %d MC SRAM DERR info: Reg 0x8F40=0x%x, Reg 0x8F44=0x%x\n",
5579 device, val, val2);
5580 }
5581
5582 return err;
5583}
5584
5585static int gaudi_hbm_event_to_dev(u16 hbm_event_type)
5586{
5587 switch (hbm_event_type) {
5588 case GAUDI_EVENT_HBM0_SPI_0:
5589 case GAUDI_EVENT_HBM0_SPI_1:
5590 return 0;
5591 case GAUDI_EVENT_HBM1_SPI_0:
5592 case GAUDI_EVENT_HBM1_SPI_1:
5593 return 1;
5594 case GAUDI_EVENT_HBM2_SPI_0:
5595 case GAUDI_EVENT_HBM2_SPI_1:
5596 return 2;
5597 case GAUDI_EVENT_HBM3_SPI_0:
5598 case GAUDI_EVENT_HBM3_SPI_1:
5599 return 3;
5600 default:
5601 break;
5602 }
5603
5604 /* Should never happen */
5605 return 0;
5606}
5607
5608static bool gaudi_tpc_read_interrupts(struct hl_device *hdev, u8 tpc_id,
5609 char *interrupt_name)
5610{
5611 struct gaudi_device *gaudi = hdev->asic_specific;
5612 u32 tpc_offset = tpc_id * TPC_CFG_OFFSET, tpc_interrupts_cause, i;
5613 bool soft_reset_required = false;
5614
5615 /* Accessing the TPC_INTR_CAUSE registers requires disabling the clock
5616 * gating, and thus cannot be done in ArmCP and should be done instead
5617 * by the driver.
5618 */
5619
5620 mutex_lock(&gaudi->clk_gate_mutex);
5621
5622 hdev->asic_funcs->disable_clock_gating(hdev);
5623
5624 tpc_interrupts_cause = RREG32(mmTPC0_CFG_TPC_INTR_CAUSE + tpc_offset) &
5625 TPC0_CFG_TPC_INTR_CAUSE_CAUSE_MASK;
5626
5627 for (i = 0 ; i < GAUDI_NUM_OF_TPC_INTR_CAUSE ; i++)
5628 if (tpc_interrupts_cause & BIT(i)) {
5629 dev_err_ratelimited(hdev->dev,
5630 "TPC%d_%s interrupt cause: %s\n",
5631 tpc_id, interrupt_name,
5632 gaudi_tpc_interrupts_cause[i]);
5633 /* If this is QM error, we need to soft-reset */
5634 if (i == 15)
5635 soft_reset_required = true;
5636 }
5637
5638 /* Clear interrupts */
5639 WREG32(mmTPC0_CFG_TPC_INTR_CAUSE + tpc_offset, 0);
5640
Oded Gabbaye38bfd32020-07-03 20:46:12 +03005641 hdev->asic_funcs->set_clock_gating(hdev);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005642
5643 mutex_unlock(&gaudi->clk_gate_mutex);
5644
5645 return soft_reset_required;
5646}
5647
5648static int tpc_dec_event_to_tpc_id(u16 tpc_dec_event_type)
5649{
5650 return (tpc_dec_event_type - GAUDI_EVENT_TPC0_DEC) >> 1;
5651}
5652
5653static int tpc_krn_event_to_tpc_id(u16 tpc_dec_event_type)
5654{
5655 return (tpc_dec_event_type - GAUDI_EVENT_TPC0_KRN_ERR) / 6;
5656}
5657
5658static void gaudi_print_clk_change_info(struct hl_device *hdev,
5659 u16 event_type)
5660{
5661 switch (event_type) {
5662 case GAUDI_EVENT_FIX_POWER_ENV_S:
Ofir Bitton0a068ad2020-07-21 10:49:51 +03005663 hdev->clk_throttling_reason |= HL_CLK_THROTTLE_POWER;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005664 dev_info_ratelimited(hdev->dev,
5665 "Clock throttling due to power consumption\n");
5666 break;
5667
5668 case GAUDI_EVENT_FIX_POWER_ENV_E:
Ofir Bitton0a068ad2020-07-21 10:49:51 +03005669 hdev->clk_throttling_reason &= ~HL_CLK_THROTTLE_POWER;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005670 dev_info_ratelimited(hdev->dev,
5671 "Power envelop is safe, back to optimal clock\n");
5672 break;
5673
5674 case GAUDI_EVENT_FIX_THERMAL_ENV_S:
Ofir Bitton0a068ad2020-07-21 10:49:51 +03005675 hdev->clk_throttling_reason |= HL_CLK_THROTTLE_THERMAL;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005676 dev_info_ratelimited(hdev->dev,
5677 "Clock throttling due to overheating\n");
5678 break;
5679
5680 case GAUDI_EVENT_FIX_THERMAL_ENV_E:
Ofir Bitton0a068ad2020-07-21 10:49:51 +03005681 hdev->clk_throttling_reason &= ~HL_CLK_THROTTLE_THERMAL;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005682 dev_info_ratelimited(hdev->dev,
5683 "Thermal envelop is safe, back to optimal clock\n");
5684 break;
5685
5686 default:
5687 dev_err(hdev->dev, "Received invalid clock change event %d\n",
5688 event_type);
5689 break;
5690 }
5691}
5692
5693static void gaudi_handle_eqe(struct hl_device *hdev,
5694 struct hl_eq_entry *eq_entry)
5695{
5696 struct gaudi_device *gaudi = hdev->asic_specific;
5697 u32 ctl = le32_to_cpu(eq_entry->hdr.ctl);
5698 u16 event_type = ((ctl & EQ_CTL_EVENT_TYPE_MASK)
5699 >> EQ_CTL_EVENT_TYPE_SHIFT);
5700 u8 cause;
Oded Gabbay66446822020-05-18 16:48:01 +03005701 bool reset_required;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005702
5703 gaudi->events_stat[event_type]++;
5704 gaudi->events_stat_aggregate[event_type]++;
5705
5706 switch (event_type) {
5707 case GAUDI_EVENT_PCIE_CORE_DERR:
5708 case GAUDI_EVENT_PCIE_IF_DERR:
5709 case GAUDI_EVENT_PCIE_PHY_DERR:
5710 case GAUDI_EVENT_TPC0_DERR ... GAUDI_EVENT_TPC7_DERR:
5711 case GAUDI_EVENT_MME0_ACC_DERR:
5712 case GAUDI_EVENT_MME0_SBAB_DERR:
5713 case GAUDI_EVENT_MME1_ACC_DERR:
5714 case GAUDI_EVENT_MME1_SBAB_DERR:
5715 case GAUDI_EVENT_MME2_ACC_DERR:
5716 case GAUDI_EVENT_MME2_SBAB_DERR:
5717 case GAUDI_EVENT_MME3_ACC_DERR:
5718 case GAUDI_EVENT_MME3_SBAB_DERR:
5719 case GAUDI_EVENT_DMA0_DERR_ECC ... GAUDI_EVENT_DMA7_DERR_ECC:
5720 fallthrough;
5721 case GAUDI_EVENT_CPU_IF_ECC_DERR:
5722 case GAUDI_EVENT_PSOC_MEM_DERR:
5723 case GAUDI_EVENT_PSOC_CORESIGHT_DERR:
5724 case GAUDI_EVENT_SRAM0_DERR ... GAUDI_EVENT_SRAM28_DERR:
5725 case GAUDI_EVENT_DMA_IF0_DERR ... GAUDI_EVENT_DMA_IF3_DERR:
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005726 case GAUDI_EVENT_HBM_0_DERR ... GAUDI_EVENT_HBM_3_DERR:
5727 case GAUDI_EVENT_MMU_DERR:
Oded Gabbayfcc6a4e2020-05-17 08:20:35 +03005728 gaudi_print_irq_info(hdev, event_type, true);
5729 gaudi_handle_ecc_event(hdev, event_type, &eq_entry->ecc_data);
5730 if (hdev->hard_reset_on_fw_events)
5731 hl_device_reset(hdev, true, false);
5732 break;
5733
5734 case GAUDI_EVENT_GIC500:
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005735 case GAUDI_EVENT_AXI_ECC:
5736 case GAUDI_EVENT_L2_RAM_ECC:
5737 case GAUDI_EVENT_PLL0 ... GAUDI_EVENT_PLL17:
5738 gaudi_print_irq_info(hdev, event_type, false);
5739 if (hdev->hard_reset_on_fw_events)
5740 hl_device_reset(hdev, true, false);
5741 break;
5742
5743 case GAUDI_EVENT_HBM0_SPI_0:
5744 case GAUDI_EVENT_HBM1_SPI_0:
5745 case GAUDI_EVENT_HBM2_SPI_0:
5746 case GAUDI_EVENT_HBM3_SPI_0:
5747 gaudi_print_irq_info(hdev, event_type, false);
5748 gaudi_hbm_read_interrupts(hdev,
5749 gaudi_hbm_event_to_dev(event_type));
5750 if (hdev->hard_reset_on_fw_events)
5751 hl_device_reset(hdev, true, false);
5752 break;
5753
5754 case GAUDI_EVENT_HBM0_SPI_1:
5755 case GAUDI_EVENT_HBM1_SPI_1:
5756 case GAUDI_EVENT_HBM2_SPI_1:
5757 case GAUDI_EVENT_HBM3_SPI_1:
5758 gaudi_print_irq_info(hdev, event_type, false);
5759 gaudi_hbm_read_interrupts(hdev,
5760 gaudi_hbm_event_to_dev(event_type));
5761 break;
5762
5763 case GAUDI_EVENT_TPC0_DEC:
5764 case GAUDI_EVENT_TPC1_DEC:
5765 case GAUDI_EVENT_TPC2_DEC:
5766 case GAUDI_EVENT_TPC3_DEC:
5767 case GAUDI_EVENT_TPC4_DEC:
5768 case GAUDI_EVENT_TPC5_DEC:
5769 case GAUDI_EVENT_TPC6_DEC:
5770 case GAUDI_EVENT_TPC7_DEC:
5771 gaudi_print_irq_info(hdev, event_type, true);
Oded Gabbay66446822020-05-18 16:48:01 +03005772 reset_required = gaudi_tpc_read_interrupts(hdev,
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005773 tpc_dec_event_to_tpc_id(event_type),
5774 "AXI_SLV_DEC_Error");
Oded Gabbay66446822020-05-18 16:48:01 +03005775 if (reset_required) {
5776 dev_err(hdev->dev, "hard reset required due to %s\n",
5777 gaudi_irq_map_table[event_type].name);
5778
5779 if (hdev->hard_reset_on_fw_events)
5780 hl_device_reset(hdev, true, false);
5781 } else {
5782 hl_fw_unmask_irq(hdev, event_type);
Omer Shpigelmand7985072020-05-17 23:01:22 +03005783 }
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005784 break;
5785
5786 case GAUDI_EVENT_TPC0_KRN_ERR:
5787 case GAUDI_EVENT_TPC1_KRN_ERR:
5788 case GAUDI_EVENT_TPC2_KRN_ERR:
5789 case GAUDI_EVENT_TPC3_KRN_ERR:
5790 case GAUDI_EVENT_TPC4_KRN_ERR:
5791 case GAUDI_EVENT_TPC5_KRN_ERR:
5792 case GAUDI_EVENT_TPC6_KRN_ERR:
5793 case GAUDI_EVENT_TPC7_KRN_ERR:
5794 gaudi_print_irq_info(hdev, event_type, true);
Oded Gabbay66446822020-05-18 16:48:01 +03005795 reset_required = gaudi_tpc_read_interrupts(hdev,
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005796 tpc_krn_event_to_tpc_id(event_type),
5797 "KRN_ERR");
Oded Gabbay66446822020-05-18 16:48:01 +03005798 if (reset_required) {
5799 dev_err(hdev->dev, "hard reset required due to %s\n",
5800 gaudi_irq_map_table[event_type].name);
5801
5802 if (hdev->hard_reset_on_fw_events)
5803 hl_device_reset(hdev, true, false);
5804 } else {
5805 hl_fw_unmask_irq(hdev, event_type);
Omer Shpigelmand7985072020-05-17 23:01:22 +03005806 }
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005807 break;
5808
5809 case GAUDI_EVENT_PCIE_CORE_SERR:
5810 case GAUDI_EVENT_PCIE_IF_SERR:
5811 case GAUDI_EVENT_PCIE_PHY_SERR:
5812 case GAUDI_EVENT_TPC0_SERR ... GAUDI_EVENT_TPC7_SERR:
5813 case GAUDI_EVENT_MME0_ACC_SERR:
5814 case GAUDI_EVENT_MME0_SBAB_SERR:
5815 case GAUDI_EVENT_MME1_ACC_SERR:
5816 case GAUDI_EVENT_MME1_SBAB_SERR:
5817 case GAUDI_EVENT_MME2_ACC_SERR:
5818 case GAUDI_EVENT_MME2_SBAB_SERR:
5819 case GAUDI_EVENT_MME3_ACC_SERR:
5820 case GAUDI_EVENT_MME3_SBAB_SERR:
5821 case GAUDI_EVENT_DMA0_SERR_ECC ... GAUDI_EVENT_DMA7_SERR_ECC:
5822 case GAUDI_EVENT_CPU_IF_ECC_SERR:
5823 case GAUDI_EVENT_PSOC_MEM_SERR:
5824 case GAUDI_EVENT_PSOC_CORESIGHT_SERR:
5825 case GAUDI_EVENT_SRAM0_SERR ... GAUDI_EVENT_SRAM28_SERR:
5826 case GAUDI_EVENT_DMA_IF0_SERR ... GAUDI_EVENT_DMA_IF3_SERR:
5827 case GAUDI_EVENT_HBM_0_SERR ... GAUDI_EVENT_HBM_3_SERR:
5828 fallthrough;
5829 case GAUDI_EVENT_MMU_SERR:
Oded Gabbayfcc6a4e2020-05-17 08:20:35 +03005830 gaudi_print_irq_info(hdev, event_type, true);
5831 gaudi_handle_ecc_event(hdev, event_type, &eq_entry->ecc_data);
5832 hl_fw_unmask_irq(hdev, event_type);
5833 break;
5834
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005835 case GAUDI_EVENT_PCIE_DEC:
5836 case GAUDI_EVENT_MME0_WBC_RSP:
5837 case GAUDI_EVENT_MME0_SBAB0_RSP:
5838 case GAUDI_EVENT_MME1_WBC_RSP:
5839 case GAUDI_EVENT_MME1_SBAB0_RSP:
5840 case GAUDI_EVENT_MME2_WBC_RSP:
5841 case GAUDI_EVENT_MME2_SBAB0_RSP:
5842 case GAUDI_EVENT_MME3_WBC_RSP:
5843 case GAUDI_EVENT_MME3_SBAB0_RSP:
5844 case GAUDI_EVENT_CPU_AXI_SPLITTER:
5845 case GAUDI_EVENT_PSOC_AXI_DEC:
5846 case GAUDI_EVENT_PSOC_PRSTN_FALL:
5847 case GAUDI_EVENT_MMU_PAGE_FAULT:
5848 case GAUDI_EVENT_MMU_WR_PERM:
5849 case GAUDI_EVENT_RAZWI_OR_ADC:
5850 case GAUDI_EVENT_TPC0_QM ... GAUDI_EVENT_TPC7_QM:
5851 case GAUDI_EVENT_MME0_QM ... GAUDI_EVENT_MME2_QM:
5852 case GAUDI_EVENT_DMA0_QM ... GAUDI_EVENT_DMA7_QM:
5853 fallthrough;
5854 case GAUDI_EVENT_DMA0_CORE ... GAUDI_EVENT_DMA7_CORE:
5855 gaudi_print_irq_info(hdev, event_type, true);
5856 gaudi_handle_qman_err(hdev, event_type);
Ofir Bittonebd8d122020-05-10 13:41:28 +03005857 hl_fw_unmask_irq(hdev, event_type);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005858 break;
5859
5860 case GAUDI_EVENT_RAZWI_OR_ADC_SW:
5861 gaudi_print_irq_info(hdev, event_type, true);
Oded Gabbay66446822020-05-18 16:48:01 +03005862 if (hdev->hard_reset_on_fw_events)
5863 hl_device_reset(hdev, true, false);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005864 break;
5865
5866 case GAUDI_EVENT_TPC0_BMON_SPMU:
5867 case GAUDI_EVENT_TPC1_BMON_SPMU:
5868 case GAUDI_EVENT_TPC2_BMON_SPMU:
5869 case GAUDI_EVENT_TPC3_BMON_SPMU:
5870 case GAUDI_EVENT_TPC4_BMON_SPMU:
5871 case GAUDI_EVENT_TPC5_BMON_SPMU:
5872 case GAUDI_EVENT_TPC6_BMON_SPMU:
5873 case GAUDI_EVENT_TPC7_BMON_SPMU:
5874 case GAUDI_EVENT_DMA_BM_CH0 ... GAUDI_EVENT_DMA_BM_CH7:
5875 gaudi_print_irq_info(hdev, event_type, false);
Ofir Bittonebd8d122020-05-10 13:41:28 +03005876 hl_fw_unmask_irq(hdev, event_type);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005877 break;
5878
5879 case GAUDI_EVENT_FIX_POWER_ENV_S ... GAUDI_EVENT_FIX_THERMAL_ENV_E:
5880 gaudi_print_clk_change_info(hdev, event_type);
Ofir Bittonebd8d122020-05-10 13:41:28 +03005881 hl_fw_unmask_irq(hdev, event_type);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005882 break;
5883
5884 case GAUDI_EVENT_PSOC_GPIO_U16_0:
5885 cause = le64_to_cpu(eq_entry->data[0]) & 0xFF;
5886 dev_err(hdev->dev,
5887 "Received high temp H/W interrupt %d (cause %d)\n",
5888 event_type, cause);
5889 break;
5890
5891 default:
5892 dev_err(hdev->dev, "Received invalid H/W interrupt %d\n",
5893 event_type);
5894 break;
5895 }
5896}
5897
5898static void *gaudi_get_events_stat(struct hl_device *hdev, bool aggregate,
5899 u32 *size)
5900{
5901 struct gaudi_device *gaudi = hdev->asic_specific;
5902
5903 if (aggregate) {
5904 *size = (u32) sizeof(gaudi->events_stat_aggregate);
5905 return gaudi->events_stat_aggregate;
5906 }
5907
5908 *size = (u32) sizeof(gaudi->events_stat);
5909 return gaudi->events_stat;
5910}
5911
Omer Shpigelman8ff5f4f2020-05-24 23:06:59 +03005912static int gaudi_mmu_invalidate_cache(struct hl_device *hdev, bool is_hard,
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005913 u32 flags)
5914{
5915 struct gaudi_device *gaudi = hdev->asic_specific;
5916 u32 status, timeout_usec;
5917 int rc;
5918
5919 if (!(gaudi->hw_cap_initialized & HW_CAP_MMU) ||
5920 hdev->hard_reset_pending)
Omer Shpigelman8ff5f4f2020-05-24 23:06:59 +03005921 return 0;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005922
5923 if (hdev->pldm)
5924 timeout_usec = GAUDI_PLDM_MMU_TIMEOUT_USEC;
5925 else
5926 timeout_usec = MMU_CONFIG_TIMEOUT_USEC;
5927
Omer Shpigelman8ff5f4f2020-05-24 23:06:59 +03005928 mutex_lock(&hdev->mmu_cache_lock);
5929
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005930 /* L0 & L1 invalidation */
Omer Shpigelmancfd41762020-06-03 13:03:35 +03005931 WREG32(mmSTLB_INV_PS, 3);
5932 WREG32(mmSTLB_CACHE_INV, gaudi->mmu_cache_inv_pi++);
Omer Shpigelman42d0b0b2020-05-17 17:35:39 +03005933 WREG32(mmSTLB_INV_PS, 2);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005934
5935 rc = hl_poll_timeout(
5936 hdev,
Omer Shpigelman42d0b0b2020-05-17 17:35:39 +03005937 mmSTLB_INV_PS,
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005938 status,
5939 !status,
5940 1000,
5941 timeout_usec);
5942
Omer Shpigelman42d0b0b2020-05-17 17:35:39 +03005943 WREG32(mmSTLB_INV_SET, 0);
5944
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005945 mutex_unlock(&hdev->mmu_cache_lock);
Omer Shpigelman8ff5f4f2020-05-24 23:06:59 +03005946
5947 if (rc) {
5948 dev_err_ratelimited(hdev->dev,
5949 "MMU cache invalidation timeout\n");
5950 hl_device_reset(hdev, true, false);
5951 }
5952
5953 return rc;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005954}
5955
Omer Shpigelman8ff5f4f2020-05-24 23:06:59 +03005956static int gaudi_mmu_invalidate_cache_range(struct hl_device *hdev,
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005957 bool is_hard, u32 asid, u64 va, u64 size)
5958{
5959 struct gaudi_device *gaudi = hdev->asic_specific;
5960 u32 status, timeout_usec;
5961 u32 inv_data;
5962 u32 pi;
5963 int rc;
5964
5965 if (!(gaudi->hw_cap_initialized & HW_CAP_MMU) ||
5966 hdev->hard_reset_pending)
Omer Shpigelman8ff5f4f2020-05-24 23:06:59 +03005967 return 0;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005968
5969 mutex_lock(&hdev->mmu_cache_lock);
5970
5971 if (hdev->pldm)
5972 timeout_usec = GAUDI_PLDM_MMU_TIMEOUT_USEC;
5973 else
5974 timeout_usec = MMU_CONFIG_TIMEOUT_USEC;
5975
5976 /*
5977 * TODO: currently invalidate entire L0 & L1 as in regular hard
5978 * invalidation. Need to apply invalidation of specific cache
5979 * lines with mask of ASID & VA & size.
5980 * Note that L1 with be flushed entirely in any case.
5981 */
5982
5983 /* L0 & L1 invalidation */
5984 inv_data = RREG32(mmSTLB_CACHE_INV);
5985 /* PI is 8 bit */
5986 pi = ((inv_data & STLB_CACHE_INV_PRODUCER_INDEX_MASK) + 1) & 0xFF;
5987 WREG32(mmSTLB_CACHE_INV,
5988 (inv_data & STLB_CACHE_INV_INDEX_MASK_MASK) | pi);
5989
5990 rc = hl_poll_timeout(
5991 hdev,
5992 mmSTLB_INV_CONSUMER_INDEX,
5993 status,
5994 status == pi,
5995 1000,
5996 timeout_usec);
5997
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005998 mutex_unlock(&hdev->mmu_cache_lock);
Omer Shpigelman8ff5f4f2020-05-24 23:06:59 +03005999
6000 if (rc) {
6001 dev_err_ratelimited(hdev->dev,
6002 "MMU cache invalidation timeout\n");
6003 hl_device_reset(hdev, true, false);
6004 }
6005
6006 return rc;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006007}
6008
6009static int gaudi_mmu_update_asid_hop0_addr(struct hl_device *hdev,
6010 u32 asid, u64 phys_addr)
6011{
6012 u32 status, timeout_usec;
6013 int rc;
6014
6015 if (hdev->pldm)
6016 timeout_usec = GAUDI_PLDM_MMU_TIMEOUT_USEC;
6017 else
6018 timeout_usec = MMU_CONFIG_TIMEOUT_USEC;
6019
6020 WREG32(MMU_ASID, asid);
6021 WREG32(MMU_HOP0_PA43_12, phys_addr >> MMU_HOP0_PA43_12_SHIFT);
6022 WREG32(MMU_HOP0_PA49_44, phys_addr >> MMU_HOP0_PA49_44_SHIFT);
6023 WREG32(MMU_BUSY, 0x80000000);
6024
6025 rc = hl_poll_timeout(
6026 hdev,
6027 MMU_BUSY,
6028 status,
6029 !(status & 0x80000000),
6030 1000,
6031 timeout_usec);
6032
6033 if (rc) {
6034 dev_err(hdev->dev,
6035 "Timeout during MMU hop0 config of asid %d\n", asid);
6036 return rc;
6037 }
6038
6039 return 0;
6040}
6041
6042static int gaudi_send_heartbeat(struct hl_device *hdev)
6043{
6044 struct gaudi_device *gaudi = hdev->asic_specific;
6045
6046 if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
6047 return 0;
6048
6049 return hl_fw_send_heartbeat(hdev);
6050}
6051
6052static int gaudi_armcp_info_get(struct hl_device *hdev)
6053{
6054 struct gaudi_device *gaudi = hdev->asic_specific;
6055 struct asic_fixed_properties *prop = &hdev->asic_prop;
6056 int rc;
6057
6058 if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
6059 return 0;
6060
6061 rc = hl_fw_armcp_info_get(hdev);
6062 if (rc)
6063 return rc;
6064
6065 if (!strlen(prop->armcp_info.card_name))
6066 strncpy(prop->armcp_info.card_name, GAUDI_DEFAULT_CARD_NAME,
6067 CARD_NAME_MAX_LEN);
6068
Oded Gabbay58361aa2020-08-08 23:34:47 +03006069 hdev->card_type = le32_to_cpu(hdev->asic_prop.armcp_info.card_type);
6070
6071 if (hdev->card_type == armcp_card_type_pci)
6072 prop->max_power_default = MAX_POWER_DEFAULT_PCI;
6073 else if (hdev->card_type == armcp_card_type_pmc)
6074 prop->max_power_default = MAX_POWER_DEFAULT_PMC;
6075
6076 hdev->max_power = prop->max_power_default;
6077
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006078 return 0;
6079}
6080
6081static bool gaudi_is_device_idle(struct hl_device *hdev, u32 *mask,
6082 struct seq_file *s)
6083{
6084 struct gaudi_device *gaudi = hdev->asic_specific;
6085 const char *fmt = "%-5d%-9s%#-14x%#-12x%#x\n";
6086 const char *mme_slave_fmt = "%-5d%-9s%-14s%-12s%#x\n";
6087 u32 qm_glbl_sts0, qm_cgm_sts, dma_core_sts0, tpc_cfg_sts, mme_arch_sts;
6088 bool is_idle = true, is_eng_idle, is_slave;
6089 u64 offset;
6090 int i, dma_id;
6091
6092 mutex_lock(&gaudi->clk_gate_mutex);
6093
6094 hdev->asic_funcs->disable_clock_gating(hdev);
6095
6096 if (s)
6097 seq_puts(s,
6098 "\nDMA is_idle QM_GLBL_STS0 QM_CGM_STS DMA_CORE_STS0\n"
6099 "--- ------- ------------ ---------- -------------\n");
6100
6101 for (i = 0 ; i < DMA_NUMBER_OF_CHNLS ; i++) {
6102 dma_id = gaudi_dma_assignment[i];
6103 offset = dma_id * DMA_QMAN_OFFSET;
6104
6105 qm_glbl_sts0 = RREG32(mmDMA0_QM_GLBL_STS0 + offset);
6106 qm_cgm_sts = RREG32(mmDMA0_QM_CGM_STS + offset);
6107 dma_core_sts0 = RREG32(mmDMA0_CORE_STS0 + offset);
6108 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts) &&
6109 IS_DMA_IDLE(dma_core_sts0);
6110 is_idle &= is_eng_idle;
6111
6112 if (mask)
6113 *mask |= !is_eng_idle <<
6114 (GAUDI_ENGINE_ID_DMA_0 + dma_id);
6115 if (s)
6116 seq_printf(s, fmt, dma_id,
6117 is_eng_idle ? "Y" : "N", qm_glbl_sts0,
6118 qm_cgm_sts, dma_core_sts0);
6119 }
6120
6121 if (s)
6122 seq_puts(s,
6123 "\nTPC is_idle QM_GLBL_STS0 QM_CGM_STS CFG_STATUS\n"
6124 "--- ------- ------------ ---------- ----------\n");
6125
6126 for (i = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) {
6127 offset = i * TPC_QMAN_OFFSET;
6128 qm_glbl_sts0 = RREG32(mmTPC0_QM_GLBL_STS0 + offset);
6129 qm_cgm_sts = RREG32(mmTPC0_QM_CGM_STS + offset);
6130 tpc_cfg_sts = RREG32(mmTPC0_CFG_STATUS + offset);
6131 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts) &&
6132 IS_TPC_IDLE(tpc_cfg_sts);
6133 is_idle &= is_eng_idle;
6134
6135 if (mask)
6136 *mask |= !is_eng_idle << (GAUDI_ENGINE_ID_TPC_0 + i);
6137 if (s)
6138 seq_printf(s, fmt, i,
6139 is_eng_idle ? "Y" : "N",
6140 qm_glbl_sts0, qm_cgm_sts, tpc_cfg_sts);
6141 }
6142
6143 if (s)
6144 seq_puts(s,
6145 "\nMME is_idle QM_GLBL_STS0 QM_CGM_STS ARCH_STATUS\n"
6146 "--- ------- ------------ ---------- -----------\n");
6147
6148 for (i = 0 ; i < MME_NUMBER_OF_ENGINES ; i++) {
6149 offset = i * MME_QMAN_OFFSET;
6150 mme_arch_sts = RREG32(mmMME0_CTRL_ARCH_STATUS + offset);
6151 is_eng_idle = IS_MME_IDLE(mme_arch_sts);
6152
6153 /* MME 1 & 3 are slaves, no need to check their QMANs */
6154 is_slave = i % 2;
6155 if (!is_slave) {
6156 qm_glbl_sts0 = RREG32(mmMME0_QM_GLBL_STS0 + offset);
6157 qm_cgm_sts = RREG32(mmMME0_QM_CGM_STS + offset);
6158 is_eng_idle &= IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts);
6159 }
6160
6161 is_idle &= is_eng_idle;
6162
6163 if (mask)
6164 *mask |= !is_eng_idle << (GAUDI_ENGINE_ID_MME_0 + i);
6165 if (s) {
6166 if (!is_slave)
6167 seq_printf(s, fmt, i,
6168 is_eng_idle ? "Y" : "N",
6169 qm_glbl_sts0, qm_cgm_sts, mme_arch_sts);
6170 else
6171 seq_printf(s, mme_slave_fmt, i,
6172 is_eng_idle ? "Y" : "N", "-",
6173 "-", mme_arch_sts);
6174 }
6175 }
6176
6177 if (s)
6178 seq_puts(s, "\n");
6179
Oded Gabbaye38bfd32020-07-03 20:46:12 +03006180 hdev->asic_funcs->set_clock_gating(hdev);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006181
6182 mutex_unlock(&gaudi->clk_gate_mutex);
6183
6184 return is_idle;
6185}
6186
6187static void gaudi_hw_queues_lock(struct hl_device *hdev)
6188 __acquires(&gaudi->hw_queues_lock)
6189{
6190 struct gaudi_device *gaudi = hdev->asic_specific;
6191
6192 spin_lock(&gaudi->hw_queues_lock);
6193}
6194
6195static void gaudi_hw_queues_unlock(struct hl_device *hdev)
6196 __releases(&gaudi->hw_queues_lock)
6197{
6198 struct gaudi_device *gaudi = hdev->asic_specific;
6199
6200 spin_unlock(&gaudi->hw_queues_lock);
6201}
6202
6203static u32 gaudi_get_pci_id(struct hl_device *hdev)
6204{
6205 return hdev->pdev->device;
6206}
6207
6208static int gaudi_get_eeprom_data(struct hl_device *hdev, void *data,
6209 size_t max_size)
6210{
6211 struct gaudi_device *gaudi = hdev->asic_specific;
6212
6213 if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
6214 return 0;
6215
6216 return hl_fw_get_eeprom_data(hdev, data, max_size);
6217}
6218
6219/*
6220 * this function should be used only during initialization and/or after reset,
6221 * when there are no active users.
6222 */
6223static int gaudi_run_tpc_kernel(struct hl_device *hdev, u64 tpc_kernel,
6224 u32 tpc_id)
6225{
6226 struct gaudi_device *gaudi = hdev->asic_specific;
6227 u64 kernel_timeout;
6228 u32 status, offset;
6229 int rc;
6230
6231 offset = tpc_id * (mmTPC1_CFG_STATUS - mmTPC0_CFG_STATUS);
6232
6233 if (hdev->pldm)
6234 kernel_timeout = GAUDI_PLDM_TPC_KERNEL_WAIT_USEC;
6235 else
6236 kernel_timeout = HL_DEVICE_TIMEOUT_USEC;
6237
6238 mutex_lock(&gaudi->clk_gate_mutex);
6239
6240 hdev->asic_funcs->disable_clock_gating(hdev);
6241
6242 WREG32(mmTPC0_CFG_QM_KERNEL_BASE_ADDRESS_LOW + offset,
6243 lower_32_bits(tpc_kernel));
6244 WREG32(mmTPC0_CFG_QM_KERNEL_BASE_ADDRESS_HIGH + offset,
6245 upper_32_bits(tpc_kernel));
6246
6247 WREG32(mmTPC0_CFG_ICACHE_BASE_ADDERESS_LOW + offset,
6248 lower_32_bits(tpc_kernel));
6249 WREG32(mmTPC0_CFG_ICACHE_BASE_ADDERESS_HIGH + offset,
6250 upper_32_bits(tpc_kernel));
6251 /* set a valid LUT pointer, content is of no significance */
6252 WREG32(mmTPC0_CFG_LUT_FUNC256_BASE_ADDR_LO + offset,
6253 lower_32_bits(tpc_kernel));
6254 WREG32(mmTPC0_CFG_LUT_FUNC256_BASE_ADDR_HI + offset,
6255 upper_32_bits(tpc_kernel));
6256
6257 WREG32(mmTPC0_CFG_QM_SYNC_OBJECT_ADDR + offset,
6258 lower_32_bits(CFG_BASE +
6259 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0));
6260
6261 WREG32(mmTPC0_CFG_TPC_CMD + offset,
6262 (1 << TPC0_CFG_TPC_CMD_ICACHE_INVALIDATE_SHIFT |
6263 1 << TPC0_CFG_TPC_CMD_ICACHE_PREFETCH_64KB_SHIFT));
6264 /* wait a bit for the engine to start executing */
6265 usleep_range(1000, 1500);
6266
6267 /* wait until engine has finished executing */
6268 rc = hl_poll_timeout(
6269 hdev,
6270 mmTPC0_CFG_STATUS + offset,
6271 status,
6272 (status & TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK) ==
6273 TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK,
6274 1000,
6275 kernel_timeout);
6276
6277 if (rc) {
6278 dev_err(hdev->dev,
6279 "Timeout while waiting for TPC%d icache prefetch\n",
6280 tpc_id);
Oded Gabbaye38bfd32020-07-03 20:46:12 +03006281 hdev->asic_funcs->set_clock_gating(hdev);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006282 mutex_unlock(&gaudi->clk_gate_mutex);
6283 return -EIO;
6284 }
6285
6286 WREG32(mmTPC0_CFG_TPC_EXECUTE + offset,
6287 1 << TPC0_CFG_TPC_EXECUTE_V_SHIFT);
6288
6289 /* wait a bit for the engine to start executing */
6290 usleep_range(1000, 1500);
6291
6292 /* wait until engine has finished executing */
6293 rc = hl_poll_timeout(
6294 hdev,
6295 mmTPC0_CFG_STATUS + offset,
6296 status,
6297 (status & TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK) ==
6298 TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK,
6299 1000,
6300 kernel_timeout);
6301
6302 rc = hl_poll_timeout(
6303 hdev,
6304 mmTPC0_CFG_WQ_INFLIGHT_CNTR + offset,
6305 status,
6306 (status == 0),
6307 1000,
6308 kernel_timeout);
6309
Oded Gabbaye38bfd32020-07-03 20:46:12 +03006310 hdev->asic_funcs->set_clock_gating(hdev);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006311 mutex_unlock(&gaudi->clk_gate_mutex);
6312
6313 if (rc) {
6314 dev_err(hdev->dev,
6315 "Timeout while waiting for TPC%d kernel to execute\n",
6316 tpc_id);
6317 return -EIO;
6318 }
6319
6320 return 0;
6321}
6322
6323static enum hl_device_hw_state gaudi_get_hw_state(struct hl_device *hdev)
6324{
6325 return RREG32(mmHW_STATE);
6326}
6327
kernel test robotbb34bf72020-07-29 08:03:13 +08006328static int gaudi_ctx_init(struct hl_ctx *ctx)
Ofir Bittona04b7cd2020-07-13 13:36:55 +03006329{
6330 return 0;
6331}
6332
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006333static u32 gaudi_get_queue_id_for_cq(struct hl_device *hdev, u32 cq_idx)
6334{
6335 return gaudi_cq_assignment[cq_idx];
6336}
6337
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006338static u32 gaudi_get_signal_cb_size(struct hl_device *hdev)
6339{
6340 return sizeof(struct packet_msg_short) +
6341 sizeof(struct packet_msg_prot) * 2;
6342}
6343
6344static u32 gaudi_get_wait_cb_size(struct hl_device *hdev)
6345{
6346 return sizeof(struct packet_msg_short) * 4 +
6347 sizeof(struct packet_fence) +
6348 sizeof(struct packet_msg_prot) * 2;
6349}
6350
6351static void gaudi_gen_signal_cb(struct hl_device *hdev, void *data, u16 sob_id)
6352{
6353 struct hl_cb *cb = (struct hl_cb *) data;
6354 struct packet_msg_short *pkt;
6355 u32 value, ctl;
6356
6357 pkt = (struct packet_msg_short *) (uintptr_t) cb->kernel_address;
6358 memset(pkt, 0, sizeof(*pkt));
6359
Ofir Bitton6c07bab2020-06-01 10:38:46 +03006360 /* Inc by 1, Mode ADD */
6361 value = FIELD_PREP(GAUDI_PKT_SHORT_VAL_SOB_SYNC_VAL_MASK, 1);
6362 value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_SOB_MOD_MASK, 1);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006363
Ofir Bitton6c07bab2020-06-01 10:38:46 +03006364 ctl = FIELD_PREP(GAUDI_PKT_SHORT_CTL_ADDR_MASK, sob_id * 4);
6365 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_OP_MASK, 0); /* write the value */
6366 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_BASE_MASK, 3); /* W_S SOB base */
6367 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_OPCODE_MASK, PACKET_MSG_SHORT);
6368 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_EB_MASK, 1);
6369 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_RB_MASK, 1);
6370 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_MB_MASK, 1);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006371
6372 pkt->value = cpu_to_le32(value);
6373 pkt->ctl = cpu_to_le32(ctl);
6374}
6375
6376static u32 gaudi_add_mon_msg_short(struct packet_msg_short *pkt, u32 value,
6377 u16 addr)
6378{
6379 u32 ctl, pkt_size = sizeof(*pkt);
6380
6381 memset(pkt, 0, pkt_size);
6382
Ofir Bitton6c07bab2020-06-01 10:38:46 +03006383 ctl = FIELD_PREP(GAUDI_PKT_SHORT_CTL_ADDR_MASK, addr);
6384 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_BASE_MASK, 2); /* W_S MON base */
6385 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_OPCODE_MASK, PACKET_MSG_SHORT);
6386 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_EB_MASK, 0);
6387 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_RB_MASK, 1);
6388 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_MB_MASK, 0); /* last pkt MB */
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006389
6390 pkt->value = cpu_to_le32(value);
6391 pkt->ctl = cpu_to_le32(ctl);
6392
6393 return pkt_size;
6394}
6395
6396static u32 gaudi_add_arm_monitor_pkt(struct packet_msg_short *pkt, u16 sob_id,
6397 u16 sob_val, u16 addr)
6398{
6399 u32 ctl, value, pkt_size = sizeof(*pkt);
6400 u8 mask = ~(1 << (sob_id & 0x7));
6401
6402 memset(pkt, 0, pkt_size);
6403
Ofir Bitton6c07bab2020-06-01 10:38:46 +03006404 value = FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_SYNC_GID_MASK, sob_id / 8);
6405 value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_SYNC_VAL_MASK, sob_val);
6406 value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_MODE_MASK,
6407 0); /* GREATER OR EQUAL*/
6408 value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_MASK_MASK, mask);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006409
Ofir Bitton6c07bab2020-06-01 10:38:46 +03006410 ctl = FIELD_PREP(GAUDI_PKT_SHORT_CTL_ADDR_MASK, addr);
6411 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_OP_MASK, 0); /* write the value */
6412 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_BASE_MASK, 2); /* W_S MON base */
6413 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_OPCODE_MASK, PACKET_MSG_SHORT);
6414 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_EB_MASK, 0);
6415 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_RB_MASK, 1);
6416 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_MB_MASK, 1);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006417
6418 pkt->value = cpu_to_le32(value);
6419 pkt->ctl = cpu_to_le32(ctl);
6420
6421 return pkt_size;
6422}
6423
6424static u32 gaudi_add_fence_pkt(struct packet_fence *pkt)
6425{
6426 u32 ctl, cfg, pkt_size = sizeof(*pkt);
6427
6428 memset(pkt, 0, pkt_size);
6429
Ofir Bitton6c07bab2020-06-01 10:38:46 +03006430 cfg = FIELD_PREP(GAUDI_PKT_FENCE_CFG_DEC_VAL_MASK, 1);
6431 cfg |= FIELD_PREP(GAUDI_PKT_FENCE_CFG_TARGET_VAL_MASK, 1);
6432 cfg |= FIELD_PREP(GAUDI_PKT_FENCE_CFG_ID_MASK, 2);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006433
Ofir Bitton6c07bab2020-06-01 10:38:46 +03006434 ctl = FIELD_PREP(GAUDI_PKT_FENCE_CTL_OPCODE_MASK, PACKET_FENCE);
6435 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_EB_MASK, 0);
6436 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_RB_MASK, 1);
6437 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_MB_MASK, 1);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006438
6439 pkt->cfg = cpu_to_le32(cfg);
6440 pkt->ctl = cpu_to_le32(ctl);
6441
6442 return pkt_size;
6443}
6444
6445static void gaudi_gen_wait_cb(struct hl_device *hdev, void *data, u16 sob_id,
6446 u16 sob_val, u16 mon_id, u32 q_idx)
6447{
6448 struct hl_cb *cb = (struct hl_cb *) data;
6449 void *buf = (void *) (uintptr_t) cb->kernel_address;
6450 u64 monitor_base, fence_addr = 0;
6451 u32 size = 0;
6452 u16 msg_addr_offset;
6453
6454 switch (q_idx) {
6455 case GAUDI_QUEUE_ID_DMA_0_0:
6456 fence_addr = mmDMA0_QM_CP_FENCE2_RDATA_0;
6457 break;
6458 case GAUDI_QUEUE_ID_DMA_0_1:
6459 fence_addr = mmDMA0_QM_CP_FENCE2_RDATA_1;
6460 break;
6461 case GAUDI_QUEUE_ID_DMA_0_2:
6462 fence_addr = mmDMA0_QM_CP_FENCE2_RDATA_2;
6463 break;
6464 case GAUDI_QUEUE_ID_DMA_0_3:
6465 fence_addr = mmDMA0_QM_CP_FENCE2_RDATA_3;
6466 break;
6467 case GAUDI_QUEUE_ID_DMA_1_0:
6468 fence_addr = mmDMA1_QM_CP_FENCE2_RDATA_0;
6469 break;
6470 case GAUDI_QUEUE_ID_DMA_1_1:
6471 fence_addr = mmDMA1_QM_CP_FENCE2_RDATA_1;
6472 break;
6473 case GAUDI_QUEUE_ID_DMA_1_2:
6474 fence_addr = mmDMA1_QM_CP_FENCE2_RDATA_2;
6475 break;
6476 case GAUDI_QUEUE_ID_DMA_1_3:
6477 fence_addr = mmDMA1_QM_CP_FENCE2_RDATA_3;
6478 break;
6479 case GAUDI_QUEUE_ID_DMA_5_0:
6480 fence_addr = mmDMA5_QM_CP_FENCE2_RDATA_0;
6481 break;
6482 case GAUDI_QUEUE_ID_DMA_5_1:
6483 fence_addr = mmDMA5_QM_CP_FENCE2_RDATA_1;
6484 break;
6485 case GAUDI_QUEUE_ID_DMA_5_2:
6486 fence_addr = mmDMA5_QM_CP_FENCE2_RDATA_2;
6487 break;
6488 case GAUDI_QUEUE_ID_DMA_5_3:
6489 fence_addr = mmDMA5_QM_CP_FENCE2_RDATA_3;
6490 break;
6491 default:
6492 /* queue index should be valid here */
6493 dev_crit(hdev->dev, "wrong queue id %d for wait packet\n",
6494 q_idx);
6495 return;
6496 }
6497
6498 fence_addr += CFG_BASE;
6499
6500 /*
6501 * monitor_base should be the content of the base0 address registers,
6502 * so it will be added to the msg short offsets
6503 */
6504 monitor_base = mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0;
6505
6506 /* First monitor config packet: low address of the sync */
6507 msg_addr_offset =
6508 (mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0 + mon_id * 4) -
6509 monitor_base;
6510
6511 size += gaudi_add_mon_msg_short(buf + size, (u32) fence_addr,
6512 msg_addr_offset);
6513
6514 /* Second monitor config packet: high address of the sync */
6515 msg_addr_offset =
6516 (mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRH_0 + mon_id * 4) -
6517 monitor_base;
6518
6519 size += gaudi_add_mon_msg_short(buf + size, (u32) (fence_addr >> 32),
6520 msg_addr_offset);
6521
6522 /*
6523 * Third monitor config packet: the payload, i.e. what to write when the
6524 * sync triggers
6525 */
6526 msg_addr_offset =
6527 (mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_DATA_0 + mon_id * 4) -
6528 monitor_base;
6529
6530 size += gaudi_add_mon_msg_short(buf + size, 1, msg_addr_offset);
6531
6532 /* Fourth monitor config packet: bind the monitor to a sync object */
6533 msg_addr_offset =
6534 (mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0 + mon_id * 4) -
6535 monitor_base;
6536 size += gaudi_add_arm_monitor_pkt(buf + size, sob_id, sob_val,
6537 msg_addr_offset);
6538
6539 /* Fence packet */
6540 size += gaudi_add_fence_pkt(buf + size);
6541}
6542
6543static void gaudi_reset_sob(struct hl_device *hdev, void *data)
6544{
6545 struct hl_hw_sob *hw_sob = (struct hl_hw_sob *) data;
6546
6547 dev_dbg(hdev->dev, "reset SOB, q_idx: %d, sob_id: %d\n", hw_sob->q_idx,
6548 hw_sob->sob_id);
6549
6550 WREG32(mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 + hw_sob->sob_id * 4,
6551 0);
6552
6553 kref_init(&hw_sob->kref);
6554}
6555
6556static void gaudi_set_dma_mask_from_fw(struct hl_device *hdev)
6557{
6558 if (RREG32(mmPSOC_GLOBAL_CONF_NON_RST_FLOPS_0) ==
6559 HL_POWER9_HOST_MAGIC) {
6560 hdev->power9_64bit_dma_enable = 1;
6561 hdev->dma_mask = 64;
6562 } else {
6563 hdev->power9_64bit_dma_enable = 0;
6564 hdev->dma_mask = 48;
6565 }
6566}
6567
6568static u64 gaudi_get_device_time(struct hl_device *hdev)
6569{
6570 u64 device_time = ((u64) RREG32(mmPSOC_TIMESTAMP_CNTCVU)) << 32;
6571
6572 return device_time | RREG32(mmPSOC_TIMESTAMP_CNTCVL);
6573}
6574
6575static const struct hl_asic_funcs gaudi_funcs = {
6576 .early_init = gaudi_early_init,
6577 .early_fini = gaudi_early_fini,
6578 .late_init = gaudi_late_init,
6579 .late_fini = gaudi_late_fini,
6580 .sw_init = gaudi_sw_init,
6581 .sw_fini = gaudi_sw_fini,
6582 .hw_init = gaudi_hw_init,
6583 .hw_fini = gaudi_hw_fini,
6584 .halt_engines = gaudi_halt_engines,
6585 .suspend = gaudi_suspend,
6586 .resume = gaudi_resume,
6587 .cb_mmap = gaudi_cb_mmap,
6588 .ring_doorbell = gaudi_ring_doorbell,
6589 .pqe_write = gaudi_pqe_write,
6590 .asic_dma_alloc_coherent = gaudi_dma_alloc_coherent,
6591 .asic_dma_free_coherent = gaudi_dma_free_coherent,
6592 .get_int_queue_base = gaudi_get_int_queue_base,
6593 .test_queues = gaudi_test_queues,
6594 .asic_dma_pool_zalloc = gaudi_dma_pool_zalloc,
6595 .asic_dma_pool_free = gaudi_dma_pool_free,
6596 .cpu_accessible_dma_pool_alloc = gaudi_cpu_accessible_dma_pool_alloc,
6597 .cpu_accessible_dma_pool_free = gaudi_cpu_accessible_dma_pool_free,
6598 .hl_dma_unmap_sg = gaudi_dma_unmap_sg,
6599 .cs_parser = gaudi_cs_parser,
6600 .asic_dma_map_sg = gaudi_dma_map_sg,
6601 .get_dma_desc_list_size = gaudi_get_dma_desc_list_size,
6602 .add_end_of_cb_packets = gaudi_add_end_of_cb_packets,
6603 .update_eq_ci = gaudi_update_eq_ci,
6604 .context_switch = gaudi_context_switch,
6605 .restore_phase_topology = gaudi_restore_phase_topology,
6606 .debugfs_read32 = gaudi_debugfs_read32,
6607 .debugfs_write32 = gaudi_debugfs_write32,
6608 .debugfs_read64 = gaudi_debugfs_read64,
6609 .debugfs_write64 = gaudi_debugfs_write64,
Oded Gabbaybcaf4152020-05-11 10:41:37 +03006610 .add_device_attr = gaudi_add_device_attr,
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006611 .handle_eqe = gaudi_handle_eqe,
Oded Gabbaybcaf4152020-05-11 10:41:37 +03006612 .set_pll_profile = gaudi_set_pll_profile,
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006613 .get_events_stat = gaudi_get_events_stat,
6614 .read_pte = gaudi_read_pte,
6615 .write_pte = gaudi_write_pte,
6616 .mmu_invalidate_cache = gaudi_mmu_invalidate_cache,
6617 .mmu_invalidate_cache_range = gaudi_mmu_invalidate_cache_range,
6618 .send_heartbeat = gaudi_send_heartbeat,
Oded Gabbaye38bfd32020-07-03 20:46:12 +03006619 .set_clock_gating = gaudi_set_clock_gating,
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006620 .disable_clock_gating = gaudi_disable_clock_gating,
Omer Shpigelman79fc7a92020-05-11 10:46:29 +03006621 .debug_coresight = gaudi_debug_coresight,
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006622 .is_device_idle = gaudi_is_device_idle,
6623 .soft_reset_late_init = gaudi_soft_reset_late_init,
6624 .hw_queues_lock = gaudi_hw_queues_lock,
6625 .hw_queues_unlock = gaudi_hw_queues_unlock,
6626 .get_pci_id = gaudi_get_pci_id,
6627 .get_eeprom_data = gaudi_get_eeprom_data,
6628 .send_cpu_message = gaudi_send_cpu_message,
6629 .get_hw_state = gaudi_get_hw_state,
6630 .pci_bars_map = gaudi_pci_bars_map,
6631 .set_dram_bar_base = gaudi_set_hbm_bar_base,
6632 .init_iatu = gaudi_init_iatu,
6633 .rreg = hl_rreg,
6634 .wreg = hl_wreg,
Omer Shpigelman79fc7a92020-05-11 10:46:29 +03006635 .halt_coresight = gaudi_halt_coresight,
Ofir Bittona04b7cd2020-07-13 13:36:55 +03006636 .ctx_init = gaudi_ctx_init,
Oded Gabbaybcaf4152020-05-11 10:41:37 +03006637 .get_clk_rate = gaudi_get_clk_rate,
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006638 .get_queue_id_for_cq = gaudi_get_queue_id_for_cq,
6639 .read_device_fw_version = gaudi_read_device_fw_version,
6640 .load_firmware_to_device = gaudi_load_firmware_to_device,
6641 .load_boot_fit_to_device = gaudi_load_boot_fit_to_device,
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006642 .get_signal_cb_size = gaudi_get_signal_cb_size,
6643 .get_wait_cb_size = gaudi_get_wait_cb_size,
6644 .gen_signal_cb = gaudi_gen_signal_cb,
6645 .gen_wait_cb = gaudi_gen_wait_cb,
6646 .reset_sob = gaudi_reset_sob,
6647 .set_dma_mask_from_fw = gaudi_set_dma_mask_from_fw,
6648 .get_device_time = gaudi_get_device_time
6649};
6650
6651/**
6652 * gaudi_set_asic_funcs - set GAUDI function pointers
6653 *
Lee Jonesf7d227c2020-07-01 09:58:42 +01006654 * @hdev: pointer to hl_device structure
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006655 *
6656 */
6657void gaudi_set_asic_funcs(struct hl_device *hdev)
6658{
6659 hdev->asic_funcs = &gaudi_funcs;
6660}