blob: 9d9cbcd5a28a6660adb1916f3fd111a00d86595c [file] [log] [blame]
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03001// SPDX-License-Identifier: GPL-2.0
2
3/*
4 * Copyright 2016-2020 HabanaLabs, Ltd.
5 * All Rights Reserved.
6 */
7
8#include "gaudiP.h"
9#include "include/hw_ip/mmu/mmu_general.h"
10#include "include/hw_ip/mmu/mmu_v1_1.h"
11#include "include/gaudi/gaudi_masks.h"
12#include "include/gaudi/gaudi_fw_if.h"
13#include "include/gaudi/gaudi_reg_map.h"
Ofir Bittonebd8d122020-05-10 13:41:28 +030014#include "include/gaudi/gaudi_async_ids_map_extended.h"
Oded Gabbayac0ae6a2020-05-11 10:29:27 +030015
16#include <linux/module.h>
17#include <linux/pci.h>
18#include <linux/firmware.h>
19#include <linux/hwmon.h>
20#include <linux/genalloc.h>
21#include <linux/io-64-nonatomic-lo-hi.h>
22#include <linux/iommu.h>
23#include <linux/seq_file.h>
24
25/*
26 * Gaudi security scheme:
27 *
28 * 1. Host is protected by:
29 * - Range registers
30 * - MMU
31 *
32 * 2. DDR is protected by:
33 * - Range registers (protect the first 512MB)
34 *
35 * 3. Configuration is protected by:
36 * - Range registers
37 * - Protection bits
38 *
39 * MMU is always enabled.
40 *
41 * QMAN DMA channels 0,1,5 (PCI DMAN):
42 * - DMA is not secured.
43 * - PQ and CQ are secured.
44 * - CP is secured: The driver needs to parse CB but WREG should be allowed
45 * because of TDMA (tensor DMA). Hence, WREG is always not
46 * secured.
47 *
48 * When the driver needs to use DMA it will check that Gaudi is idle, set DMA
49 * channel 0 to be secured, execute the DMA and change it back to not secured.
50 * Currently, the driver doesn't use the DMA while there are compute jobs
51 * running.
52 *
53 * The current use cases for the driver to use the DMA are:
54 * - Clear SRAM on context switch (happens on context switch when device is
55 * idle)
56 * - MMU page tables area clear (happens on init)
57 *
58 * QMAN DMA 2-4,6,7, TPC, MME, NIC:
59 * PQ is secured and is located on the Host (HBM CON TPC3 bug)
60 * CQ, CP and the engine are not secured
61 *
62 */
63
64#define GAUDI_BOOT_FIT_FILE "habanalabs/gaudi/gaudi-boot-fit.itb"
65#define GAUDI_LINUX_FW_FILE "habanalabs/gaudi/gaudi-fit.itb"
66#define GAUDI_TPC_FW_FILE "habanalabs/gaudi/gaudi_tpc.bin"
67
68#define GAUDI_DMA_POOL_BLK_SIZE 0x100 /* 256 bytes */
69
70#define GAUDI_RESET_TIMEOUT_MSEC 1000 /* 1000ms */
71#define GAUDI_RESET_WAIT_MSEC 1 /* 1ms */
72#define GAUDI_CPU_RESET_WAIT_MSEC 200 /* 200ms */
73#define GAUDI_TEST_QUEUE_WAIT_USEC 100000 /* 100ms */
74
75#define GAUDI_PLDM_RESET_WAIT_MSEC 1000 /* 1s */
76#define GAUDI_PLDM_HRESET_TIMEOUT_MSEC 20000 /* 20s */
77#define GAUDI_PLDM_SRESET_TIMEOUT_MSEC 14000 /* 14s */
78#define GAUDI_PLDM_TEST_QUEUE_WAIT_USEC 1000000 /* 1s */
79#define GAUDI_PLDM_MMU_TIMEOUT_USEC (MMU_CONFIG_TIMEOUT_USEC * 100)
80#define GAUDI_PLDM_QMAN0_TIMEOUT_USEC (HL_DEVICE_TIMEOUT_USEC * 30)
81#define GAUDI_PLDM_TPC_KERNEL_WAIT_USEC (HL_DEVICE_TIMEOUT_USEC * 30)
82#define GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC 1000000 /* 1s */
83
84#define GAUDI_QMAN0_FENCE_VAL 0x72E91AB9
85
86#define GAUDI_MAX_STRING_LEN 20
87
88#define GAUDI_CB_POOL_CB_CNT 512
89#define GAUDI_CB_POOL_CB_SIZE 0x20000 /* 128KB */
90
91#define GAUDI_ALLOC_CPU_MEM_RETRY_CNT 3
92
93#define GAUDI_NUM_OF_TPC_INTR_CAUSE 20
94
95#define GAUDI_NUM_OF_QM_ERR_CAUSE 16
96
97#define GAUDI_NUM_OF_QM_ARB_ERR_CAUSE 3
98
Oded Gabbay647e8352020-06-07 11:26:48 +030099#define GAUDI_ARB_WDT_TIMEOUT 0x1000000
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300100
101static const char gaudi_irq_name[GAUDI_MSI_ENTRIES][GAUDI_MAX_STRING_LEN] = {
102 "gaudi cq 0_0", "gaudi cq 0_1", "gaudi cq 0_2", "gaudi cq 0_3",
103 "gaudi cq 1_0", "gaudi cq 1_1", "gaudi cq 1_2", "gaudi cq 1_3",
104 "gaudi cq 5_0", "gaudi cq 5_1", "gaudi cq 5_2", "gaudi cq 5_3",
105 "gaudi cpu eq"
106};
107
108static const u8 gaudi_dma_assignment[GAUDI_DMA_MAX] = {
109 [GAUDI_PCI_DMA_1] = 0,
110 [GAUDI_PCI_DMA_2] = 1,
111 [GAUDI_PCI_DMA_3] = 5,
112 [GAUDI_HBM_DMA_1] = 2,
113 [GAUDI_HBM_DMA_2] = 3,
114 [GAUDI_HBM_DMA_3] = 4,
115 [GAUDI_HBM_DMA_4] = 6,
116 [GAUDI_HBM_DMA_5] = 7
117};
118
119static const u8 gaudi_cq_assignment[NUMBER_OF_CMPLT_QUEUES] = {
120 [0] = GAUDI_QUEUE_ID_DMA_0_0,
121 [1] = GAUDI_QUEUE_ID_DMA_0_1,
122 [2] = GAUDI_QUEUE_ID_DMA_0_2,
123 [3] = GAUDI_QUEUE_ID_DMA_0_3,
124 [4] = GAUDI_QUEUE_ID_DMA_1_0,
125 [5] = GAUDI_QUEUE_ID_DMA_1_1,
126 [6] = GAUDI_QUEUE_ID_DMA_1_2,
127 [7] = GAUDI_QUEUE_ID_DMA_1_3,
128 [8] = GAUDI_QUEUE_ID_DMA_5_0,
129 [9] = GAUDI_QUEUE_ID_DMA_5_1,
130 [10] = GAUDI_QUEUE_ID_DMA_5_2,
131 [11] = GAUDI_QUEUE_ID_DMA_5_3
132};
133
134static const u16 gaudi_packet_sizes[MAX_PACKET_ID] = {
135 [PACKET_WREG_32] = sizeof(struct packet_wreg32),
136 [PACKET_WREG_BULK] = sizeof(struct packet_wreg_bulk),
137 [PACKET_MSG_LONG] = sizeof(struct packet_msg_long),
138 [PACKET_MSG_SHORT] = sizeof(struct packet_msg_short),
139 [PACKET_CP_DMA] = sizeof(struct packet_cp_dma),
140 [PACKET_REPEAT] = sizeof(struct packet_repeat),
141 [PACKET_MSG_PROT] = sizeof(struct packet_msg_prot),
142 [PACKET_FENCE] = sizeof(struct packet_fence),
143 [PACKET_LIN_DMA] = sizeof(struct packet_lin_dma),
144 [PACKET_NOP] = sizeof(struct packet_nop),
145 [PACKET_STOP] = sizeof(struct packet_stop),
146 [PACKET_ARB_POINT] = sizeof(struct packet_arb_point),
147 [PACKET_WAIT] = sizeof(struct packet_wait),
148 [PACKET_LOAD_AND_EXE] = sizeof(struct packet_load_and_exe)
149};
150
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300151static const char * const
152gaudi_tpc_interrupts_cause[GAUDI_NUM_OF_TPC_INTR_CAUSE] = {
153 "tpc_address_exceed_slm",
154 "tpc_div_by_0",
155 "tpc_spu_mac_overflow",
156 "tpc_spu_addsub_overflow",
157 "tpc_spu_abs_overflow",
158 "tpc_spu_fp_dst_nan_inf",
159 "tpc_spu_fp_dst_denorm",
160 "tpc_vpu_mac_overflow",
161 "tpc_vpu_addsub_overflow",
162 "tpc_vpu_abs_overflow",
163 "tpc_vpu_fp_dst_nan_inf",
164 "tpc_vpu_fp_dst_denorm",
165 "tpc_assertions",
166 "tpc_illegal_instruction",
167 "tpc_pc_wrap_around",
168 "tpc_qm_sw_err",
169 "tpc_hbw_rresp_err",
170 "tpc_hbw_bresp_err",
171 "tpc_lbw_rresp_err",
172 "tpc_lbw_bresp_err"
173};
174
175static const char * const
176gaudi_qman_error_cause[GAUDI_NUM_OF_QM_ERR_CAUSE] = {
177 "PQ AXI HBW error",
178 "CQ AXI HBW error",
179 "CP AXI HBW error",
180 "CP error due to undefined OPCODE",
181 "CP encountered STOP OPCODE",
182 "CP AXI LBW error",
183 "CP WRREG32 or WRBULK returned error",
184 "N/A",
185 "FENCE 0 inc over max value and clipped",
186 "FENCE 1 inc over max value and clipped",
187 "FENCE 2 inc over max value and clipped",
188 "FENCE 3 inc over max value and clipped",
189 "FENCE 0 dec under min value and clipped",
190 "FENCE 1 dec under min value and clipped",
191 "FENCE 2 dec under min value and clipped",
192 "FENCE 3 dec under min value and clipped"
193};
194
195static const char * const
196gaudi_qman_arb_error_cause[GAUDI_NUM_OF_QM_ARB_ERR_CAUSE] = {
197 "Choice push while full error",
198 "Choice Q watchdog error",
199 "MSG AXI LBW returned with error"
200};
201
202static enum hl_queue_type gaudi_queue_type[GAUDI_QUEUE_ID_SIZE] = {
203 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_0 */
204 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_1 */
205 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_2 */
206 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_3 */
207 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_0 */
208 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_1 */
209 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_2 */
210 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_3 */
211 QUEUE_TYPE_CPU, /* GAUDI_QUEUE_ID_CPU_PQ */
212 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_0 */
213 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_1 */
214 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_2 */
215 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_3 */
216 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_0 */
217 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_1 */
218 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_2 */
219 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_3 */
220 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_0 */
221 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_1 */
222 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_2 */
223 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_3 */
224 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_5_0 */
225 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_5_1 */
226 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_5_2 */
227 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_5_3 */
228 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_0 */
229 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_1 */
230 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_2 */
231 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_3 */
232 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_0 */
233 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_1 */
234 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_2 */
235 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_3 */
236 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_0 */
237 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_1 */
238 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_2 */
239 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_3 */
240 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_0 */
241 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_1 */
242 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_2 */
243 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_3 */
244 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_0 */
245 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_1 */
246 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_2 */
247 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_3 */
248 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_0 */
249 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_1 */
250 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_2 */
251 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_3 */
252 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_0 */
253 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_1 */
254 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_2 */
255 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_3 */
256 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_0 */
257 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_1 */
258 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_2 */
259 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_3 */
260 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_0 */
261 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_1 */
262 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_2 */
263 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_3 */
264 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_0 */
265 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_1 */
266 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_2 */
267 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_3 */
268 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_0 */
269 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_1 */
270 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_2 */
271 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_3 */
272 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_0 */
273 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_1 */
274 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_2 */
275 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_3 */
276 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_0_0 */
277 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_0_1 */
278 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_0_2 */
279 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_0_3 */
280 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_1_0 */
281 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_1_1 */
282 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_1_2 */
283 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_1_3 */
284 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_2_0 */
285 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_2_1 */
286 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_2_2 */
287 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_2_3 */
288 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_3_0 */
289 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_3_1 */
290 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_3_2 */
291 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_3_3 */
292 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_4_0 */
293 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_4_1 */
294 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_4_2 */
295 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_4_3 */
296 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_5_0 */
297 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_5_1 */
298 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_5_2 */
299 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_5_3 */
300 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_6_0 */
301 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_6_1 */
302 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_6_2 */
303 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_6_3 */
304 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_7_0 */
305 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_7_1 */
306 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_7_2 */
307 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_7_3 */
308 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_8_0 */
309 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_8_1 */
310 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_8_2 */
311 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_8_3 */
312 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_9_0 */
313 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_9_1 */
314 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_9_2 */
315 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_9_3 */
316};
317
318static int gaudi_mmu_update_asid_hop0_addr(struct hl_device *hdev, u32 asid,
319 u64 phys_addr);
320static int gaudi_send_job_on_qman0(struct hl_device *hdev,
321 struct hl_cs_job *job);
322static int gaudi_memset_device_memory(struct hl_device *hdev, u64 addr,
323 u32 size, u64 val);
324static int gaudi_run_tpc_kernel(struct hl_device *hdev, u64 tpc_kernel,
325 u32 tpc_id);
326static int gaudi_mmu_clear_pgt_range(struct hl_device *hdev);
327static int gaudi_armcp_info_get(struct hl_device *hdev);
328static void gaudi_disable_clock_gating(struct hl_device *hdev);
329static void gaudi_mmu_prepare(struct hl_device *hdev, u32 asid);
330
331static int gaudi_get_fixed_properties(struct hl_device *hdev)
332{
333 struct asic_fixed_properties *prop = &hdev->asic_prop;
334 int i;
335
336 if (GAUDI_QUEUE_ID_SIZE >= HL_MAX_QUEUES) {
337 dev_err(hdev->dev,
338 "Number of H/W queues must be smaller than %d\n",
339 HL_MAX_QUEUES);
340 return -EFAULT;
341 }
342
343 for (i = 0 ; i < GAUDI_QUEUE_ID_SIZE ; i++) {
344 if (gaudi_queue_type[i] == QUEUE_TYPE_EXT) {
345 prop->hw_queues_props[i].type = QUEUE_TYPE_EXT;
346 prop->hw_queues_props[i].driver_only = 0;
347 prop->hw_queues_props[i].requires_kernel_cb = 1;
348 } else if (gaudi_queue_type[i] == QUEUE_TYPE_CPU) {
349 prop->hw_queues_props[i].type = QUEUE_TYPE_CPU;
350 prop->hw_queues_props[i].driver_only = 1;
351 prop->hw_queues_props[i].requires_kernel_cb = 0;
352 } else if (gaudi_queue_type[i] == QUEUE_TYPE_INT) {
353 prop->hw_queues_props[i].type = QUEUE_TYPE_INT;
354 prop->hw_queues_props[i].driver_only = 0;
355 prop->hw_queues_props[i].requires_kernel_cb = 0;
356 } else if (gaudi_queue_type[i] == QUEUE_TYPE_NA) {
357 prop->hw_queues_props[i].type = QUEUE_TYPE_NA;
358 prop->hw_queues_props[i].driver_only = 0;
359 prop->hw_queues_props[i].requires_kernel_cb = 0;
360 }
361 }
362
363 for (; i < HL_MAX_QUEUES; i++)
364 prop->hw_queues_props[i].type = QUEUE_TYPE_NA;
365
366 prop->completion_queues_count = NUMBER_OF_CMPLT_QUEUES;
367
368 prop->dram_base_address = DRAM_PHYS_BASE;
369 prop->dram_size = GAUDI_HBM_SIZE_32GB;
370 prop->dram_end_address = prop->dram_base_address +
371 prop->dram_size;
372 prop->dram_user_base_address = DRAM_BASE_ADDR_USER;
373
374 prop->sram_base_address = SRAM_BASE_ADDR;
375 prop->sram_size = SRAM_SIZE;
376 prop->sram_end_address = prop->sram_base_address +
377 prop->sram_size;
378 prop->sram_user_base_address = prop->sram_base_address +
379 SRAM_USER_BASE_OFFSET;
380
381 prop->mmu_pgt_addr = MMU_PAGE_TABLES_ADDR;
382 if (hdev->pldm)
383 prop->mmu_pgt_size = 0x800000; /* 8MB */
384 else
385 prop->mmu_pgt_size = MMU_PAGE_TABLES_SIZE;
386 prop->mmu_pte_size = HL_PTE_SIZE;
387 prop->mmu_hop_table_size = HOP_TABLE_SIZE;
388 prop->mmu_hop0_tables_total_size = HOP0_TABLES_TOTAL_SIZE;
389 prop->dram_page_size = PAGE_SIZE_2MB;
390
391 prop->pmmu.hop0_shift = HOP0_SHIFT;
392 prop->pmmu.hop1_shift = HOP1_SHIFT;
393 prop->pmmu.hop2_shift = HOP2_SHIFT;
394 prop->pmmu.hop3_shift = HOP3_SHIFT;
395 prop->pmmu.hop4_shift = HOP4_SHIFT;
396 prop->pmmu.hop0_mask = HOP0_MASK;
397 prop->pmmu.hop1_mask = HOP1_MASK;
398 prop->pmmu.hop2_mask = HOP2_MASK;
399 prop->pmmu.hop3_mask = HOP3_MASK;
400 prop->pmmu.hop4_mask = HOP4_MASK;
401 prop->pmmu.start_addr = VA_HOST_SPACE_START;
402 prop->pmmu.end_addr =
403 (VA_HOST_SPACE_START + VA_HOST_SPACE_SIZE / 2) - 1;
404 prop->pmmu.page_size = PAGE_SIZE_4KB;
405
406 /* PMMU and HPMMU are the same except of page size */
407 memcpy(&prop->pmmu_huge, &prop->pmmu, sizeof(prop->pmmu));
408 prop->pmmu_huge.page_size = PAGE_SIZE_2MB;
409
410 /* shifts and masks are the same in PMMU and DMMU */
411 memcpy(&prop->dmmu, &prop->pmmu, sizeof(prop->pmmu));
412 prop->dmmu.start_addr = (VA_HOST_SPACE_START + VA_HOST_SPACE_SIZE / 2);
413 prop->dmmu.end_addr = VA_HOST_SPACE_END;
414 prop->dmmu.page_size = PAGE_SIZE_2MB;
415
416 prop->cfg_size = CFG_SIZE;
417 prop->max_asid = MAX_ASID;
418 prop->num_of_events = GAUDI_EVENT_SIZE;
419 prop->tpc_enabled_mask = TPC_ENABLED_MASK;
420
421 prop->max_power_default = MAX_POWER_DEFAULT;
422
423 prop->cb_pool_cb_cnt = GAUDI_CB_POOL_CB_CNT;
424 prop->cb_pool_cb_size = GAUDI_CB_POOL_CB_SIZE;
425
426 prop->pcie_dbi_base_address = mmPCIE_DBI_BASE;
427 prop->pcie_aux_dbi_reg_addr = CFG_BASE + mmPCIE_AUX_DBI;
428
429 strncpy(prop->armcp_info.card_name, GAUDI_DEFAULT_CARD_NAME,
430 CARD_NAME_MAX_LEN);
431
Ofir Bittonc16d45f2020-06-02 12:28:27 +0300432 prop->max_pending_cs = GAUDI_MAX_PENDING_CS;
433
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300434 return 0;
435}
436
437static int gaudi_pci_bars_map(struct hl_device *hdev)
438{
439 static const char * const name[] = {"SRAM", "CFG", "HBM"};
440 bool is_wc[3] = {false, false, true};
441 int rc;
442
443 rc = hl_pci_bars_map(hdev, name, is_wc);
444 if (rc)
445 return rc;
446
447 hdev->rmmio = hdev->pcie_bar[CFG_BAR_ID] +
448 (CFG_BASE - SPI_FLASH_BASE_ADDR);
449
450 return 0;
451}
452
453static u64 gaudi_set_hbm_bar_base(struct hl_device *hdev, u64 addr)
454{
455 struct gaudi_device *gaudi = hdev->asic_specific;
456 u64 old_addr = addr;
457 int rc;
458
459 if ((gaudi) && (gaudi->hbm_bar_cur_addr == addr))
460 return old_addr;
461
462 /* Inbound Region 2 - Bar 4 - Point to HBM */
463 rc = hl_pci_set_dram_bar_base(hdev, 2, 4, addr);
464 if (rc)
465 return U64_MAX;
466
467 if (gaudi) {
468 old_addr = gaudi->hbm_bar_cur_addr;
469 gaudi->hbm_bar_cur_addr = addr;
470 }
471
472 return old_addr;
473}
474
475static int gaudi_init_iatu(struct hl_device *hdev)
476{
477 int rc = 0;
478
479 /* Inbound Region 1 - Bar 2 - Point to SPI FLASH */
480 rc = hl_pci_iatu_write(hdev, 0x314,
481 lower_32_bits(SPI_FLASH_BASE_ADDR));
482 rc |= hl_pci_iatu_write(hdev, 0x318,
483 upper_32_bits(SPI_FLASH_BASE_ADDR));
484 rc |= hl_pci_iatu_write(hdev, 0x300, 0);
485 /* Enable + Bar match + match enable */
486 rc |= hl_pci_iatu_write(hdev, 0x304, 0xC0080200);
487
488 if (rc)
489 return -EIO;
490
491 return hl_pci_init_iatu(hdev, SRAM_BASE_ADDR, DRAM_PHYS_BASE,
492 HOST_PHYS_BASE, HOST_PHYS_SIZE);
493}
494
495static int gaudi_early_init(struct hl_device *hdev)
496{
497 struct asic_fixed_properties *prop = &hdev->asic_prop;
498 struct pci_dev *pdev = hdev->pdev;
499 int rc;
500
501 rc = gaudi_get_fixed_properties(hdev);
502 if (rc) {
503 dev_err(hdev->dev, "Failed to get fixed properties\n");
504 return rc;
505 }
506
507 /* Check BAR sizes */
508 if (pci_resource_len(pdev, SRAM_BAR_ID) != SRAM_BAR_SIZE) {
509 dev_err(hdev->dev,
510 "Not " HL_NAME "? BAR %d size %llu, expecting %llu\n",
511 SRAM_BAR_ID,
512 (unsigned long long) pci_resource_len(pdev,
513 SRAM_BAR_ID),
514 SRAM_BAR_SIZE);
515 return -ENODEV;
516 }
517
518 if (pci_resource_len(pdev, CFG_BAR_ID) != CFG_BAR_SIZE) {
519 dev_err(hdev->dev,
520 "Not " HL_NAME "? BAR %d size %llu, expecting %llu\n",
521 CFG_BAR_ID,
522 (unsigned long long) pci_resource_len(pdev,
523 CFG_BAR_ID),
524 CFG_BAR_SIZE);
525 return -ENODEV;
526 }
527
528 prop->dram_pci_bar_size = pci_resource_len(pdev, HBM_BAR_ID);
529
530 rc = hl_pci_init(hdev);
531 if (rc)
532 return rc;
533
534 return 0;
535}
536
537static int gaudi_early_fini(struct hl_device *hdev)
538{
539 hl_pci_fini(hdev);
540
541 return 0;
542}
543
544/**
545 * gaudi_fetch_psoc_frequency - Fetch PSOC frequency values
546 *
547 * @hdev: pointer to hl_device structure
548 *
549 */
550static void gaudi_fetch_psoc_frequency(struct hl_device *hdev)
551{
552 struct asic_fixed_properties *prop = &hdev->asic_prop;
553
554 prop->psoc_pci_pll_nr = RREG32(mmPSOC_PCI_PLL_NR);
555 prop->psoc_pci_pll_nf = RREG32(mmPSOC_PCI_PLL_NF);
556 prop->psoc_pci_pll_od = RREG32(mmPSOC_PCI_PLL_OD);
557 prop->psoc_pci_pll_div_factor = RREG32(mmPSOC_PCI_PLL_DIV_FACTOR_1);
558}
559
560static int _gaudi_init_tpc_mem(struct hl_device *hdev,
561 dma_addr_t tpc_kernel_src_addr, u32 tpc_kernel_size)
562{
563 struct asic_fixed_properties *prop = &hdev->asic_prop;
564 struct packet_lin_dma *init_tpc_mem_pkt;
565 struct hl_cs_job *job;
566 struct hl_cb *cb;
567 u64 dst_addr;
568 u32 cb_size, ctl;
569 u8 tpc_id;
570 int rc;
571
572 cb = hl_cb_kernel_create(hdev, PAGE_SIZE);
573 if (!cb)
574 return -EFAULT;
575
576 init_tpc_mem_pkt = (struct packet_lin_dma *) (uintptr_t)
577 cb->kernel_address;
578 cb_size = sizeof(*init_tpc_mem_pkt);
579 memset(init_tpc_mem_pkt, 0, cb_size);
580
581 init_tpc_mem_pkt->tsize = cpu_to_le32(tpc_kernel_size);
582
583 ctl = ((PACKET_LIN_DMA << GAUDI_PKT_CTL_OPCODE_SHIFT) |
584 (1 << GAUDI_PKT_LIN_DMA_CTL_LIN_SHIFT) |
585 (1 << GAUDI_PKT_CTL_RB_SHIFT) |
586 (1 << GAUDI_PKT_CTL_MB_SHIFT));
587
588 init_tpc_mem_pkt->ctl = cpu_to_le32(ctl);
589
590 init_tpc_mem_pkt->src_addr = cpu_to_le64(tpc_kernel_src_addr);
591 dst_addr = (prop->sram_user_base_address &
592 GAUDI_PKT_LIN_DMA_DST_ADDR_MASK) >>
593 GAUDI_PKT_LIN_DMA_DST_ADDR_SHIFT;
594 init_tpc_mem_pkt->dst_addr |= cpu_to_le64(dst_addr);
595
596 job = hl_cs_allocate_job(hdev, QUEUE_TYPE_EXT, true);
597 if (!job) {
598 dev_err(hdev->dev, "Failed to allocate a new job\n");
599 rc = -ENOMEM;
600 goto release_cb;
601 }
602
603 job->id = 0;
604 job->user_cb = cb;
605 job->user_cb->cs_cnt++;
606 job->user_cb_size = cb_size;
607 job->hw_queue_id = GAUDI_QUEUE_ID_DMA_0_0;
608 job->patched_cb = job->user_cb;
609 job->job_cb_size = job->user_cb_size + sizeof(struct packet_msg_prot);
610
611 hl_debugfs_add_job(hdev, job);
612
613 rc = gaudi_send_job_on_qman0(hdev, job);
614
615 if (rc)
616 goto free_job;
617
618 for (tpc_id = 0 ; tpc_id < TPC_NUMBER_OF_ENGINES ; tpc_id++) {
619 rc = gaudi_run_tpc_kernel(hdev, dst_addr, tpc_id);
620 if (rc)
621 break;
622 }
623
624free_job:
625 hl_userptr_delete_list(hdev, &job->userptr_list);
626 hl_debugfs_remove_job(hdev, job);
627 kfree(job);
628 cb->cs_cnt--;
629
630release_cb:
631 hl_cb_put(cb);
632 hl_cb_destroy(hdev, &hdev->kernel_cb_mgr, cb->id << PAGE_SHIFT);
633
634 return rc;
635}
636
637/*
638 * gaudi_init_tpc_mem() - Initialize TPC memories.
639 * @hdev: Pointer to hl_device structure.
640 *
641 * Copy TPC kernel fw from firmware file and run it to initialize TPC memories.
642 *
643 * Return: 0 for success, negative value for error.
644 */
645static int gaudi_init_tpc_mem(struct hl_device *hdev)
646{
647 const struct firmware *fw;
648 size_t fw_size;
649 void *cpu_addr;
650 dma_addr_t dma_handle;
651 int rc;
652
653 rc = request_firmware(&fw, GAUDI_TPC_FW_FILE, hdev->dev);
654 if (rc) {
655 dev_err(hdev->dev, "Firmware file %s is not found!\n",
656 GAUDI_TPC_FW_FILE);
657 goto out;
658 }
659
660 fw_size = fw->size;
661 cpu_addr = hdev->asic_funcs->asic_dma_alloc_coherent(hdev, fw_size,
662 &dma_handle, GFP_KERNEL | __GFP_ZERO);
663 if (!cpu_addr) {
664 dev_err(hdev->dev,
665 "Failed to allocate %zu of dma memory for TPC kernel\n",
666 fw_size);
667 rc = -ENOMEM;
668 goto out;
669 }
670
671 memcpy(cpu_addr, fw->data, fw_size);
672
673 rc = _gaudi_init_tpc_mem(hdev, dma_handle, fw_size);
674
675 hdev->asic_funcs->asic_dma_free_coherent(hdev, fw->size, cpu_addr,
676 dma_handle);
677
678out:
679 release_firmware(fw);
680 return rc;
681}
682
683static int gaudi_late_init(struct hl_device *hdev)
684{
685 struct gaudi_device *gaudi = hdev->asic_specific;
686 int rc;
687
688 rc = gaudi->armcp_info_get(hdev);
689 if (rc) {
690 dev_err(hdev->dev, "Failed to get armcp info\n");
691 return rc;
692 }
693
694 rc = hl_fw_send_pci_access_msg(hdev, ARMCP_PACKET_ENABLE_PCI_ACCESS);
695 if (rc) {
696 dev_err(hdev->dev, "Failed to enable PCI access from CPU\n");
697 return rc;
698 }
699
700 WREG32(mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR, GAUDI_EVENT_INTS_REGISTER);
701
702 gaudi_fetch_psoc_frequency(hdev);
703
704 rc = gaudi_mmu_clear_pgt_range(hdev);
705 if (rc) {
706 dev_err(hdev->dev, "Failed to clear MMU page tables range\n");
707 goto disable_pci_access;
708 }
709
710 rc = gaudi_init_tpc_mem(hdev);
711 if (rc) {
712 dev_err(hdev->dev, "Failed to initialize TPC memories\n");
713 goto disable_pci_access;
714 }
715
716 return 0;
717
718disable_pci_access:
719 hl_fw_send_pci_access_msg(hdev, ARMCP_PACKET_DISABLE_PCI_ACCESS);
720
721 return rc;
722}
723
724static void gaudi_late_fini(struct hl_device *hdev)
725{
726 const struct hwmon_channel_info **channel_info_arr;
727 int i = 0;
728
729 if (!hdev->hl_chip_info->info)
730 return;
731
732 channel_info_arr = hdev->hl_chip_info->info;
733
734 while (channel_info_arr[i]) {
735 kfree(channel_info_arr[i]->config);
736 kfree(channel_info_arr[i]);
737 i++;
738 }
739
740 kfree(channel_info_arr);
741
742 hdev->hl_chip_info->info = NULL;
743}
744
745static int gaudi_alloc_cpu_accessible_dma_mem(struct hl_device *hdev)
746{
747 dma_addr_t dma_addr_arr[GAUDI_ALLOC_CPU_MEM_RETRY_CNT] = {}, end_addr;
748 void *virt_addr_arr[GAUDI_ALLOC_CPU_MEM_RETRY_CNT] = {};
749 int i, j, rc = 0;
750
751 /*
752 * The device CPU works with 40-bits addresses, while bit 39 must be set
753 * to '1' when accessing the host.
754 * Bits 49:39 of the full host address are saved for a later
755 * configuration of the HW to perform extension to 50 bits.
756 * Because there is a single HW register that holds the extension bits,
757 * these bits must be identical in all allocated range.
758 */
759
760 for (i = 0 ; i < GAUDI_ALLOC_CPU_MEM_RETRY_CNT ; i++) {
761 virt_addr_arr[i] =
762 hdev->asic_funcs->asic_dma_alloc_coherent(hdev,
763 HL_CPU_ACCESSIBLE_MEM_SIZE,
764 &dma_addr_arr[i],
765 GFP_KERNEL | __GFP_ZERO);
766 if (!virt_addr_arr[i]) {
767 rc = -ENOMEM;
768 goto free_dma_mem_arr;
769 }
770
771 end_addr = dma_addr_arr[i] + HL_CPU_ACCESSIBLE_MEM_SIZE - 1;
772 if (GAUDI_CPU_PCI_MSB_ADDR(dma_addr_arr[i]) ==
773 GAUDI_CPU_PCI_MSB_ADDR(end_addr))
774 break;
775 }
776
777 if (i == GAUDI_ALLOC_CPU_MEM_RETRY_CNT) {
778 dev_err(hdev->dev,
779 "MSB of CPU accessible DMA memory are not identical in all range\n");
780 rc = -EFAULT;
781 goto free_dma_mem_arr;
782 }
783
784 hdev->cpu_accessible_dma_mem = virt_addr_arr[i];
785 hdev->cpu_accessible_dma_address = dma_addr_arr[i];
786 hdev->cpu_pci_msb_addr =
787 GAUDI_CPU_PCI_MSB_ADDR(hdev->cpu_accessible_dma_address);
788
789 GAUDI_PCI_TO_CPU_ADDR(hdev->cpu_accessible_dma_address);
790
791free_dma_mem_arr:
792 for (j = 0 ; j < i ; j++)
793 hdev->asic_funcs->asic_dma_free_coherent(hdev,
794 HL_CPU_ACCESSIBLE_MEM_SIZE,
795 virt_addr_arr[j],
796 dma_addr_arr[j]);
797
798 return rc;
799}
800
801static void gaudi_free_internal_qmans_pq_mem(struct hl_device *hdev)
802{
803 struct gaudi_device *gaudi = hdev->asic_specific;
804 struct gaudi_internal_qman_info *q;
805 u32 i;
806
807 for (i = 0 ; i < GAUDI_QUEUE_ID_SIZE ; i++) {
808 q = &gaudi->internal_qmans[i];
809 if (!q->pq_kernel_addr)
810 continue;
811 hdev->asic_funcs->asic_dma_free_coherent(hdev, q->pq_size,
812 q->pq_kernel_addr,
813 q->pq_dma_addr);
814 }
815}
816
817static int gaudi_alloc_internal_qmans_pq_mem(struct hl_device *hdev)
818{
819 struct gaudi_device *gaudi = hdev->asic_specific;
820 struct gaudi_internal_qman_info *q;
821 int rc, i;
822
823 for (i = 0 ; i < GAUDI_QUEUE_ID_SIZE ; i++) {
824 if (gaudi_queue_type[i] != QUEUE_TYPE_INT)
825 continue;
826
827 q = &gaudi->internal_qmans[i];
828
829 switch (i) {
830 case GAUDI_QUEUE_ID_DMA_2_0 ... GAUDI_QUEUE_ID_DMA_4_3:
831 case GAUDI_QUEUE_ID_DMA_6_0 ... GAUDI_QUEUE_ID_DMA_7_3:
832 q->pq_size = HBM_DMA_QMAN_SIZE_IN_BYTES;
833 break;
834 case GAUDI_QUEUE_ID_MME_0_0 ... GAUDI_QUEUE_ID_MME_1_3:
835 q->pq_size = MME_QMAN_SIZE_IN_BYTES;
836 break;
837 case GAUDI_QUEUE_ID_TPC_0_0 ... GAUDI_QUEUE_ID_TPC_7_3:
838 q->pq_size = TPC_QMAN_SIZE_IN_BYTES;
839 break;
840 default:
841 dev_err(hdev->dev, "Bad internal queue index %d", i);
842 rc = -EINVAL;
843 goto free_internal_qmans_pq_mem;
844 }
845
846 q->pq_kernel_addr = hdev->asic_funcs->asic_dma_alloc_coherent(
847 hdev, q->pq_size,
848 &q->pq_dma_addr,
849 GFP_KERNEL | __GFP_ZERO);
850 if (!q->pq_kernel_addr) {
851 rc = -ENOMEM;
852 goto free_internal_qmans_pq_mem;
853 }
854 }
855
856 return 0;
857
858free_internal_qmans_pq_mem:
859 gaudi_free_internal_qmans_pq_mem(hdev);
860 return rc;
861}
862
863static int gaudi_sw_init(struct hl_device *hdev)
864{
865 struct gaudi_device *gaudi;
Ofir Bittonebd8d122020-05-10 13:41:28 +0300866 u32 i, event_id = 0;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300867 int rc;
868
869 /* Allocate device structure */
870 gaudi = kzalloc(sizeof(*gaudi), GFP_KERNEL);
871 if (!gaudi)
872 return -ENOMEM;
873
Ofir Bittonebd8d122020-05-10 13:41:28 +0300874 for (i = 0 ; i < ARRAY_SIZE(gaudi_irq_map_table) ; i++) {
875 if (gaudi_irq_map_table[i].valid) {
876 if (event_id == GAUDI_EVENT_SIZE) {
877 dev_err(hdev->dev,
878 "Event array exceeds the limit of %u events\n",
879 GAUDI_EVENT_SIZE);
880 rc = -EINVAL;
881 goto free_gaudi_device;
882 }
883
884 gaudi->events[event_id++] =
885 gaudi_irq_map_table[i].fc_id;
886 }
887 }
888
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300889 gaudi->armcp_info_get = gaudi_armcp_info_get;
890
891 gaudi->max_freq_value = GAUDI_MAX_CLK_FREQ;
892
893 hdev->asic_specific = gaudi;
894
895 /* Create DMA pool for small allocations */
896 hdev->dma_pool = dma_pool_create(dev_name(hdev->dev),
897 &hdev->pdev->dev, GAUDI_DMA_POOL_BLK_SIZE, 8, 0);
898 if (!hdev->dma_pool) {
899 dev_err(hdev->dev, "failed to create DMA pool\n");
900 rc = -ENOMEM;
901 goto free_gaudi_device;
902 }
903
904 rc = gaudi_alloc_cpu_accessible_dma_mem(hdev);
905 if (rc)
906 goto free_dma_pool;
907
908 hdev->cpu_accessible_dma_pool = gen_pool_create(ilog2(32), -1);
909 if (!hdev->cpu_accessible_dma_pool) {
910 dev_err(hdev->dev,
911 "Failed to create CPU accessible DMA pool\n");
912 rc = -ENOMEM;
913 goto free_cpu_dma_mem;
914 }
915
916 rc = gen_pool_add(hdev->cpu_accessible_dma_pool,
917 (uintptr_t) hdev->cpu_accessible_dma_mem,
918 HL_CPU_ACCESSIBLE_MEM_SIZE, -1);
919 if (rc) {
920 dev_err(hdev->dev,
921 "Failed to add memory to CPU accessible DMA pool\n");
922 rc = -EFAULT;
923 goto free_cpu_accessible_dma_pool;
924 }
925
926 rc = gaudi_alloc_internal_qmans_pq_mem(hdev);
927 if (rc)
928 goto free_cpu_accessible_dma_pool;
929
930 spin_lock_init(&gaudi->hw_queues_lock);
931 mutex_init(&gaudi->clk_gate_mutex);
932
933 hdev->supports_sync_stream = true;
934 hdev->supports_coresight = true;
935
936 return 0;
937
938free_cpu_accessible_dma_pool:
939 gen_pool_destroy(hdev->cpu_accessible_dma_pool);
940free_cpu_dma_mem:
941 GAUDI_CPU_TO_PCI_ADDR(hdev->cpu_accessible_dma_address,
942 hdev->cpu_pci_msb_addr);
943 hdev->asic_funcs->asic_dma_free_coherent(hdev,
944 HL_CPU_ACCESSIBLE_MEM_SIZE,
945 hdev->cpu_accessible_dma_mem,
946 hdev->cpu_accessible_dma_address);
947free_dma_pool:
948 dma_pool_destroy(hdev->dma_pool);
949free_gaudi_device:
950 kfree(gaudi);
951 return rc;
952}
953
954static int gaudi_sw_fini(struct hl_device *hdev)
955{
956 struct gaudi_device *gaudi = hdev->asic_specific;
957
958 gaudi_free_internal_qmans_pq_mem(hdev);
959
960 gen_pool_destroy(hdev->cpu_accessible_dma_pool);
961
962 GAUDI_CPU_TO_PCI_ADDR(hdev->cpu_accessible_dma_address,
963 hdev->cpu_pci_msb_addr);
964 hdev->asic_funcs->asic_dma_free_coherent(hdev,
965 HL_CPU_ACCESSIBLE_MEM_SIZE,
966 hdev->cpu_accessible_dma_mem,
967 hdev->cpu_accessible_dma_address);
968
969 dma_pool_destroy(hdev->dma_pool);
970
971 mutex_destroy(&gaudi->clk_gate_mutex);
972
973 kfree(gaudi);
974
975 return 0;
976}
977
978static irqreturn_t gaudi_irq_handler_single(int irq, void *arg)
979{
980 struct hl_device *hdev = arg;
981 int i;
982
983 if (hdev->disabled)
984 return IRQ_HANDLED;
985
986 for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++)
987 hl_irq_handler_cq(irq, &hdev->completion_queue[i]);
988
989 hl_irq_handler_eq(irq, &hdev->event_queue);
990
991 return IRQ_HANDLED;
992}
993
994/*
995 * For backward compatibility, new MSI interrupts should be set after the
996 * existing CPU and NIC interrupts.
997 */
998static int gaudi_pci_irq_vector(struct hl_device *hdev, unsigned int nr,
999 bool cpu_eq)
1000{
1001 int msi_vec;
1002
1003 if ((nr != GAUDI_EVENT_QUEUE_MSI_IDX) && (cpu_eq))
1004 dev_crit(hdev->dev, "CPU EQ must use IRQ %d\n",
1005 GAUDI_EVENT_QUEUE_MSI_IDX);
1006
1007 msi_vec = ((nr < GAUDI_EVENT_QUEUE_MSI_IDX) || (cpu_eq)) ? nr :
1008 (nr + NIC_NUMBER_OF_ENGINES + 1);
1009
1010 return pci_irq_vector(hdev->pdev, msi_vec);
1011}
1012
1013static int gaudi_enable_msi_single(struct hl_device *hdev)
1014{
1015 int rc, irq;
1016
1017 dev_info(hdev->dev, "Working in single MSI IRQ mode\n");
1018
1019 irq = gaudi_pci_irq_vector(hdev, 0, false);
1020 rc = request_irq(irq, gaudi_irq_handler_single, 0,
1021 "gaudi single msi", hdev);
1022 if (rc)
1023 dev_err(hdev->dev,
1024 "Failed to request single MSI IRQ\n");
1025
1026 return rc;
1027}
1028
1029static int gaudi_enable_msi_multi(struct hl_device *hdev)
1030{
1031 int cq_cnt = hdev->asic_prop.completion_queues_count;
1032 int rc, i, irq_cnt_init, irq;
1033
1034 for (i = 0, irq_cnt_init = 0 ; i < cq_cnt ; i++, irq_cnt_init++) {
1035 irq = gaudi_pci_irq_vector(hdev, i, false);
1036 rc = request_irq(irq, hl_irq_handler_cq, 0, gaudi_irq_name[i],
1037 &hdev->completion_queue[i]);
1038 if (rc) {
1039 dev_err(hdev->dev, "Failed to request IRQ %d", irq);
1040 goto free_irqs;
1041 }
1042 }
1043
1044 irq = gaudi_pci_irq_vector(hdev, GAUDI_EVENT_QUEUE_MSI_IDX, true);
1045 rc = request_irq(irq, hl_irq_handler_eq, 0, gaudi_irq_name[cq_cnt],
1046 &hdev->event_queue);
1047 if (rc) {
1048 dev_err(hdev->dev, "Failed to request IRQ %d", irq);
1049 goto free_irqs;
1050 }
1051
1052 return 0;
1053
1054free_irqs:
1055 for (i = 0 ; i < irq_cnt_init ; i++)
1056 free_irq(gaudi_pci_irq_vector(hdev, i, false),
1057 &hdev->completion_queue[i]);
1058 return rc;
1059}
1060
1061static int gaudi_enable_msi(struct hl_device *hdev)
1062{
1063 struct gaudi_device *gaudi = hdev->asic_specific;
1064 int rc;
1065
1066 if (gaudi->hw_cap_initialized & HW_CAP_MSI)
1067 return 0;
1068
1069 rc = pci_alloc_irq_vectors(hdev->pdev, 1, GAUDI_MSI_ENTRIES,
1070 PCI_IRQ_MSI);
1071 if (rc < 0) {
1072 dev_err(hdev->dev, "MSI: Failed to enable support %d\n", rc);
1073 return rc;
1074 }
1075
1076 if (rc < NUMBER_OF_INTERRUPTS) {
1077 gaudi->multi_msi_mode = false;
1078 rc = gaudi_enable_msi_single(hdev);
1079 } else {
1080 gaudi->multi_msi_mode = true;
1081 rc = gaudi_enable_msi_multi(hdev);
1082 }
1083
1084 if (rc)
1085 goto free_pci_irq_vectors;
1086
1087 gaudi->hw_cap_initialized |= HW_CAP_MSI;
1088
1089 return 0;
1090
1091free_pci_irq_vectors:
1092 pci_free_irq_vectors(hdev->pdev);
1093 return rc;
1094}
1095
1096static void gaudi_sync_irqs(struct hl_device *hdev)
1097{
1098 struct gaudi_device *gaudi = hdev->asic_specific;
1099 int i, cq_cnt = hdev->asic_prop.completion_queues_count;
1100
1101 if (!(gaudi->hw_cap_initialized & HW_CAP_MSI))
1102 return;
1103
1104 /* Wait for all pending IRQs to be finished */
1105 if (gaudi->multi_msi_mode) {
1106 for (i = 0 ; i < cq_cnt ; i++)
1107 synchronize_irq(gaudi_pci_irq_vector(hdev, i, false));
1108
1109 synchronize_irq(gaudi_pci_irq_vector(hdev,
1110 GAUDI_EVENT_QUEUE_MSI_IDX,
1111 true));
1112 } else {
1113 synchronize_irq(gaudi_pci_irq_vector(hdev, 0, false));
1114 }
1115}
1116
1117static void gaudi_disable_msi(struct hl_device *hdev)
1118{
1119 struct gaudi_device *gaudi = hdev->asic_specific;
1120 int i, irq, cq_cnt = hdev->asic_prop.completion_queues_count;
1121
1122 if (!(gaudi->hw_cap_initialized & HW_CAP_MSI))
1123 return;
1124
1125 gaudi_sync_irqs(hdev);
1126
1127 if (gaudi->multi_msi_mode) {
1128 irq = gaudi_pci_irq_vector(hdev, GAUDI_EVENT_QUEUE_MSI_IDX,
1129 true);
1130 free_irq(irq, &hdev->event_queue);
1131
1132 for (i = 0 ; i < cq_cnt ; i++) {
1133 irq = gaudi_pci_irq_vector(hdev, i, false);
1134 free_irq(irq, &hdev->completion_queue[i]);
1135 }
1136 } else {
1137 free_irq(gaudi_pci_irq_vector(hdev, 0, false), hdev);
1138 }
1139
1140 pci_free_irq_vectors(hdev->pdev);
1141
1142 gaudi->hw_cap_initialized &= ~HW_CAP_MSI;
1143}
1144
1145static void gaudi_init_scrambler_sram(struct hl_device *hdev)
1146{
1147 struct gaudi_device *gaudi = hdev->asic_specific;
1148
1149 if (gaudi->hw_cap_initialized & HW_CAP_SRAM_SCRAMBLER)
1150 return;
1151
1152 if (!hdev->sram_scrambler_enable)
1153 return;
1154
1155 WREG32(mmNIF_RTR_CTRL_0_SCRAM_SRAM_EN,
1156 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1157 WREG32(mmNIF_RTR_CTRL_1_SCRAM_SRAM_EN,
1158 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1159 WREG32(mmNIF_RTR_CTRL_2_SCRAM_SRAM_EN,
1160 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1161 WREG32(mmNIF_RTR_CTRL_3_SCRAM_SRAM_EN,
1162 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1163 WREG32(mmNIF_RTR_CTRL_4_SCRAM_SRAM_EN,
1164 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1165 WREG32(mmNIF_RTR_CTRL_5_SCRAM_SRAM_EN,
1166 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1167 WREG32(mmNIF_RTR_CTRL_6_SCRAM_SRAM_EN,
1168 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1169 WREG32(mmNIF_RTR_CTRL_7_SCRAM_SRAM_EN,
1170 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1171
1172 WREG32(mmSIF_RTR_CTRL_0_SCRAM_SRAM_EN,
1173 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1174 WREG32(mmSIF_RTR_CTRL_1_SCRAM_SRAM_EN,
1175 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1176 WREG32(mmSIF_RTR_CTRL_2_SCRAM_SRAM_EN,
1177 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1178 WREG32(mmSIF_RTR_CTRL_3_SCRAM_SRAM_EN,
1179 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1180 WREG32(mmSIF_RTR_CTRL_4_SCRAM_SRAM_EN,
1181 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1182 WREG32(mmSIF_RTR_CTRL_5_SCRAM_SRAM_EN,
1183 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1184 WREG32(mmSIF_RTR_CTRL_6_SCRAM_SRAM_EN,
1185 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1186 WREG32(mmSIF_RTR_CTRL_7_SCRAM_SRAM_EN,
1187 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1188
1189 WREG32(mmDMA_IF_E_N_DOWN_CH0_SCRAM_SRAM_EN,
1190 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
1191 WREG32(mmDMA_IF_E_N_DOWN_CH1_SCRAM_SRAM_EN,
1192 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
1193 WREG32(mmDMA_IF_E_S_DOWN_CH0_SCRAM_SRAM_EN,
1194 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
1195 WREG32(mmDMA_IF_E_S_DOWN_CH1_SCRAM_SRAM_EN,
1196 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
1197 WREG32(mmDMA_IF_W_N_DOWN_CH0_SCRAM_SRAM_EN,
1198 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
1199 WREG32(mmDMA_IF_W_N_DOWN_CH1_SCRAM_SRAM_EN,
1200 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
1201 WREG32(mmDMA_IF_W_S_DOWN_CH0_SCRAM_SRAM_EN,
1202 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
1203 WREG32(mmDMA_IF_W_S_DOWN_CH1_SCRAM_SRAM_EN,
1204 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
1205
1206 gaudi->hw_cap_initialized |= HW_CAP_SRAM_SCRAMBLER;
1207}
1208
1209static void gaudi_init_scrambler_hbm(struct hl_device *hdev)
1210{
1211 struct gaudi_device *gaudi = hdev->asic_specific;
1212
1213 if (gaudi->hw_cap_initialized & HW_CAP_HBM_SCRAMBLER)
1214 return;
1215
1216 if (!hdev->dram_scrambler_enable)
1217 return;
1218
1219 WREG32(mmNIF_RTR_CTRL_0_SCRAM_HBM_EN,
1220 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1221 WREG32(mmNIF_RTR_CTRL_1_SCRAM_HBM_EN,
1222 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1223 WREG32(mmNIF_RTR_CTRL_2_SCRAM_HBM_EN,
1224 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1225 WREG32(mmNIF_RTR_CTRL_3_SCRAM_HBM_EN,
1226 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1227 WREG32(mmNIF_RTR_CTRL_4_SCRAM_HBM_EN,
1228 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1229 WREG32(mmNIF_RTR_CTRL_5_SCRAM_HBM_EN,
1230 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1231 WREG32(mmNIF_RTR_CTRL_6_SCRAM_HBM_EN,
1232 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1233 WREG32(mmNIF_RTR_CTRL_7_SCRAM_HBM_EN,
1234 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1235
1236 WREG32(mmSIF_RTR_CTRL_0_SCRAM_HBM_EN,
1237 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1238 WREG32(mmSIF_RTR_CTRL_1_SCRAM_HBM_EN,
1239 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1240 WREG32(mmSIF_RTR_CTRL_2_SCRAM_HBM_EN,
1241 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1242 WREG32(mmSIF_RTR_CTRL_3_SCRAM_HBM_EN,
1243 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1244 WREG32(mmSIF_RTR_CTRL_4_SCRAM_HBM_EN,
1245 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1246 WREG32(mmSIF_RTR_CTRL_5_SCRAM_HBM_EN,
1247 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1248 WREG32(mmSIF_RTR_CTRL_6_SCRAM_HBM_EN,
1249 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1250 WREG32(mmSIF_RTR_CTRL_7_SCRAM_HBM_EN,
1251 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1252
1253 WREG32(mmDMA_IF_E_N_DOWN_CH0_SCRAM_HBM_EN,
1254 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
1255 WREG32(mmDMA_IF_E_N_DOWN_CH1_SCRAM_HBM_EN,
1256 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
1257 WREG32(mmDMA_IF_E_S_DOWN_CH0_SCRAM_HBM_EN,
1258 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
1259 WREG32(mmDMA_IF_E_S_DOWN_CH1_SCRAM_HBM_EN,
1260 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
1261 WREG32(mmDMA_IF_W_N_DOWN_CH0_SCRAM_HBM_EN,
1262 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
1263 WREG32(mmDMA_IF_W_N_DOWN_CH1_SCRAM_HBM_EN,
1264 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
1265 WREG32(mmDMA_IF_W_S_DOWN_CH0_SCRAM_HBM_EN,
1266 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
1267 WREG32(mmDMA_IF_W_S_DOWN_CH1_SCRAM_HBM_EN,
1268 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
1269
1270 gaudi->hw_cap_initialized |= HW_CAP_HBM_SCRAMBLER;
1271}
1272
1273static void gaudi_init_e2e(struct hl_device *hdev)
1274{
1275 WREG32(mmSIF_RTR_CTRL_0_E2E_HBM_WR_SIZE, 247 >> 3);
1276 WREG32(mmSIF_RTR_CTRL_0_E2E_HBM_RD_SIZE, 785 >> 3);
1277 WREG32(mmSIF_RTR_CTRL_0_E2E_PCI_WR_SIZE, 49);
1278 WREG32(mmSIF_RTR_CTRL_0_E2E_PCI_RD_SIZE, 101);
1279
1280 WREG32(mmSIF_RTR_CTRL_1_E2E_HBM_WR_SIZE, 275 >> 3);
1281 WREG32(mmSIF_RTR_CTRL_1_E2E_HBM_RD_SIZE, 614 >> 3);
1282 WREG32(mmSIF_RTR_CTRL_1_E2E_PCI_WR_SIZE, 1);
1283 WREG32(mmSIF_RTR_CTRL_1_E2E_PCI_RD_SIZE, 39);
1284
1285 WREG32(mmSIF_RTR_CTRL_2_E2E_HBM_WR_SIZE, 1);
1286 WREG32(mmSIF_RTR_CTRL_2_E2E_HBM_RD_SIZE, 1);
1287 WREG32(mmSIF_RTR_CTRL_2_E2E_PCI_WR_SIZE, 1);
1288 WREG32(mmSIF_RTR_CTRL_2_E2E_PCI_RD_SIZE, 32);
1289
1290 WREG32(mmSIF_RTR_CTRL_3_E2E_HBM_WR_SIZE, 176 >> 3);
1291 WREG32(mmSIF_RTR_CTRL_3_E2E_HBM_RD_SIZE, 32 >> 3);
1292 WREG32(mmSIF_RTR_CTRL_3_E2E_PCI_WR_SIZE, 19);
1293 WREG32(mmSIF_RTR_CTRL_3_E2E_PCI_RD_SIZE, 32);
1294
1295 WREG32(mmSIF_RTR_CTRL_4_E2E_HBM_WR_SIZE, 176 >> 3);
1296 WREG32(mmSIF_RTR_CTRL_4_E2E_HBM_RD_SIZE, 32 >> 3);
1297 WREG32(mmSIF_RTR_CTRL_4_E2E_PCI_WR_SIZE, 19);
1298 WREG32(mmSIF_RTR_CTRL_4_E2E_PCI_RD_SIZE, 32);
1299
1300 WREG32(mmSIF_RTR_CTRL_5_E2E_HBM_WR_SIZE, 1);
1301 WREG32(mmSIF_RTR_CTRL_5_E2E_HBM_RD_SIZE, 1);
1302 WREG32(mmSIF_RTR_CTRL_5_E2E_PCI_WR_SIZE, 1);
1303 WREG32(mmSIF_RTR_CTRL_5_E2E_PCI_RD_SIZE, 32);
1304
1305 WREG32(mmSIF_RTR_CTRL_6_E2E_HBM_WR_SIZE, 275 >> 3);
1306 WREG32(mmSIF_RTR_CTRL_6_E2E_HBM_RD_SIZE, 614 >> 3);
1307 WREG32(mmSIF_RTR_CTRL_6_E2E_PCI_WR_SIZE, 1);
1308 WREG32(mmSIF_RTR_CTRL_6_E2E_PCI_RD_SIZE, 39);
1309
1310 WREG32(mmSIF_RTR_CTRL_7_E2E_HBM_WR_SIZE, 297 >> 3);
1311 WREG32(mmSIF_RTR_CTRL_7_E2E_HBM_RD_SIZE, 908 >> 3);
1312 WREG32(mmSIF_RTR_CTRL_7_E2E_PCI_WR_SIZE, 19);
1313 WREG32(mmSIF_RTR_CTRL_7_E2E_PCI_RD_SIZE, 19);
1314
1315 WREG32(mmNIF_RTR_CTRL_0_E2E_HBM_WR_SIZE, 318 >> 3);
1316 WREG32(mmNIF_RTR_CTRL_0_E2E_HBM_RD_SIZE, 956 >> 3);
1317 WREG32(mmNIF_RTR_CTRL_0_E2E_PCI_WR_SIZE, 79);
1318 WREG32(mmNIF_RTR_CTRL_0_E2E_PCI_RD_SIZE, 163);
1319
1320 WREG32(mmNIF_RTR_CTRL_1_E2E_HBM_WR_SIZE, 275 >> 3);
1321 WREG32(mmNIF_RTR_CTRL_1_E2E_HBM_RD_SIZE, 614 >> 3);
1322 WREG32(mmNIF_RTR_CTRL_1_E2E_PCI_WR_SIZE, 1);
1323 WREG32(mmNIF_RTR_CTRL_1_E2E_PCI_RD_SIZE, 39);
1324
1325 WREG32(mmNIF_RTR_CTRL_2_E2E_HBM_WR_SIZE, 1);
1326 WREG32(mmNIF_RTR_CTRL_2_E2E_HBM_RD_SIZE, 1);
1327 WREG32(mmNIF_RTR_CTRL_2_E2E_PCI_WR_SIZE, 1);
1328 WREG32(mmNIF_RTR_CTRL_2_E2E_PCI_RD_SIZE, 32);
1329
1330 WREG32(mmNIF_RTR_CTRL_3_E2E_HBM_WR_SIZE, 176 >> 3);
1331 WREG32(mmNIF_RTR_CTRL_3_E2E_HBM_RD_SIZE, 32 >> 3);
1332 WREG32(mmNIF_RTR_CTRL_3_E2E_PCI_WR_SIZE, 19);
1333 WREG32(mmNIF_RTR_CTRL_3_E2E_PCI_RD_SIZE, 32);
1334
1335 WREG32(mmNIF_RTR_CTRL_4_E2E_HBM_WR_SIZE, 176 >> 3);
1336 WREG32(mmNIF_RTR_CTRL_4_E2E_HBM_RD_SIZE, 32 >> 3);
1337 WREG32(mmNIF_RTR_CTRL_4_E2E_PCI_WR_SIZE, 19);
1338 WREG32(mmNIF_RTR_CTRL_4_E2E_PCI_RD_SIZE, 32);
1339
1340 WREG32(mmNIF_RTR_CTRL_5_E2E_HBM_WR_SIZE, 1);
1341 WREG32(mmNIF_RTR_CTRL_5_E2E_HBM_RD_SIZE, 1);
1342 WREG32(mmNIF_RTR_CTRL_5_E2E_PCI_WR_SIZE, 1);
1343 WREG32(mmNIF_RTR_CTRL_5_E2E_PCI_RD_SIZE, 32);
1344
1345 WREG32(mmNIF_RTR_CTRL_6_E2E_HBM_WR_SIZE, 275 >> 3);
1346 WREG32(mmNIF_RTR_CTRL_6_E2E_HBM_RD_SIZE, 614 >> 3);
1347 WREG32(mmNIF_RTR_CTRL_6_E2E_PCI_WR_SIZE, 1);
1348 WREG32(mmNIF_RTR_CTRL_6_E2E_PCI_RD_SIZE, 39);
1349
1350 WREG32(mmNIF_RTR_CTRL_7_E2E_HBM_WR_SIZE, 318 >> 3);
1351 WREG32(mmNIF_RTR_CTRL_7_E2E_HBM_RD_SIZE, 956 >> 3);
1352 WREG32(mmNIF_RTR_CTRL_7_E2E_PCI_WR_SIZE, 79);
1353 WREG32(mmNIF_RTR_CTRL_7_E2E_PCI_RD_SIZE, 79);
1354
1355 WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
1356 WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
1357 WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
1358 WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
1359
1360 WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
1361 WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
1362 WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
1363 WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
1364
1365 WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
1366 WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
1367 WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
1368 WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
1369
1370 WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
1371 WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
1372 WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
1373 WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
1374
1375 WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
1376 WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
1377 WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
1378 WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
1379
1380 WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
1381 WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
1382 WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
1383 WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
1384
1385 WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
1386 WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
1387 WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
1388 WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
1389
1390 WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
1391 WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
1392 WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
1393 WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
1394
1395 if (!hdev->dram_scrambler_enable) {
1396 WREG32(mmSIF_RTR_CTRL_0_NL_HBM_SEL_0, 0x21);
1397 WREG32(mmSIF_RTR_CTRL_0_NL_HBM_SEL_1, 0x22);
1398 WREG32(mmSIF_RTR_CTRL_0_NL_HBM_OFFSET_18, 0x1F);
1399 WREG32(mmSIF_RTR_CTRL_0_NL_HBM_PC_SEL_3, 0x20);
1400
1401 WREG32(mmSIF_RTR_CTRL_1_NL_HBM_SEL_0, 0x21);
1402 WREG32(mmSIF_RTR_CTRL_1_NL_HBM_SEL_1, 0x22);
1403 WREG32(mmSIF_RTR_CTRL_1_NL_HBM_OFFSET_18, 0x1F);
1404 WREG32(mmSIF_RTR_CTRL_1_NL_HBM_PC_SEL_3, 0x20);
1405
1406 WREG32(mmSIF_RTR_CTRL_2_NL_HBM_SEL_0, 0x21);
1407 WREG32(mmSIF_RTR_CTRL_2_NL_HBM_SEL_1, 0x22);
1408 WREG32(mmSIF_RTR_CTRL_2_NL_HBM_OFFSET_18, 0x1F);
1409 WREG32(mmSIF_RTR_CTRL_2_NL_HBM_PC_SEL_3, 0x20);
1410
1411 WREG32(mmSIF_RTR_CTRL_3_NL_HBM_SEL_0, 0x21);
1412 WREG32(mmSIF_RTR_CTRL_3_NL_HBM_SEL_1, 0x22);
1413 WREG32(mmSIF_RTR_CTRL_3_NL_HBM_OFFSET_18, 0x1F);
1414 WREG32(mmSIF_RTR_CTRL_3_NL_HBM_PC_SEL_3, 0x20);
1415
1416 WREG32(mmSIF_RTR_CTRL_4_NL_HBM_SEL_0, 0x21);
1417 WREG32(mmSIF_RTR_CTRL_4_NL_HBM_SEL_1, 0x22);
1418 WREG32(mmSIF_RTR_CTRL_4_NL_HBM_OFFSET_18, 0x1F);
1419 WREG32(mmSIF_RTR_CTRL_4_NL_HBM_PC_SEL_3, 0x20);
1420
1421 WREG32(mmSIF_RTR_CTRL_5_NL_HBM_SEL_0, 0x21);
1422 WREG32(mmSIF_RTR_CTRL_5_NL_HBM_SEL_1, 0x22);
1423 WREG32(mmSIF_RTR_CTRL_5_NL_HBM_OFFSET_18, 0x1F);
1424 WREG32(mmSIF_RTR_CTRL_5_NL_HBM_PC_SEL_3, 0x20);
1425
1426 WREG32(mmSIF_RTR_CTRL_6_NL_HBM_SEL_0, 0x21);
1427 WREG32(mmSIF_RTR_CTRL_6_NL_HBM_SEL_1, 0x22);
1428 WREG32(mmSIF_RTR_CTRL_6_NL_HBM_OFFSET_18, 0x1F);
1429 WREG32(mmSIF_RTR_CTRL_6_NL_HBM_PC_SEL_3, 0x20);
1430
1431 WREG32(mmSIF_RTR_CTRL_7_NL_HBM_SEL_0, 0x21);
1432 WREG32(mmSIF_RTR_CTRL_7_NL_HBM_SEL_1, 0x22);
1433 WREG32(mmSIF_RTR_CTRL_7_NL_HBM_OFFSET_18, 0x1F);
1434 WREG32(mmSIF_RTR_CTRL_7_NL_HBM_PC_SEL_3, 0x20);
1435
1436 WREG32(mmNIF_RTR_CTRL_0_NL_HBM_SEL_0, 0x21);
1437 WREG32(mmNIF_RTR_CTRL_0_NL_HBM_SEL_1, 0x22);
1438 WREG32(mmNIF_RTR_CTRL_0_NL_HBM_OFFSET_18, 0x1F);
1439 WREG32(mmNIF_RTR_CTRL_0_NL_HBM_PC_SEL_3, 0x20);
1440
1441 WREG32(mmNIF_RTR_CTRL_1_NL_HBM_SEL_0, 0x21);
1442 WREG32(mmNIF_RTR_CTRL_1_NL_HBM_SEL_1, 0x22);
1443 WREG32(mmNIF_RTR_CTRL_1_NL_HBM_OFFSET_18, 0x1F);
1444 WREG32(mmNIF_RTR_CTRL_1_NL_HBM_PC_SEL_3, 0x20);
1445
1446 WREG32(mmNIF_RTR_CTRL_2_NL_HBM_SEL_0, 0x21);
1447 WREG32(mmNIF_RTR_CTRL_2_NL_HBM_SEL_1, 0x22);
1448 WREG32(mmNIF_RTR_CTRL_2_NL_HBM_OFFSET_18, 0x1F);
1449 WREG32(mmNIF_RTR_CTRL_2_NL_HBM_PC_SEL_3, 0x20);
1450
1451 WREG32(mmNIF_RTR_CTRL_3_NL_HBM_SEL_0, 0x21);
1452 WREG32(mmNIF_RTR_CTRL_3_NL_HBM_SEL_1, 0x22);
1453 WREG32(mmNIF_RTR_CTRL_3_NL_HBM_OFFSET_18, 0x1F);
1454 WREG32(mmNIF_RTR_CTRL_3_NL_HBM_PC_SEL_3, 0x20);
1455
1456 WREG32(mmNIF_RTR_CTRL_4_NL_HBM_SEL_0, 0x21);
1457 WREG32(mmNIF_RTR_CTRL_4_NL_HBM_SEL_1, 0x22);
1458 WREG32(mmNIF_RTR_CTRL_4_NL_HBM_OFFSET_18, 0x1F);
1459 WREG32(mmNIF_RTR_CTRL_4_NL_HBM_PC_SEL_3, 0x20);
1460
1461 WREG32(mmNIF_RTR_CTRL_5_NL_HBM_SEL_0, 0x21);
1462 WREG32(mmNIF_RTR_CTRL_5_NL_HBM_SEL_1, 0x22);
1463 WREG32(mmNIF_RTR_CTRL_5_NL_HBM_OFFSET_18, 0x1F);
1464 WREG32(mmNIF_RTR_CTRL_5_NL_HBM_PC_SEL_3, 0x20);
1465
1466 WREG32(mmNIF_RTR_CTRL_6_NL_HBM_SEL_0, 0x21);
1467 WREG32(mmNIF_RTR_CTRL_6_NL_HBM_SEL_1, 0x22);
1468 WREG32(mmNIF_RTR_CTRL_6_NL_HBM_OFFSET_18, 0x1F);
1469 WREG32(mmNIF_RTR_CTRL_6_NL_HBM_PC_SEL_3, 0x20);
1470
1471 WREG32(mmNIF_RTR_CTRL_7_NL_HBM_SEL_0, 0x21);
1472 WREG32(mmNIF_RTR_CTRL_7_NL_HBM_SEL_1, 0x22);
1473 WREG32(mmNIF_RTR_CTRL_7_NL_HBM_OFFSET_18, 0x1F);
1474 WREG32(mmNIF_RTR_CTRL_7_NL_HBM_PC_SEL_3, 0x20);
1475
1476 WREG32(mmDMA_IF_E_N_DOWN_CH0_NL_HBM_SEL_0, 0x21);
1477 WREG32(mmDMA_IF_E_N_DOWN_CH0_NL_HBM_SEL_1, 0x22);
1478 WREG32(mmDMA_IF_E_N_DOWN_CH0_NL_HBM_OFFSET_18, 0x1F);
1479 WREG32(mmDMA_IF_E_N_DOWN_CH0_NL_HBM_PC_SEL_3, 0x20);
1480
1481 WREG32(mmDMA_IF_E_N_DOWN_CH1_NL_HBM_SEL_0, 0x21);
1482 WREG32(mmDMA_IF_E_N_DOWN_CH1_NL_HBM_SEL_1, 0x22);
1483 WREG32(mmDMA_IF_E_N_DOWN_CH1_NL_HBM_OFFSET_18, 0x1F);
1484 WREG32(mmDMA_IF_E_N_DOWN_CH1_NL_HBM_PC_SEL_3, 0x20);
1485
1486 WREG32(mmDMA_IF_E_S_DOWN_CH0_NL_HBM_SEL_0, 0x21);
1487 WREG32(mmDMA_IF_E_S_DOWN_CH0_NL_HBM_SEL_1, 0x22);
1488 WREG32(mmDMA_IF_E_S_DOWN_CH0_NL_HBM_OFFSET_18, 0x1F);
1489 WREG32(mmDMA_IF_E_S_DOWN_CH0_NL_HBM_PC_SEL_3, 0x20);
1490
1491 WREG32(mmDMA_IF_E_S_DOWN_CH1_NL_HBM_SEL_0, 0x21);
1492 WREG32(mmDMA_IF_E_S_DOWN_CH1_NL_HBM_SEL_1, 0x22);
1493 WREG32(mmDMA_IF_E_S_DOWN_CH1_NL_HBM_OFFSET_18, 0x1F);
1494 WREG32(mmDMA_IF_E_S_DOWN_CH1_NL_HBM_PC_SEL_3, 0x20);
1495
1496 WREG32(mmDMA_IF_W_N_DOWN_CH0_NL_HBM_SEL_0, 0x21);
1497 WREG32(mmDMA_IF_W_N_DOWN_CH0_NL_HBM_SEL_1, 0x22);
1498 WREG32(mmDMA_IF_W_N_DOWN_CH0_NL_HBM_OFFSET_18, 0x1F);
1499 WREG32(mmDMA_IF_W_N_DOWN_CH0_NL_HBM_PC_SEL_3, 0x20);
1500
1501 WREG32(mmDMA_IF_W_N_DOWN_CH1_NL_HBM_SEL_0, 0x21);
1502 WREG32(mmDMA_IF_W_N_DOWN_CH1_NL_HBM_SEL_1, 0x22);
1503 WREG32(mmDMA_IF_W_N_DOWN_CH1_NL_HBM_OFFSET_18, 0x1F);
1504 WREG32(mmDMA_IF_W_N_DOWN_CH1_NL_HBM_PC_SEL_3, 0x20);
1505
1506 WREG32(mmDMA_IF_W_S_DOWN_CH0_NL_HBM_SEL_0, 0x21);
1507 WREG32(mmDMA_IF_W_S_DOWN_CH0_NL_HBM_SEL_1, 0x22);
1508 WREG32(mmDMA_IF_W_S_DOWN_CH0_NL_HBM_OFFSET_18, 0x1F);
1509 WREG32(mmDMA_IF_W_S_DOWN_CH0_NL_HBM_PC_SEL_3, 0x20);
1510
1511 WREG32(mmDMA_IF_W_S_DOWN_CH1_NL_HBM_SEL_0, 0x21);
1512 WREG32(mmDMA_IF_W_S_DOWN_CH1_NL_HBM_SEL_1, 0x22);
1513 WREG32(mmDMA_IF_W_S_DOWN_CH1_NL_HBM_OFFSET_18, 0x1F);
1514 WREG32(mmDMA_IF_W_S_DOWN_CH1_NL_HBM_PC_SEL_3, 0x20);
1515 }
1516
1517 WREG32(mmSIF_RTR_CTRL_0_E2E_HBM_EN,
1518 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
1519 WREG32(mmSIF_RTR_CTRL_0_E2E_PCI_EN,
1520 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
1521
1522 WREG32(mmSIF_RTR_CTRL_1_E2E_HBM_EN,
1523 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
1524 WREG32(mmSIF_RTR_CTRL_1_E2E_PCI_EN,
1525 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
1526
1527 WREG32(mmSIF_RTR_CTRL_2_E2E_HBM_EN,
1528 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
1529 WREG32(mmSIF_RTR_CTRL_2_E2E_PCI_EN,
1530 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
1531
1532 WREG32(mmSIF_RTR_CTRL_3_E2E_HBM_EN,
1533 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
1534 WREG32(mmSIF_RTR_CTRL_3_E2E_PCI_EN,
1535 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
1536
1537 WREG32(mmSIF_RTR_CTRL_4_E2E_HBM_EN,
1538 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
1539 WREG32(mmSIF_RTR_CTRL_4_E2E_PCI_EN,
1540 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
1541
1542 WREG32(mmSIF_RTR_CTRL_5_E2E_HBM_EN,
1543 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
1544 WREG32(mmSIF_RTR_CTRL_5_E2E_PCI_EN,
1545 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
1546
1547 WREG32(mmSIF_RTR_CTRL_6_E2E_HBM_EN,
1548 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
1549 WREG32(mmSIF_RTR_CTRL_6_E2E_PCI_EN,
1550 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
1551
1552 WREG32(mmSIF_RTR_CTRL_7_E2E_HBM_EN,
1553 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
1554 WREG32(mmSIF_RTR_CTRL_7_E2E_PCI_EN,
1555 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
1556
1557 WREG32(mmNIF_RTR_CTRL_0_E2E_HBM_EN,
1558 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
1559 WREG32(mmNIF_RTR_CTRL_0_E2E_PCI_EN,
1560 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
1561
1562 WREG32(mmNIF_RTR_CTRL_1_E2E_HBM_EN,
1563 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
1564 WREG32(mmNIF_RTR_CTRL_1_E2E_PCI_EN,
1565 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
1566
1567 WREG32(mmNIF_RTR_CTRL_2_E2E_HBM_EN,
1568 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
1569 WREG32(mmNIF_RTR_CTRL_2_E2E_PCI_EN,
1570 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
1571
1572 WREG32(mmNIF_RTR_CTRL_3_E2E_HBM_EN,
1573 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
1574 WREG32(mmNIF_RTR_CTRL_3_E2E_PCI_EN,
1575 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
1576
1577 WREG32(mmNIF_RTR_CTRL_4_E2E_HBM_EN,
1578 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
1579 WREG32(mmNIF_RTR_CTRL_4_E2E_PCI_EN,
1580 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
1581
1582 WREG32(mmNIF_RTR_CTRL_5_E2E_HBM_EN,
1583 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
1584 WREG32(mmNIF_RTR_CTRL_5_E2E_PCI_EN,
1585 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
1586
1587 WREG32(mmNIF_RTR_CTRL_6_E2E_HBM_EN,
1588 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
1589 WREG32(mmNIF_RTR_CTRL_6_E2E_PCI_EN,
1590 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
1591
1592 WREG32(mmNIF_RTR_CTRL_7_E2E_HBM_EN,
1593 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
1594 WREG32(mmNIF_RTR_CTRL_7_E2E_PCI_EN,
1595 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
1596
1597 WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_HBM_EN,
1598 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
1599 WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_PCI_EN,
1600 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
1601
1602 WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_HBM_EN,
1603 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
1604 WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_PCI_EN,
1605 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
1606
1607 WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_HBM_EN,
1608 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
1609 WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_PCI_EN,
1610 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
1611
1612 WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_HBM_EN,
1613 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
1614 WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_PCI_EN,
1615 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
1616
1617 WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_HBM_EN,
1618 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
1619 WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_PCI_EN,
1620 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
1621
1622 WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_HBM_EN,
1623 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
1624 WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_PCI_EN,
1625 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
1626
1627 WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_HBM_EN,
1628 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
1629 WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_PCI_EN,
1630 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
1631
1632 WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_HBM_EN,
1633 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
1634 WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_PCI_EN,
1635 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
1636}
1637
1638static void gaudi_init_hbm_cred(struct hl_device *hdev)
1639{
1640 uint32_t hbm0_wr, hbm1_wr, hbm0_rd, hbm1_rd;
1641
1642 hbm0_wr = 0x33333333;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03001643 hbm0_rd = 0x77777777;
Oded Gabbay0b168c82020-06-15 19:25:57 +03001644 hbm1_wr = 0x55555555;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03001645 hbm1_rd = 0xDDDDDDDD;
1646
1647 WREG32(mmDMA_IF_E_N_HBM0_WR_CRED_CNT, hbm0_wr);
1648 WREG32(mmDMA_IF_E_N_HBM1_WR_CRED_CNT, hbm1_wr);
1649 WREG32(mmDMA_IF_E_N_HBM0_RD_CRED_CNT, hbm0_rd);
1650 WREG32(mmDMA_IF_E_N_HBM1_RD_CRED_CNT, hbm1_rd);
1651
1652 WREG32(mmDMA_IF_E_S_HBM0_WR_CRED_CNT, hbm0_wr);
1653 WREG32(mmDMA_IF_E_S_HBM1_WR_CRED_CNT, hbm1_wr);
1654 WREG32(mmDMA_IF_E_S_HBM0_RD_CRED_CNT, hbm0_rd);
1655 WREG32(mmDMA_IF_E_S_HBM1_RD_CRED_CNT, hbm1_rd);
1656
1657 WREG32(mmDMA_IF_W_N_HBM0_WR_CRED_CNT, hbm0_wr);
1658 WREG32(mmDMA_IF_W_N_HBM1_WR_CRED_CNT, hbm1_wr);
1659 WREG32(mmDMA_IF_W_N_HBM0_RD_CRED_CNT, hbm0_rd);
1660 WREG32(mmDMA_IF_W_N_HBM1_RD_CRED_CNT, hbm1_rd);
1661
1662 WREG32(mmDMA_IF_W_S_HBM0_WR_CRED_CNT, hbm0_wr);
1663 WREG32(mmDMA_IF_W_S_HBM1_WR_CRED_CNT, hbm1_wr);
1664 WREG32(mmDMA_IF_W_S_HBM0_RD_CRED_CNT, hbm0_rd);
1665 WREG32(mmDMA_IF_W_S_HBM1_RD_CRED_CNT, hbm1_rd);
1666
1667 WREG32(mmDMA_IF_E_N_HBM_CRED_EN_0,
1668 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
1669 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
1670 WREG32(mmDMA_IF_E_S_HBM_CRED_EN_0,
1671 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
1672 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
1673 WREG32(mmDMA_IF_W_N_HBM_CRED_EN_0,
1674 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
1675 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
1676 WREG32(mmDMA_IF_W_S_HBM_CRED_EN_0,
1677 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
1678 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
1679
1680 WREG32(mmDMA_IF_E_N_HBM_CRED_EN_1,
1681 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
1682 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
1683 WREG32(mmDMA_IF_E_S_HBM_CRED_EN_1,
1684 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
1685 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
1686 WREG32(mmDMA_IF_W_N_HBM_CRED_EN_1,
1687 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
1688 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
1689 WREG32(mmDMA_IF_W_S_HBM_CRED_EN_1,
1690 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
1691 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
1692}
1693
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03001694static void gaudi_init_golden_registers(struct hl_device *hdev)
1695{
1696 u32 tpc_offset;
1697 int tpc_id, i;
1698
1699 gaudi_init_e2e(hdev);
1700
1701 gaudi_init_hbm_cred(hdev);
1702
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03001703 gaudi_disable_clock_gating(hdev);
1704
1705 for (tpc_id = 0, tpc_offset = 0;
1706 tpc_id < TPC_NUMBER_OF_ENGINES;
1707 tpc_id++, tpc_offset += TPC_CFG_OFFSET) {
1708 /* Mask all arithmetic interrupts from TPC */
1709 WREG32(mmTPC0_CFG_TPC_INTR_MASK + tpc_offset, 0x8FFF);
1710 /* Set 16 cache lines */
1711 WREG32_FIELD(TPC0_CFG_MSS_CONFIG, tpc_offset,
1712 ICACHE_FETCH_LINE_NUM, 2);
1713 }
1714
1715 /* Make sure 1st 128 bytes in SRAM are 0 for Tensor DMA */
1716 for (i = 0 ; i < 128 ; i += 8)
1717 writeq(0, hdev->pcie_bar[SRAM_BAR_ID] + i);
1718
1719 WREG32(mmMME0_CTRL_EUS_ROLLUP_CNT_ADD, 3);
1720 WREG32(mmMME1_CTRL_EUS_ROLLUP_CNT_ADD, 3);
1721 WREG32(mmMME2_CTRL_EUS_ROLLUP_CNT_ADD, 3);
1722 WREG32(mmMME3_CTRL_EUS_ROLLUP_CNT_ADD, 3);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03001723}
1724
1725static void gaudi_init_pci_dma_qman(struct hl_device *hdev, int dma_id,
1726 int qman_id, dma_addr_t qman_pq_addr)
1727{
1728 u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi;
1729 u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi;
1730 u32 q_off, dma_qm_offset;
1731 u32 dma_qm_err_cfg;
1732
1733 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
1734
1735 mtr_base_en_lo = lower_32_bits(CFG_BASE +
1736 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
1737 mtr_base_en_hi = upper_32_bits(CFG_BASE +
1738 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
1739 so_base_en_lo = lower_32_bits(CFG_BASE +
1740 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
1741 so_base_en_hi = upper_32_bits(CFG_BASE +
1742 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
1743 mtr_base_ws_lo = lower_32_bits(CFG_BASE +
1744 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
1745 mtr_base_ws_hi = upper_32_bits(CFG_BASE +
1746 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
1747 so_base_ws_lo = lower_32_bits(CFG_BASE +
1748 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
1749 so_base_ws_hi = upper_32_bits(CFG_BASE +
1750 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
1751
1752 q_off = dma_qm_offset + qman_id * 4;
1753
1754 WREG32(mmDMA0_QM_PQ_BASE_LO_0 + q_off, lower_32_bits(qman_pq_addr));
1755 WREG32(mmDMA0_QM_PQ_BASE_HI_0 + q_off, upper_32_bits(qman_pq_addr));
1756
1757 WREG32(mmDMA0_QM_PQ_SIZE_0 + q_off, ilog2(HL_QUEUE_LENGTH));
1758 WREG32(mmDMA0_QM_PQ_PI_0 + q_off, 0);
1759 WREG32(mmDMA0_QM_PQ_CI_0 + q_off, 0);
1760
1761 WREG32(mmDMA0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off, 0x74);
1762 WREG32(mmDMA0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off, 0x14);
1763 WREG32(mmDMA0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off, 0x1C);
1764
1765 WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo);
1766 WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi);
1767 WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo);
1768 WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi);
1769 WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_LO_0 + q_off, mtr_base_ws_lo);
1770 WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_HI_0 + q_off, mtr_base_ws_hi);
1771 WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_LO_0 + q_off, so_base_ws_lo);
1772 WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_HI_0 + q_off, so_base_ws_hi);
1773
Omer Shpigelmance043262020-06-16 17:56:27 +03001774 WREG32(mmDMA0_QM_CP_BARRIER_CFG_0 + q_off, 0x100);
1775
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03001776 /* The following configuration is needed only once per QMAN */
1777 if (qman_id == 0) {
1778 /* Configure RAZWI IRQ */
1779 dma_qm_err_cfg = PCI_DMA_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
1780 if (hdev->stop_on_err) {
1781 dma_qm_err_cfg |=
1782 PCI_DMA_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
1783 }
1784
1785 WREG32(mmDMA0_QM_GLBL_ERR_CFG + dma_qm_offset, dma_qm_err_cfg);
1786 WREG32(mmDMA0_QM_GLBL_ERR_ADDR_LO + dma_qm_offset,
1787 lower_32_bits(CFG_BASE +
1788 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR));
1789 WREG32(mmDMA0_QM_GLBL_ERR_ADDR_HI + dma_qm_offset,
1790 upper_32_bits(CFG_BASE +
1791 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR));
1792 WREG32(mmDMA0_QM_GLBL_ERR_WDATA + dma_qm_offset,
1793 gaudi_irq_map_table[GAUDI_EVENT_DMA0_QM].cpu_id +
1794 dma_id);
1795
1796 WREG32(mmDMA0_QM_ARB_ERR_MSG_EN + dma_qm_offset,
1797 QM_ARB_ERR_MSG_EN_MASK);
1798
1799 /* Increase ARB WDT to support streams architecture */
1800 WREG32(mmDMA0_QM_ARB_SLV_CHOISE_WDT + dma_qm_offset,
1801 GAUDI_ARB_WDT_TIMEOUT);
1802
1803 WREG32(mmDMA0_QM_GLBL_PROT + dma_qm_offset,
1804 QMAN_EXTERNAL_MAKE_TRUSTED);
1805
1806 WREG32(mmDMA0_QM_GLBL_CFG1 + dma_qm_offset, 0);
1807 }
1808}
1809
1810static void gaudi_init_dma_core(struct hl_device *hdev, int dma_id)
1811{
1812 u32 dma_offset = dma_id * DMA_CORE_OFFSET;
1813 u32 dma_err_cfg = 1 << DMA0_CORE_ERR_CFG_ERR_MSG_EN_SHIFT;
1814
1815 /* Set to maximum possible according to physical size */
1816 WREG32(mmDMA0_CORE_RD_MAX_OUTSTAND + dma_offset, 0);
1817 WREG32(mmDMA0_CORE_RD_MAX_SIZE + dma_offset, 0);
1818
1819 /* STOP_ON bit implies no completion to operation in case of RAZWI */
1820 if (hdev->stop_on_err)
1821 dma_err_cfg |= 1 << DMA0_CORE_ERR_CFG_STOP_ON_ERR_SHIFT;
1822
1823 WREG32(mmDMA0_CORE_ERR_CFG + dma_offset, dma_err_cfg);
1824 WREG32(mmDMA0_CORE_ERRMSG_ADDR_LO + dma_offset,
1825 lower_32_bits(CFG_BASE + mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR));
1826 WREG32(mmDMA0_CORE_ERRMSG_ADDR_HI + dma_offset,
1827 upper_32_bits(CFG_BASE + mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR));
1828 WREG32(mmDMA0_CORE_ERRMSG_WDATA + dma_offset,
1829 gaudi_irq_map_table[GAUDI_EVENT_DMA0_CORE].cpu_id + dma_id);
1830 WREG32(mmDMA0_CORE_PROT + dma_offset,
1831 1 << DMA0_CORE_PROT_ERR_VAL_SHIFT);
1832 /* If the channel is secured, it should be in MMU bypass mode */
1833 WREG32(mmDMA0_CORE_SECURE_PROPS + dma_offset,
1834 1 << DMA0_CORE_SECURE_PROPS_MMBP_SHIFT);
1835 WREG32(mmDMA0_CORE_CFG_0 + dma_offset, 1 << DMA0_CORE_CFG_0_EN_SHIFT);
1836}
1837
1838static void gaudi_enable_qman(struct hl_device *hdev, int dma_id,
1839 u32 enable_mask)
1840{
1841 u32 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
1842
1843 WREG32(mmDMA0_QM_GLBL_CFG0 + dma_qm_offset, enable_mask);
1844}
1845
1846static void gaudi_init_pci_dma_qmans(struct hl_device *hdev)
1847{
1848 struct gaudi_device *gaudi = hdev->asic_specific;
1849 struct hl_hw_queue *q;
1850 int i, j, dma_id, cpu_skip, nic_skip, cq_id = 0, q_idx, msi_vec = 0;
1851
1852 if (gaudi->hw_cap_initialized & HW_CAP_PCI_DMA)
1853 return;
1854
1855 for (i = 0 ; i < PCI_DMA_NUMBER_OF_CHNLS ; i++) {
1856 dma_id = gaudi_dma_assignment[i];
1857 /*
1858 * For queues after the CPU Q need to add 1 to get the correct
1859 * queue. In addition, need to add the CPU EQ and NIC IRQs in
1860 * order to get the correct MSI register.
1861 */
1862 if (dma_id > 1) {
1863 cpu_skip = 1;
1864 nic_skip = NIC_NUMBER_OF_ENGINES;
1865 } else {
1866 cpu_skip = 0;
1867 nic_skip = 0;
1868 }
1869
1870 for (j = 0 ; j < QMAN_STREAMS ; j++) {
1871 q_idx = 4 * dma_id + j + cpu_skip;
1872 q = &hdev->kernel_queues[q_idx];
1873 q->cq_id = cq_id++;
1874 q->msi_vec = nic_skip + cpu_skip + msi_vec++;
1875 gaudi_init_pci_dma_qman(hdev, dma_id, j,
1876 q->bus_address);
1877 }
1878
1879 gaudi_init_dma_core(hdev, dma_id);
1880
1881 gaudi_enable_qman(hdev, dma_id, PCI_DMA_QMAN_ENABLE);
1882 }
1883
1884 gaudi->hw_cap_initialized |= HW_CAP_PCI_DMA;
1885}
1886
1887static void gaudi_init_hbm_dma_qman(struct hl_device *hdev, int dma_id,
1888 int qman_id, u64 qman_base_addr)
1889{
1890 u32 mtr_base_lo, mtr_base_hi;
1891 u32 so_base_lo, so_base_hi;
1892 u32 q_off, dma_qm_offset;
1893 u32 dma_qm_err_cfg;
1894
1895 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
1896
1897 mtr_base_lo = lower_32_bits(CFG_BASE +
1898 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
1899 mtr_base_hi = upper_32_bits(CFG_BASE +
1900 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
1901 so_base_lo = lower_32_bits(CFG_BASE +
1902 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
1903 so_base_hi = upper_32_bits(CFG_BASE +
1904 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
1905
1906 q_off = dma_qm_offset + qman_id * 4;
1907
1908 if (qman_id < 4) {
1909 WREG32(mmDMA0_QM_PQ_BASE_LO_0 + q_off,
1910 lower_32_bits(qman_base_addr));
1911 WREG32(mmDMA0_QM_PQ_BASE_HI_0 + q_off,
1912 upper_32_bits(qman_base_addr));
1913
1914 WREG32(mmDMA0_QM_PQ_SIZE_0 + q_off, ilog2(HBM_DMA_QMAN_LENGTH));
1915 WREG32(mmDMA0_QM_PQ_PI_0 + q_off, 0);
1916 WREG32(mmDMA0_QM_PQ_CI_0 + q_off, 0);
1917
1918 WREG32(mmDMA0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off, 0x81BC);
1919 WREG32(mmDMA0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off, 0x81B4);
1920 WREG32(mmDMA0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off, 0x1C);
1921 } else {
1922 WREG32(mmDMA0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off, 0x74);
1923 WREG32(mmDMA0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off, 0x14);
1924 WREG32(mmDMA0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off, 0x1C);
1925
1926 /* Configure RAZWI IRQ */
1927 dma_qm_err_cfg = HBM_DMA_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
1928 if (hdev->stop_on_err) {
1929 dma_qm_err_cfg |=
1930 HBM_DMA_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
1931 }
1932 WREG32(mmDMA0_QM_GLBL_ERR_CFG + dma_qm_offset, dma_qm_err_cfg);
1933
1934 WREG32(mmDMA0_QM_GLBL_ERR_ADDR_LO + dma_qm_offset,
1935 lower_32_bits(CFG_BASE +
1936 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR));
1937 WREG32(mmDMA0_QM_GLBL_ERR_ADDR_HI + dma_qm_offset,
1938 upper_32_bits(CFG_BASE +
1939 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR));
1940 WREG32(mmDMA0_QM_GLBL_ERR_WDATA + dma_qm_offset,
1941 gaudi_irq_map_table[GAUDI_EVENT_DMA0_QM].cpu_id +
1942 dma_id);
1943
1944 WREG32(mmDMA0_QM_ARB_ERR_MSG_EN + dma_qm_offset,
1945 QM_ARB_ERR_MSG_EN_MASK);
1946
1947 /* Increase ARB WDT to support streams architecture */
1948 WREG32(mmDMA0_QM_ARB_SLV_CHOISE_WDT + dma_qm_offset,
1949 GAUDI_ARB_WDT_TIMEOUT);
1950
1951 WREG32(mmDMA0_QM_GLBL_CFG1 + dma_qm_offset, 0);
1952 WREG32(mmDMA0_QM_GLBL_PROT + dma_qm_offset,
1953 QMAN_INTERNAL_MAKE_TRUSTED);
1954 }
1955
1956 WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_lo);
1957 WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_hi);
1958 WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_lo);
1959 WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_hi);
1960}
1961
1962static void gaudi_init_hbm_dma_qmans(struct hl_device *hdev)
1963{
1964 struct gaudi_device *gaudi = hdev->asic_specific;
1965 struct gaudi_internal_qman_info *q;
1966 u64 qman_base_addr;
1967 int i, j, dma_id, internal_q_index;
1968
1969 if (gaudi->hw_cap_initialized & HW_CAP_HBM_DMA)
1970 return;
1971
1972 for (i = 0 ; i < HBM_DMA_NUMBER_OF_CHNLS ; i++) {
1973 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_1 + i];
1974
1975 for (j = 0 ; j < QMAN_STREAMS ; j++) {
1976 /*
1977 * Add the CPU queue in order to get the correct queue
1978 * number as all internal queue are placed after it
1979 */
1980 internal_q_index = dma_id * QMAN_STREAMS + j + 1;
1981
1982 q = &gaudi->internal_qmans[internal_q_index];
1983 qman_base_addr = (u64) q->pq_dma_addr;
1984 gaudi_init_hbm_dma_qman(hdev, dma_id, j,
1985 qman_base_addr);
1986 }
1987
1988 /* Initializing lower CP for HBM DMA QMAN */
1989 gaudi_init_hbm_dma_qman(hdev, dma_id, 4, 0);
1990
1991 gaudi_init_dma_core(hdev, dma_id);
1992
1993 gaudi_enable_qman(hdev, dma_id, HBM_DMA_QMAN_ENABLE);
1994 }
1995
1996 gaudi->hw_cap_initialized |= HW_CAP_HBM_DMA;
1997}
1998
1999static void gaudi_init_mme_qman(struct hl_device *hdev, u32 mme_offset,
2000 int qman_id, u64 qman_base_addr)
2001{
2002 u32 mtr_base_lo, mtr_base_hi;
2003 u32 so_base_lo, so_base_hi;
2004 u32 q_off, mme_id;
2005 u32 mme_qm_err_cfg;
2006
2007 mtr_base_lo = lower_32_bits(CFG_BASE +
2008 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2009 mtr_base_hi = upper_32_bits(CFG_BASE +
2010 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2011 so_base_lo = lower_32_bits(CFG_BASE +
2012 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2013 so_base_hi = upper_32_bits(CFG_BASE +
2014 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2015
2016 q_off = mme_offset + qman_id * 4;
2017
2018 if (qman_id < 4) {
2019 WREG32(mmMME0_QM_PQ_BASE_LO_0 + q_off,
2020 lower_32_bits(qman_base_addr));
2021 WREG32(mmMME0_QM_PQ_BASE_HI_0 + q_off,
2022 upper_32_bits(qman_base_addr));
2023
2024 WREG32(mmMME0_QM_PQ_SIZE_0 + q_off, ilog2(MME_QMAN_LENGTH));
2025 WREG32(mmMME0_QM_PQ_PI_0 + q_off, 0);
2026 WREG32(mmMME0_QM_PQ_CI_0 + q_off, 0);
2027
2028 WREG32(mmMME0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off, 0x81BC);
2029 WREG32(mmMME0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off, 0x81B4);
2030 WREG32(mmMME0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off, 0x1C);
2031 } else {
2032 WREG32(mmMME0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off, 0x74);
2033 WREG32(mmMME0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off, 0x14);
2034 WREG32(mmMME0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off, 0x1C);
2035
2036 /* Configure RAZWI IRQ */
2037 mme_id = mme_offset /
2038 (mmMME1_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0);
2039
2040 mme_qm_err_cfg = MME_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
2041 if (hdev->stop_on_err) {
2042 mme_qm_err_cfg |=
2043 MME_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
2044 }
2045 WREG32(mmMME0_QM_GLBL_ERR_CFG + mme_offset, mme_qm_err_cfg);
2046 WREG32(mmMME0_QM_GLBL_ERR_ADDR_LO + mme_offset,
2047 lower_32_bits(CFG_BASE +
2048 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR));
2049 WREG32(mmMME0_QM_GLBL_ERR_ADDR_HI + mme_offset,
2050 upper_32_bits(CFG_BASE +
2051 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR));
2052 WREG32(mmMME0_QM_GLBL_ERR_WDATA + mme_offset,
2053 gaudi_irq_map_table[GAUDI_EVENT_MME0_QM].cpu_id +
2054 mme_id);
2055
2056 WREG32(mmMME0_QM_ARB_ERR_MSG_EN + mme_offset,
2057 QM_ARB_ERR_MSG_EN_MASK);
2058
2059 /* Increase ARB WDT to support streams architecture */
2060 WREG32(mmMME0_QM_ARB_SLV_CHOISE_WDT + mme_offset,
2061 GAUDI_ARB_WDT_TIMEOUT);
2062
2063 WREG32(mmMME0_QM_GLBL_CFG1 + mme_offset, 0);
2064 WREG32(mmMME0_QM_GLBL_PROT + mme_offset,
2065 QMAN_INTERNAL_MAKE_TRUSTED);
2066 }
2067
2068 WREG32(mmMME0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_lo);
2069 WREG32(mmMME0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_hi);
2070 WREG32(mmMME0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_lo);
2071 WREG32(mmMME0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_hi);
2072}
2073
2074static void gaudi_init_mme_qmans(struct hl_device *hdev)
2075{
2076 struct gaudi_device *gaudi = hdev->asic_specific;
2077 struct gaudi_internal_qman_info *q;
2078 u64 qman_base_addr;
2079 u32 mme_offset;
2080 int i, internal_q_index;
2081
2082 if (gaudi->hw_cap_initialized & HW_CAP_MME)
2083 return;
2084
2085 /*
2086 * map GAUDI_QUEUE_ID_MME_0_X to the N_W_MME (mmMME2_QM_BASE)
2087 * and GAUDI_QUEUE_ID_MME_1_X to the S_W_MME (mmMME0_QM_BASE)
2088 */
2089
2090 mme_offset = mmMME2_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0;
2091
2092 for (i = 0 ; i < MME_NUMBER_OF_QMANS ; i++) {
2093 internal_q_index = GAUDI_QUEUE_ID_MME_0_0 + i;
2094 q = &gaudi->internal_qmans[internal_q_index];
2095 qman_base_addr = (u64) q->pq_dma_addr;
2096 gaudi_init_mme_qman(hdev, mme_offset, (i & 0x3),
2097 qman_base_addr);
2098 if (i == 3)
2099 mme_offset = 0;
2100 }
2101
2102 /* Initializing lower CP for MME QMANs */
2103 mme_offset = mmMME2_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0;
2104 gaudi_init_mme_qman(hdev, mme_offset, 4, 0);
2105 gaudi_init_mme_qman(hdev, 0, 4, 0);
2106
2107 WREG32(mmMME2_QM_GLBL_CFG0, QMAN_MME_ENABLE);
2108 WREG32(mmMME0_QM_GLBL_CFG0, QMAN_MME_ENABLE);
2109
2110 gaudi->hw_cap_initialized |= HW_CAP_MME;
2111}
2112
2113static void gaudi_init_tpc_qman(struct hl_device *hdev, u32 tpc_offset,
2114 int qman_id, u64 qman_base_addr)
2115{
2116 u32 mtr_base_lo, mtr_base_hi;
2117 u32 so_base_lo, so_base_hi;
2118 u32 q_off, tpc_id;
2119 u32 tpc_qm_err_cfg;
2120
2121 mtr_base_lo = lower_32_bits(CFG_BASE +
2122 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2123 mtr_base_hi = upper_32_bits(CFG_BASE +
2124 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2125 so_base_lo = lower_32_bits(CFG_BASE +
2126 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2127 so_base_hi = upper_32_bits(CFG_BASE +
2128 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2129
2130 q_off = tpc_offset + qman_id * 4;
2131
2132 if (qman_id < 4) {
2133 WREG32(mmTPC0_QM_PQ_BASE_LO_0 + q_off,
2134 lower_32_bits(qman_base_addr));
2135 WREG32(mmTPC0_QM_PQ_BASE_HI_0 + q_off,
2136 upper_32_bits(qman_base_addr));
2137
2138 WREG32(mmTPC0_QM_PQ_SIZE_0 + q_off, ilog2(TPC_QMAN_LENGTH));
2139 WREG32(mmTPC0_QM_PQ_PI_0 + q_off, 0);
2140 WREG32(mmTPC0_QM_PQ_CI_0 + q_off, 0);
2141
2142 WREG32(mmTPC0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off, 0x81BC);
2143 WREG32(mmTPC0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off, 0x81B4);
2144 WREG32(mmTPC0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off, 0x1C);
2145 } else {
2146 WREG32(mmTPC0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off, 0x74);
2147 WREG32(mmTPC0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off, 0x14);
2148 WREG32(mmTPC0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off, 0x1C);
2149
2150 /* Configure RAZWI IRQ */
2151 tpc_id = tpc_offset /
2152 (mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0);
2153
2154 tpc_qm_err_cfg = TPC_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
2155 if (hdev->stop_on_err) {
2156 tpc_qm_err_cfg |=
2157 TPC_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
2158 }
2159
2160 WREG32(mmTPC0_QM_GLBL_ERR_CFG + tpc_offset, tpc_qm_err_cfg);
2161 WREG32(mmTPC0_QM_GLBL_ERR_ADDR_LO + tpc_offset,
2162 lower_32_bits(CFG_BASE +
2163 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR));
2164 WREG32(mmTPC0_QM_GLBL_ERR_ADDR_HI + tpc_offset,
2165 upper_32_bits(CFG_BASE +
2166 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR));
2167 WREG32(mmTPC0_QM_GLBL_ERR_WDATA + tpc_offset,
2168 gaudi_irq_map_table[GAUDI_EVENT_TPC0_QM].cpu_id +
2169 tpc_id);
2170
2171 WREG32(mmTPC0_QM_ARB_ERR_MSG_EN + tpc_offset,
2172 QM_ARB_ERR_MSG_EN_MASK);
2173
2174 /* Increase ARB WDT to support streams architecture */
2175 WREG32(mmTPC0_QM_ARB_SLV_CHOISE_WDT + tpc_offset,
2176 GAUDI_ARB_WDT_TIMEOUT);
2177
2178 WREG32(mmTPC0_QM_GLBL_CFG1 + tpc_offset, 0);
2179 WREG32(mmTPC0_QM_GLBL_PROT + tpc_offset,
2180 QMAN_INTERNAL_MAKE_TRUSTED);
2181 }
2182
2183 WREG32(mmTPC0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_lo);
2184 WREG32(mmTPC0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_hi);
2185 WREG32(mmTPC0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_lo);
2186 WREG32(mmTPC0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_hi);
2187}
2188
2189static void gaudi_init_tpc_qmans(struct hl_device *hdev)
2190{
2191 struct gaudi_device *gaudi = hdev->asic_specific;
2192 struct gaudi_internal_qman_info *q;
2193 u64 qman_base_addr;
2194 u32 so_base_hi, tpc_offset = 0;
2195 u32 tpc_delta = mmTPC1_CFG_SM_BASE_ADDRESS_HIGH -
2196 mmTPC0_CFG_SM_BASE_ADDRESS_HIGH;
2197 int i, tpc_id, internal_q_index;
2198
2199 if (gaudi->hw_cap_initialized & HW_CAP_TPC_MASK)
2200 return;
2201
2202 so_base_hi = upper_32_bits(CFG_BASE +
2203 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2204
2205 for (tpc_id = 0 ; tpc_id < TPC_NUMBER_OF_ENGINES ; tpc_id++) {
2206 for (i = 0 ; i < QMAN_STREAMS ; i++) {
2207 internal_q_index = GAUDI_QUEUE_ID_TPC_0_0 +
2208 tpc_id * QMAN_STREAMS + i;
2209 q = &gaudi->internal_qmans[internal_q_index];
2210 qman_base_addr = (u64) q->pq_dma_addr;
2211 gaudi_init_tpc_qman(hdev, tpc_offset, i,
2212 qman_base_addr);
2213
2214 if (i == 3) {
2215 /* Initializing lower CP for TPC QMAN */
2216 gaudi_init_tpc_qman(hdev, tpc_offset, 4, 0);
2217
2218 /* Enable the QMAN and TPC channel */
2219 WREG32(mmTPC0_QM_GLBL_CFG0 + tpc_offset,
2220 QMAN_TPC_ENABLE);
2221 }
2222 }
2223
2224 WREG32(mmTPC0_CFG_SM_BASE_ADDRESS_HIGH + tpc_id * tpc_delta,
2225 so_base_hi);
2226
2227 tpc_offset += mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0;
2228
2229 gaudi->hw_cap_initialized |= 1 << (HW_CAP_TPC_SHIFT + tpc_id);
2230 }
2231}
2232
2233static void gaudi_disable_pci_dma_qmans(struct hl_device *hdev)
2234{
2235 struct gaudi_device *gaudi = hdev->asic_specific;
2236
2237 if (!(gaudi->hw_cap_initialized & HW_CAP_PCI_DMA))
2238 return;
2239
2240 WREG32(mmDMA0_QM_GLBL_CFG0, 0);
2241 WREG32(mmDMA1_QM_GLBL_CFG0, 0);
2242 WREG32(mmDMA5_QM_GLBL_CFG0, 0);
2243}
2244
2245static void gaudi_disable_hbm_dma_qmans(struct hl_device *hdev)
2246{
2247 struct gaudi_device *gaudi = hdev->asic_specific;
2248
2249 if (!(gaudi->hw_cap_initialized & HW_CAP_HBM_DMA))
2250 return;
2251
2252 WREG32(mmDMA2_QM_GLBL_CFG0, 0);
2253 WREG32(mmDMA3_QM_GLBL_CFG0, 0);
2254 WREG32(mmDMA4_QM_GLBL_CFG0, 0);
2255 WREG32(mmDMA6_QM_GLBL_CFG0, 0);
2256 WREG32(mmDMA7_QM_GLBL_CFG0, 0);
2257}
2258
2259static void gaudi_disable_mme_qmans(struct hl_device *hdev)
2260{
2261 struct gaudi_device *gaudi = hdev->asic_specific;
2262
2263 if (!(gaudi->hw_cap_initialized & HW_CAP_MME))
2264 return;
2265
2266 WREG32(mmMME2_QM_GLBL_CFG0, 0);
2267 WREG32(mmMME0_QM_GLBL_CFG0, 0);
2268}
2269
2270static void gaudi_disable_tpc_qmans(struct hl_device *hdev)
2271{
2272 struct gaudi_device *gaudi = hdev->asic_specific;
2273 u32 tpc_offset = 0;
2274 int tpc_id;
2275
2276 if (!(gaudi->hw_cap_initialized & HW_CAP_TPC_MASK))
2277 return;
2278
2279 for (tpc_id = 0 ; tpc_id < TPC_NUMBER_OF_ENGINES ; tpc_id++) {
2280 WREG32(mmTPC0_QM_GLBL_CFG0 + tpc_offset, 0);
2281 tpc_offset += mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0;
2282 }
2283}
2284
2285static void gaudi_stop_pci_dma_qmans(struct hl_device *hdev)
2286{
2287 struct gaudi_device *gaudi = hdev->asic_specific;
2288
2289 if (!(gaudi->hw_cap_initialized & HW_CAP_PCI_DMA))
2290 return;
2291
2292 /* Stop upper CPs of QMANs 0.0 to 1.3 and 5.0 to 5.3 */
2293 WREG32(mmDMA0_QM_GLBL_CFG1, 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
2294 WREG32(mmDMA1_QM_GLBL_CFG1, 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
2295 WREG32(mmDMA5_QM_GLBL_CFG1, 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
2296}
2297
2298static void gaudi_stop_hbm_dma_qmans(struct hl_device *hdev)
2299{
2300 struct gaudi_device *gaudi = hdev->asic_specific;
2301
2302 if (!(gaudi->hw_cap_initialized & HW_CAP_HBM_DMA))
2303 return;
2304
2305 /* Stop CPs of HBM DMA QMANs */
2306
2307 WREG32(mmDMA2_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
2308 WREG32(mmDMA3_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
2309 WREG32(mmDMA4_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
2310 WREG32(mmDMA6_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
2311 WREG32(mmDMA7_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
2312}
2313
2314static void gaudi_stop_mme_qmans(struct hl_device *hdev)
2315{
2316 struct gaudi_device *gaudi = hdev->asic_specific;
2317
2318 if (!(gaudi->hw_cap_initialized & HW_CAP_MME))
2319 return;
2320
2321 /* Stop CPs of MME QMANs */
2322 WREG32(mmMME2_QM_GLBL_CFG1, 0x1F << MME0_QM_GLBL_CFG1_CP_STOP_SHIFT);
2323 WREG32(mmMME0_QM_GLBL_CFG1, 0x1F << MME0_QM_GLBL_CFG1_CP_STOP_SHIFT);
2324}
2325
2326static void gaudi_stop_tpc_qmans(struct hl_device *hdev)
2327{
2328 struct gaudi_device *gaudi = hdev->asic_specific;
2329
2330 if (!(gaudi->hw_cap_initialized & HW_CAP_TPC_MASK))
2331 return;
2332
2333 WREG32(mmTPC0_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
2334 WREG32(mmTPC1_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
2335 WREG32(mmTPC2_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
2336 WREG32(mmTPC3_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
2337 WREG32(mmTPC4_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
2338 WREG32(mmTPC5_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
2339 WREG32(mmTPC6_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
2340 WREG32(mmTPC7_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
2341}
2342
2343static void gaudi_pci_dma_stall(struct hl_device *hdev)
2344{
2345 struct gaudi_device *gaudi = hdev->asic_specific;
2346
2347 if (!(gaudi->hw_cap_initialized & HW_CAP_PCI_DMA))
2348 return;
2349
2350 WREG32(mmDMA0_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
2351 WREG32(mmDMA1_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
2352 WREG32(mmDMA5_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
2353}
2354
2355static void gaudi_hbm_dma_stall(struct hl_device *hdev)
2356{
2357 struct gaudi_device *gaudi = hdev->asic_specific;
2358
2359 if (!(gaudi->hw_cap_initialized & HW_CAP_HBM_DMA))
2360 return;
2361
2362 WREG32(mmDMA2_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
2363 WREG32(mmDMA3_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
2364 WREG32(mmDMA4_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
2365 WREG32(mmDMA6_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
2366 WREG32(mmDMA7_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
2367}
2368
2369static void gaudi_mme_stall(struct hl_device *hdev)
2370{
2371 struct gaudi_device *gaudi = hdev->asic_specific;
2372
2373 if (!(gaudi->hw_cap_initialized & HW_CAP_MME))
2374 return;
2375
2376 /* WA for H3-1800 bug: do ACC and SBAB writes twice */
2377 WREG32(mmMME0_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
2378 WREG32(mmMME0_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
2379 WREG32(mmMME0_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
2380 WREG32(mmMME0_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
2381 WREG32(mmMME1_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
2382 WREG32(mmMME1_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
2383 WREG32(mmMME1_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
2384 WREG32(mmMME1_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
2385 WREG32(mmMME2_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
2386 WREG32(mmMME2_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
2387 WREG32(mmMME2_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
2388 WREG32(mmMME2_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
2389 WREG32(mmMME3_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
2390 WREG32(mmMME3_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
2391 WREG32(mmMME3_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
2392 WREG32(mmMME3_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
2393}
2394
2395static void gaudi_tpc_stall(struct hl_device *hdev)
2396{
2397 struct gaudi_device *gaudi = hdev->asic_specific;
2398
2399 if (!(gaudi->hw_cap_initialized & HW_CAP_TPC_MASK))
2400 return;
2401
2402 WREG32(mmTPC0_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
2403 WREG32(mmTPC1_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
2404 WREG32(mmTPC2_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
2405 WREG32(mmTPC3_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
2406 WREG32(mmTPC4_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
2407 WREG32(mmTPC5_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
2408 WREG32(mmTPC6_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
2409 WREG32(mmTPC7_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
2410}
2411
2412static void gaudi_enable_clock_gating(struct hl_device *hdev)
2413{
2414 struct gaudi_device *gaudi = hdev->asic_specific;
2415 u32 qman_offset;
2416 int i;
2417
2418 if (!hdev->clock_gating)
2419 return;
2420
2421 if (gaudi->hw_cap_initialized & HW_CAP_CLK_GATE)
2422 return;
2423
2424 /* In case we are during debug session, don't enable the clock gate
2425 * as it may interfere
2426 */
2427 if (hdev->in_debug)
2428 return;
2429
2430 for (i = 0, qman_offset = 0 ; i < PCI_DMA_NUMBER_OF_CHNLS ; i++) {
2431 qman_offset = gaudi_dma_assignment[i] * DMA_QMAN_OFFSET;
2432 WREG32(mmDMA0_QM_CGM_CFG1 + qman_offset, QMAN_CGM1_PWR_GATE_EN);
2433 WREG32(mmDMA0_QM_CGM_CFG + qman_offset,
2434 QMAN_UPPER_CP_CGM_PWR_GATE_EN);
2435 }
2436
2437 for (; i < HBM_DMA_NUMBER_OF_CHNLS ; i++) {
2438 qman_offset = gaudi_dma_assignment[i] * DMA_QMAN_OFFSET;
2439 WREG32(mmDMA0_QM_CGM_CFG1 + qman_offset, QMAN_CGM1_PWR_GATE_EN);
2440 WREG32(mmDMA0_QM_CGM_CFG + qman_offset,
2441 QMAN_COMMON_CP_CGM_PWR_GATE_EN);
2442 }
2443
2444 WREG32(mmMME0_QM_CGM_CFG1, QMAN_CGM1_PWR_GATE_EN);
2445 WREG32(mmMME0_QM_CGM_CFG,
2446 QMAN_COMMON_CP_CGM_PWR_GATE_EN);
2447 WREG32(mmMME2_QM_CGM_CFG1, QMAN_CGM1_PWR_GATE_EN);
2448 WREG32(mmMME2_QM_CGM_CFG,
2449 QMAN_COMMON_CP_CGM_PWR_GATE_EN);
2450
2451 for (i = 0, qman_offset = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) {
2452 WREG32(mmTPC0_QM_CGM_CFG1 + qman_offset,
2453 QMAN_CGM1_PWR_GATE_EN);
2454 WREG32(mmTPC0_QM_CGM_CFG + qman_offset,
2455 QMAN_COMMON_CP_CGM_PWR_GATE_EN);
2456
2457 qman_offset += TPC_QMAN_OFFSET;
2458 }
2459
2460 gaudi->hw_cap_initialized |= HW_CAP_CLK_GATE;
2461}
2462
2463static void gaudi_disable_clock_gating(struct hl_device *hdev)
2464{
2465 struct gaudi_device *gaudi = hdev->asic_specific;
2466 u32 qman_offset;
2467 int i;
2468
2469 if (!(gaudi->hw_cap_initialized & HW_CAP_CLK_GATE))
2470 return;
2471
2472 for (i = 0, qman_offset = 0 ; i < DMA_NUMBER_OF_CHANNELS ; i++) {
2473 WREG32(mmDMA0_QM_CGM_CFG + qman_offset, 0);
2474 WREG32(mmDMA0_QM_CGM_CFG1 + qman_offset, 0);
2475
2476 qman_offset += (mmDMA1_QM_CGM_CFG - mmDMA0_QM_CGM_CFG);
2477 }
2478
2479 WREG32(mmMME0_QM_CGM_CFG, 0);
2480 WREG32(mmMME0_QM_CGM_CFG1, 0);
2481 WREG32(mmMME2_QM_CGM_CFG, 0);
2482 WREG32(mmMME2_QM_CGM_CFG1, 0);
2483
2484 for (i = 0, qman_offset = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) {
2485 WREG32(mmTPC0_QM_CGM_CFG + qman_offset, 0);
2486 WREG32(mmTPC0_QM_CGM_CFG1 + qman_offset, 0);
2487
2488 qman_offset += (mmTPC1_QM_CGM_CFG - mmTPC0_QM_CGM_CFG);
2489 }
2490
2491 gaudi->hw_cap_initialized &= ~(HW_CAP_CLK_GATE);
2492}
2493
2494static void gaudi_enable_timestamp(struct hl_device *hdev)
2495{
2496 /* Disable the timestamp counter */
2497 WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 0);
2498
2499 /* Zero the lower/upper parts of the 64-bit counter */
2500 WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE + 0xC, 0);
2501 WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE + 0x8, 0);
2502
2503 /* Enable the counter */
2504 WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 1);
2505}
2506
2507static void gaudi_disable_timestamp(struct hl_device *hdev)
2508{
2509 /* Disable the timestamp counter */
2510 WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 0);
2511}
2512
2513static void gaudi_halt_engines(struct hl_device *hdev, bool hard_reset)
2514{
2515 u32 wait_timeout_ms, cpu_timeout_ms;
2516
2517 dev_info(hdev->dev,
2518 "Halting compute engines and disabling interrupts\n");
2519
2520 if (hdev->pldm) {
2521 wait_timeout_ms = GAUDI_PLDM_RESET_WAIT_MSEC;
2522 cpu_timeout_ms = GAUDI_PLDM_RESET_WAIT_MSEC;
2523 } else {
2524 wait_timeout_ms = GAUDI_RESET_WAIT_MSEC;
2525 cpu_timeout_ms = GAUDI_CPU_RESET_WAIT_MSEC;
2526 }
2527
2528 if (hard_reset) {
2529 /*
2530 * I don't know what is the state of the CPU so make sure it is
2531 * stopped in any means necessary
2532 */
2533 WREG32(mmPSOC_GLOBAL_CONF_KMD_MSG_TO_CPU, KMD_MSG_GOTO_WFE);
2534 WREG32(mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR,
Ofir Bittonebd8d122020-05-10 13:41:28 +03002535 GAUDI_EVENT_HALT_MACHINE);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03002536 msleep(cpu_timeout_ms);
2537 }
2538
2539 gaudi_stop_mme_qmans(hdev);
2540 gaudi_stop_tpc_qmans(hdev);
2541 gaudi_stop_hbm_dma_qmans(hdev);
2542 gaudi_stop_pci_dma_qmans(hdev);
2543
2544 gaudi_disable_clock_gating(hdev);
2545
2546 msleep(wait_timeout_ms);
2547
2548 gaudi_pci_dma_stall(hdev);
2549 gaudi_hbm_dma_stall(hdev);
2550 gaudi_tpc_stall(hdev);
2551 gaudi_mme_stall(hdev);
2552
2553 msleep(wait_timeout_ms);
2554
2555 gaudi_disable_mme_qmans(hdev);
2556 gaudi_disable_tpc_qmans(hdev);
2557 gaudi_disable_hbm_dma_qmans(hdev);
2558 gaudi_disable_pci_dma_qmans(hdev);
2559
2560 gaudi_disable_timestamp(hdev);
2561
2562 if (hard_reset)
2563 gaudi_disable_msi(hdev);
2564 else
2565 gaudi_sync_irqs(hdev);
2566}
2567
2568static int gaudi_mmu_init(struct hl_device *hdev)
2569{
2570 struct asic_fixed_properties *prop = &hdev->asic_prop;
2571 struct gaudi_device *gaudi = hdev->asic_specific;
2572 u64 hop0_addr;
2573 int rc, i;
2574
2575 if (!hdev->mmu_enable)
2576 return 0;
2577
2578 if (gaudi->hw_cap_initialized & HW_CAP_MMU)
2579 return 0;
2580
2581 hdev->dram_supports_virtual_memory = false;
2582
2583 for (i = 0 ; i < prop->max_asid ; i++) {
2584 hop0_addr = prop->mmu_pgt_addr +
2585 (i * prop->mmu_hop_table_size);
2586
2587 rc = gaudi_mmu_update_asid_hop0_addr(hdev, i, hop0_addr);
2588 if (rc) {
2589 dev_err(hdev->dev,
2590 "failed to set hop0 addr for asid %d\n", i);
2591 goto err;
2592 }
2593 }
2594
2595 /* init MMU cache manage page */
2596 WREG32(mmSTLB_CACHE_INV_BASE_39_8, MMU_CACHE_MNG_ADDR >> 8);
2597 WREG32(mmSTLB_CACHE_INV_BASE_49_40, MMU_CACHE_MNG_ADDR >> 40);
2598
2599 hdev->asic_funcs->mmu_invalidate_cache(hdev, true,
2600 VM_TYPE_USERPTR | VM_TYPE_PHYS_PACK);
2601
2602 WREG32(mmMMU_UP_MMU_ENABLE, 1);
2603 WREG32(mmMMU_UP_SPI_MASK, 0xF);
2604
2605 WREG32(mmSTLB_HOP_CONFIGURATION,
2606 hdev->mmu_huge_page_opt ? 0x30440 : 0x40440);
2607
Omer Shpigelmancfd41762020-06-03 13:03:35 +03002608 /*
2609 * The H/W expects the first PI after init to be 1. After wraparound
2610 * we'll write 0.
2611 */
2612 gaudi->mmu_cache_inv_pi = 1;
2613
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03002614 gaudi->hw_cap_initialized |= HW_CAP_MMU;
2615
2616 return 0;
2617
2618err:
2619 return rc;
2620}
2621
2622static int gaudi_load_firmware_to_device(struct hl_device *hdev)
2623{
2624 void __iomem *dst;
2625
2626 /* HBM scrambler must be initialized before pushing F/W to HBM */
2627 gaudi_init_scrambler_hbm(hdev);
2628
2629 dst = hdev->pcie_bar[HBM_BAR_ID] + LINUX_FW_OFFSET;
2630
2631 return hl_fw_load_fw_to_device(hdev, GAUDI_LINUX_FW_FILE, dst);
2632}
2633
2634static int gaudi_load_boot_fit_to_device(struct hl_device *hdev)
2635{
2636 void __iomem *dst;
2637
2638 dst = hdev->pcie_bar[SRAM_BAR_ID] + BOOT_FIT_SRAM_OFFSET;
2639
2640 return hl_fw_load_fw_to_device(hdev, GAUDI_BOOT_FIT_FILE, dst);
2641}
2642
2643static void gaudi_read_device_fw_version(struct hl_device *hdev,
2644 enum hl_fw_component fwc)
2645{
2646 const char *name;
2647 u32 ver_off;
2648 char *dest;
2649
2650 switch (fwc) {
2651 case FW_COMP_UBOOT:
2652 ver_off = RREG32(mmUBOOT_VER_OFFSET);
2653 dest = hdev->asic_prop.uboot_ver;
2654 name = "U-Boot";
2655 break;
2656 case FW_COMP_PREBOOT:
2657 ver_off = RREG32(mmPREBOOT_VER_OFFSET);
2658 dest = hdev->asic_prop.preboot_ver;
2659 name = "Preboot";
2660 break;
2661 default:
2662 dev_warn(hdev->dev, "Undefined FW component: %d\n", fwc);
2663 return;
2664 }
2665
2666 ver_off &= ~((u32)SRAM_BASE_ADDR);
2667
2668 if (ver_off < SRAM_SIZE - VERSION_MAX_LEN) {
2669 memcpy_fromio(dest, hdev->pcie_bar[SRAM_BAR_ID] + ver_off,
2670 VERSION_MAX_LEN);
2671 } else {
2672 dev_err(hdev->dev, "%s version offset (0x%x) is above SRAM\n",
2673 name, ver_off);
2674 strcpy(dest, "unavailable");
2675 }
2676}
2677
2678static int gaudi_init_cpu(struct hl_device *hdev)
2679{
2680 struct gaudi_device *gaudi = hdev->asic_specific;
2681 int rc;
2682
2683 if (!hdev->cpu_enable)
2684 return 0;
2685
2686 if (gaudi->hw_cap_initialized & HW_CAP_CPU)
2687 return 0;
2688
2689 /*
2690 * The device CPU works with 40 bits addresses.
2691 * This register sets the extension to 50 bits.
2692 */
2693 WREG32(mmCPU_IF_CPU_MSB_ADDR, hdev->cpu_pci_msb_addr);
2694
2695 rc = hl_fw_init_cpu(hdev, mmPSOC_GLOBAL_CONF_CPU_BOOT_STATUS,
2696 mmPSOC_GLOBAL_CONF_KMD_MSG_TO_CPU,
2697 mmCPU_CMD_STATUS_TO_HOST,
2698 mmCPU_BOOT_ERR0,
2699 !hdev->bmc_enable, GAUDI_CPU_TIMEOUT_USEC,
2700 GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC);
2701
2702 if (rc)
2703 return rc;
2704
2705 gaudi->hw_cap_initialized |= HW_CAP_CPU;
2706
2707 return 0;
2708}
2709
2710static int gaudi_init_cpu_queues(struct hl_device *hdev, u32 cpu_timeout)
2711{
2712 struct gaudi_device *gaudi = hdev->asic_specific;
2713 struct hl_eq *eq;
2714 u32 status;
2715 struct hl_hw_queue *cpu_pq =
2716 &hdev->kernel_queues[GAUDI_QUEUE_ID_CPU_PQ];
2717 int err;
2718
2719 if (!hdev->cpu_queues_enable)
2720 return 0;
2721
2722 if (gaudi->hw_cap_initialized & HW_CAP_CPU_Q)
2723 return 0;
2724
2725 eq = &hdev->event_queue;
2726
2727 WREG32(mmCPU_IF_PQ_BASE_ADDR_LOW, lower_32_bits(cpu_pq->bus_address));
2728 WREG32(mmCPU_IF_PQ_BASE_ADDR_HIGH, upper_32_bits(cpu_pq->bus_address));
2729
2730 WREG32(mmCPU_IF_EQ_BASE_ADDR_LOW, lower_32_bits(eq->bus_address));
2731 WREG32(mmCPU_IF_EQ_BASE_ADDR_HIGH, upper_32_bits(eq->bus_address));
2732
2733 WREG32(mmCPU_IF_CQ_BASE_ADDR_LOW,
2734 lower_32_bits(hdev->cpu_accessible_dma_address));
2735 WREG32(mmCPU_IF_CQ_BASE_ADDR_HIGH,
2736 upper_32_bits(hdev->cpu_accessible_dma_address));
2737
2738 WREG32(mmCPU_IF_PQ_LENGTH, HL_QUEUE_SIZE_IN_BYTES);
2739 WREG32(mmCPU_IF_EQ_LENGTH, HL_EQ_SIZE_IN_BYTES);
2740 WREG32(mmCPU_IF_CQ_LENGTH, HL_CPU_ACCESSIBLE_MEM_SIZE);
2741
2742 /* Used for EQ CI */
2743 WREG32(mmCPU_IF_EQ_RD_OFFS, 0);
2744
2745 WREG32(mmCPU_IF_PF_PQ_PI, 0);
2746
2747 if (gaudi->multi_msi_mode)
2748 WREG32(mmCPU_IF_QUEUE_INIT, PQ_INIT_STATUS_READY_FOR_CP);
2749 else
2750 WREG32(mmCPU_IF_QUEUE_INIT,
2751 PQ_INIT_STATUS_READY_FOR_CP_SINGLE_MSI);
2752
2753 WREG32(mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR, GAUDI_EVENT_PI_UPDATE);
2754
2755 err = hl_poll_timeout(
2756 hdev,
2757 mmCPU_IF_QUEUE_INIT,
2758 status,
2759 (status == PQ_INIT_STATUS_READY_FOR_HOST),
2760 1000,
2761 cpu_timeout);
2762
2763 if (err) {
2764 dev_err(hdev->dev,
2765 "Failed to communicate with ARM CPU (ArmCP timeout)\n");
2766 return -EIO;
2767 }
2768
2769 gaudi->hw_cap_initialized |= HW_CAP_CPU_Q;
2770 return 0;
2771}
2772
2773static void gaudi_pre_hw_init(struct hl_device *hdev)
2774{
2775 /* Perform read from the device to make sure device is up */
2776 RREG32(mmPCIE_DBI_DEVICE_ID_VENDOR_ID_REG);
2777
2778 /*
2779 * Let's mark in the H/W that we have reached this point. We check
2780 * this value in the reset_before_init function to understand whether
2781 * we need to reset the chip before doing H/W init. This register is
2782 * cleared by the H/W upon H/W reset
2783 */
2784 WREG32(mmHW_STATE, HL_DEVICE_HW_STATE_DIRTY);
2785
2786 /* Set the access through PCI bars (Linux driver only) as secured */
2787 WREG32(mmPCIE_WRAP_LBW_PROT_OVR, (PCIE_WRAP_LBW_PROT_OVR_RD_EN_MASK |
2788 PCIE_WRAP_LBW_PROT_OVR_WR_EN_MASK));
2789
2790 /* Perform read to flush the waiting writes to ensure configuration
2791 * was set in the device
2792 */
2793 RREG32(mmPCIE_WRAP_LBW_PROT_OVR);
2794
2795 if (hdev->axi_drain) {
2796 WREG32(mmPCIE_WRAP_LBW_DRAIN_CFG,
2797 1 << PCIE_WRAP_LBW_DRAIN_CFG_EN_SHIFT);
2798 WREG32(mmPCIE_WRAP_HBW_DRAIN_CFG,
2799 1 << PCIE_WRAP_HBW_DRAIN_CFG_EN_SHIFT);
2800
2801 /* Perform read to flush the DRAIN cfg */
2802 RREG32(mmPCIE_WRAP_HBW_DRAIN_CFG);
2803 } else {
2804 WREG32(mmPCIE_WRAP_LBW_DRAIN_CFG, 0);
2805 WREG32(mmPCIE_WRAP_HBW_DRAIN_CFG, 0);
2806
2807 /* Perform read to flush the DRAIN cfg */
2808 RREG32(mmPCIE_WRAP_HBW_DRAIN_CFG);
2809 }
2810
2811 /* Configure the reset registers. Must be done as early as possible
2812 * in case we fail during H/W initialization
2813 */
2814 WREG32(mmPSOC_GLOBAL_CONF_SOFT_RST_CFG_H,
2815 (CFG_RST_H_DMA_MASK |
2816 CFG_RST_H_MME_MASK |
2817 CFG_RST_H_SM_MASK |
2818 CFG_RST_H_TPC_MASK));
2819
2820 WREG32(mmPSOC_GLOBAL_CONF_SOFT_RST_CFG_L, CFG_RST_L_TPC_MASK);
2821
2822 WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST_CFG_H,
2823 (CFG_RST_H_HBM_MASK |
2824 CFG_RST_H_TPC_MASK |
2825 CFG_RST_H_NIC_MASK |
2826 CFG_RST_H_SM_MASK |
2827 CFG_RST_H_DMA_MASK |
2828 CFG_RST_H_MME_MASK |
2829 CFG_RST_H_CPU_MASK |
2830 CFG_RST_H_MMU_MASK));
2831
2832 WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST_CFG_L,
2833 (CFG_RST_L_IF_MASK |
2834 CFG_RST_L_PSOC_MASK |
2835 CFG_RST_L_TPC_MASK));
2836}
2837
2838static int gaudi_hw_init(struct hl_device *hdev)
2839{
2840 int rc;
2841
2842 dev_info(hdev->dev, "Starting initialization of H/W\n");
2843
2844 gaudi_pre_hw_init(hdev);
2845
2846 gaudi_init_pci_dma_qmans(hdev);
2847
2848 gaudi_init_hbm_dma_qmans(hdev);
2849
2850 /*
2851 * Before pushing u-boot/linux to device, need to set the hbm bar to
2852 * base address of dram
2853 */
2854 if (gaudi_set_hbm_bar_base(hdev, DRAM_PHYS_BASE) == U64_MAX) {
2855 dev_err(hdev->dev,
2856 "failed to map HBM bar to DRAM base address\n");
2857 return -EIO;
2858 }
2859
2860 rc = gaudi_init_cpu(hdev);
2861 if (rc) {
2862 dev_err(hdev->dev, "failed to initialize CPU\n");
2863 return rc;
2864 }
2865
2866 /* SRAM scrambler must be initialized after CPU is running from HBM */
2867 gaudi_init_scrambler_sram(hdev);
2868
2869 /* This is here just in case we are working without CPU */
2870 gaudi_init_scrambler_hbm(hdev);
2871
2872 gaudi_init_golden_registers(hdev);
2873
2874 rc = gaudi_mmu_init(hdev);
2875 if (rc)
2876 return rc;
2877
Omer Shpigelman3a3a5bf12020-05-11 10:45:12 +03002878 gaudi_init_security(hdev);
2879
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03002880 gaudi_init_mme_qmans(hdev);
2881
2882 gaudi_init_tpc_qmans(hdev);
2883
2884 gaudi_enable_clock_gating(hdev);
2885
2886 gaudi_enable_timestamp(hdev);
2887
2888 /* MSI must be enabled before CPU queues are initialized */
2889 rc = gaudi_enable_msi(hdev);
2890 if (rc)
2891 goto disable_queues;
2892
2893 /* must be called after MSI was enabled */
2894 rc = gaudi_init_cpu_queues(hdev, GAUDI_CPU_TIMEOUT_USEC);
2895 if (rc) {
2896 dev_err(hdev->dev, "failed to initialize CPU H/W queues %d\n",
2897 rc);
2898 goto disable_msi;
2899 }
2900
2901 /* Perform read from the device to flush all configuration */
2902 RREG32(mmPCIE_DBI_DEVICE_ID_VENDOR_ID_REG);
2903
2904 return 0;
2905
2906disable_msi:
2907 gaudi_disable_msi(hdev);
2908disable_queues:
2909 gaudi_disable_mme_qmans(hdev);
2910 gaudi_disable_pci_dma_qmans(hdev);
2911
2912 return rc;
2913}
2914
2915static void gaudi_hw_fini(struct hl_device *hdev, bool hard_reset)
2916{
2917 struct gaudi_device *gaudi = hdev->asic_specific;
2918 u32 status, reset_timeout_ms, boot_strap = 0;
2919
2920 if (hdev->pldm) {
2921 if (hard_reset)
2922 reset_timeout_ms = GAUDI_PLDM_HRESET_TIMEOUT_MSEC;
2923 else
2924 reset_timeout_ms = GAUDI_PLDM_SRESET_TIMEOUT_MSEC;
2925 } else {
2926 reset_timeout_ms = GAUDI_RESET_TIMEOUT_MSEC;
2927 }
2928
2929 if (hard_reset) {
2930 /* Tell ASIC not to re-initialize PCIe */
2931 WREG32(mmPREBOOT_PCIE_EN, LKD_HARD_RESET_MAGIC);
2932
2933 boot_strap = RREG32(mmPSOC_GLOBAL_CONF_BOOT_STRAP_PINS);
2934 /* H/W bug WA:
2935 * rdata[31:0] = strap_read_val;
2936 * wdata[31:0] = rdata[30:21],1'b0,rdata[20:0]
2937 */
2938 boot_strap = (((boot_strap & 0x7FE00000) << 1) |
2939 (boot_strap & 0x001FFFFF));
2940 WREG32(mmPSOC_GLOBAL_CONF_BOOT_STRAP_PINS, boot_strap & ~0x2);
2941
2942 /* Restart BTL/BLR upon hard-reset */
2943 WREG32(mmPSOC_GLOBAL_CONF_BOOT_SEQ_RE_START, 1);
2944
2945 WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST,
2946 1 << PSOC_GLOBAL_CONF_SW_ALL_RST_IND_SHIFT);
2947 dev_info(hdev->dev,
2948 "Issued HARD reset command, going to wait %dms\n",
2949 reset_timeout_ms);
2950 } else {
2951 /* Don't restart BTL/BLR upon soft-reset */
2952 WREG32(mmPSOC_GLOBAL_CONF_BOOT_SEQ_RE_START, 0);
2953
2954 WREG32(mmPSOC_GLOBAL_CONF_SOFT_RST,
2955 1 << PSOC_GLOBAL_CONF_SOFT_RST_IND_SHIFT);
2956 dev_info(hdev->dev,
2957 "Issued SOFT reset command, going to wait %dms\n",
2958 reset_timeout_ms);
2959 }
2960
2961 /*
2962 * After hard reset, we can't poll the BTM_FSM register because the PSOC
2963 * itself is in reset. Need to wait until the reset is deasserted
2964 */
2965 msleep(reset_timeout_ms);
2966
2967 status = RREG32(mmPSOC_GLOBAL_CONF_BTM_FSM);
2968 if (status & PSOC_GLOBAL_CONF_BTM_FSM_STATE_MASK)
2969 dev_err(hdev->dev,
2970 "Timeout while waiting for device to reset 0x%x\n",
2971 status);
2972
2973 if (!hard_reset) {
2974 gaudi->hw_cap_initialized &= ~(HW_CAP_PCI_DMA | HW_CAP_MME |
2975 HW_CAP_TPC_MASK |
2976 HW_CAP_HBM_DMA);
2977
2978 WREG32(mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR,
2979 GAUDI_EVENT_SOFT_RESET);
2980 return;
2981 }
2982
2983 /* We continue here only for hard-reset */
2984
2985 WREG32(mmPSOC_GLOBAL_CONF_BOOT_STRAP_PINS, boot_strap);
2986
2987 gaudi->hw_cap_initialized &= ~(HW_CAP_CPU | HW_CAP_CPU_Q |
2988 HW_CAP_HBM | HW_CAP_PCI_DMA |
2989 HW_CAP_MME | HW_CAP_TPC_MASK |
2990 HW_CAP_HBM_DMA | HW_CAP_PLL |
2991 HW_CAP_MMU |
2992 HW_CAP_SRAM_SCRAMBLER |
2993 HW_CAP_HBM_SCRAMBLER);
2994 memset(gaudi->events_stat, 0, sizeof(gaudi->events_stat));
2995}
2996
2997static int gaudi_suspend(struct hl_device *hdev)
2998{
2999 int rc;
3000
3001 rc = hl_fw_send_pci_access_msg(hdev, ARMCP_PACKET_DISABLE_PCI_ACCESS);
3002 if (rc)
3003 dev_err(hdev->dev, "Failed to disable PCI access from CPU\n");
3004
3005 return rc;
3006}
3007
3008static int gaudi_resume(struct hl_device *hdev)
3009{
3010 return gaudi_init_iatu(hdev);
3011}
3012
3013static int gaudi_cb_mmap(struct hl_device *hdev, struct vm_area_struct *vma,
3014 u64 kaddress, phys_addr_t paddress, u32 size)
3015{
3016 int rc;
3017
3018 vma->vm_flags |= VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP |
3019 VM_DONTCOPY | VM_NORESERVE;
3020
3021 rc = remap_pfn_range(vma, vma->vm_start, paddress >> PAGE_SHIFT,
3022 size, vma->vm_page_prot);
3023 if (rc)
3024 dev_err(hdev->dev, "remap_pfn_range error %d", rc);
3025
3026 return rc;
3027}
3028
3029static void gaudi_ring_doorbell(struct hl_device *hdev, u32 hw_queue_id, u32 pi)
3030{
3031 struct gaudi_device *gaudi = hdev->asic_specific;
3032 u32 db_reg_offset, db_value, dma_qm_offset, q_off;
3033 int dma_id;
3034 bool invalid_queue = false;
3035
3036 switch (hw_queue_id) {
3037 case GAUDI_QUEUE_ID_DMA_0_0...GAUDI_QUEUE_ID_DMA_0_3:
3038 dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_1];
3039 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
3040 q_off = dma_qm_offset + (hw_queue_id & 0x3) * 4;
3041 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
3042 break;
3043
3044 case GAUDI_QUEUE_ID_DMA_1_0...GAUDI_QUEUE_ID_DMA_1_3:
3045 dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_2];
3046 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
3047 q_off = dma_qm_offset + (hw_queue_id & 0x3) * 4;
3048 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
3049 break;
3050
3051 case GAUDI_QUEUE_ID_DMA_2_0...GAUDI_QUEUE_ID_DMA_2_3:
3052 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_1];
3053 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
3054 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
3055 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
3056 break;
3057
3058 case GAUDI_QUEUE_ID_DMA_3_0...GAUDI_QUEUE_ID_DMA_3_3:
3059 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_2];
3060 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
3061 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
3062 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
3063 break;
3064
3065 case GAUDI_QUEUE_ID_DMA_4_0...GAUDI_QUEUE_ID_DMA_4_3:
3066 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_3];
3067 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
3068 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
3069 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
3070 break;
3071
3072 case GAUDI_QUEUE_ID_DMA_5_0...GAUDI_QUEUE_ID_DMA_5_3:
3073 dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_3];
3074 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
3075 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
3076 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
3077 break;
3078
3079 case GAUDI_QUEUE_ID_DMA_6_0...GAUDI_QUEUE_ID_DMA_6_3:
3080 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_4];
3081 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
3082 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
3083 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
3084 break;
3085
3086 case GAUDI_QUEUE_ID_DMA_7_0...GAUDI_QUEUE_ID_DMA_7_3:
3087 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_5];
3088 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
3089 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
3090 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
3091 break;
3092
3093 case GAUDI_QUEUE_ID_CPU_PQ:
3094 if (gaudi->hw_cap_initialized & HW_CAP_CPU_Q)
3095 db_reg_offset = mmCPU_IF_PF_PQ_PI;
3096 else
3097 invalid_queue = true;
3098 break;
3099
3100 case GAUDI_QUEUE_ID_MME_0_0:
3101 db_reg_offset = mmMME2_QM_PQ_PI_0;
3102 break;
3103
3104 case GAUDI_QUEUE_ID_MME_0_1:
3105 db_reg_offset = mmMME2_QM_PQ_PI_1;
3106 break;
3107
3108 case GAUDI_QUEUE_ID_MME_0_2:
3109 db_reg_offset = mmMME2_QM_PQ_PI_2;
3110 break;
3111
3112 case GAUDI_QUEUE_ID_MME_0_3:
3113 db_reg_offset = mmMME2_QM_PQ_PI_3;
3114 break;
3115
3116 case GAUDI_QUEUE_ID_MME_1_0:
3117 db_reg_offset = mmMME0_QM_PQ_PI_0;
3118 break;
3119
3120 case GAUDI_QUEUE_ID_MME_1_1:
3121 db_reg_offset = mmMME0_QM_PQ_PI_1;
3122 break;
3123
3124 case GAUDI_QUEUE_ID_MME_1_2:
3125 db_reg_offset = mmMME0_QM_PQ_PI_2;
3126 break;
3127
3128 case GAUDI_QUEUE_ID_MME_1_3:
3129 db_reg_offset = mmMME0_QM_PQ_PI_3;
3130 break;
3131
3132 case GAUDI_QUEUE_ID_TPC_0_0:
3133 db_reg_offset = mmTPC0_QM_PQ_PI_0;
3134 break;
3135
3136 case GAUDI_QUEUE_ID_TPC_0_1:
3137 db_reg_offset = mmTPC0_QM_PQ_PI_1;
3138 break;
3139
3140 case GAUDI_QUEUE_ID_TPC_0_2:
3141 db_reg_offset = mmTPC0_QM_PQ_PI_2;
3142 break;
3143
3144 case GAUDI_QUEUE_ID_TPC_0_3:
3145 db_reg_offset = mmTPC0_QM_PQ_PI_3;
3146 break;
3147
3148 case GAUDI_QUEUE_ID_TPC_1_0:
3149 db_reg_offset = mmTPC1_QM_PQ_PI_0;
3150 break;
3151
3152 case GAUDI_QUEUE_ID_TPC_1_1:
3153 db_reg_offset = mmTPC1_QM_PQ_PI_1;
3154 break;
3155
3156 case GAUDI_QUEUE_ID_TPC_1_2:
3157 db_reg_offset = mmTPC1_QM_PQ_PI_2;
3158 break;
3159
3160 case GAUDI_QUEUE_ID_TPC_1_3:
3161 db_reg_offset = mmTPC1_QM_PQ_PI_3;
3162 break;
3163
3164 case GAUDI_QUEUE_ID_TPC_2_0:
3165 db_reg_offset = mmTPC2_QM_PQ_PI_0;
3166 break;
3167
3168 case GAUDI_QUEUE_ID_TPC_2_1:
3169 db_reg_offset = mmTPC2_QM_PQ_PI_1;
3170 break;
3171
3172 case GAUDI_QUEUE_ID_TPC_2_2:
3173 db_reg_offset = mmTPC2_QM_PQ_PI_2;
3174 break;
3175
3176 case GAUDI_QUEUE_ID_TPC_2_3:
3177 db_reg_offset = mmTPC2_QM_PQ_PI_3;
3178 break;
3179
3180 case GAUDI_QUEUE_ID_TPC_3_0:
3181 db_reg_offset = mmTPC3_QM_PQ_PI_0;
3182 break;
3183
3184 case GAUDI_QUEUE_ID_TPC_3_1:
3185 db_reg_offset = mmTPC3_QM_PQ_PI_1;
3186 break;
3187
3188 case GAUDI_QUEUE_ID_TPC_3_2:
3189 db_reg_offset = mmTPC3_QM_PQ_PI_2;
3190 break;
3191
3192 case GAUDI_QUEUE_ID_TPC_3_3:
3193 db_reg_offset = mmTPC3_QM_PQ_PI_3;
3194 break;
3195
3196 case GAUDI_QUEUE_ID_TPC_4_0:
3197 db_reg_offset = mmTPC4_QM_PQ_PI_0;
3198 break;
3199
3200 case GAUDI_QUEUE_ID_TPC_4_1:
3201 db_reg_offset = mmTPC4_QM_PQ_PI_1;
3202 break;
3203
3204 case GAUDI_QUEUE_ID_TPC_4_2:
3205 db_reg_offset = mmTPC4_QM_PQ_PI_2;
3206 break;
3207
3208 case GAUDI_QUEUE_ID_TPC_4_3:
3209 db_reg_offset = mmTPC4_QM_PQ_PI_3;
3210 break;
3211
3212 case GAUDI_QUEUE_ID_TPC_5_0:
3213 db_reg_offset = mmTPC5_QM_PQ_PI_0;
3214 break;
3215
3216 case GAUDI_QUEUE_ID_TPC_5_1:
3217 db_reg_offset = mmTPC5_QM_PQ_PI_1;
3218 break;
3219
3220 case GAUDI_QUEUE_ID_TPC_5_2:
3221 db_reg_offset = mmTPC5_QM_PQ_PI_2;
3222 break;
3223
3224 case GAUDI_QUEUE_ID_TPC_5_3:
3225 db_reg_offset = mmTPC5_QM_PQ_PI_3;
3226 break;
3227
3228 case GAUDI_QUEUE_ID_TPC_6_0:
3229 db_reg_offset = mmTPC6_QM_PQ_PI_0;
3230 break;
3231
3232 case GAUDI_QUEUE_ID_TPC_6_1:
3233 db_reg_offset = mmTPC6_QM_PQ_PI_1;
3234 break;
3235
3236 case GAUDI_QUEUE_ID_TPC_6_2:
3237 db_reg_offset = mmTPC6_QM_PQ_PI_2;
3238 break;
3239
3240 case GAUDI_QUEUE_ID_TPC_6_3:
3241 db_reg_offset = mmTPC6_QM_PQ_PI_3;
3242 break;
3243
3244 case GAUDI_QUEUE_ID_TPC_7_0:
3245 db_reg_offset = mmTPC7_QM_PQ_PI_0;
3246 break;
3247
3248 case GAUDI_QUEUE_ID_TPC_7_1:
3249 db_reg_offset = mmTPC7_QM_PQ_PI_1;
3250 break;
3251
3252 case GAUDI_QUEUE_ID_TPC_7_2:
3253 db_reg_offset = mmTPC7_QM_PQ_PI_2;
3254 break;
3255
3256 case GAUDI_QUEUE_ID_TPC_7_3:
3257 db_reg_offset = mmTPC7_QM_PQ_PI_3;
3258 break;
3259
3260 default:
3261 invalid_queue = true;
3262 }
3263
3264 if (invalid_queue) {
3265 /* Should never get here */
3266 dev_err(hdev->dev, "h/w queue %d is invalid. Can't set pi\n",
3267 hw_queue_id);
3268 return;
3269 }
3270
3271 db_value = pi;
3272
3273 /* ring the doorbell */
3274 WREG32(db_reg_offset, db_value);
3275
3276 if (hw_queue_id == GAUDI_QUEUE_ID_CPU_PQ)
3277 WREG32(mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR,
3278 GAUDI_EVENT_PI_UPDATE);
3279}
3280
3281static void gaudi_pqe_write(struct hl_device *hdev, __le64 *pqe,
3282 struct hl_bd *bd)
3283{
3284 __le64 *pbd = (__le64 *) bd;
3285
3286 /* The QMANs are on the host memory so a simple copy suffice */
3287 pqe[0] = pbd[0];
3288 pqe[1] = pbd[1];
3289}
3290
3291static void *gaudi_dma_alloc_coherent(struct hl_device *hdev, size_t size,
3292 dma_addr_t *dma_handle, gfp_t flags)
3293{
3294 void *kernel_addr = dma_alloc_coherent(&hdev->pdev->dev, size,
3295 dma_handle, flags);
3296
3297 /* Shift to the device's base physical address of host memory */
3298 if (kernel_addr)
3299 *dma_handle += HOST_PHYS_BASE;
3300
3301 return kernel_addr;
3302}
3303
3304static void gaudi_dma_free_coherent(struct hl_device *hdev, size_t size,
3305 void *cpu_addr, dma_addr_t dma_handle)
3306{
3307 /* Cancel the device's base physical address of host memory */
3308 dma_addr_t fixed_dma_handle = dma_handle - HOST_PHYS_BASE;
3309
3310 dma_free_coherent(&hdev->pdev->dev, size, cpu_addr, fixed_dma_handle);
3311}
3312
3313static void *gaudi_get_int_queue_base(struct hl_device *hdev,
3314 u32 queue_id, dma_addr_t *dma_handle,
3315 u16 *queue_len)
3316{
3317 struct gaudi_device *gaudi = hdev->asic_specific;
3318 struct gaudi_internal_qman_info *q;
3319
3320 if (queue_id >= GAUDI_QUEUE_ID_SIZE ||
3321 gaudi_queue_type[queue_id] != QUEUE_TYPE_INT) {
3322 dev_err(hdev->dev, "Got invalid queue id %d\n", queue_id);
3323 return NULL;
3324 }
3325
3326 q = &gaudi->internal_qmans[queue_id];
3327 *dma_handle = q->pq_dma_addr;
3328 *queue_len = q->pq_size / QMAN_PQ_ENTRY_SIZE;
3329
3330 return q->pq_kernel_addr;
3331}
3332
3333static int gaudi_send_cpu_message(struct hl_device *hdev, u32 *msg,
3334 u16 len, u32 timeout, long *result)
3335{
3336 struct gaudi_device *gaudi = hdev->asic_specific;
3337
3338 if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q)) {
3339 if (result)
3340 *result = 0;
3341 return 0;
3342 }
3343
3344 return hl_fw_send_cpu_message(hdev, GAUDI_QUEUE_ID_CPU_PQ, msg, len,
3345 timeout, result);
3346}
3347
3348static int gaudi_test_queue(struct hl_device *hdev, u32 hw_queue_id)
3349{
3350 struct packet_msg_prot *fence_pkt;
3351 dma_addr_t pkt_dma_addr;
3352 u32 fence_val, tmp, timeout_usec;
3353 dma_addr_t fence_dma_addr;
3354 u32 *fence_ptr;
3355 int rc;
3356
3357 if (hdev->pldm)
3358 timeout_usec = GAUDI_PLDM_TEST_QUEUE_WAIT_USEC;
3359 else
3360 timeout_usec = GAUDI_TEST_QUEUE_WAIT_USEC;
3361
3362 fence_val = GAUDI_QMAN0_FENCE_VAL;
3363
3364 fence_ptr = hdev->asic_funcs->asic_dma_pool_zalloc(hdev, 4, GFP_KERNEL,
3365 &fence_dma_addr);
3366 if (!fence_ptr) {
3367 dev_err(hdev->dev,
3368 "Failed to allocate memory for queue testing\n");
3369 return -ENOMEM;
3370 }
3371
3372 *fence_ptr = 0;
3373
3374 fence_pkt = hdev->asic_funcs->asic_dma_pool_zalloc(hdev,
3375 sizeof(struct packet_msg_prot),
3376 GFP_KERNEL, &pkt_dma_addr);
3377 if (!fence_pkt) {
3378 dev_err(hdev->dev,
3379 "Failed to allocate packet for queue testing\n");
3380 rc = -ENOMEM;
3381 goto free_fence_ptr;
3382 }
3383
3384 tmp = (PACKET_MSG_PROT << GAUDI_PKT_CTL_OPCODE_SHIFT) |
3385 (1 << GAUDI_PKT_CTL_EB_SHIFT) |
3386 (1 << GAUDI_PKT_CTL_MB_SHIFT);
3387 fence_pkt->ctl = cpu_to_le32(tmp);
3388 fence_pkt->value = cpu_to_le32(fence_val);
3389 fence_pkt->addr = cpu_to_le64(fence_dma_addr);
3390
3391 rc = hl_hw_queue_send_cb_no_cmpl(hdev, hw_queue_id,
3392 sizeof(struct packet_msg_prot),
3393 pkt_dma_addr);
3394 if (rc) {
3395 dev_err(hdev->dev,
3396 "Failed to send fence packet\n");
3397 goto free_pkt;
3398 }
3399
3400 rc = hl_poll_timeout_memory(hdev, fence_ptr, tmp, (tmp == fence_val),
3401 1000, timeout_usec, true);
3402
3403 hl_hw_queue_inc_ci_kernel(hdev, hw_queue_id);
3404
3405 if (rc == -ETIMEDOUT) {
3406 dev_err(hdev->dev,
3407 "H/W queue %d test failed (scratch(0x%08llX) == 0x%08X)\n",
3408 hw_queue_id, (unsigned long long) fence_dma_addr, tmp);
3409 rc = -EIO;
3410 }
3411
3412free_pkt:
3413 hdev->asic_funcs->asic_dma_pool_free(hdev, (void *) fence_pkt,
3414 pkt_dma_addr);
3415free_fence_ptr:
3416 hdev->asic_funcs->asic_dma_pool_free(hdev, (void *) fence_ptr,
3417 fence_dma_addr);
3418 return rc;
3419}
3420
3421static int gaudi_test_cpu_queue(struct hl_device *hdev)
3422{
3423 struct gaudi_device *gaudi = hdev->asic_specific;
3424
3425 /*
3426 * check capability here as send_cpu_message() won't update the result
3427 * value if no capability
3428 */
3429 if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
3430 return 0;
3431
3432 return hl_fw_test_cpu_queue(hdev);
3433}
3434
3435static int gaudi_test_queues(struct hl_device *hdev)
3436{
3437 int i, rc, ret_val = 0;
3438
3439 for (i = 0 ; i < HL_MAX_QUEUES ; i++) {
3440 if (hdev->asic_prop.hw_queues_props[i].type == QUEUE_TYPE_EXT) {
3441 rc = gaudi_test_queue(hdev, i);
3442 if (rc)
3443 ret_val = -EINVAL;
3444 }
3445 }
3446
3447 rc = gaudi_test_cpu_queue(hdev);
3448 if (rc)
3449 ret_val = -EINVAL;
3450
3451 return ret_val;
3452}
3453
3454static void *gaudi_dma_pool_zalloc(struct hl_device *hdev, size_t size,
3455 gfp_t mem_flags, dma_addr_t *dma_handle)
3456{
3457 void *kernel_addr;
3458
3459 if (size > GAUDI_DMA_POOL_BLK_SIZE)
3460 return NULL;
3461
3462 kernel_addr = dma_pool_zalloc(hdev->dma_pool, mem_flags, dma_handle);
3463
3464 /* Shift to the device's base physical address of host memory */
3465 if (kernel_addr)
3466 *dma_handle += HOST_PHYS_BASE;
3467
3468 return kernel_addr;
3469}
3470
3471static void gaudi_dma_pool_free(struct hl_device *hdev, void *vaddr,
3472 dma_addr_t dma_addr)
3473{
3474 /* Cancel the device's base physical address of host memory */
3475 dma_addr_t fixed_dma_addr = dma_addr - HOST_PHYS_BASE;
3476
3477 dma_pool_free(hdev->dma_pool, vaddr, fixed_dma_addr);
3478}
3479
3480static void *gaudi_cpu_accessible_dma_pool_alloc(struct hl_device *hdev,
3481 size_t size, dma_addr_t *dma_handle)
3482{
3483 return hl_fw_cpu_accessible_dma_pool_alloc(hdev, size, dma_handle);
3484}
3485
3486static void gaudi_cpu_accessible_dma_pool_free(struct hl_device *hdev,
3487 size_t size, void *vaddr)
3488{
3489 hl_fw_cpu_accessible_dma_pool_free(hdev, size, vaddr);
3490}
3491
3492static int gaudi_dma_map_sg(struct hl_device *hdev, struct scatterlist *sgl,
3493 int nents, enum dma_data_direction dir)
3494{
3495 struct scatterlist *sg;
3496 int i;
3497
3498 if (!dma_map_sg(&hdev->pdev->dev, sgl, nents, dir))
3499 return -ENOMEM;
3500
3501 /* Shift to the device's base physical address of host memory */
3502 for_each_sg(sgl, sg, nents, i)
3503 sg->dma_address += HOST_PHYS_BASE;
3504
3505 return 0;
3506}
3507
3508static void gaudi_dma_unmap_sg(struct hl_device *hdev, struct scatterlist *sgl,
3509 int nents, enum dma_data_direction dir)
3510{
3511 struct scatterlist *sg;
3512 int i;
3513
3514 /* Cancel the device's base physical address of host memory */
3515 for_each_sg(sgl, sg, nents, i)
3516 sg->dma_address -= HOST_PHYS_BASE;
3517
3518 dma_unmap_sg(&hdev->pdev->dev, sgl, nents, dir);
3519}
3520
3521static u32 gaudi_get_dma_desc_list_size(struct hl_device *hdev,
3522 struct sg_table *sgt)
3523{
3524 struct scatterlist *sg, *sg_next_iter;
3525 u32 count, dma_desc_cnt;
3526 u64 len, len_next;
3527 dma_addr_t addr, addr_next;
3528
3529 dma_desc_cnt = 0;
3530
3531 for_each_sg(sgt->sgl, sg, sgt->nents, count) {
3532
3533 len = sg_dma_len(sg);
3534 addr = sg_dma_address(sg);
3535
3536 if (len == 0)
3537 break;
3538
3539 while ((count + 1) < sgt->nents) {
3540 sg_next_iter = sg_next(sg);
3541 len_next = sg_dma_len(sg_next_iter);
3542 addr_next = sg_dma_address(sg_next_iter);
3543
3544 if (len_next == 0)
3545 break;
3546
3547 if ((addr + len == addr_next) &&
3548 (len + len_next <= DMA_MAX_TRANSFER_SIZE)) {
3549 len += len_next;
3550 count++;
3551 sg = sg_next_iter;
3552 } else {
3553 break;
3554 }
3555 }
3556
3557 dma_desc_cnt++;
3558 }
3559
3560 return dma_desc_cnt * sizeof(struct packet_lin_dma);
3561}
3562
3563static int gaudi_pin_memory_before_cs(struct hl_device *hdev,
3564 struct hl_cs_parser *parser,
3565 struct packet_lin_dma *user_dma_pkt,
3566 u64 addr, enum dma_data_direction dir)
3567{
3568 struct hl_userptr *userptr;
3569 int rc;
3570
3571 if (hl_userptr_is_pinned(hdev, addr, le32_to_cpu(user_dma_pkt->tsize),
3572 parser->job_userptr_list, &userptr))
3573 goto already_pinned;
3574
3575 userptr = kzalloc(sizeof(*userptr), GFP_ATOMIC);
3576 if (!userptr)
3577 return -ENOMEM;
3578
3579 rc = hl_pin_host_memory(hdev, addr, le32_to_cpu(user_dma_pkt->tsize),
3580 userptr);
3581 if (rc)
3582 goto free_userptr;
3583
3584 list_add_tail(&userptr->job_node, parser->job_userptr_list);
3585
3586 rc = hdev->asic_funcs->asic_dma_map_sg(hdev, userptr->sgt->sgl,
3587 userptr->sgt->nents, dir);
3588 if (rc) {
3589 dev_err(hdev->dev, "failed to map sgt with DMA region\n");
3590 goto unpin_memory;
3591 }
3592
3593 userptr->dma_mapped = true;
3594 userptr->dir = dir;
3595
3596already_pinned:
3597 parser->patched_cb_size +=
3598 gaudi_get_dma_desc_list_size(hdev, userptr->sgt);
3599
3600 return 0;
3601
3602unpin_memory:
3603 hl_unpin_host_memory(hdev, userptr);
3604free_userptr:
3605 kfree(userptr);
3606 return rc;
3607}
3608
3609static int gaudi_validate_dma_pkt_host(struct hl_device *hdev,
3610 struct hl_cs_parser *parser,
3611 struct packet_lin_dma *user_dma_pkt,
3612 bool src_in_host)
3613{
3614 enum dma_data_direction dir;
3615 bool skip_host_mem_pin = false, user_memset;
3616 u64 addr;
3617 int rc = 0;
3618
3619 user_memset = (le32_to_cpu(user_dma_pkt->ctl) &
3620 GAUDI_PKT_LIN_DMA_CTL_MEMSET_MASK) >>
3621 GAUDI_PKT_LIN_DMA_CTL_MEMSET_SHIFT;
3622
3623 if (src_in_host) {
3624 if (user_memset)
3625 skip_host_mem_pin = true;
3626
3627 dev_dbg(hdev->dev, "DMA direction is HOST --> DEVICE\n");
3628 dir = DMA_TO_DEVICE;
3629 addr = le64_to_cpu(user_dma_pkt->src_addr);
3630 } else {
3631 dev_dbg(hdev->dev, "DMA direction is DEVICE --> HOST\n");
3632 dir = DMA_FROM_DEVICE;
3633 addr = (le64_to_cpu(user_dma_pkt->dst_addr) &
3634 GAUDI_PKT_LIN_DMA_DST_ADDR_MASK) >>
3635 GAUDI_PKT_LIN_DMA_DST_ADDR_SHIFT;
3636 }
3637
3638 if (skip_host_mem_pin)
3639 parser->patched_cb_size += sizeof(*user_dma_pkt);
3640 else
3641 rc = gaudi_pin_memory_before_cs(hdev, parser, user_dma_pkt,
3642 addr, dir);
3643
3644 return rc;
3645}
3646
3647static int gaudi_validate_dma_pkt_no_mmu(struct hl_device *hdev,
3648 struct hl_cs_parser *parser,
3649 struct packet_lin_dma *user_dma_pkt)
3650{
3651 bool src_in_host = false;
3652 u64 dst_addr = (le64_to_cpu(user_dma_pkt->dst_addr) &
3653 GAUDI_PKT_LIN_DMA_DST_ADDR_MASK) >>
3654 GAUDI_PKT_LIN_DMA_DST_ADDR_SHIFT;
3655
3656 dev_dbg(hdev->dev, "DMA packet details:\n");
3657 dev_dbg(hdev->dev, "source == 0x%llx\n",
3658 le64_to_cpu(user_dma_pkt->src_addr));
3659 dev_dbg(hdev->dev, "destination == 0x%llx\n", dst_addr);
3660 dev_dbg(hdev->dev, "size == %u\n", le32_to_cpu(user_dma_pkt->tsize));
3661
3662 /*
3663 * Special handling for DMA with size 0. Bypass all validations
3664 * because no transactions will be done except for WR_COMP, which
3665 * is not a security issue
3666 */
3667 if (!le32_to_cpu(user_dma_pkt->tsize)) {
3668 parser->patched_cb_size += sizeof(*user_dma_pkt);
3669 return 0;
3670 }
3671
3672 if (parser->hw_queue_id <= GAUDI_QUEUE_ID_DMA_0_3)
3673 src_in_host = true;
3674
3675 return gaudi_validate_dma_pkt_host(hdev, parser, user_dma_pkt,
3676 src_in_host);
3677}
3678
Oded Gabbay64536ab2020-05-27 12:38:16 +03003679static int gaudi_validate_load_and_exe_pkt(struct hl_device *hdev,
3680 struct hl_cs_parser *parser,
3681 struct packet_load_and_exe *user_pkt)
3682{
3683 u32 cfg;
3684
3685 cfg = le32_to_cpu(user_pkt->cfg);
3686
3687 if (cfg & GAUDI_PKT_LOAD_AND_EXE_CFG_DST_MASK) {
3688 dev_err(hdev->dev,
3689 "User not allowed to use Load and Execute\n");
3690 return -EPERM;
3691 }
3692
3693 parser->patched_cb_size += sizeof(struct packet_load_and_exe);
3694
3695 return 0;
3696}
3697
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003698static int gaudi_validate_cb(struct hl_device *hdev,
3699 struct hl_cs_parser *parser, bool is_mmu)
3700{
3701 u32 cb_parsed_length = 0;
3702 int rc = 0;
3703
3704 parser->patched_cb_size = 0;
3705
3706 /* cb_user_size is more than 0 so loop will always be executed */
3707 while (cb_parsed_length < parser->user_cb_size) {
3708 enum packet_id pkt_id;
3709 u16 pkt_size;
3710 struct gaudi_packet *user_pkt;
3711
3712 user_pkt = (struct gaudi_packet *) (uintptr_t)
3713 (parser->user_cb->kernel_address + cb_parsed_length);
3714
3715 pkt_id = (enum packet_id) (
3716 (le64_to_cpu(user_pkt->header) &
3717 PACKET_HEADER_PACKET_ID_MASK) >>
3718 PACKET_HEADER_PACKET_ID_SHIFT);
3719
3720 pkt_size = gaudi_packet_sizes[pkt_id];
3721 cb_parsed_length += pkt_size;
3722 if (cb_parsed_length > parser->user_cb_size) {
3723 dev_err(hdev->dev,
3724 "packet 0x%x is out of CB boundary\n", pkt_id);
3725 rc = -EINVAL;
3726 break;
3727 }
3728
3729 switch (pkt_id) {
3730 case PACKET_MSG_PROT:
3731 dev_err(hdev->dev,
3732 "User not allowed to use MSG_PROT\n");
3733 rc = -EPERM;
3734 break;
3735
3736 case PACKET_CP_DMA:
3737 dev_err(hdev->dev, "User not allowed to use CP_DMA\n");
3738 rc = -EPERM;
3739 break;
3740
3741 case PACKET_STOP:
3742 dev_err(hdev->dev, "User not allowed to use STOP\n");
3743 rc = -EPERM;
3744 break;
3745
Oded Gabbay64536ab2020-05-27 12:38:16 +03003746 case PACKET_LOAD_AND_EXE:
3747 rc = gaudi_validate_load_and_exe_pkt(hdev, parser,
3748 (struct packet_load_and_exe *) user_pkt);
3749 break;
3750
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003751 case PACKET_LIN_DMA:
3752 parser->contains_dma_pkt = true;
3753 if (is_mmu)
3754 parser->patched_cb_size += pkt_size;
3755 else
3756 rc = gaudi_validate_dma_pkt_no_mmu(hdev, parser,
3757 (struct packet_lin_dma *) user_pkt);
3758 break;
3759
3760 case PACKET_WREG_32:
3761 case PACKET_WREG_BULK:
3762 case PACKET_MSG_LONG:
3763 case PACKET_MSG_SHORT:
3764 case PACKET_REPEAT:
3765 case PACKET_FENCE:
3766 case PACKET_NOP:
3767 case PACKET_ARB_POINT:
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03003768 parser->patched_cb_size += pkt_size;
3769 break;
3770
3771 default:
3772 dev_err(hdev->dev, "Invalid packet header 0x%x\n",
3773 pkt_id);
3774 rc = -EINVAL;
3775 break;
3776 }
3777
3778 if (rc)
3779 break;
3780 }
3781
3782 /*
3783 * The new CB should have space at the end for two MSG_PROT packets:
3784 * 1. A packet that will act as a completion packet
3785 * 2. A packet that will generate MSI-X interrupt
3786 */
3787 parser->patched_cb_size += sizeof(struct packet_msg_prot) * 2;
3788
3789 return rc;
3790}
3791
3792static int gaudi_patch_dma_packet(struct hl_device *hdev,
3793 struct hl_cs_parser *parser,
3794 struct packet_lin_dma *user_dma_pkt,
3795 struct packet_lin_dma *new_dma_pkt,
3796 u32 *new_dma_pkt_size)
3797{
3798 struct hl_userptr *userptr;
3799 struct scatterlist *sg, *sg_next_iter;
3800 u32 count, dma_desc_cnt, user_wrcomp_en_mask, ctl;
3801 u64 len, len_next;
3802 dma_addr_t dma_addr, dma_addr_next;
3803 u64 device_memory_addr, addr;
3804 enum dma_data_direction dir;
3805 struct sg_table *sgt;
3806 bool src_in_host = false;
3807 bool skip_host_mem_pin = false;
3808 bool user_memset;
3809
3810 ctl = le32_to_cpu(user_dma_pkt->ctl);
3811
3812 if (parser->hw_queue_id <= GAUDI_QUEUE_ID_DMA_0_3)
3813 src_in_host = true;
3814
3815 user_memset = (ctl & GAUDI_PKT_LIN_DMA_CTL_MEMSET_MASK) >>
3816 GAUDI_PKT_LIN_DMA_CTL_MEMSET_SHIFT;
3817
3818 if (src_in_host) {
3819 addr = le64_to_cpu(user_dma_pkt->src_addr);
3820 device_memory_addr = le64_to_cpu(user_dma_pkt->dst_addr);
3821 dir = DMA_TO_DEVICE;
3822 if (user_memset)
3823 skip_host_mem_pin = true;
3824 } else {
3825 addr = le64_to_cpu(user_dma_pkt->dst_addr);
3826 device_memory_addr = le64_to_cpu(user_dma_pkt->src_addr);
3827 dir = DMA_FROM_DEVICE;
3828 }
3829
3830 if ((!skip_host_mem_pin) &&
3831 (!hl_userptr_is_pinned(hdev, addr,
3832 le32_to_cpu(user_dma_pkt->tsize),
3833 parser->job_userptr_list, &userptr))) {
3834 dev_err(hdev->dev, "Userptr 0x%llx + 0x%x NOT mapped\n",
3835 addr, user_dma_pkt->tsize);
3836 return -EFAULT;
3837 }
3838
3839 if ((user_memset) && (dir == DMA_TO_DEVICE)) {
3840 memcpy(new_dma_pkt, user_dma_pkt, sizeof(*user_dma_pkt));
3841 *new_dma_pkt_size = sizeof(*user_dma_pkt);
3842 return 0;
3843 }
3844
3845 user_wrcomp_en_mask = ctl & GAUDI_PKT_LIN_DMA_CTL_WRCOMP_EN_MASK;
3846
3847 sgt = userptr->sgt;
3848 dma_desc_cnt = 0;
3849
3850 for_each_sg(sgt->sgl, sg, sgt->nents, count) {
3851 len = sg_dma_len(sg);
3852 dma_addr = sg_dma_address(sg);
3853
3854 if (len == 0)
3855 break;
3856
3857 while ((count + 1) < sgt->nents) {
3858 sg_next_iter = sg_next(sg);
3859 len_next = sg_dma_len(sg_next_iter);
3860 dma_addr_next = sg_dma_address(sg_next_iter);
3861
3862 if (len_next == 0)
3863 break;
3864
3865 if ((dma_addr + len == dma_addr_next) &&
3866 (len + len_next <= DMA_MAX_TRANSFER_SIZE)) {
3867 len += len_next;
3868 count++;
3869 sg = sg_next_iter;
3870 } else {
3871 break;
3872 }
3873 }
3874
3875 new_dma_pkt->ctl = user_dma_pkt->ctl;
3876
3877 ctl = le32_to_cpu(user_dma_pkt->ctl);
3878 if (likely(dma_desc_cnt))
3879 ctl &= ~GAUDI_PKT_CTL_EB_MASK;
3880 ctl &= ~GAUDI_PKT_LIN_DMA_CTL_WRCOMP_EN_MASK;
3881 new_dma_pkt->ctl = cpu_to_le32(ctl);
3882 new_dma_pkt->tsize = cpu_to_le32(len);
3883
3884 if (dir == DMA_TO_DEVICE) {
3885 new_dma_pkt->src_addr = cpu_to_le64(dma_addr);
3886 new_dma_pkt->dst_addr = cpu_to_le64(device_memory_addr);
3887 } else {
3888 new_dma_pkt->src_addr = cpu_to_le64(device_memory_addr);
3889 new_dma_pkt->dst_addr = cpu_to_le64(dma_addr);
3890 }
3891
3892 if (!user_memset)
3893 device_memory_addr += len;
3894 dma_desc_cnt++;
3895 new_dma_pkt++;
3896 }
3897
3898 if (!dma_desc_cnt) {
3899 dev_err(hdev->dev,
3900 "Error of 0 SG entries when patching DMA packet\n");
3901 return -EFAULT;
3902 }
3903
3904 /* Fix the last dma packet - wrcomp must be as user set it */
3905 new_dma_pkt--;
3906 new_dma_pkt->ctl |= cpu_to_le32(user_wrcomp_en_mask);
3907
3908 *new_dma_pkt_size = dma_desc_cnt * sizeof(struct packet_lin_dma);
3909
3910 return 0;
3911}
3912
3913static int gaudi_patch_cb(struct hl_device *hdev,
3914 struct hl_cs_parser *parser)
3915{
3916 u32 cb_parsed_length = 0;
3917 u32 cb_patched_cur_length = 0;
3918 int rc = 0;
3919
3920 /* cb_user_size is more than 0 so loop will always be executed */
3921 while (cb_parsed_length < parser->user_cb_size) {
3922 enum packet_id pkt_id;
3923 u16 pkt_size;
3924 u32 new_pkt_size = 0;
3925 struct gaudi_packet *user_pkt, *kernel_pkt;
3926
3927 user_pkt = (struct gaudi_packet *) (uintptr_t)
3928 (parser->user_cb->kernel_address + cb_parsed_length);
3929 kernel_pkt = (struct gaudi_packet *) (uintptr_t)
3930 (parser->patched_cb->kernel_address +
3931 cb_patched_cur_length);
3932
3933 pkt_id = (enum packet_id) (
3934 (le64_to_cpu(user_pkt->header) &
3935 PACKET_HEADER_PACKET_ID_MASK) >>
3936 PACKET_HEADER_PACKET_ID_SHIFT);
3937
3938 pkt_size = gaudi_packet_sizes[pkt_id];
3939 cb_parsed_length += pkt_size;
3940 if (cb_parsed_length > parser->user_cb_size) {
3941 dev_err(hdev->dev,
3942 "packet 0x%x is out of CB boundary\n", pkt_id);
3943 rc = -EINVAL;
3944 break;
3945 }
3946
3947 switch (pkt_id) {
3948 case PACKET_LIN_DMA:
3949 rc = gaudi_patch_dma_packet(hdev, parser,
3950 (struct packet_lin_dma *) user_pkt,
3951 (struct packet_lin_dma *) kernel_pkt,
3952 &new_pkt_size);
3953 cb_patched_cur_length += new_pkt_size;
3954 break;
3955
3956 case PACKET_MSG_PROT:
3957 dev_err(hdev->dev,
3958 "User not allowed to use MSG_PROT\n");
3959 rc = -EPERM;
3960 break;
3961
3962 case PACKET_CP_DMA:
3963 dev_err(hdev->dev, "User not allowed to use CP_DMA\n");
3964 rc = -EPERM;
3965 break;
3966
3967 case PACKET_STOP:
3968 dev_err(hdev->dev, "User not allowed to use STOP\n");
3969 rc = -EPERM;
3970 break;
3971
3972 case PACKET_WREG_32:
3973 case PACKET_WREG_BULK:
3974 case PACKET_MSG_LONG:
3975 case PACKET_MSG_SHORT:
3976 case PACKET_REPEAT:
3977 case PACKET_FENCE:
3978 case PACKET_NOP:
3979 case PACKET_ARB_POINT:
3980 case PACKET_LOAD_AND_EXE:
3981 memcpy(kernel_pkt, user_pkt, pkt_size);
3982 cb_patched_cur_length += pkt_size;
3983 break;
3984
3985 default:
3986 dev_err(hdev->dev, "Invalid packet header 0x%x\n",
3987 pkt_id);
3988 rc = -EINVAL;
3989 break;
3990 }
3991
3992 if (rc)
3993 break;
3994 }
3995
3996 return rc;
3997}
3998
3999static int gaudi_parse_cb_mmu(struct hl_device *hdev,
4000 struct hl_cs_parser *parser)
4001{
4002 u64 patched_cb_handle;
4003 u32 patched_cb_size;
4004 struct hl_cb *user_cb;
4005 int rc;
4006
4007 /*
4008 * The new CB should have space at the end for two MSG_PROT pkt:
4009 * 1. A packet that will act as a completion packet
4010 * 2. A packet that will generate MSI interrupt
4011 */
4012 parser->patched_cb_size = parser->user_cb_size +
4013 sizeof(struct packet_msg_prot) * 2;
4014
4015 rc = hl_cb_create(hdev, &hdev->kernel_cb_mgr,
4016 parser->patched_cb_size,
4017 &patched_cb_handle, HL_KERNEL_ASID_ID);
4018
4019 if (rc) {
4020 dev_err(hdev->dev,
4021 "Failed to allocate patched CB for DMA CS %d\n",
4022 rc);
4023 return rc;
4024 }
4025
4026 patched_cb_handle >>= PAGE_SHIFT;
4027 parser->patched_cb = hl_cb_get(hdev, &hdev->kernel_cb_mgr,
4028 (u32) patched_cb_handle);
4029 /* hl_cb_get should never fail here so use kernel WARN */
4030 WARN(!parser->patched_cb, "DMA CB handle invalid 0x%x\n",
4031 (u32) patched_cb_handle);
4032 if (!parser->patched_cb) {
4033 rc = -EFAULT;
4034 goto out;
4035 }
4036
4037 /*
4038 * The check that parser->user_cb_size <= parser->user_cb->size was done
4039 * in validate_queue_index().
4040 */
4041 memcpy((void *) (uintptr_t) parser->patched_cb->kernel_address,
4042 (void *) (uintptr_t) parser->user_cb->kernel_address,
4043 parser->user_cb_size);
4044
4045 patched_cb_size = parser->patched_cb_size;
4046
4047 /* Validate patched CB instead of user CB */
4048 user_cb = parser->user_cb;
4049 parser->user_cb = parser->patched_cb;
4050 rc = gaudi_validate_cb(hdev, parser, true);
4051 parser->user_cb = user_cb;
4052
4053 if (rc) {
4054 hl_cb_put(parser->patched_cb);
4055 goto out;
4056 }
4057
4058 if (patched_cb_size != parser->patched_cb_size) {
4059 dev_err(hdev->dev, "user CB size mismatch\n");
4060 hl_cb_put(parser->patched_cb);
4061 rc = -EINVAL;
4062 goto out;
4063 }
4064
4065out:
4066 /*
4067 * Always call cb destroy here because we still have 1 reference
4068 * to it by calling cb_get earlier. After the job will be completed,
4069 * cb_put will release it, but here we want to remove it from the
4070 * idr
4071 */
4072 hl_cb_destroy(hdev, &hdev->kernel_cb_mgr,
4073 patched_cb_handle << PAGE_SHIFT);
4074
4075 return rc;
4076}
4077
4078static int gaudi_parse_cb_no_mmu(struct hl_device *hdev,
4079 struct hl_cs_parser *parser)
4080{
4081 u64 patched_cb_handle;
4082 int rc;
4083
4084 rc = gaudi_validate_cb(hdev, parser, false);
4085
4086 if (rc)
4087 goto free_userptr;
4088
4089 rc = hl_cb_create(hdev, &hdev->kernel_cb_mgr,
4090 parser->patched_cb_size,
4091 &patched_cb_handle, HL_KERNEL_ASID_ID);
4092 if (rc) {
4093 dev_err(hdev->dev,
4094 "Failed to allocate patched CB for DMA CS %d\n", rc);
4095 goto free_userptr;
4096 }
4097
4098 patched_cb_handle >>= PAGE_SHIFT;
4099 parser->patched_cb = hl_cb_get(hdev, &hdev->kernel_cb_mgr,
4100 (u32) patched_cb_handle);
4101 /* hl_cb_get should never fail here so use kernel WARN */
4102 WARN(!parser->patched_cb, "DMA CB handle invalid 0x%x\n",
4103 (u32) patched_cb_handle);
4104 if (!parser->patched_cb) {
4105 rc = -EFAULT;
4106 goto out;
4107 }
4108
4109 rc = gaudi_patch_cb(hdev, parser);
4110
4111 if (rc)
4112 hl_cb_put(parser->patched_cb);
4113
4114out:
4115 /*
4116 * Always call cb destroy here because we still have 1 reference
4117 * to it by calling cb_get earlier. After the job will be completed,
4118 * cb_put will release it, but here we want to remove it from the
4119 * idr
4120 */
4121 hl_cb_destroy(hdev, &hdev->kernel_cb_mgr,
4122 patched_cb_handle << PAGE_SHIFT);
4123
4124free_userptr:
4125 if (rc)
4126 hl_userptr_delete_list(hdev, parser->job_userptr_list);
4127 return rc;
4128}
4129
4130static int gaudi_parse_cb_no_ext_queue(struct hl_device *hdev,
4131 struct hl_cs_parser *parser)
4132{
4133 struct asic_fixed_properties *asic_prop = &hdev->asic_prop;
4134
4135 /* For internal queue jobs just check if CB address is valid */
4136 if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,
4137 parser->user_cb_size,
4138 asic_prop->sram_user_base_address,
4139 asic_prop->sram_end_address))
4140 return 0;
4141
4142 if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,
4143 parser->user_cb_size,
4144 asic_prop->dram_user_base_address,
4145 asic_prop->dram_end_address))
4146 return 0;
4147
4148 /* PMMU and HPMMU addresses are equal, check only one of them */
4149 if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,
4150 parser->user_cb_size,
4151 asic_prop->pmmu.start_addr,
4152 asic_prop->pmmu.end_addr))
4153 return 0;
4154
4155 dev_err(hdev->dev,
4156 "CB address 0x%px + 0x%x for internal QMAN is not valid\n",
4157 parser->user_cb, parser->user_cb_size);
4158
4159 return -EFAULT;
4160}
4161
4162static int gaudi_cs_parser(struct hl_device *hdev, struct hl_cs_parser *parser)
4163{
4164 struct gaudi_device *gaudi = hdev->asic_specific;
4165
4166 if (parser->queue_type == QUEUE_TYPE_INT)
4167 return gaudi_parse_cb_no_ext_queue(hdev, parser);
4168
4169 if (gaudi->hw_cap_initialized & HW_CAP_MMU)
4170 return gaudi_parse_cb_mmu(hdev, parser);
4171 else
4172 return gaudi_parse_cb_no_mmu(hdev, parser);
4173}
4174
4175static void gaudi_add_end_of_cb_packets(struct hl_device *hdev,
4176 u64 kernel_address, u32 len,
4177 u64 cq_addr, u32 cq_val, u32 msi_vec,
4178 bool eb)
4179{
4180 struct gaudi_device *gaudi = hdev->asic_specific;
4181 struct packet_msg_prot *cq_pkt;
4182 u32 tmp;
4183
4184 cq_pkt = (struct packet_msg_prot *) (uintptr_t)
4185 (kernel_address + len - (sizeof(struct packet_msg_prot) * 2));
4186
4187 tmp = (PACKET_MSG_PROT << GAUDI_PKT_CTL_OPCODE_SHIFT) |
4188 (1 << GAUDI_PKT_CTL_MB_SHIFT);
4189
4190 if (eb)
4191 tmp |= (1 << GAUDI_PKT_CTL_EB_SHIFT);
4192
4193 cq_pkt->ctl = cpu_to_le32(tmp);
4194 cq_pkt->value = cpu_to_le32(cq_val);
4195 cq_pkt->addr = cpu_to_le64(cq_addr);
4196
4197 cq_pkt++;
4198
4199 tmp = (PACKET_MSG_PROT << GAUDI_PKT_CTL_OPCODE_SHIFT) |
4200 (1 << GAUDI_PKT_CTL_MB_SHIFT);
4201 cq_pkt->ctl = cpu_to_le32(tmp);
4202 cq_pkt->value = cpu_to_le32(1);
4203
4204 if (!gaudi->multi_msi_mode)
4205 msi_vec = 0;
4206
4207 cq_pkt->addr = cpu_to_le64(CFG_BASE + mmPCIE_MSI_INTR_0 + msi_vec * 4);
4208}
4209
4210static void gaudi_update_eq_ci(struct hl_device *hdev, u32 val)
4211{
4212 WREG32(mmCPU_IF_EQ_RD_OFFS, val);
4213}
4214
4215static int gaudi_memset_device_memory(struct hl_device *hdev, u64 addr,
4216 u32 size, u64 val)
4217{
4218 struct packet_lin_dma *lin_dma_pkt;
4219 struct hl_cs_job *job;
4220 u32 cb_size, ctl;
4221 struct hl_cb *cb;
4222 int rc;
4223
4224 cb = hl_cb_kernel_create(hdev, PAGE_SIZE);
4225 if (!cb)
4226 return -EFAULT;
4227
4228 lin_dma_pkt = (struct packet_lin_dma *) (uintptr_t) cb->kernel_address;
4229 memset(lin_dma_pkt, 0, sizeof(*lin_dma_pkt));
4230 cb_size = sizeof(*lin_dma_pkt);
4231
4232 ctl = ((PACKET_LIN_DMA << GAUDI_PKT_CTL_OPCODE_SHIFT) |
4233 (1 << GAUDI_PKT_LIN_DMA_CTL_MEMSET_SHIFT) |
4234 (1 << GAUDI_PKT_LIN_DMA_CTL_LIN_SHIFT) |
4235 (1 << GAUDI_PKT_CTL_RB_SHIFT) |
4236 (1 << GAUDI_PKT_CTL_MB_SHIFT));
4237 lin_dma_pkt->ctl = cpu_to_le32(ctl);
4238 lin_dma_pkt->src_addr = cpu_to_le64(val);
4239 lin_dma_pkt->dst_addr |= cpu_to_le64(addr);
4240 lin_dma_pkt->tsize = cpu_to_le32(size);
4241
4242 job = hl_cs_allocate_job(hdev, QUEUE_TYPE_EXT, true);
4243 if (!job) {
4244 dev_err(hdev->dev, "Failed to allocate a new job\n");
4245 rc = -ENOMEM;
4246 goto release_cb;
4247 }
4248
4249 job->id = 0;
4250 job->user_cb = cb;
4251 job->user_cb->cs_cnt++;
4252 job->user_cb_size = cb_size;
4253 job->hw_queue_id = GAUDI_QUEUE_ID_DMA_0_0;
4254 job->patched_cb = job->user_cb;
4255 job->job_cb_size = job->user_cb_size + sizeof(struct packet_msg_prot);
4256
4257 hl_debugfs_add_job(hdev, job);
4258
4259 rc = gaudi_send_job_on_qman0(hdev, job);
4260
4261 hl_debugfs_remove_job(hdev, job);
4262 kfree(job);
4263 cb->cs_cnt--;
4264
4265release_cb:
4266 hl_cb_put(cb);
4267 hl_cb_destroy(hdev, &hdev->kernel_cb_mgr, cb->id << PAGE_SHIFT);
4268
4269 return rc;
4270}
4271
4272static void gaudi_restore_sm_registers(struct hl_device *hdev)
4273{
4274 int i;
4275
4276 for (i = 0 ; i < NUM_OF_SOB_IN_BLOCK << 2 ; i += 4) {
4277 WREG32(mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0 + i, 0);
4278 WREG32(mmSYNC_MNGR_E_S_SYNC_MNGR_OBJS_SOB_OBJ_0 + i, 0);
4279 WREG32(mmSYNC_MNGR_W_N_SYNC_MNGR_OBJS_SOB_OBJ_0 + i, 0);
4280 }
4281
4282 for (i = 0 ; i < NUM_OF_MONITORS_IN_BLOCK << 2 ; i += 4) {
4283 WREG32(mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_STATUS_0 + i, 0);
4284 WREG32(mmSYNC_MNGR_E_S_SYNC_MNGR_OBJS_MON_STATUS_0 + i, 0);
4285 WREG32(mmSYNC_MNGR_W_N_SYNC_MNGR_OBJS_MON_STATUS_0 + i, 0);
4286 }
4287
4288 i = GAUDI_FIRST_AVAILABLE_W_S_SYNC_OBJECT * 4;
4289
4290 for (; i < NUM_OF_SOB_IN_BLOCK << 2 ; i += 4)
4291 WREG32(mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 + i, 0);
4292
4293 i = GAUDI_FIRST_AVAILABLE_W_S_MONITOR * 4;
4294
4295 for (; i < NUM_OF_MONITORS_IN_BLOCK << 2 ; i += 4)
4296 WREG32(mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_STATUS_0 + i, 0);
4297}
4298
4299static void gaudi_restore_dma_registers(struct hl_device *hdev)
4300{
4301 u32 sob_delta = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_1 -
4302 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0;
4303 int i;
4304
4305 for (i = 0 ; i < DMA_NUMBER_OF_CHANNELS ; i++) {
4306 u64 sob_addr = CFG_BASE +
4307 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0 +
4308 (i * sob_delta);
4309 u32 dma_offset = i * DMA_CORE_OFFSET;
4310
4311 WREG32(mmDMA0_CORE_WR_COMP_ADDR_LO + dma_offset,
4312 lower_32_bits(sob_addr));
4313 WREG32(mmDMA0_CORE_WR_COMP_ADDR_HI + dma_offset,
4314 upper_32_bits(sob_addr));
4315 WREG32(mmDMA0_CORE_WR_COMP_WDATA + dma_offset, 0x80000001);
4316
4317 /* For DMAs 2-7, need to restore WR_AWUSER_31_11 as it can be
4318 * modified by the user for SRAM reduction
4319 */
4320 if (i > 1)
4321 WREG32(mmDMA0_CORE_WR_AWUSER_31_11 + dma_offset,
4322 0x00000001);
4323 }
4324}
4325
4326static void gaudi_restore_qm_registers(struct hl_device *hdev)
4327{
4328 u32 qman_offset;
4329 int i;
4330
4331 for (i = 0 ; i < DMA_NUMBER_OF_CHANNELS ; i++) {
4332 qman_offset = i * DMA_QMAN_OFFSET;
4333 WREG32(mmDMA0_QM_ARB_CFG_0 + qman_offset, 0);
4334 }
4335
4336 for (i = 0 ; i < MME_NUMBER_OF_MASTER_ENGINES ; i++) {
4337 qman_offset = i * (mmMME2_QM_BASE - mmMME0_QM_BASE);
4338 WREG32(mmMME0_QM_ARB_CFG_0 + qman_offset, 0);
4339 }
4340
4341 for (i = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) {
4342 qman_offset = i * TPC_QMAN_OFFSET;
4343 WREG32(mmTPC0_QM_ARB_CFG_0 + qman_offset, 0);
4344 }
4345}
4346
4347static void gaudi_restore_user_registers(struct hl_device *hdev)
4348{
4349 gaudi_restore_sm_registers(hdev);
4350 gaudi_restore_dma_registers(hdev);
4351 gaudi_restore_qm_registers(hdev);
4352}
4353
4354static int gaudi_context_switch(struct hl_device *hdev, u32 asid)
4355{
4356 struct asic_fixed_properties *prop = &hdev->asic_prop;
4357 u64 addr = prop->sram_user_base_address;
4358 u32 size = hdev->pldm ? 0x10000 :
4359 (prop->sram_size - SRAM_USER_BASE_OFFSET);
4360 u64 val = 0x7777777777777777ull;
4361 int rc;
4362
4363 rc = gaudi_memset_device_memory(hdev, addr, size, val);
4364 if (rc) {
4365 dev_err(hdev->dev, "Failed to clear SRAM in context switch\n");
4366 return rc;
4367 }
4368
4369 gaudi_mmu_prepare(hdev, asid);
4370
4371 gaudi_restore_user_registers(hdev);
4372
4373 return 0;
4374}
4375
4376static int gaudi_mmu_clear_pgt_range(struct hl_device *hdev)
4377{
4378 struct asic_fixed_properties *prop = &hdev->asic_prop;
4379 struct gaudi_device *gaudi = hdev->asic_specific;
4380 u64 addr = prop->mmu_pgt_addr;
4381 u32 size = prop->mmu_pgt_size + MMU_CACHE_MNG_SIZE;
4382
4383 if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
4384 return 0;
4385
4386 return gaudi_memset_device_memory(hdev, addr, size, 0);
4387}
4388
4389static void gaudi_restore_phase_topology(struct hl_device *hdev)
4390{
4391
4392}
4393
4394static int gaudi_debugfs_read32(struct hl_device *hdev, u64 addr, u32 *val)
4395{
4396 struct asic_fixed_properties *prop = &hdev->asic_prop;
4397 struct gaudi_device *gaudi = hdev->asic_specific;
4398 u64 hbm_bar_addr;
4399 int rc = 0;
4400
4401 if ((addr >= CFG_BASE) && (addr < CFG_BASE + CFG_SIZE)) {
4402 if (gaudi->hw_cap_initialized & HW_CAP_CLK_GATE) {
4403 dev_err_ratelimited(hdev->dev,
4404 "Can't read register - clock gating is enabled!\n");
4405 rc = -EFAULT;
4406 } else {
4407 *val = RREG32(addr - CFG_BASE);
4408 }
4409 } else if ((addr >= SRAM_BASE_ADDR) &&
4410 (addr < SRAM_BASE_ADDR + SRAM_BAR_SIZE)) {
4411 *val = readl(hdev->pcie_bar[SRAM_BAR_ID] +
4412 (addr - SRAM_BASE_ADDR));
4413 } else if (addr < DRAM_PHYS_BASE + hdev->asic_prop.dram_size) {
4414 u64 bar_base_addr = DRAM_PHYS_BASE +
4415 (addr & ~(prop->dram_pci_bar_size - 0x1ull));
4416
4417 hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, bar_base_addr);
4418 if (hbm_bar_addr != U64_MAX) {
4419 *val = readl(hdev->pcie_bar[HBM_BAR_ID] +
4420 (addr - bar_base_addr));
4421
4422 hbm_bar_addr = gaudi_set_hbm_bar_base(hdev,
4423 hbm_bar_addr);
4424 }
4425 if (hbm_bar_addr == U64_MAX)
4426 rc = -EIO;
4427 } else if (addr >= HOST_PHYS_BASE && !iommu_present(&pci_bus_type)) {
4428 *val = *(u32 *) phys_to_virt(addr - HOST_PHYS_BASE);
4429 } else {
4430 rc = -EFAULT;
4431 }
4432
4433 return rc;
4434}
4435
4436static int gaudi_debugfs_write32(struct hl_device *hdev, u64 addr, u32 val)
4437{
4438 struct asic_fixed_properties *prop = &hdev->asic_prop;
4439 struct gaudi_device *gaudi = hdev->asic_specific;
4440 u64 hbm_bar_addr;
4441 int rc = 0;
4442
4443 if ((addr >= CFG_BASE) && (addr < CFG_BASE + CFG_SIZE)) {
4444 if (gaudi->hw_cap_initialized & HW_CAP_CLK_GATE) {
4445 dev_err_ratelimited(hdev->dev,
4446 "Can't write register - clock gating is enabled!\n");
4447 rc = -EFAULT;
4448 } else {
4449 WREG32(addr - CFG_BASE, val);
4450 }
4451 } else if ((addr >= SRAM_BASE_ADDR) &&
4452 (addr < SRAM_BASE_ADDR + SRAM_BAR_SIZE)) {
4453 writel(val, hdev->pcie_bar[SRAM_BAR_ID] +
4454 (addr - SRAM_BASE_ADDR));
4455 } else if (addr < DRAM_PHYS_BASE + hdev->asic_prop.dram_size) {
4456 u64 bar_base_addr = DRAM_PHYS_BASE +
4457 (addr & ~(prop->dram_pci_bar_size - 0x1ull));
4458
4459 hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, bar_base_addr);
4460 if (hbm_bar_addr != U64_MAX) {
4461 writel(val, hdev->pcie_bar[HBM_BAR_ID] +
4462 (addr - bar_base_addr));
4463
4464 hbm_bar_addr = gaudi_set_hbm_bar_base(hdev,
4465 hbm_bar_addr);
4466 }
4467 if (hbm_bar_addr == U64_MAX)
4468 rc = -EIO;
4469 } else if (addr >= HOST_PHYS_BASE && !iommu_present(&pci_bus_type)) {
4470 *(u32 *) phys_to_virt(addr - HOST_PHYS_BASE) = val;
4471 } else {
4472 rc = -EFAULT;
4473 }
4474
4475 return rc;
4476}
4477
4478static int gaudi_debugfs_read64(struct hl_device *hdev, u64 addr, u64 *val)
4479{
4480 struct asic_fixed_properties *prop = &hdev->asic_prop;
4481 struct gaudi_device *gaudi = hdev->asic_specific;
4482 u64 hbm_bar_addr;
4483 int rc = 0;
4484
4485 if ((addr >= CFG_BASE) && (addr <= CFG_BASE + CFG_SIZE - sizeof(u64))) {
4486 if (gaudi->hw_cap_initialized & HW_CAP_CLK_GATE) {
4487 dev_err_ratelimited(hdev->dev,
4488 "Can't read register - clock gating is enabled!\n");
4489 rc = -EFAULT;
4490 } else {
4491 u32 val_l = RREG32(addr - CFG_BASE);
4492 u32 val_h = RREG32(addr + sizeof(u32) - CFG_BASE);
4493
4494 *val = (((u64) val_h) << 32) | val_l;
4495 }
4496 } else if ((addr >= SRAM_BASE_ADDR) &&
4497 (addr <= SRAM_BASE_ADDR + SRAM_BAR_SIZE - sizeof(u64))) {
4498 *val = readq(hdev->pcie_bar[SRAM_BAR_ID] +
4499 (addr - SRAM_BASE_ADDR));
4500 } else if (addr <=
4501 DRAM_PHYS_BASE + hdev->asic_prop.dram_size - sizeof(u64)) {
4502 u64 bar_base_addr = DRAM_PHYS_BASE +
4503 (addr & ~(prop->dram_pci_bar_size - 0x1ull));
4504
4505 hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, bar_base_addr);
4506 if (hbm_bar_addr != U64_MAX) {
4507 *val = readq(hdev->pcie_bar[HBM_BAR_ID] +
4508 (addr - bar_base_addr));
4509
4510 hbm_bar_addr = gaudi_set_hbm_bar_base(hdev,
4511 hbm_bar_addr);
4512 }
4513 if (hbm_bar_addr == U64_MAX)
4514 rc = -EIO;
4515 } else if (addr >= HOST_PHYS_BASE && !iommu_present(&pci_bus_type)) {
4516 *val = *(u64 *) phys_to_virt(addr - HOST_PHYS_BASE);
4517 } else {
4518 rc = -EFAULT;
4519 }
4520
4521 return rc;
4522}
4523
4524static int gaudi_debugfs_write64(struct hl_device *hdev, u64 addr, u64 val)
4525{
4526 struct asic_fixed_properties *prop = &hdev->asic_prop;
4527 struct gaudi_device *gaudi = hdev->asic_specific;
4528 u64 hbm_bar_addr;
4529 int rc = 0;
4530
4531 if ((addr >= CFG_BASE) && (addr <= CFG_BASE + CFG_SIZE - sizeof(u64))) {
4532 if (gaudi->hw_cap_initialized & HW_CAP_CLK_GATE) {
4533 dev_err_ratelimited(hdev->dev,
4534 "Can't write register - clock gating is enabled!\n");
4535 rc = -EFAULT;
4536 } else {
4537 WREG32(addr - CFG_BASE, lower_32_bits(val));
4538 WREG32(addr + sizeof(u32) - CFG_BASE,
4539 upper_32_bits(val));
4540 }
4541 } else if ((addr >= SRAM_BASE_ADDR) &&
4542 (addr <= SRAM_BASE_ADDR + SRAM_BAR_SIZE - sizeof(u64))) {
4543 writeq(val, hdev->pcie_bar[SRAM_BAR_ID] +
4544 (addr - SRAM_BASE_ADDR));
4545 } else if (addr <=
4546 DRAM_PHYS_BASE + hdev->asic_prop.dram_size - sizeof(u64)) {
4547 u64 bar_base_addr = DRAM_PHYS_BASE +
4548 (addr & ~(prop->dram_pci_bar_size - 0x1ull));
4549
4550 hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, bar_base_addr);
4551 if (hbm_bar_addr != U64_MAX) {
4552 writeq(val, hdev->pcie_bar[HBM_BAR_ID] +
4553 (addr - bar_base_addr));
4554
4555 hbm_bar_addr = gaudi_set_hbm_bar_base(hdev,
4556 hbm_bar_addr);
4557 }
4558 if (hbm_bar_addr == U64_MAX)
4559 rc = -EIO;
4560 } else if (addr >= HOST_PHYS_BASE && !iommu_present(&pci_bus_type)) {
4561 *(u64 *) phys_to_virt(addr - HOST_PHYS_BASE) = val;
4562 } else {
4563 rc = -EFAULT;
4564 }
4565
4566 return rc;
4567}
4568
4569static u64 gaudi_read_pte(struct hl_device *hdev, u64 addr)
4570{
4571 struct gaudi_device *gaudi = hdev->asic_specific;
4572
4573 if (hdev->hard_reset_pending)
4574 return U64_MAX;
4575
4576 return readq(hdev->pcie_bar[HBM_BAR_ID] +
4577 (addr - gaudi->hbm_bar_cur_addr));
4578}
4579
4580static void gaudi_write_pte(struct hl_device *hdev, u64 addr, u64 val)
4581{
4582 struct gaudi_device *gaudi = hdev->asic_specific;
4583
4584 if (hdev->hard_reset_pending)
4585 return;
4586
4587 writeq(val, hdev->pcie_bar[HBM_BAR_ID] +
4588 (addr - gaudi->hbm_bar_cur_addr));
4589}
4590
4591static void gaudi_mmu_prepare_reg(struct hl_device *hdev, u64 reg, u32 asid)
4592{
4593 /* mask to zero the MMBP and ASID bits */
4594 WREG32_AND(reg, ~0x7FF);
4595 WREG32_OR(reg, asid);
4596}
4597
4598static void gaudi_mmu_prepare(struct hl_device *hdev, u32 asid)
4599{
4600 struct gaudi_device *gaudi = hdev->asic_specific;
4601
4602 if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
4603 return;
4604
4605 if (asid & ~DMA0_QM_GLBL_NON_SECURE_PROPS_0_ASID_MASK) {
4606 WARN(1, "asid %u is too big\n", asid);
4607 return;
4608 }
4609
4610 mutex_lock(&gaudi->clk_gate_mutex);
4611
4612 hdev->asic_funcs->disable_clock_gating(hdev);
4613
4614 gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_0, asid);
4615 gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_1, asid);
4616 gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_2, asid);
4617 gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_3, asid);
4618 gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_4, asid);
4619
4620 gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_0, asid);
4621 gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_1, asid);
4622 gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_2, asid);
4623 gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_3, asid);
4624 gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_4, asid);
4625
4626 gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_0, asid);
4627 gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_1, asid);
4628 gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_2, asid);
4629 gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_3, asid);
4630 gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_4, asid);
4631
4632 gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_0, asid);
4633 gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_1, asid);
4634 gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_2, asid);
4635 gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_3, asid);
4636 gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_4, asid);
4637
4638 gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_0, asid);
4639 gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_1, asid);
4640 gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_2, asid);
4641 gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_3, asid);
4642 gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_4, asid);
4643
4644 gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_0, asid);
4645 gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_1, asid);
4646 gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_2, asid);
4647 gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_3, asid);
4648 gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_4, asid);
4649
4650 gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_0, asid);
4651 gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_1, asid);
4652 gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_2, asid);
4653 gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_3, asid);
4654 gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_4, asid);
4655
4656 gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_0, asid);
4657 gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_1, asid);
4658 gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_2, asid);
4659 gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_3, asid);
4660 gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_4, asid);
4661
4662 gaudi_mmu_prepare_reg(hdev, mmDMA0_CORE_NON_SECURE_PROPS, asid);
4663 gaudi_mmu_prepare_reg(hdev, mmDMA1_CORE_NON_SECURE_PROPS, asid);
4664 gaudi_mmu_prepare_reg(hdev, mmDMA2_CORE_NON_SECURE_PROPS, asid);
4665 gaudi_mmu_prepare_reg(hdev, mmDMA3_CORE_NON_SECURE_PROPS, asid);
4666 gaudi_mmu_prepare_reg(hdev, mmDMA4_CORE_NON_SECURE_PROPS, asid);
4667 gaudi_mmu_prepare_reg(hdev, mmDMA5_CORE_NON_SECURE_PROPS, asid);
4668 gaudi_mmu_prepare_reg(hdev, mmDMA6_CORE_NON_SECURE_PROPS, asid);
4669 gaudi_mmu_prepare_reg(hdev, mmDMA7_CORE_NON_SECURE_PROPS, asid);
4670
4671 gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_0, asid);
4672 gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_1, asid);
4673 gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_2, asid);
4674 gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_3, asid);
4675 gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_4, asid);
4676 gaudi_mmu_prepare_reg(hdev, mmTPC0_CFG_ARUSER_LO, asid);
4677 gaudi_mmu_prepare_reg(hdev, mmTPC0_CFG_AWUSER_LO, asid);
4678
4679 gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_0, asid);
4680 gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_1, asid);
4681 gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_2, asid);
4682 gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_3, asid);
4683 gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_4, asid);
4684 gaudi_mmu_prepare_reg(hdev, mmTPC1_CFG_ARUSER_LO, asid);
4685 gaudi_mmu_prepare_reg(hdev, mmTPC1_CFG_AWUSER_LO, asid);
4686
4687 gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_0, asid);
4688 gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_1, asid);
4689 gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_2, asid);
4690 gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_3, asid);
4691 gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_4, asid);
4692 gaudi_mmu_prepare_reg(hdev, mmTPC2_CFG_ARUSER_LO, asid);
4693 gaudi_mmu_prepare_reg(hdev, mmTPC2_CFG_AWUSER_LO, asid);
4694
4695 gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_0, asid);
4696 gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_1, asid);
4697 gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_2, asid);
4698 gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_3, asid);
4699 gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_4, asid);
4700 gaudi_mmu_prepare_reg(hdev, mmTPC3_CFG_ARUSER_LO, asid);
4701 gaudi_mmu_prepare_reg(hdev, mmTPC3_CFG_AWUSER_LO, asid);
4702
4703 gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_0, asid);
4704 gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_1, asid);
4705 gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_2, asid);
4706 gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_3, asid);
4707 gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_4, asid);
4708 gaudi_mmu_prepare_reg(hdev, mmTPC4_CFG_ARUSER_LO, asid);
4709 gaudi_mmu_prepare_reg(hdev, mmTPC4_CFG_AWUSER_LO, asid);
4710
4711 gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_0, asid);
4712 gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_1, asid);
4713 gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_2, asid);
4714 gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_3, asid);
4715 gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_4, asid);
4716 gaudi_mmu_prepare_reg(hdev, mmTPC5_CFG_ARUSER_LO, asid);
4717 gaudi_mmu_prepare_reg(hdev, mmTPC5_CFG_AWUSER_LO, asid);
4718
4719 gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_0, asid);
4720 gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_1, asid);
4721 gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_2, asid);
4722 gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_3, asid);
4723 gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_4, asid);
4724 gaudi_mmu_prepare_reg(hdev, mmTPC6_CFG_ARUSER_LO, asid);
4725 gaudi_mmu_prepare_reg(hdev, mmTPC6_CFG_AWUSER_LO, asid);
4726
4727 gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_0, asid);
4728 gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_1, asid);
4729 gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_2, asid);
4730 gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_3, asid);
4731 gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_4, asid);
4732 gaudi_mmu_prepare_reg(hdev, mmTPC7_CFG_ARUSER_LO, asid);
4733 gaudi_mmu_prepare_reg(hdev, mmTPC7_CFG_AWUSER_LO, asid);
4734
4735 gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_0, asid);
4736 gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_1, asid);
4737 gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_2, asid);
4738 gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_3, asid);
4739 gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_4, asid);
4740 gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_0, asid);
4741 gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_1, asid);
4742 gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_2, asid);
4743 gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_3, asid);
4744 gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_4, asid);
4745
4746 gaudi_mmu_prepare_reg(hdev, mmMME0_SBAB_ARUSER0, asid);
4747 gaudi_mmu_prepare_reg(hdev, mmMME0_SBAB_ARUSER1, asid);
4748 gaudi_mmu_prepare_reg(hdev, mmMME1_SBAB_ARUSER0, asid);
4749 gaudi_mmu_prepare_reg(hdev, mmMME1_SBAB_ARUSER1, asid);
4750 gaudi_mmu_prepare_reg(hdev, mmMME2_SBAB_ARUSER0, asid);
4751 gaudi_mmu_prepare_reg(hdev, mmMME2_SBAB_ARUSER1, asid);
4752 gaudi_mmu_prepare_reg(hdev, mmMME3_SBAB_ARUSER0, asid);
4753 gaudi_mmu_prepare_reg(hdev, mmMME3_SBAB_ARUSER1, asid);
4754 gaudi_mmu_prepare_reg(hdev, mmMME0_ACC_WBC, asid);
4755 gaudi_mmu_prepare_reg(hdev, mmMME1_ACC_WBC, asid);
4756 gaudi_mmu_prepare_reg(hdev, mmMME2_ACC_WBC, asid);
4757 gaudi_mmu_prepare_reg(hdev, mmMME3_ACC_WBC, asid);
4758
4759 gaudi_mmu_prepare_reg(hdev, mmPSOC_GLOBAL_CONF_TRACE_ARUSER, asid);
4760 gaudi_mmu_prepare_reg(hdev, mmPSOC_GLOBAL_CONF_TRACE_AWUSER, asid);
4761
4762 hdev->asic_funcs->enable_clock_gating(hdev);
4763
4764 mutex_unlock(&gaudi->clk_gate_mutex);
4765}
4766
4767static int gaudi_send_job_on_qman0(struct hl_device *hdev,
4768 struct hl_cs_job *job)
4769{
4770 struct packet_msg_prot *fence_pkt;
4771 u32 *fence_ptr;
4772 dma_addr_t fence_dma_addr;
4773 struct hl_cb *cb;
4774 u32 tmp, timeout, dma_offset;
4775 int rc;
4776
4777 if (hdev->pldm)
4778 timeout = GAUDI_PLDM_QMAN0_TIMEOUT_USEC;
4779 else
4780 timeout = HL_DEVICE_TIMEOUT_USEC;
4781
4782 if (!hdev->asic_funcs->is_device_idle(hdev, NULL, NULL)) {
4783 dev_err_ratelimited(hdev->dev,
4784 "Can't send driver job on QMAN0 because the device is not idle\n");
4785 return -EBUSY;
4786 }
4787
4788 fence_ptr = hdev->asic_funcs->asic_dma_pool_zalloc(hdev, 4, GFP_KERNEL,
4789 &fence_dma_addr);
4790 if (!fence_ptr) {
4791 dev_err(hdev->dev,
4792 "Failed to allocate fence memory for QMAN0\n");
4793 return -ENOMEM;
4794 }
4795
4796 cb = job->patched_cb;
4797
4798 fence_pkt = (struct packet_msg_prot *) (uintptr_t) (cb->kernel_address +
4799 job->job_cb_size - sizeof(struct packet_msg_prot));
4800
4801 tmp = (PACKET_MSG_PROT << GAUDI_PKT_CTL_OPCODE_SHIFT) |
4802 (1 << GAUDI_PKT_CTL_EB_SHIFT) |
4803 (1 << GAUDI_PKT_CTL_MB_SHIFT);
4804 fence_pkt->ctl = cpu_to_le32(tmp);
4805 fence_pkt->value = cpu_to_le32(GAUDI_QMAN0_FENCE_VAL);
4806 fence_pkt->addr = cpu_to_le64(fence_dma_addr);
4807
4808 dma_offset = gaudi_dma_assignment[GAUDI_PCI_DMA_1] * DMA_CORE_OFFSET;
4809
4810 WREG32_OR(mmDMA0_CORE_PROT + dma_offset, BIT(DMA0_CORE_PROT_VAL_SHIFT));
4811
4812 rc = hl_hw_queue_send_cb_no_cmpl(hdev, GAUDI_QUEUE_ID_DMA_0_0,
4813 job->job_cb_size, cb->bus_address);
4814 if (rc) {
4815 dev_err(hdev->dev, "Failed to send CB on QMAN0, %d\n", rc);
4816 goto free_fence_ptr;
4817 }
4818
4819 rc = hl_poll_timeout_memory(hdev, fence_ptr, tmp,
4820 (tmp == GAUDI_QMAN0_FENCE_VAL), 1000,
4821 timeout, true);
4822
4823 hl_hw_queue_inc_ci_kernel(hdev, GAUDI_QUEUE_ID_DMA_0_0);
4824
4825 if (rc == -ETIMEDOUT) {
4826 dev_err(hdev->dev, "QMAN0 Job timeout (0x%x)\n", tmp);
4827 goto free_fence_ptr;
4828 }
4829
4830free_fence_ptr:
4831 WREG32_AND(mmDMA0_CORE_PROT + dma_offset,
4832 ~BIT(DMA0_CORE_PROT_VAL_SHIFT));
4833
4834 hdev->asic_funcs->asic_dma_pool_free(hdev, (void *) fence_ptr,
4835 fence_dma_addr);
4836 return rc;
4837}
4838
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03004839static void gaudi_get_event_desc(u16 event_type, char *desc, size_t size)
4840{
Ofir Bittonebd8d122020-05-10 13:41:28 +03004841 if (event_type >= GAUDI_EVENT_SIZE)
4842 goto event_not_supported;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03004843
Ofir Bittonebd8d122020-05-10 13:41:28 +03004844 if (!gaudi_irq_map_table[event_type].valid)
4845 goto event_not_supported;
4846
4847 snprintf(desc, size, gaudi_irq_map_table[event_type].name);
4848
4849 return;
4850
4851event_not_supported:
4852 snprintf(desc, size, "N/A");
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03004853}
4854
4855static const char *gaudi_get_razwi_initiator_dma_name(struct hl_device *hdev,
4856 u32 x_y, bool is_write)
4857{
4858 u32 dma_id[2], dma_offset, err_cause[2], mask, i;
4859
4860 mask = is_write ? DMA0_CORE_ERR_CAUSE_HBW_WR_ERR_MASK :
4861 DMA0_CORE_ERR_CAUSE_HBW_RD_ERR_MASK;
4862
4863 switch (x_y) {
4864 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_0:
4865 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_1:
4866 dma_id[0] = 0;
4867 dma_id[1] = 2;
4868 break;
4869 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_0:
4870 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_1:
4871 dma_id[0] = 1;
4872 dma_id[1] = 3;
4873 break;
4874 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_0:
4875 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_1:
4876 dma_id[0] = 4;
4877 dma_id[1] = 6;
4878 break;
4879 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_0:
4880 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_1:
4881 dma_id[0] = 5;
4882 dma_id[1] = 7;
4883 break;
4884 default:
4885 goto unknown_initiator;
4886 }
4887
4888 for (i = 0 ; i < 2 ; i++) {
4889 dma_offset = dma_id[i] * DMA_CORE_OFFSET;
4890 err_cause[i] = RREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset);
4891 }
4892
4893 switch (x_y) {
4894 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_0:
4895 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_1:
4896 if ((err_cause[0] & mask) && !(err_cause[1] & mask))
4897 return "DMA0";
4898 else if (!(err_cause[0] & mask) && (err_cause[1] & mask))
4899 return "DMA2";
4900 else
4901 return "DMA0 or DMA2";
4902 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_0:
4903 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_1:
4904 if ((err_cause[0] & mask) && !(err_cause[1] & mask))
4905 return "DMA1";
4906 else if (!(err_cause[0] & mask) && (err_cause[1] & mask))
4907 return "DMA3";
4908 else
4909 return "DMA1 or DMA3";
4910 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_0:
4911 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_1:
4912 if ((err_cause[0] & mask) && !(err_cause[1] & mask))
4913 return "DMA4";
4914 else if (!(err_cause[0] & mask) && (err_cause[1] & mask))
4915 return "DMA6";
4916 else
4917 return "DMA4 or DMA6";
4918 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_0:
4919 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_1:
4920 if ((err_cause[0] & mask) && !(err_cause[1] & mask))
4921 return "DMA5";
4922 else if (!(err_cause[0] & mask) && (err_cause[1] & mask))
4923 return "DMA7";
4924 else
4925 return "DMA5 or DMA7";
4926 }
4927
4928unknown_initiator:
4929 return "unknown initiator";
4930}
4931
4932static const char *gaudi_get_razwi_initiator_name(struct hl_device *hdev,
4933 bool is_write)
4934{
4935 u32 val, x_y, axi_id;
4936
4937 val = is_write ? RREG32(mmMMU_UP_RAZWI_WRITE_ID) :
4938 RREG32(mmMMU_UP_RAZWI_READ_ID);
4939 x_y = val & ((RAZWI_INITIATOR_Y_MASK << RAZWI_INITIATOR_Y_SHIFT) |
4940 (RAZWI_INITIATOR_X_MASK << RAZWI_INITIATOR_X_SHIFT));
4941 axi_id = val & (RAZWI_INITIATOR_AXI_ID_MASK <<
4942 RAZWI_INITIATOR_AXI_ID_SHIFT);
4943
4944 switch (x_y) {
4945 case RAZWI_INITIATOR_ID_X_Y_TPC0_NIC0:
4946 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC))
4947 return "TPC0";
4948 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC))
4949 return "NIC0";
4950 break;
4951 case RAZWI_INITIATOR_ID_X_Y_TPC1:
4952 return "TPC1";
4953 case RAZWI_INITIATOR_ID_X_Y_MME0_0:
4954 case RAZWI_INITIATOR_ID_X_Y_MME0_1:
4955 return "MME0";
4956 case RAZWI_INITIATOR_ID_X_Y_MME1_0:
4957 case RAZWI_INITIATOR_ID_X_Y_MME1_1:
4958 return "MME1";
4959 case RAZWI_INITIATOR_ID_X_Y_TPC2:
4960 return "TPC2";
4961 case RAZWI_INITIATOR_ID_X_Y_TPC3_PCI_CPU_PSOC:
4962 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC))
4963 return "TPC3";
4964 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_PCI))
4965 return "PCI";
4966 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_CPU))
4967 return "CPU";
4968 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_PSOC))
4969 return "PSOC";
4970 break;
4971 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_0:
4972 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_1:
4973 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_0:
4974 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_1:
4975 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_0:
4976 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_1:
4977 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_0:
4978 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_1:
4979 return gaudi_get_razwi_initiator_dma_name(hdev, x_y, is_write);
4980 case RAZWI_INITIATOR_ID_X_Y_TPC4_NIC1_NIC2:
4981 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC))
4982 return "TPC4";
4983 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC))
4984 return "NIC1";
4985 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC_FT))
4986 return "NIC2";
4987 break;
4988 case RAZWI_INITIATOR_ID_X_Y_TPC5:
4989 return "TPC5";
4990 case RAZWI_INITIATOR_ID_X_Y_MME2_0:
4991 case RAZWI_INITIATOR_ID_X_Y_MME2_1:
4992 return "MME2";
4993 case RAZWI_INITIATOR_ID_X_Y_MME3_0:
4994 case RAZWI_INITIATOR_ID_X_Y_MME3_1:
4995 return "MME3";
4996 case RAZWI_INITIATOR_ID_X_Y_TPC6:
4997 return "TPC6";
4998 case RAZWI_INITIATOR_ID_X_Y_TPC7_NIC4_NIC5:
4999 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC))
5000 return "TPC7";
5001 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC))
5002 return "NIC4";
5003 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC_FT))
5004 return "NIC5";
5005 break;
5006 default:
5007 break;
5008 }
5009
5010 dev_err(hdev->dev,
5011 "Unknown RAZWI initiator ID 0x%x [Y=%d, X=%d, AXI_ID=%d]\n",
5012 val,
5013 (val >> RAZWI_INITIATOR_Y_SHIFT) & RAZWI_INITIATOR_Y_MASK,
5014 (val >> RAZWI_INITIATOR_X_SHIFT) & RAZWI_INITIATOR_X_MASK,
5015 (val >> RAZWI_INITIATOR_AXI_ID_SHIFT) &
5016 RAZWI_INITIATOR_AXI_ID_MASK);
5017
5018 return "unknown initiator";
5019}
5020
5021static void gaudi_print_razwi_info(struct hl_device *hdev)
5022{
5023 if (RREG32(mmMMU_UP_RAZWI_WRITE_VLD)) {
5024 dev_err_ratelimited(hdev->dev,
5025 "RAZWI event caused by illegal write of %s\n",
5026 gaudi_get_razwi_initiator_name(hdev, true));
5027 WREG32(mmMMU_UP_RAZWI_WRITE_VLD, 0);
5028 }
5029
5030 if (RREG32(mmMMU_UP_RAZWI_READ_VLD)) {
5031 dev_err_ratelimited(hdev->dev,
5032 "RAZWI event caused by illegal read of %s\n",
5033 gaudi_get_razwi_initiator_name(hdev, false));
5034 WREG32(mmMMU_UP_RAZWI_READ_VLD, 0);
5035 }
5036}
5037
5038static void gaudi_print_mmu_error_info(struct hl_device *hdev)
5039{
5040 struct gaudi_device *gaudi = hdev->asic_specific;
5041 u64 addr;
5042 u32 val;
5043
5044 if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
5045 return;
5046
5047 val = RREG32(mmMMU_UP_PAGE_ERROR_CAPTURE);
5048 if (val & MMU_UP_PAGE_ERROR_CAPTURE_ENTRY_VALID_MASK) {
5049 addr = val & MMU_UP_PAGE_ERROR_CAPTURE_VA_49_32_MASK;
5050 addr <<= 32;
5051 addr |= RREG32(mmMMU_UP_PAGE_ERROR_CAPTURE_VA);
5052
5053 dev_err_ratelimited(hdev->dev, "MMU page fault on va 0x%llx\n",
5054 addr);
5055
5056 WREG32(mmMMU_UP_PAGE_ERROR_CAPTURE, 0);
5057 }
5058
5059 val = RREG32(mmMMU_UP_ACCESS_ERROR_CAPTURE);
5060 if (val & MMU_UP_ACCESS_ERROR_CAPTURE_ENTRY_VALID_MASK) {
5061 addr = val & MMU_UP_ACCESS_ERROR_CAPTURE_VA_49_32_MASK;
5062 addr <<= 32;
5063 addr |= RREG32(mmMMU_UP_ACCESS_ERROR_CAPTURE_VA);
5064
5065 dev_err_ratelimited(hdev->dev,
5066 "MMU access error on va 0x%llx\n", addr);
5067
5068 WREG32(mmMMU_UP_ACCESS_ERROR_CAPTURE, 0);
5069 }
5070}
5071
5072/*
5073 * +-------------------+------------------------------------------------------+
5074 * | Configuration Reg | Description |
5075 * | Address | |
5076 * +-------------------+------------------------------------------------------+
5077 * | 0xF30 - 0xF3F |ECC single error indication (1 bit per memory wrapper)|
5078 * | |0xF30 memory wrappers 31:0 (MSB to LSB) |
5079 * | |0xF34 memory wrappers 63:32 |
5080 * | |0xF38 memory wrappers 95:64 |
5081 * | |0xF3C memory wrappers 127:96 |
5082 * +-------------------+------------------------------------------------------+
5083 * | 0xF40 - 0xF4F |ECC double error indication (1 bit per memory wrapper)|
5084 * | |0xF40 memory wrappers 31:0 (MSB to LSB) |
5085 * | |0xF44 memory wrappers 63:32 |
5086 * | |0xF48 memory wrappers 95:64 |
5087 * | |0xF4C memory wrappers 127:96 |
5088 * +-------------------+------------------------------------------------------+
5089 */
5090static void gaudi_print_ecc_info_generic(struct hl_device *hdev,
5091 const char *block_name,
5092 u64 block_address, int num_memories,
5093 bool derr, bool disable_clock_gating)
5094{
5095 struct gaudi_device *gaudi = hdev->asic_specific;
5096 int num_mem_regs = num_memories / 32 + ((num_memories % 32) ? 1 : 0);
5097
5098 if (block_address >= CFG_BASE)
5099 block_address -= CFG_BASE;
5100
5101 if (derr)
5102 block_address += GAUDI_ECC_DERR0_OFFSET;
5103 else
5104 block_address += GAUDI_ECC_SERR0_OFFSET;
5105
5106 if (disable_clock_gating) {
5107 mutex_lock(&gaudi->clk_gate_mutex);
5108 hdev->asic_funcs->disable_clock_gating(hdev);
5109 }
5110
5111 switch (num_mem_regs) {
5112 case 1:
5113 dev_err(hdev->dev,
5114 "%s ECC indication: 0x%08x\n",
5115 block_name, RREG32(block_address));
5116 break;
5117 case 2:
5118 dev_err(hdev->dev,
5119 "%s ECC indication: 0x%08x 0x%08x\n",
5120 block_name,
5121 RREG32(block_address), RREG32(block_address + 4));
5122 break;
5123 case 3:
5124 dev_err(hdev->dev,
5125 "%s ECC indication: 0x%08x 0x%08x 0x%08x\n",
5126 block_name,
5127 RREG32(block_address), RREG32(block_address + 4),
5128 RREG32(block_address + 8));
5129 break;
5130 case 4:
5131 dev_err(hdev->dev,
5132 "%s ECC indication: 0x%08x 0x%08x 0x%08x 0x%08x\n",
5133 block_name,
5134 RREG32(block_address), RREG32(block_address + 4),
5135 RREG32(block_address + 8), RREG32(block_address + 0xc));
5136 break;
5137 default:
5138 break;
5139
5140 }
5141
5142 if (disable_clock_gating) {
5143 hdev->asic_funcs->enable_clock_gating(hdev);
5144 mutex_unlock(&gaudi->clk_gate_mutex);
5145 }
5146}
5147
5148static void gaudi_handle_qman_err_generic(struct hl_device *hdev,
5149 const char *qm_name,
5150 u64 glbl_sts_addr,
5151 u64 arb_err_addr)
5152{
5153 u32 i, j, glbl_sts_val, arb_err_val, glbl_sts_clr_val;
5154 char reg_desc[32];
5155
5156 /* Iterate through all stream GLBL_STS1 registers + Lower CP */
5157 for (i = 0 ; i < QMAN_STREAMS + 1 ; i++) {
5158 glbl_sts_clr_val = 0;
5159 glbl_sts_val = RREG32(glbl_sts_addr + 4 * i);
5160
5161 if (!glbl_sts_val)
5162 continue;
5163
5164 if (i == QMAN_STREAMS)
5165 snprintf(reg_desc, ARRAY_SIZE(reg_desc), "LowerCP");
5166 else
5167 snprintf(reg_desc, ARRAY_SIZE(reg_desc), "stream%u", i);
5168
5169 for (j = 0 ; j < GAUDI_NUM_OF_QM_ERR_CAUSE ; j++) {
5170 if (glbl_sts_val & BIT(j)) {
5171 dev_err_ratelimited(hdev->dev,
5172 "%s %s. err cause: %s\n",
5173 qm_name, reg_desc,
5174 gaudi_qman_error_cause[j]);
5175 glbl_sts_clr_val |= BIT(j);
5176 }
5177 }
5178
5179 /* Write 1 clear errors */
5180 WREG32(glbl_sts_addr + 4 * i, glbl_sts_clr_val);
5181 }
5182
5183 arb_err_val = RREG32(arb_err_addr);
5184
5185 if (!arb_err_val)
5186 return;
5187
5188 for (j = 0 ; j < GAUDI_NUM_OF_QM_ARB_ERR_CAUSE ; j++) {
5189 if (arb_err_val & BIT(j)) {
5190 dev_err_ratelimited(hdev->dev,
5191 "%s ARB_ERR. err cause: %s\n",
5192 qm_name,
5193 gaudi_qman_arb_error_cause[j]);
5194 }
5195 }
5196}
5197
5198static void gaudi_print_ecc_info(struct hl_device *hdev, u16 event_type)
5199{
5200 u64 block_address;
5201 u8 index;
5202 int num_memories;
5203 char desc[32];
5204 bool derr;
5205 bool disable_clock_gating;
5206
5207 switch (event_type) {
5208 case GAUDI_EVENT_PCIE_CORE_SERR:
5209 snprintf(desc, ARRAY_SIZE(desc), "%s", "PCIE_CORE");
5210 block_address = mmPCIE_CORE_BASE;
5211 num_memories = 51;
5212 derr = false;
5213 disable_clock_gating = false;
5214 break;
5215 case GAUDI_EVENT_PCIE_CORE_DERR:
5216 snprintf(desc, ARRAY_SIZE(desc), "%s", "PCIE_CORE");
5217 block_address = mmPCIE_CORE_BASE;
5218 num_memories = 51;
5219 derr = true;
5220 disable_clock_gating = false;
5221 break;
5222 case GAUDI_EVENT_PCIE_IF_SERR:
5223 snprintf(desc, ARRAY_SIZE(desc), "%s", "PCIE_WRAP");
5224 block_address = mmPCIE_WRAP_BASE;
5225 num_memories = 11;
5226 derr = false;
5227 disable_clock_gating = false;
5228 break;
5229 case GAUDI_EVENT_PCIE_IF_DERR:
5230 snprintf(desc, ARRAY_SIZE(desc), "%s", "PCIE_WRAP");
5231 block_address = mmPCIE_WRAP_BASE;
5232 num_memories = 11;
5233 derr = true;
5234 disable_clock_gating = false;
5235 break;
5236 case GAUDI_EVENT_PCIE_PHY_SERR:
5237 snprintf(desc, ARRAY_SIZE(desc), "%s", "PCIE_PHY");
5238 block_address = mmPCIE_PHY_BASE;
5239 num_memories = 4;
5240 derr = false;
5241 disable_clock_gating = false;
5242 break;
5243 case GAUDI_EVENT_PCIE_PHY_DERR:
5244 snprintf(desc, ARRAY_SIZE(desc), "%s", "PCIE_PHY");
5245 block_address = mmPCIE_PHY_BASE;
5246 num_memories = 4;
5247 derr = true;
5248 disable_clock_gating = false;
5249 break;
5250 case GAUDI_EVENT_TPC0_SERR ... GAUDI_EVENT_TPC7_SERR:
5251 index = event_type - GAUDI_EVENT_TPC0_SERR;
5252 block_address = mmTPC0_CFG_BASE + index * TPC_CFG_OFFSET;
5253 snprintf(desc, ARRAY_SIZE(desc), "%s%d", "TPC", index);
5254 num_memories = 90;
5255 derr = false;
5256 disable_clock_gating = true;
5257 break;
5258 case GAUDI_EVENT_TPC0_DERR ... GAUDI_EVENT_TPC7_DERR:
5259 index = event_type - GAUDI_EVENT_TPC0_DERR;
5260 block_address =
5261 mmTPC0_CFG_BASE + index * TPC_CFG_OFFSET;
5262 snprintf(desc, ARRAY_SIZE(desc), "%s%d", "TPC", index);
5263 num_memories = 90;
5264 derr = true;
5265 disable_clock_gating = true;
5266 break;
5267 case GAUDI_EVENT_MME0_ACC_SERR:
5268 case GAUDI_EVENT_MME1_ACC_SERR:
5269 case GAUDI_EVENT_MME2_ACC_SERR:
5270 case GAUDI_EVENT_MME3_ACC_SERR:
5271 index = (event_type - GAUDI_EVENT_MME0_ACC_SERR) / 4;
5272 block_address = mmMME0_ACC_BASE + index * MME_ACC_OFFSET;
5273 snprintf(desc, ARRAY_SIZE(desc), "MME%d_ACC", index);
5274 num_memories = 128;
5275 derr = false;
5276 disable_clock_gating = true;
5277 break;
5278 case GAUDI_EVENT_MME0_ACC_DERR:
5279 case GAUDI_EVENT_MME1_ACC_DERR:
5280 case GAUDI_EVENT_MME2_ACC_DERR:
5281 case GAUDI_EVENT_MME3_ACC_DERR:
5282 index = (event_type - GAUDI_EVENT_MME0_ACC_DERR) / 4;
5283 block_address = mmMME0_ACC_BASE + index * MME_ACC_OFFSET;
5284 snprintf(desc, ARRAY_SIZE(desc), "MME%d_ACC", index);
5285 num_memories = 128;
5286 derr = true;
5287 disable_clock_gating = true;
5288 break;
5289 case GAUDI_EVENT_MME0_SBAB_SERR:
5290 case GAUDI_EVENT_MME1_SBAB_SERR:
5291 case GAUDI_EVENT_MME2_SBAB_SERR:
5292 case GAUDI_EVENT_MME3_SBAB_SERR:
5293 index = (event_type - GAUDI_EVENT_MME0_SBAB_SERR) / 4;
5294 block_address = mmMME0_SBAB_BASE + index * MME_ACC_OFFSET;
5295 snprintf(desc, ARRAY_SIZE(desc), "MME%d_SBAB", index);
5296 num_memories = 33;
5297 derr = false;
5298 disable_clock_gating = true;
5299 break;
5300 case GAUDI_EVENT_MME0_SBAB_DERR:
5301 case GAUDI_EVENT_MME1_SBAB_DERR:
5302 case GAUDI_EVENT_MME2_SBAB_DERR:
5303 case GAUDI_EVENT_MME3_SBAB_DERR:
5304 index = (event_type - GAUDI_EVENT_MME0_SBAB_DERR) / 4;
5305 block_address = mmMME0_SBAB_BASE + index * MME_ACC_OFFSET;
5306 snprintf(desc, ARRAY_SIZE(desc), "MME%d_SBAB", index);
5307 num_memories = 33;
5308 derr = true;
5309 disable_clock_gating = true;
5310 break;
5311 case GAUDI_EVENT_DMA0_SERR_ECC ... GAUDI_EVENT_DMA7_SERR_ECC:
5312 index = event_type - GAUDI_EVENT_DMA0_SERR_ECC;
5313 block_address = mmDMA0_CORE_BASE + index * DMA_CORE_OFFSET;
5314 snprintf(desc, ARRAY_SIZE(desc), "DMA%d_CORE", index);
5315 num_memories = 16;
5316 derr = false;
5317 disable_clock_gating = false;
5318 break;
5319 case GAUDI_EVENT_DMA0_DERR_ECC ... GAUDI_EVENT_DMA7_DERR_ECC:
5320 index = event_type - GAUDI_EVENT_DMA0_DERR_ECC;
5321 block_address = mmDMA0_CORE_BASE + index * DMA_CORE_OFFSET;
5322 snprintf(desc, ARRAY_SIZE(desc), "DMA%d_CORE", index);
5323 num_memories = 16;
5324 derr = true;
5325 disable_clock_gating = false;
5326 break;
5327 case GAUDI_EVENT_CPU_IF_ECC_SERR:
5328 block_address = mmCPU_IF_BASE;
5329 snprintf(desc, ARRAY_SIZE(desc), "%s", "CPU");
5330 num_memories = 4;
5331 derr = false;
5332 disable_clock_gating = false;
5333 break;
5334 case GAUDI_EVENT_CPU_IF_ECC_DERR:
5335 block_address = mmCPU_IF_BASE;
5336 snprintf(desc, ARRAY_SIZE(desc), "%s", "CPU");
5337 num_memories = 4;
5338 derr = true;
5339 disable_clock_gating = false;
5340 break;
5341 case GAUDI_EVENT_PSOC_MEM_SERR:
5342 block_address = mmPSOC_GLOBAL_CONF_BASE;
5343 snprintf(desc, ARRAY_SIZE(desc), "%s", "CPU");
5344 num_memories = 4;
5345 derr = false;
5346 disable_clock_gating = false;
5347 break;
5348 case GAUDI_EVENT_PSOC_MEM_DERR:
5349 block_address = mmPSOC_GLOBAL_CONF_BASE;
5350 snprintf(desc, ARRAY_SIZE(desc), "%s", "CPU");
5351 num_memories = 4;
5352 derr = true;
5353 disable_clock_gating = false;
5354 break;
5355 case GAUDI_EVENT_PSOC_CORESIGHT_SERR:
5356 block_address = mmPSOC_CS_TRACE_BASE;
5357 snprintf(desc, ARRAY_SIZE(desc), "%s", "CPU");
5358 num_memories = 2;
5359 derr = false;
5360 disable_clock_gating = false;
5361 break;
5362 case GAUDI_EVENT_PSOC_CORESIGHT_DERR:
5363 block_address = mmPSOC_CS_TRACE_BASE;
5364 snprintf(desc, ARRAY_SIZE(desc), "%s", "CPU");
5365 num_memories = 2;
5366 derr = true;
5367 disable_clock_gating = false;
5368 break;
5369 case GAUDI_EVENT_SRAM0_SERR ... GAUDI_EVENT_SRAM28_SERR:
5370 index = event_type - GAUDI_EVENT_SRAM0_SERR;
5371 block_address =
5372 mmSRAM_Y0_X0_BANK_BASE + index * SRAM_BANK_OFFSET;
5373 snprintf(desc, ARRAY_SIZE(desc), "SRAM%d", index);
5374 num_memories = 2;
5375 derr = false;
5376 disable_clock_gating = false;
5377 break;
5378 case GAUDI_EVENT_SRAM0_DERR ... GAUDI_EVENT_SRAM28_DERR:
5379 index = event_type - GAUDI_EVENT_SRAM0_DERR;
5380 block_address =
5381 mmSRAM_Y0_X0_BANK_BASE + index * SRAM_BANK_OFFSET;
5382 snprintf(desc, ARRAY_SIZE(desc), "SRAM%d", index);
5383 num_memories = 2;
5384 derr = true;
5385 disable_clock_gating = false;
5386 break;
5387 case GAUDI_EVENT_DMA_IF0_SERR ... GAUDI_EVENT_DMA_IF3_SERR:
5388 index = event_type - GAUDI_EVENT_DMA_IF0_SERR;
5389 block_address = mmDMA_IF_W_S_BASE +
5390 index * (mmDMA_IF_E_S_BASE - mmDMA_IF_W_S_BASE);
5391 snprintf(desc, ARRAY_SIZE(desc), "DMA_IF%d", index);
5392 num_memories = 60;
5393 derr = false;
5394 disable_clock_gating = false;
5395 break;
5396 case GAUDI_EVENT_DMA_IF0_DERR ... GAUDI_EVENT_DMA_IF3_DERR:
5397 index = event_type - GAUDI_EVENT_DMA_IF0_DERR;
5398 block_address = mmDMA_IF_W_S_BASE +
5399 index * (mmDMA_IF_E_S_BASE - mmDMA_IF_W_S_BASE);
5400 snprintf(desc, ARRAY_SIZE(desc), "DMA_IF%d", index);
5401 derr = true;
5402 num_memories = 60;
5403 disable_clock_gating = false;
5404 break;
5405 case GAUDI_EVENT_HBM_0_SERR ... GAUDI_EVENT_HBM_3_SERR:
5406 index = event_type - GAUDI_EVENT_HBM_0_SERR;
5407 /* HBM Registers are at different offsets */
5408 block_address = mmHBM0_BASE + 0x8000 +
5409 index * (mmHBM1_BASE - mmHBM0_BASE);
5410 snprintf(desc, ARRAY_SIZE(desc), "HBM%d", index);
5411 derr = false;
5412 num_memories = 64;
5413 disable_clock_gating = false;
5414 break;
5415 case GAUDI_EVENT_HBM_0_DERR ... GAUDI_EVENT_HBM_3_DERR:
5416 index = event_type - GAUDI_EVENT_HBM_0_SERR;
5417 /* HBM Registers are at different offsets */
5418 block_address = mmHBM0_BASE + 0x8000 +
5419 index * (mmHBM1_BASE - mmHBM0_BASE);
5420 snprintf(desc, ARRAY_SIZE(desc), "HBM%d", index);
5421 derr = true;
5422 num_memories = 64;
5423 disable_clock_gating = false;
5424 break;
5425 default:
5426 return;
5427 }
5428
5429 gaudi_print_ecc_info_generic(hdev, desc, block_address, num_memories,
5430 derr, disable_clock_gating);
5431}
5432
5433static void gaudi_handle_qman_err(struct hl_device *hdev, u16 event_type)
5434{
5435 u64 glbl_sts_addr, arb_err_addr;
5436 u8 index;
5437 char desc[32];
5438
5439 switch (event_type) {
5440 case GAUDI_EVENT_TPC0_QM ... GAUDI_EVENT_TPC7_QM:
5441 index = event_type - GAUDI_EVENT_TPC0_QM;
5442 glbl_sts_addr =
5443 mmTPC0_QM_GLBL_STS1_0 + index * TPC_QMAN_OFFSET;
5444 arb_err_addr =
5445 mmTPC0_QM_ARB_ERR_CAUSE + index * TPC_QMAN_OFFSET;
5446 snprintf(desc, ARRAY_SIZE(desc), "%s%d", "TPC_QM", index);
5447 break;
5448 case GAUDI_EVENT_MME0_QM ... GAUDI_EVENT_MME2_QM:
5449 index = event_type - GAUDI_EVENT_MME0_QM;
5450 glbl_sts_addr =
5451 mmMME0_QM_GLBL_STS1_0 + index * MME_QMAN_OFFSET;
5452 arb_err_addr =
5453 mmMME0_QM_ARB_ERR_CAUSE + index * MME_QMAN_OFFSET;
5454 snprintf(desc, ARRAY_SIZE(desc), "%s%d", "MME_QM", index);
5455 break;
5456 case GAUDI_EVENT_DMA0_QM ... GAUDI_EVENT_DMA7_QM:
5457 index = event_type - GAUDI_EVENT_DMA0_QM;
5458 glbl_sts_addr =
5459 mmDMA0_QM_GLBL_STS1_0 + index * DMA_QMAN_OFFSET;
5460 arb_err_addr =
5461 mmDMA0_QM_ARB_ERR_CAUSE + index * DMA_QMAN_OFFSET;
5462 snprintf(desc, ARRAY_SIZE(desc), "%s%d", "DMA_QM", index);
5463 break;
5464 default:
5465 return;
5466 }
5467
5468 gaudi_handle_qman_err_generic(hdev, desc, glbl_sts_addr, arb_err_addr);
5469}
5470
5471static void gaudi_print_irq_info(struct hl_device *hdev, u16 event_type,
5472 bool razwi)
5473{
Ofir Bittonebd8d122020-05-10 13:41:28 +03005474 char desc[64] = "";
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005475
5476 gaudi_get_event_desc(event_type, desc, sizeof(desc));
5477 dev_err_ratelimited(hdev->dev, "Received H/W interrupt %d [\"%s\"]\n",
5478 event_type, desc);
5479
5480 gaudi_print_ecc_info(hdev, event_type);
5481
5482 if (razwi) {
5483 gaudi_print_razwi_info(hdev);
5484 gaudi_print_mmu_error_info(hdev);
5485 }
5486}
5487
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005488static int gaudi_soft_reset_late_init(struct hl_device *hdev)
5489{
Ofir Bittonebd8d122020-05-10 13:41:28 +03005490 struct gaudi_device *gaudi = hdev->asic_specific;
5491
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005492 /* Unmask all IRQs since some could have been received
5493 * during the soft reset
5494 */
Ofir Bittonebd8d122020-05-10 13:41:28 +03005495 return hl_fw_unmask_irq_arr(hdev, gaudi->events, sizeof(gaudi->events));
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005496}
5497
5498static int gaudi_hbm_read_interrupts(struct hl_device *hdev, int device)
5499{
5500 int ch, err = 0;
5501 u32 base, val, val2;
5502
5503 base = GAUDI_HBM_CFG_BASE + device * GAUDI_HBM_CFG_OFFSET;
5504 for (ch = 0 ; ch < GAUDI_HBM_CHANNELS ; ch++) {
5505 val = RREG32_MASK(base + ch * 0x1000 + 0x06C, 0x0000FFFF);
5506 val = (val & 0xFF) | ((val >> 8) & 0xFF);
5507 if (val) {
5508 err = 1;
5509 dev_err(hdev->dev,
5510 "HBM%d pc%d interrupts info: WR_PAR=%d, RD_PAR=%d, CA_PAR=%d, SERR=%d, DERR=%d\n",
5511 device, ch * 2, val & 0x1, (val >> 1) & 0x1,
5512 (val >> 2) & 0x1, (val >> 3) & 0x1,
5513 (val >> 4) & 0x1);
5514
5515 val2 = RREG32(base + ch * 0x1000 + 0x060);
5516 dev_err(hdev->dev,
5517 "HBM%d pc%d ECC info: 1ST_ERR_ADDR=0x%x, 1ST_ERR_TYPE=%d, SEC_CONT_CNT=%d, SEC_CNT=%d, DED_CNT=%d\n",
5518 device, ch * 2,
5519 RREG32(base + ch * 0x1000 + 0x064),
5520 (val2 & 0x200) >> 9, (val2 & 0xFC00) >> 10,
5521 (val2 & 0xFF0000) >> 16,
5522 (val2 & 0xFF000000) >> 24);
5523 }
5524
5525 val = RREG32_MASK(base + ch * 0x1000 + 0x07C, 0x0000FFFF);
5526 val = (val & 0xFF) | ((val >> 8) & 0xFF);
5527 if (val) {
5528 err = 1;
5529 dev_err(hdev->dev,
5530 "HBM%d pc%d interrupts info: WR_PAR=%d, RD_PAR=%d, CA_PAR=%d, SERR=%d, DERR=%d\n",
5531 device, ch * 2 + 1, val & 0x1, (val >> 1) & 0x1,
5532 (val >> 2) & 0x1, (val >> 3) & 0x1,
5533 (val >> 4) & 0x1);
5534
5535 val2 = RREG32(base + ch * 0x1000 + 0x070);
5536 dev_err(hdev->dev,
5537 "HBM%d pc%d ECC info: 1ST_ERR_ADDR=0x%x, 1ST_ERR_TYPE=%d, SEC_CONT_CNT=%d, SEC_CNT=%d, DED_CNT=%d\n",
5538 device, ch * 2 + 1,
5539 RREG32(base + ch * 0x1000 + 0x074),
5540 (val2 & 0x200) >> 9, (val2 & 0xFC00) >> 10,
5541 (val2 & 0xFF0000) >> 16,
5542 (val2 & 0xFF000000) >> 24);
5543 }
5544
5545 /* Clear interrupts */
5546 RMWREG32(base + (ch * 0x1000) + 0x060, 0x1C8, 0x1FF);
5547 RMWREG32(base + (ch * 0x1000) + 0x070, 0x1C8, 0x1FF);
5548 WREG32(base + (ch * 0x1000) + 0x06C, 0x1F1F);
5549 WREG32(base + (ch * 0x1000) + 0x07C, 0x1F1F);
5550 RMWREG32(base + (ch * 0x1000) + 0x060, 0x0, 0xF);
5551 RMWREG32(base + (ch * 0x1000) + 0x070, 0x0, 0xF);
5552 }
5553
5554 val = RREG32(base + 0x8F30);
5555 val2 = RREG32(base + 0x8F34);
5556 if (val | val2) {
5557 err = 1;
5558 dev_err(hdev->dev,
5559 "HBM %d MC SRAM SERR info: Reg 0x8F30=0x%x, Reg 0x8F34=0x%x\n",
5560 device, val, val2);
5561 }
5562 val = RREG32(base + 0x8F40);
5563 val2 = RREG32(base + 0x8F44);
5564 if (val | val2) {
5565 err = 1;
5566 dev_err(hdev->dev,
5567 "HBM %d MC SRAM DERR info: Reg 0x8F40=0x%x, Reg 0x8F44=0x%x\n",
5568 device, val, val2);
5569 }
5570
5571 return err;
5572}
5573
5574static int gaudi_hbm_event_to_dev(u16 hbm_event_type)
5575{
5576 switch (hbm_event_type) {
5577 case GAUDI_EVENT_HBM0_SPI_0:
5578 case GAUDI_EVENT_HBM0_SPI_1:
5579 return 0;
5580 case GAUDI_EVENT_HBM1_SPI_0:
5581 case GAUDI_EVENT_HBM1_SPI_1:
5582 return 1;
5583 case GAUDI_EVENT_HBM2_SPI_0:
5584 case GAUDI_EVENT_HBM2_SPI_1:
5585 return 2;
5586 case GAUDI_EVENT_HBM3_SPI_0:
5587 case GAUDI_EVENT_HBM3_SPI_1:
5588 return 3;
5589 default:
5590 break;
5591 }
5592
5593 /* Should never happen */
5594 return 0;
5595}
5596
5597static bool gaudi_tpc_read_interrupts(struct hl_device *hdev, u8 tpc_id,
5598 char *interrupt_name)
5599{
5600 struct gaudi_device *gaudi = hdev->asic_specific;
5601 u32 tpc_offset = tpc_id * TPC_CFG_OFFSET, tpc_interrupts_cause, i;
5602 bool soft_reset_required = false;
5603
5604 /* Accessing the TPC_INTR_CAUSE registers requires disabling the clock
5605 * gating, and thus cannot be done in ArmCP and should be done instead
5606 * by the driver.
5607 */
5608
5609 mutex_lock(&gaudi->clk_gate_mutex);
5610
5611 hdev->asic_funcs->disable_clock_gating(hdev);
5612
5613 tpc_interrupts_cause = RREG32(mmTPC0_CFG_TPC_INTR_CAUSE + tpc_offset) &
5614 TPC0_CFG_TPC_INTR_CAUSE_CAUSE_MASK;
5615
5616 for (i = 0 ; i < GAUDI_NUM_OF_TPC_INTR_CAUSE ; i++)
5617 if (tpc_interrupts_cause & BIT(i)) {
5618 dev_err_ratelimited(hdev->dev,
5619 "TPC%d_%s interrupt cause: %s\n",
5620 tpc_id, interrupt_name,
5621 gaudi_tpc_interrupts_cause[i]);
5622 /* If this is QM error, we need to soft-reset */
5623 if (i == 15)
5624 soft_reset_required = true;
5625 }
5626
5627 /* Clear interrupts */
5628 WREG32(mmTPC0_CFG_TPC_INTR_CAUSE + tpc_offset, 0);
5629
5630 hdev->asic_funcs->enable_clock_gating(hdev);
5631
5632 mutex_unlock(&gaudi->clk_gate_mutex);
5633
5634 return soft_reset_required;
5635}
5636
5637static int tpc_dec_event_to_tpc_id(u16 tpc_dec_event_type)
5638{
5639 return (tpc_dec_event_type - GAUDI_EVENT_TPC0_DEC) >> 1;
5640}
5641
5642static int tpc_krn_event_to_tpc_id(u16 tpc_dec_event_type)
5643{
5644 return (tpc_dec_event_type - GAUDI_EVENT_TPC0_KRN_ERR) / 6;
5645}
5646
5647static void gaudi_print_clk_change_info(struct hl_device *hdev,
5648 u16 event_type)
5649{
5650 switch (event_type) {
5651 case GAUDI_EVENT_FIX_POWER_ENV_S:
5652 dev_info_ratelimited(hdev->dev,
5653 "Clock throttling due to power consumption\n");
5654 break;
5655
5656 case GAUDI_EVENT_FIX_POWER_ENV_E:
5657 dev_info_ratelimited(hdev->dev,
5658 "Power envelop is safe, back to optimal clock\n");
5659 break;
5660
5661 case GAUDI_EVENT_FIX_THERMAL_ENV_S:
5662 dev_info_ratelimited(hdev->dev,
5663 "Clock throttling due to overheating\n");
5664 break;
5665
5666 case GAUDI_EVENT_FIX_THERMAL_ENV_E:
5667 dev_info_ratelimited(hdev->dev,
5668 "Thermal envelop is safe, back to optimal clock\n");
5669 break;
5670
5671 default:
5672 dev_err(hdev->dev, "Received invalid clock change event %d\n",
5673 event_type);
5674 break;
5675 }
5676}
5677
5678static void gaudi_handle_eqe(struct hl_device *hdev,
5679 struct hl_eq_entry *eq_entry)
5680{
5681 struct gaudi_device *gaudi = hdev->asic_specific;
5682 u32 ctl = le32_to_cpu(eq_entry->hdr.ctl);
5683 u16 event_type = ((ctl & EQ_CTL_EVENT_TYPE_MASK)
5684 >> EQ_CTL_EVENT_TYPE_SHIFT);
5685 u8 cause;
Oded Gabbay66446822020-05-18 16:48:01 +03005686 bool reset_required;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005687
5688 gaudi->events_stat[event_type]++;
5689 gaudi->events_stat_aggregate[event_type]++;
5690
5691 switch (event_type) {
5692 case GAUDI_EVENT_PCIE_CORE_DERR:
5693 case GAUDI_EVENT_PCIE_IF_DERR:
5694 case GAUDI_EVENT_PCIE_PHY_DERR:
5695 case GAUDI_EVENT_TPC0_DERR ... GAUDI_EVENT_TPC7_DERR:
5696 case GAUDI_EVENT_MME0_ACC_DERR:
5697 case GAUDI_EVENT_MME0_SBAB_DERR:
5698 case GAUDI_EVENT_MME1_ACC_DERR:
5699 case GAUDI_EVENT_MME1_SBAB_DERR:
5700 case GAUDI_EVENT_MME2_ACC_DERR:
5701 case GAUDI_EVENT_MME2_SBAB_DERR:
5702 case GAUDI_EVENT_MME3_ACC_DERR:
5703 case GAUDI_EVENT_MME3_SBAB_DERR:
5704 case GAUDI_EVENT_DMA0_DERR_ECC ... GAUDI_EVENT_DMA7_DERR_ECC:
5705 fallthrough;
5706 case GAUDI_EVENT_CPU_IF_ECC_DERR:
5707 case GAUDI_EVENT_PSOC_MEM_DERR:
5708 case GAUDI_EVENT_PSOC_CORESIGHT_DERR:
5709 case GAUDI_EVENT_SRAM0_DERR ... GAUDI_EVENT_SRAM28_DERR:
5710 case GAUDI_EVENT_DMA_IF0_DERR ... GAUDI_EVENT_DMA_IF3_DERR:
5711 fallthrough;
5712 case GAUDI_EVENT_GIC500:
5713 case GAUDI_EVENT_HBM_0_DERR ... GAUDI_EVENT_HBM_3_DERR:
5714 case GAUDI_EVENT_MMU_DERR:
5715 case GAUDI_EVENT_AXI_ECC:
5716 case GAUDI_EVENT_L2_RAM_ECC:
5717 case GAUDI_EVENT_PLL0 ... GAUDI_EVENT_PLL17:
5718 gaudi_print_irq_info(hdev, event_type, false);
5719 if (hdev->hard_reset_on_fw_events)
5720 hl_device_reset(hdev, true, false);
5721 break;
5722
5723 case GAUDI_EVENT_HBM0_SPI_0:
5724 case GAUDI_EVENT_HBM1_SPI_0:
5725 case GAUDI_EVENT_HBM2_SPI_0:
5726 case GAUDI_EVENT_HBM3_SPI_0:
5727 gaudi_print_irq_info(hdev, event_type, false);
5728 gaudi_hbm_read_interrupts(hdev,
5729 gaudi_hbm_event_to_dev(event_type));
5730 if (hdev->hard_reset_on_fw_events)
5731 hl_device_reset(hdev, true, false);
5732 break;
5733
5734 case GAUDI_EVENT_HBM0_SPI_1:
5735 case GAUDI_EVENT_HBM1_SPI_1:
5736 case GAUDI_EVENT_HBM2_SPI_1:
5737 case GAUDI_EVENT_HBM3_SPI_1:
5738 gaudi_print_irq_info(hdev, event_type, false);
5739 gaudi_hbm_read_interrupts(hdev,
5740 gaudi_hbm_event_to_dev(event_type));
5741 break;
5742
5743 case GAUDI_EVENT_TPC0_DEC:
5744 case GAUDI_EVENT_TPC1_DEC:
5745 case GAUDI_EVENT_TPC2_DEC:
5746 case GAUDI_EVENT_TPC3_DEC:
5747 case GAUDI_EVENT_TPC4_DEC:
5748 case GAUDI_EVENT_TPC5_DEC:
5749 case GAUDI_EVENT_TPC6_DEC:
5750 case GAUDI_EVENT_TPC7_DEC:
5751 gaudi_print_irq_info(hdev, event_type, true);
Oded Gabbay66446822020-05-18 16:48:01 +03005752 reset_required = gaudi_tpc_read_interrupts(hdev,
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005753 tpc_dec_event_to_tpc_id(event_type),
5754 "AXI_SLV_DEC_Error");
Oded Gabbay66446822020-05-18 16:48:01 +03005755 if (reset_required) {
5756 dev_err(hdev->dev, "hard reset required due to %s\n",
5757 gaudi_irq_map_table[event_type].name);
5758
5759 if (hdev->hard_reset_on_fw_events)
5760 hl_device_reset(hdev, true, false);
5761 } else {
5762 hl_fw_unmask_irq(hdev, event_type);
Omer Shpigelmand7985072020-05-17 23:01:22 +03005763 }
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005764 break;
5765
5766 case GAUDI_EVENT_TPC0_KRN_ERR:
5767 case GAUDI_EVENT_TPC1_KRN_ERR:
5768 case GAUDI_EVENT_TPC2_KRN_ERR:
5769 case GAUDI_EVENT_TPC3_KRN_ERR:
5770 case GAUDI_EVENT_TPC4_KRN_ERR:
5771 case GAUDI_EVENT_TPC5_KRN_ERR:
5772 case GAUDI_EVENT_TPC6_KRN_ERR:
5773 case GAUDI_EVENT_TPC7_KRN_ERR:
5774 gaudi_print_irq_info(hdev, event_type, true);
Oded Gabbay66446822020-05-18 16:48:01 +03005775 reset_required = gaudi_tpc_read_interrupts(hdev,
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005776 tpc_krn_event_to_tpc_id(event_type),
5777 "KRN_ERR");
Oded Gabbay66446822020-05-18 16:48:01 +03005778 if (reset_required) {
5779 dev_err(hdev->dev, "hard reset required due to %s\n",
5780 gaudi_irq_map_table[event_type].name);
5781
5782 if (hdev->hard_reset_on_fw_events)
5783 hl_device_reset(hdev, true, false);
5784 } else {
5785 hl_fw_unmask_irq(hdev, event_type);
Omer Shpigelmand7985072020-05-17 23:01:22 +03005786 }
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005787 break;
5788
5789 case GAUDI_EVENT_PCIE_CORE_SERR:
5790 case GAUDI_EVENT_PCIE_IF_SERR:
5791 case GAUDI_EVENT_PCIE_PHY_SERR:
5792 case GAUDI_EVENT_TPC0_SERR ... GAUDI_EVENT_TPC7_SERR:
5793 case GAUDI_EVENT_MME0_ACC_SERR:
5794 case GAUDI_EVENT_MME0_SBAB_SERR:
5795 case GAUDI_EVENT_MME1_ACC_SERR:
5796 case GAUDI_EVENT_MME1_SBAB_SERR:
5797 case GAUDI_EVENT_MME2_ACC_SERR:
5798 case GAUDI_EVENT_MME2_SBAB_SERR:
5799 case GAUDI_EVENT_MME3_ACC_SERR:
5800 case GAUDI_EVENT_MME3_SBAB_SERR:
5801 case GAUDI_EVENT_DMA0_SERR_ECC ... GAUDI_EVENT_DMA7_SERR_ECC:
5802 case GAUDI_EVENT_CPU_IF_ECC_SERR:
5803 case GAUDI_EVENT_PSOC_MEM_SERR:
5804 case GAUDI_EVENT_PSOC_CORESIGHT_SERR:
5805 case GAUDI_EVENT_SRAM0_SERR ... GAUDI_EVENT_SRAM28_SERR:
5806 case GAUDI_EVENT_DMA_IF0_SERR ... GAUDI_EVENT_DMA_IF3_SERR:
5807 case GAUDI_EVENT_HBM_0_SERR ... GAUDI_EVENT_HBM_3_SERR:
5808 fallthrough;
5809 case GAUDI_EVENT_MMU_SERR:
5810 case GAUDI_EVENT_PCIE_DEC:
5811 case GAUDI_EVENT_MME0_WBC_RSP:
5812 case GAUDI_EVENT_MME0_SBAB0_RSP:
5813 case GAUDI_EVENT_MME1_WBC_RSP:
5814 case GAUDI_EVENT_MME1_SBAB0_RSP:
5815 case GAUDI_EVENT_MME2_WBC_RSP:
5816 case GAUDI_EVENT_MME2_SBAB0_RSP:
5817 case GAUDI_EVENT_MME3_WBC_RSP:
5818 case GAUDI_EVENT_MME3_SBAB0_RSP:
5819 case GAUDI_EVENT_CPU_AXI_SPLITTER:
5820 case GAUDI_EVENT_PSOC_AXI_DEC:
5821 case GAUDI_EVENT_PSOC_PRSTN_FALL:
5822 case GAUDI_EVENT_MMU_PAGE_FAULT:
5823 case GAUDI_EVENT_MMU_WR_PERM:
5824 case GAUDI_EVENT_RAZWI_OR_ADC:
5825 case GAUDI_EVENT_TPC0_QM ... GAUDI_EVENT_TPC7_QM:
5826 case GAUDI_EVENT_MME0_QM ... GAUDI_EVENT_MME2_QM:
5827 case GAUDI_EVENT_DMA0_QM ... GAUDI_EVENT_DMA7_QM:
5828 fallthrough;
5829 case GAUDI_EVENT_DMA0_CORE ... GAUDI_EVENT_DMA7_CORE:
5830 gaudi_print_irq_info(hdev, event_type, true);
5831 gaudi_handle_qman_err(hdev, event_type);
Ofir Bittonebd8d122020-05-10 13:41:28 +03005832 hl_fw_unmask_irq(hdev, event_type);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005833 break;
5834
5835 case GAUDI_EVENT_RAZWI_OR_ADC_SW:
5836 gaudi_print_irq_info(hdev, event_type, true);
Oded Gabbay66446822020-05-18 16:48:01 +03005837 if (hdev->hard_reset_on_fw_events)
5838 hl_device_reset(hdev, true, false);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005839 break;
5840
5841 case GAUDI_EVENT_TPC0_BMON_SPMU:
5842 case GAUDI_EVENT_TPC1_BMON_SPMU:
5843 case GAUDI_EVENT_TPC2_BMON_SPMU:
5844 case GAUDI_EVENT_TPC3_BMON_SPMU:
5845 case GAUDI_EVENT_TPC4_BMON_SPMU:
5846 case GAUDI_EVENT_TPC5_BMON_SPMU:
5847 case GAUDI_EVENT_TPC6_BMON_SPMU:
5848 case GAUDI_EVENT_TPC7_BMON_SPMU:
5849 case GAUDI_EVENT_DMA_BM_CH0 ... GAUDI_EVENT_DMA_BM_CH7:
5850 gaudi_print_irq_info(hdev, event_type, false);
Ofir Bittonebd8d122020-05-10 13:41:28 +03005851 hl_fw_unmask_irq(hdev, event_type);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005852 break;
5853
5854 case GAUDI_EVENT_FIX_POWER_ENV_S ... GAUDI_EVENT_FIX_THERMAL_ENV_E:
5855 gaudi_print_clk_change_info(hdev, event_type);
Ofir Bittonebd8d122020-05-10 13:41:28 +03005856 hl_fw_unmask_irq(hdev, event_type);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005857 break;
5858
5859 case GAUDI_EVENT_PSOC_GPIO_U16_0:
5860 cause = le64_to_cpu(eq_entry->data[0]) & 0xFF;
5861 dev_err(hdev->dev,
5862 "Received high temp H/W interrupt %d (cause %d)\n",
5863 event_type, cause);
5864 break;
5865
5866 default:
5867 dev_err(hdev->dev, "Received invalid H/W interrupt %d\n",
5868 event_type);
5869 break;
5870 }
5871}
5872
5873static void *gaudi_get_events_stat(struct hl_device *hdev, bool aggregate,
5874 u32 *size)
5875{
5876 struct gaudi_device *gaudi = hdev->asic_specific;
5877
5878 if (aggregate) {
5879 *size = (u32) sizeof(gaudi->events_stat_aggregate);
5880 return gaudi->events_stat_aggregate;
5881 }
5882
5883 *size = (u32) sizeof(gaudi->events_stat);
5884 return gaudi->events_stat;
5885}
5886
Omer Shpigelman8ff5f4f2020-05-24 23:06:59 +03005887static int gaudi_mmu_invalidate_cache(struct hl_device *hdev, bool is_hard,
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005888 u32 flags)
5889{
5890 struct gaudi_device *gaudi = hdev->asic_specific;
5891 u32 status, timeout_usec;
5892 int rc;
5893
5894 if (!(gaudi->hw_cap_initialized & HW_CAP_MMU) ||
5895 hdev->hard_reset_pending)
Omer Shpigelman8ff5f4f2020-05-24 23:06:59 +03005896 return 0;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005897
5898 if (hdev->pldm)
5899 timeout_usec = GAUDI_PLDM_MMU_TIMEOUT_USEC;
5900 else
5901 timeout_usec = MMU_CONFIG_TIMEOUT_USEC;
5902
Omer Shpigelman8ff5f4f2020-05-24 23:06:59 +03005903 mutex_lock(&hdev->mmu_cache_lock);
5904
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005905 /* L0 & L1 invalidation */
Omer Shpigelmancfd41762020-06-03 13:03:35 +03005906 WREG32(mmSTLB_INV_PS, 3);
5907 WREG32(mmSTLB_CACHE_INV, gaudi->mmu_cache_inv_pi++);
Omer Shpigelman42d0b0b2020-05-17 17:35:39 +03005908 WREG32(mmSTLB_INV_PS, 2);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005909
5910 rc = hl_poll_timeout(
5911 hdev,
Omer Shpigelman42d0b0b2020-05-17 17:35:39 +03005912 mmSTLB_INV_PS,
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005913 status,
5914 !status,
5915 1000,
5916 timeout_usec);
5917
Omer Shpigelman42d0b0b2020-05-17 17:35:39 +03005918 WREG32(mmSTLB_INV_SET, 0);
5919
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005920 mutex_unlock(&hdev->mmu_cache_lock);
Omer Shpigelman8ff5f4f2020-05-24 23:06:59 +03005921
5922 if (rc) {
5923 dev_err_ratelimited(hdev->dev,
5924 "MMU cache invalidation timeout\n");
5925 hl_device_reset(hdev, true, false);
5926 }
5927
5928 return rc;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005929}
5930
Omer Shpigelman8ff5f4f2020-05-24 23:06:59 +03005931static int gaudi_mmu_invalidate_cache_range(struct hl_device *hdev,
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005932 bool is_hard, u32 asid, u64 va, u64 size)
5933{
5934 struct gaudi_device *gaudi = hdev->asic_specific;
5935 u32 status, timeout_usec;
5936 u32 inv_data;
5937 u32 pi;
5938 int rc;
5939
5940 if (!(gaudi->hw_cap_initialized & HW_CAP_MMU) ||
5941 hdev->hard_reset_pending)
Omer Shpigelman8ff5f4f2020-05-24 23:06:59 +03005942 return 0;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005943
5944 mutex_lock(&hdev->mmu_cache_lock);
5945
5946 if (hdev->pldm)
5947 timeout_usec = GAUDI_PLDM_MMU_TIMEOUT_USEC;
5948 else
5949 timeout_usec = MMU_CONFIG_TIMEOUT_USEC;
5950
5951 /*
5952 * TODO: currently invalidate entire L0 & L1 as in regular hard
5953 * invalidation. Need to apply invalidation of specific cache
5954 * lines with mask of ASID & VA & size.
5955 * Note that L1 with be flushed entirely in any case.
5956 */
5957
5958 /* L0 & L1 invalidation */
5959 inv_data = RREG32(mmSTLB_CACHE_INV);
5960 /* PI is 8 bit */
5961 pi = ((inv_data & STLB_CACHE_INV_PRODUCER_INDEX_MASK) + 1) & 0xFF;
5962 WREG32(mmSTLB_CACHE_INV,
5963 (inv_data & STLB_CACHE_INV_INDEX_MASK_MASK) | pi);
5964
5965 rc = hl_poll_timeout(
5966 hdev,
5967 mmSTLB_INV_CONSUMER_INDEX,
5968 status,
5969 status == pi,
5970 1000,
5971 timeout_usec);
5972
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005973 mutex_unlock(&hdev->mmu_cache_lock);
Omer Shpigelman8ff5f4f2020-05-24 23:06:59 +03005974
5975 if (rc) {
5976 dev_err_ratelimited(hdev->dev,
5977 "MMU cache invalidation timeout\n");
5978 hl_device_reset(hdev, true, false);
5979 }
5980
5981 return rc;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005982}
5983
5984static int gaudi_mmu_update_asid_hop0_addr(struct hl_device *hdev,
5985 u32 asid, u64 phys_addr)
5986{
5987 u32 status, timeout_usec;
5988 int rc;
5989
5990 if (hdev->pldm)
5991 timeout_usec = GAUDI_PLDM_MMU_TIMEOUT_USEC;
5992 else
5993 timeout_usec = MMU_CONFIG_TIMEOUT_USEC;
5994
5995 WREG32(MMU_ASID, asid);
5996 WREG32(MMU_HOP0_PA43_12, phys_addr >> MMU_HOP0_PA43_12_SHIFT);
5997 WREG32(MMU_HOP0_PA49_44, phys_addr >> MMU_HOP0_PA49_44_SHIFT);
5998 WREG32(MMU_BUSY, 0x80000000);
5999
6000 rc = hl_poll_timeout(
6001 hdev,
6002 MMU_BUSY,
6003 status,
6004 !(status & 0x80000000),
6005 1000,
6006 timeout_usec);
6007
6008 if (rc) {
6009 dev_err(hdev->dev,
6010 "Timeout during MMU hop0 config of asid %d\n", asid);
6011 return rc;
6012 }
6013
6014 return 0;
6015}
6016
6017static int gaudi_send_heartbeat(struct hl_device *hdev)
6018{
6019 struct gaudi_device *gaudi = hdev->asic_specific;
6020
6021 if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
6022 return 0;
6023
6024 return hl_fw_send_heartbeat(hdev);
6025}
6026
6027static int gaudi_armcp_info_get(struct hl_device *hdev)
6028{
6029 struct gaudi_device *gaudi = hdev->asic_specific;
6030 struct asic_fixed_properties *prop = &hdev->asic_prop;
6031 int rc;
6032
6033 if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
6034 return 0;
6035
6036 rc = hl_fw_armcp_info_get(hdev);
6037 if (rc)
6038 return rc;
6039
6040 if (!strlen(prop->armcp_info.card_name))
6041 strncpy(prop->armcp_info.card_name, GAUDI_DEFAULT_CARD_NAME,
6042 CARD_NAME_MAX_LEN);
6043
6044 return 0;
6045}
6046
6047static bool gaudi_is_device_idle(struct hl_device *hdev, u32 *mask,
6048 struct seq_file *s)
6049{
6050 struct gaudi_device *gaudi = hdev->asic_specific;
6051 const char *fmt = "%-5d%-9s%#-14x%#-12x%#x\n";
6052 const char *mme_slave_fmt = "%-5d%-9s%-14s%-12s%#x\n";
6053 u32 qm_glbl_sts0, qm_cgm_sts, dma_core_sts0, tpc_cfg_sts, mme_arch_sts;
6054 bool is_idle = true, is_eng_idle, is_slave;
6055 u64 offset;
6056 int i, dma_id;
6057
6058 mutex_lock(&gaudi->clk_gate_mutex);
6059
6060 hdev->asic_funcs->disable_clock_gating(hdev);
6061
6062 if (s)
6063 seq_puts(s,
6064 "\nDMA is_idle QM_GLBL_STS0 QM_CGM_STS DMA_CORE_STS0\n"
6065 "--- ------- ------------ ---------- -------------\n");
6066
6067 for (i = 0 ; i < DMA_NUMBER_OF_CHNLS ; i++) {
6068 dma_id = gaudi_dma_assignment[i];
6069 offset = dma_id * DMA_QMAN_OFFSET;
6070
6071 qm_glbl_sts0 = RREG32(mmDMA0_QM_GLBL_STS0 + offset);
6072 qm_cgm_sts = RREG32(mmDMA0_QM_CGM_STS + offset);
6073 dma_core_sts0 = RREG32(mmDMA0_CORE_STS0 + offset);
6074 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts) &&
6075 IS_DMA_IDLE(dma_core_sts0);
6076 is_idle &= is_eng_idle;
6077
6078 if (mask)
6079 *mask |= !is_eng_idle <<
6080 (GAUDI_ENGINE_ID_DMA_0 + dma_id);
6081 if (s)
6082 seq_printf(s, fmt, dma_id,
6083 is_eng_idle ? "Y" : "N", qm_glbl_sts0,
6084 qm_cgm_sts, dma_core_sts0);
6085 }
6086
6087 if (s)
6088 seq_puts(s,
6089 "\nTPC is_idle QM_GLBL_STS0 QM_CGM_STS CFG_STATUS\n"
6090 "--- ------- ------------ ---------- ----------\n");
6091
6092 for (i = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) {
6093 offset = i * TPC_QMAN_OFFSET;
6094 qm_glbl_sts0 = RREG32(mmTPC0_QM_GLBL_STS0 + offset);
6095 qm_cgm_sts = RREG32(mmTPC0_QM_CGM_STS + offset);
6096 tpc_cfg_sts = RREG32(mmTPC0_CFG_STATUS + offset);
6097 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts) &&
6098 IS_TPC_IDLE(tpc_cfg_sts);
6099 is_idle &= is_eng_idle;
6100
6101 if (mask)
6102 *mask |= !is_eng_idle << (GAUDI_ENGINE_ID_TPC_0 + i);
6103 if (s)
6104 seq_printf(s, fmt, i,
6105 is_eng_idle ? "Y" : "N",
6106 qm_glbl_sts0, qm_cgm_sts, tpc_cfg_sts);
6107 }
6108
6109 if (s)
6110 seq_puts(s,
6111 "\nMME is_idle QM_GLBL_STS0 QM_CGM_STS ARCH_STATUS\n"
6112 "--- ------- ------------ ---------- -----------\n");
6113
6114 for (i = 0 ; i < MME_NUMBER_OF_ENGINES ; i++) {
6115 offset = i * MME_QMAN_OFFSET;
6116 mme_arch_sts = RREG32(mmMME0_CTRL_ARCH_STATUS + offset);
6117 is_eng_idle = IS_MME_IDLE(mme_arch_sts);
6118
6119 /* MME 1 & 3 are slaves, no need to check their QMANs */
6120 is_slave = i % 2;
6121 if (!is_slave) {
6122 qm_glbl_sts0 = RREG32(mmMME0_QM_GLBL_STS0 + offset);
6123 qm_cgm_sts = RREG32(mmMME0_QM_CGM_STS + offset);
6124 is_eng_idle &= IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts);
6125 }
6126
6127 is_idle &= is_eng_idle;
6128
6129 if (mask)
6130 *mask |= !is_eng_idle << (GAUDI_ENGINE_ID_MME_0 + i);
6131 if (s) {
6132 if (!is_slave)
6133 seq_printf(s, fmt, i,
6134 is_eng_idle ? "Y" : "N",
6135 qm_glbl_sts0, qm_cgm_sts, mme_arch_sts);
6136 else
6137 seq_printf(s, mme_slave_fmt, i,
6138 is_eng_idle ? "Y" : "N", "-",
6139 "-", mme_arch_sts);
6140 }
6141 }
6142
6143 if (s)
6144 seq_puts(s, "\n");
6145
6146 hdev->asic_funcs->enable_clock_gating(hdev);
6147
6148 mutex_unlock(&gaudi->clk_gate_mutex);
6149
6150 return is_idle;
6151}
6152
6153static void gaudi_hw_queues_lock(struct hl_device *hdev)
6154 __acquires(&gaudi->hw_queues_lock)
6155{
6156 struct gaudi_device *gaudi = hdev->asic_specific;
6157
6158 spin_lock(&gaudi->hw_queues_lock);
6159}
6160
6161static void gaudi_hw_queues_unlock(struct hl_device *hdev)
6162 __releases(&gaudi->hw_queues_lock)
6163{
6164 struct gaudi_device *gaudi = hdev->asic_specific;
6165
6166 spin_unlock(&gaudi->hw_queues_lock);
6167}
6168
6169static u32 gaudi_get_pci_id(struct hl_device *hdev)
6170{
6171 return hdev->pdev->device;
6172}
6173
6174static int gaudi_get_eeprom_data(struct hl_device *hdev, void *data,
6175 size_t max_size)
6176{
6177 struct gaudi_device *gaudi = hdev->asic_specific;
6178
6179 if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
6180 return 0;
6181
6182 return hl_fw_get_eeprom_data(hdev, data, max_size);
6183}
6184
6185/*
6186 * this function should be used only during initialization and/or after reset,
6187 * when there are no active users.
6188 */
6189static int gaudi_run_tpc_kernel(struct hl_device *hdev, u64 tpc_kernel,
6190 u32 tpc_id)
6191{
6192 struct gaudi_device *gaudi = hdev->asic_specific;
6193 u64 kernel_timeout;
6194 u32 status, offset;
6195 int rc;
6196
6197 offset = tpc_id * (mmTPC1_CFG_STATUS - mmTPC0_CFG_STATUS);
6198
6199 if (hdev->pldm)
6200 kernel_timeout = GAUDI_PLDM_TPC_KERNEL_WAIT_USEC;
6201 else
6202 kernel_timeout = HL_DEVICE_TIMEOUT_USEC;
6203
6204 mutex_lock(&gaudi->clk_gate_mutex);
6205
6206 hdev->asic_funcs->disable_clock_gating(hdev);
6207
6208 WREG32(mmTPC0_CFG_QM_KERNEL_BASE_ADDRESS_LOW + offset,
6209 lower_32_bits(tpc_kernel));
6210 WREG32(mmTPC0_CFG_QM_KERNEL_BASE_ADDRESS_HIGH + offset,
6211 upper_32_bits(tpc_kernel));
6212
6213 WREG32(mmTPC0_CFG_ICACHE_BASE_ADDERESS_LOW + offset,
6214 lower_32_bits(tpc_kernel));
6215 WREG32(mmTPC0_CFG_ICACHE_BASE_ADDERESS_HIGH + offset,
6216 upper_32_bits(tpc_kernel));
6217 /* set a valid LUT pointer, content is of no significance */
6218 WREG32(mmTPC0_CFG_LUT_FUNC256_BASE_ADDR_LO + offset,
6219 lower_32_bits(tpc_kernel));
6220 WREG32(mmTPC0_CFG_LUT_FUNC256_BASE_ADDR_HI + offset,
6221 upper_32_bits(tpc_kernel));
6222
6223 WREG32(mmTPC0_CFG_QM_SYNC_OBJECT_ADDR + offset,
6224 lower_32_bits(CFG_BASE +
6225 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0));
6226
6227 WREG32(mmTPC0_CFG_TPC_CMD + offset,
6228 (1 << TPC0_CFG_TPC_CMD_ICACHE_INVALIDATE_SHIFT |
6229 1 << TPC0_CFG_TPC_CMD_ICACHE_PREFETCH_64KB_SHIFT));
6230 /* wait a bit for the engine to start executing */
6231 usleep_range(1000, 1500);
6232
6233 /* wait until engine has finished executing */
6234 rc = hl_poll_timeout(
6235 hdev,
6236 mmTPC0_CFG_STATUS + offset,
6237 status,
6238 (status & TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK) ==
6239 TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK,
6240 1000,
6241 kernel_timeout);
6242
6243 if (rc) {
6244 dev_err(hdev->dev,
6245 "Timeout while waiting for TPC%d icache prefetch\n",
6246 tpc_id);
6247 hdev->asic_funcs->enable_clock_gating(hdev);
6248 mutex_unlock(&gaudi->clk_gate_mutex);
6249 return -EIO;
6250 }
6251
6252 WREG32(mmTPC0_CFG_TPC_EXECUTE + offset,
6253 1 << TPC0_CFG_TPC_EXECUTE_V_SHIFT);
6254
6255 /* wait a bit for the engine to start executing */
6256 usleep_range(1000, 1500);
6257
6258 /* wait until engine has finished executing */
6259 rc = hl_poll_timeout(
6260 hdev,
6261 mmTPC0_CFG_STATUS + offset,
6262 status,
6263 (status & TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK) ==
6264 TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK,
6265 1000,
6266 kernel_timeout);
6267
6268 rc = hl_poll_timeout(
6269 hdev,
6270 mmTPC0_CFG_WQ_INFLIGHT_CNTR + offset,
6271 status,
6272 (status == 0),
6273 1000,
6274 kernel_timeout);
6275
6276 hdev->asic_funcs->enable_clock_gating(hdev);
6277 mutex_unlock(&gaudi->clk_gate_mutex);
6278
6279 if (rc) {
6280 dev_err(hdev->dev,
6281 "Timeout while waiting for TPC%d kernel to execute\n",
6282 tpc_id);
6283 return -EIO;
6284 }
6285
6286 return 0;
6287}
6288
6289static enum hl_device_hw_state gaudi_get_hw_state(struct hl_device *hdev)
6290{
6291 return RREG32(mmHW_STATE);
6292}
6293
6294static u32 gaudi_get_queue_id_for_cq(struct hl_device *hdev, u32 cq_idx)
6295{
6296 return gaudi_cq_assignment[cq_idx];
6297}
6298
6299static void gaudi_ext_queue_init(struct hl_device *hdev, u32 q_idx)
6300{
6301 struct gaudi_device *gaudi = hdev->asic_specific;
6302 struct hl_hw_queue *hw_queue = &hdev->kernel_queues[q_idx];
6303 struct hl_hw_sob *hw_sob;
6304 int sob, ext_idx = gaudi->ext_queue_idx++;
6305
6306 /*
6307 * The external queues might not sit sequentially, hence use the
6308 * real external queue index for the SOB/MON base id.
6309 */
6310 hw_queue->base_sob_id = ext_idx * HL_RSVD_SOBS;
6311 hw_queue->base_mon_id = ext_idx * HL_RSVD_MONS;
6312 hw_queue->next_sob_val = 1;
6313 hw_queue->curr_sob_offset = 0;
6314
6315 for (sob = 0 ; sob < HL_RSVD_SOBS ; sob++) {
6316 hw_sob = &hw_queue->hw_sob[sob];
6317 hw_sob->hdev = hdev;
6318 hw_sob->sob_id = hw_queue->base_sob_id + sob;
6319 hw_sob->q_idx = q_idx;
6320 kref_init(&hw_sob->kref);
6321 }
6322}
6323
6324static void gaudi_ext_queue_reset(struct hl_device *hdev, u32 q_idx)
6325{
6326 struct hl_hw_queue *hw_queue = &hdev->kernel_queues[q_idx];
6327
6328 /*
6329 * In case we got here due to a stuck CS, the refcnt might be bigger
6330 * than 1 and therefore we reset it.
6331 */
6332 kref_init(&hw_queue->hw_sob[hw_queue->curr_sob_offset].kref);
6333 hw_queue->curr_sob_offset = 0;
6334 hw_queue->next_sob_val = 1;
6335}
6336
6337static u32 gaudi_get_signal_cb_size(struct hl_device *hdev)
6338{
6339 return sizeof(struct packet_msg_short) +
6340 sizeof(struct packet_msg_prot) * 2;
6341}
6342
6343static u32 gaudi_get_wait_cb_size(struct hl_device *hdev)
6344{
6345 return sizeof(struct packet_msg_short) * 4 +
6346 sizeof(struct packet_fence) +
6347 sizeof(struct packet_msg_prot) * 2;
6348}
6349
6350static void gaudi_gen_signal_cb(struct hl_device *hdev, void *data, u16 sob_id)
6351{
6352 struct hl_cb *cb = (struct hl_cb *) data;
6353 struct packet_msg_short *pkt;
6354 u32 value, ctl;
6355
6356 pkt = (struct packet_msg_short *) (uintptr_t) cb->kernel_address;
6357 memset(pkt, 0, sizeof(*pkt));
6358
6359 value = 1 << GAUDI_PKT_SHORT_VAL_SOB_SYNC_VAL_SHIFT; /* inc by 1 */
6360 value |= 1 << GAUDI_PKT_SHORT_VAL_SOB_MOD_SHIFT; /* add mode */
6361
6362 ctl = (sob_id * 4) << GAUDI_PKT_SHORT_CTL_ADDR_SHIFT; /* SOB id */
6363 ctl |= 0 << GAUDI_PKT_SHORT_CTL_OP_SHIFT; /* write the value */
6364 ctl |= 3 << GAUDI_PKT_SHORT_CTL_BASE_SHIFT; /* W_S SOB base */
6365 ctl |= PACKET_MSG_SHORT << GAUDI_PKT_SHORT_CTL_OPCODE_SHIFT;
6366 ctl |= 1 << GAUDI_PKT_SHORT_CTL_EB_SHIFT;
6367 ctl |= 1 << GAUDI_PKT_SHORT_CTL_RB_SHIFT;
6368 ctl |= 1 << GAUDI_PKT_SHORT_CTL_MB_SHIFT;
6369
6370 pkt->value = cpu_to_le32(value);
6371 pkt->ctl = cpu_to_le32(ctl);
6372}
6373
6374static u32 gaudi_add_mon_msg_short(struct packet_msg_short *pkt, u32 value,
6375 u16 addr)
6376{
6377 u32 ctl, pkt_size = sizeof(*pkt);
6378
6379 memset(pkt, 0, pkt_size);
6380
6381 ctl = addr << GAUDI_PKT_SHORT_CTL_ADDR_SHIFT;
6382 ctl |= 2 << GAUDI_PKT_SHORT_CTL_BASE_SHIFT; /* W_S MON base */
6383 ctl |= PACKET_MSG_SHORT << GAUDI_PKT_SHORT_CTL_OPCODE_SHIFT;
6384 ctl |= 0 << GAUDI_PKT_SHORT_CTL_EB_SHIFT;
6385 ctl |= 1 << GAUDI_PKT_SHORT_CTL_RB_SHIFT;
6386 ctl |= 0 << GAUDI_PKT_SHORT_CTL_MB_SHIFT; /* only last pkt needs MB */
6387
6388 pkt->value = cpu_to_le32(value);
6389 pkt->ctl = cpu_to_le32(ctl);
6390
6391 return pkt_size;
6392}
6393
6394static u32 gaudi_add_arm_monitor_pkt(struct packet_msg_short *pkt, u16 sob_id,
6395 u16 sob_val, u16 addr)
6396{
6397 u32 ctl, value, pkt_size = sizeof(*pkt);
6398 u8 mask = ~(1 << (sob_id & 0x7));
6399
6400 memset(pkt, 0, pkt_size);
6401
6402 value = (sob_id / 8) << GAUDI_PKT_SHORT_VAL_MON_SYNC_GID_SHIFT;
6403 value |= sob_val << GAUDI_PKT_SHORT_VAL_MON_SYNC_VAL_SHIFT;
6404 value |= 0 << GAUDI_PKT_SHORT_VAL_MON_MODE_SHIFT; /* GREATER_OR_EQUAL */
6405 value |= mask << GAUDI_PKT_SHORT_VAL_MON_MASK_SHIFT;
6406
6407 ctl = addr << GAUDI_PKT_SHORT_CTL_ADDR_SHIFT;
6408 ctl |= 0 << GAUDI_PKT_SHORT_CTL_OP_SHIFT; /* write the value */
6409 ctl |= 2 << GAUDI_PKT_SHORT_CTL_BASE_SHIFT; /* W_S MON base */
6410 ctl |= PACKET_MSG_SHORT << GAUDI_PKT_SHORT_CTL_OPCODE_SHIFT;
6411 ctl |= 0 << GAUDI_PKT_SHORT_CTL_EB_SHIFT;
6412 ctl |= 1 << GAUDI_PKT_SHORT_CTL_RB_SHIFT;
6413 ctl |= 1 << GAUDI_PKT_SHORT_CTL_MB_SHIFT;
6414
6415 pkt->value = cpu_to_le32(value);
6416 pkt->ctl = cpu_to_le32(ctl);
6417
6418 return pkt_size;
6419}
6420
6421static u32 gaudi_add_fence_pkt(struct packet_fence *pkt)
6422{
6423 u32 ctl, cfg, pkt_size = sizeof(*pkt);
6424
6425 memset(pkt, 0, pkt_size);
6426
6427 cfg = 1 << GAUDI_PKT_FENCE_CFG_DEC_VAL_SHIFT;
6428 cfg |= 1 << GAUDI_PKT_FENCE_CFG_TARGET_VAL_SHIFT;
6429 cfg |= 2 << GAUDI_PKT_FENCE_CFG_ID_SHIFT;
6430
6431 ctl = 0 << GAUDI_PKT_FENCE_CTL_PRED_SHIFT;
6432 ctl |= PACKET_FENCE << GAUDI_PKT_FENCE_CTL_OPCODE_SHIFT;
6433 ctl |= 0 << GAUDI_PKT_FENCE_CTL_EB_SHIFT;
6434 ctl |= 1 << GAUDI_PKT_FENCE_CTL_RB_SHIFT;
6435 ctl |= 1 << GAUDI_PKT_FENCE_CTL_MB_SHIFT;
6436
6437 pkt->cfg = cpu_to_le32(cfg);
6438 pkt->ctl = cpu_to_le32(ctl);
6439
6440 return pkt_size;
6441}
6442
6443static void gaudi_gen_wait_cb(struct hl_device *hdev, void *data, u16 sob_id,
6444 u16 sob_val, u16 mon_id, u32 q_idx)
6445{
6446 struct hl_cb *cb = (struct hl_cb *) data;
6447 void *buf = (void *) (uintptr_t) cb->kernel_address;
6448 u64 monitor_base, fence_addr = 0;
6449 u32 size = 0;
6450 u16 msg_addr_offset;
6451
6452 switch (q_idx) {
6453 case GAUDI_QUEUE_ID_DMA_0_0:
6454 fence_addr = mmDMA0_QM_CP_FENCE2_RDATA_0;
6455 break;
6456 case GAUDI_QUEUE_ID_DMA_0_1:
6457 fence_addr = mmDMA0_QM_CP_FENCE2_RDATA_1;
6458 break;
6459 case GAUDI_QUEUE_ID_DMA_0_2:
6460 fence_addr = mmDMA0_QM_CP_FENCE2_RDATA_2;
6461 break;
6462 case GAUDI_QUEUE_ID_DMA_0_3:
6463 fence_addr = mmDMA0_QM_CP_FENCE2_RDATA_3;
6464 break;
6465 case GAUDI_QUEUE_ID_DMA_1_0:
6466 fence_addr = mmDMA1_QM_CP_FENCE2_RDATA_0;
6467 break;
6468 case GAUDI_QUEUE_ID_DMA_1_1:
6469 fence_addr = mmDMA1_QM_CP_FENCE2_RDATA_1;
6470 break;
6471 case GAUDI_QUEUE_ID_DMA_1_2:
6472 fence_addr = mmDMA1_QM_CP_FENCE2_RDATA_2;
6473 break;
6474 case GAUDI_QUEUE_ID_DMA_1_3:
6475 fence_addr = mmDMA1_QM_CP_FENCE2_RDATA_3;
6476 break;
6477 case GAUDI_QUEUE_ID_DMA_5_0:
6478 fence_addr = mmDMA5_QM_CP_FENCE2_RDATA_0;
6479 break;
6480 case GAUDI_QUEUE_ID_DMA_5_1:
6481 fence_addr = mmDMA5_QM_CP_FENCE2_RDATA_1;
6482 break;
6483 case GAUDI_QUEUE_ID_DMA_5_2:
6484 fence_addr = mmDMA5_QM_CP_FENCE2_RDATA_2;
6485 break;
6486 case GAUDI_QUEUE_ID_DMA_5_3:
6487 fence_addr = mmDMA5_QM_CP_FENCE2_RDATA_3;
6488 break;
6489 default:
6490 /* queue index should be valid here */
6491 dev_crit(hdev->dev, "wrong queue id %d for wait packet\n",
6492 q_idx);
6493 return;
6494 }
6495
6496 fence_addr += CFG_BASE;
6497
6498 /*
6499 * monitor_base should be the content of the base0 address registers,
6500 * so it will be added to the msg short offsets
6501 */
6502 monitor_base = mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0;
6503
6504 /* First monitor config packet: low address of the sync */
6505 msg_addr_offset =
6506 (mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0 + mon_id * 4) -
6507 monitor_base;
6508
6509 size += gaudi_add_mon_msg_short(buf + size, (u32) fence_addr,
6510 msg_addr_offset);
6511
6512 /* Second monitor config packet: high address of the sync */
6513 msg_addr_offset =
6514 (mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRH_0 + mon_id * 4) -
6515 monitor_base;
6516
6517 size += gaudi_add_mon_msg_short(buf + size, (u32) (fence_addr >> 32),
6518 msg_addr_offset);
6519
6520 /*
6521 * Third monitor config packet: the payload, i.e. what to write when the
6522 * sync triggers
6523 */
6524 msg_addr_offset =
6525 (mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_DATA_0 + mon_id * 4) -
6526 monitor_base;
6527
6528 size += gaudi_add_mon_msg_short(buf + size, 1, msg_addr_offset);
6529
6530 /* Fourth monitor config packet: bind the monitor to a sync object */
6531 msg_addr_offset =
6532 (mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0 + mon_id * 4) -
6533 monitor_base;
6534 size += gaudi_add_arm_monitor_pkt(buf + size, sob_id, sob_val,
6535 msg_addr_offset);
6536
6537 /* Fence packet */
6538 size += gaudi_add_fence_pkt(buf + size);
6539}
6540
6541static void gaudi_reset_sob(struct hl_device *hdev, void *data)
6542{
6543 struct hl_hw_sob *hw_sob = (struct hl_hw_sob *) data;
6544
6545 dev_dbg(hdev->dev, "reset SOB, q_idx: %d, sob_id: %d\n", hw_sob->q_idx,
6546 hw_sob->sob_id);
6547
6548 WREG32(mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 + hw_sob->sob_id * 4,
6549 0);
6550
6551 kref_init(&hw_sob->kref);
6552}
6553
6554static void gaudi_set_dma_mask_from_fw(struct hl_device *hdev)
6555{
6556 if (RREG32(mmPSOC_GLOBAL_CONF_NON_RST_FLOPS_0) ==
6557 HL_POWER9_HOST_MAGIC) {
6558 hdev->power9_64bit_dma_enable = 1;
6559 hdev->dma_mask = 64;
6560 } else {
6561 hdev->power9_64bit_dma_enable = 0;
6562 hdev->dma_mask = 48;
6563 }
6564}
6565
6566static u64 gaudi_get_device_time(struct hl_device *hdev)
6567{
6568 u64 device_time = ((u64) RREG32(mmPSOC_TIMESTAMP_CNTCVU)) << 32;
6569
6570 return device_time | RREG32(mmPSOC_TIMESTAMP_CNTCVL);
6571}
6572
6573static const struct hl_asic_funcs gaudi_funcs = {
6574 .early_init = gaudi_early_init,
6575 .early_fini = gaudi_early_fini,
6576 .late_init = gaudi_late_init,
6577 .late_fini = gaudi_late_fini,
6578 .sw_init = gaudi_sw_init,
6579 .sw_fini = gaudi_sw_fini,
6580 .hw_init = gaudi_hw_init,
6581 .hw_fini = gaudi_hw_fini,
6582 .halt_engines = gaudi_halt_engines,
6583 .suspend = gaudi_suspend,
6584 .resume = gaudi_resume,
6585 .cb_mmap = gaudi_cb_mmap,
6586 .ring_doorbell = gaudi_ring_doorbell,
6587 .pqe_write = gaudi_pqe_write,
6588 .asic_dma_alloc_coherent = gaudi_dma_alloc_coherent,
6589 .asic_dma_free_coherent = gaudi_dma_free_coherent,
6590 .get_int_queue_base = gaudi_get_int_queue_base,
6591 .test_queues = gaudi_test_queues,
6592 .asic_dma_pool_zalloc = gaudi_dma_pool_zalloc,
6593 .asic_dma_pool_free = gaudi_dma_pool_free,
6594 .cpu_accessible_dma_pool_alloc = gaudi_cpu_accessible_dma_pool_alloc,
6595 .cpu_accessible_dma_pool_free = gaudi_cpu_accessible_dma_pool_free,
6596 .hl_dma_unmap_sg = gaudi_dma_unmap_sg,
6597 .cs_parser = gaudi_cs_parser,
6598 .asic_dma_map_sg = gaudi_dma_map_sg,
6599 .get_dma_desc_list_size = gaudi_get_dma_desc_list_size,
6600 .add_end_of_cb_packets = gaudi_add_end_of_cb_packets,
6601 .update_eq_ci = gaudi_update_eq_ci,
6602 .context_switch = gaudi_context_switch,
6603 .restore_phase_topology = gaudi_restore_phase_topology,
6604 .debugfs_read32 = gaudi_debugfs_read32,
6605 .debugfs_write32 = gaudi_debugfs_write32,
6606 .debugfs_read64 = gaudi_debugfs_read64,
6607 .debugfs_write64 = gaudi_debugfs_write64,
Oded Gabbaybcaf4152020-05-11 10:41:37 +03006608 .add_device_attr = gaudi_add_device_attr,
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006609 .handle_eqe = gaudi_handle_eqe,
Oded Gabbaybcaf4152020-05-11 10:41:37 +03006610 .set_pll_profile = gaudi_set_pll_profile,
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006611 .get_events_stat = gaudi_get_events_stat,
6612 .read_pte = gaudi_read_pte,
6613 .write_pte = gaudi_write_pte,
6614 .mmu_invalidate_cache = gaudi_mmu_invalidate_cache,
6615 .mmu_invalidate_cache_range = gaudi_mmu_invalidate_cache_range,
6616 .send_heartbeat = gaudi_send_heartbeat,
6617 .enable_clock_gating = gaudi_enable_clock_gating,
6618 .disable_clock_gating = gaudi_disable_clock_gating,
Omer Shpigelman79fc7a92020-05-11 10:46:29 +03006619 .debug_coresight = gaudi_debug_coresight,
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006620 .is_device_idle = gaudi_is_device_idle,
6621 .soft_reset_late_init = gaudi_soft_reset_late_init,
6622 .hw_queues_lock = gaudi_hw_queues_lock,
6623 .hw_queues_unlock = gaudi_hw_queues_unlock,
6624 .get_pci_id = gaudi_get_pci_id,
6625 .get_eeprom_data = gaudi_get_eeprom_data,
6626 .send_cpu_message = gaudi_send_cpu_message,
6627 .get_hw_state = gaudi_get_hw_state,
6628 .pci_bars_map = gaudi_pci_bars_map,
6629 .set_dram_bar_base = gaudi_set_hbm_bar_base,
6630 .init_iatu = gaudi_init_iatu,
6631 .rreg = hl_rreg,
6632 .wreg = hl_wreg,
Omer Shpigelman79fc7a92020-05-11 10:46:29 +03006633 .halt_coresight = gaudi_halt_coresight,
Oded Gabbaybcaf4152020-05-11 10:41:37 +03006634 .get_clk_rate = gaudi_get_clk_rate,
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006635 .get_queue_id_for_cq = gaudi_get_queue_id_for_cq,
6636 .read_device_fw_version = gaudi_read_device_fw_version,
6637 .load_firmware_to_device = gaudi_load_firmware_to_device,
6638 .load_boot_fit_to_device = gaudi_load_boot_fit_to_device,
6639 .ext_queue_init = gaudi_ext_queue_init,
6640 .ext_queue_reset = gaudi_ext_queue_reset,
6641 .get_signal_cb_size = gaudi_get_signal_cb_size,
6642 .get_wait_cb_size = gaudi_get_wait_cb_size,
6643 .gen_signal_cb = gaudi_gen_signal_cb,
6644 .gen_wait_cb = gaudi_gen_wait_cb,
6645 .reset_sob = gaudi_reset_sob,
6646 .set_dma_mask_from_fw = gaudi_set_dma_mask_from_fw,
6647 .get_device_time = gaudi_get_device_time
6648};
6649
6650/**
6651 * gaudi_set_asic_funcs - set GAUDI function pointers
6652 *
Lee Jonesf7d227c2020-07-01 09:58:42 +01006653 * @hdev: pointer to hl_device structure
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006654 *
6655 */
6656void gaudi_set_asic_funcs(struct hl_device *hdev)
6657{
6658 hdev->asic_funcs = &gaudi_funcs;
6659}